//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "tti" static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); namespace { /// \brief No-op implementation of the TTI interface using the utility base /// classes. /// /// This is used when no target specific information is available. struct NoTTIImpl : TargetTransformInfoImplCRTPBase { explicit NoTTIImpl(const DataLayout &DL) : TargetTransformInfoImplCRTPBase(DL) {} }; } TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model(NoTTIImpl(DL))) {} TargetTransformInfo::~TargetTransformInfo() {} TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) : TTIImpl(std::move(Arg.TTIImpl)) {} TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { TTIImpl = std::move(RHS.TTIImpl); return *this; } int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { int Cost = TTIImpl->getCallCost(FTy, NumArgs); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallCost(const Function *F, ArrayRef Arguments) const { int Cost = TTIImpl->getCallCost(F, Arguments); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { return TTIImpl->getInliningThresholdMultiplier(); } int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef Operands) const { return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); } int TargetTransformInfo::getExtCost(const Instruction *I, const Value *Src) const { return TTIImpl->getExtCost(I, Src); } int TargetTransformInfo::getIntrinsicCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const { int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const { return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); } int TargetTransformInfo::getUserCost(const User *U, ArrayRef Operands) const { int Cost = TTIImpl->getUserCost(U, Operands); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } bool TargetTransformInfo::hasBranchDivergence() const { return TTIImpl->hasBranchDivergence(); } bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { return TTIImpl->isAlwaysUniform(V); } unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); } bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { return TTIImpl->isLegalICmpImmediate(Imm); } bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace, I); } bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { return TTIImpl->isLSRCostLess(C1, C2); } bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { return TTIImpl->isLegalMaskedStore(DataType); } bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { return TTIImpl->isLegalMaskedLoad(DataType); } bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { return TTIImpl->isLegalMaskedGather(DataType); } bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { return TTIImpl->isLegalMaskedScatter(DataType); } bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } bool TargetTransformInfo::hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { return TTIImpl->hasVolatileVariant(I, AddrSpace); } bool TargetTransformInfo::prefersVectorizedAddressing() const { return TTIImpl->prefersVectorizedAddressing(); } int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const { int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } bool TargetTransformInfo::LSRWithInstrQueries() const { return TTIImpl->LSRWithInstrQueries(); } bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { return TTIImpl->isTruncateFree(Ty1, Ty2); } bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { return TTIImpl->isProfitableToHoist(I); } bool TargetTransformInfo::isTypeLegal(Type *Ty) const { return TTIImpl->isTypeLegal(Ty); } unsigned TargetTransformInfo::getJumpBufAlignment() const { return TTIImpl->getJumpBufAlignment(); } unsigned TargetTransformInfo::getJumpBufSize() const { return TTIImpl->getJumpBufSize(); } bool TargetTransformInfo::shouldBuildLookupTables() const { return TTIImpl->shouldBuildLookupTables(); } bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const { return TTIImpl->shouldBuildLookupTablesForConstant(C); } unsigned TargetTransformInfo:: getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); } unsigned TargetTransformInfo:: getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF) const { return TTIImpl->getOperandsScalarizationOverhead(Args, VF); } bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { return TTIImpl->supportsEfficientVectorElementLoadStore(); } bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } const TargetTransformInfo::MemCmpExpansionOptions * TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const { return TTIImpl->enableMemCmpExpansion(IsZeroCmp); } bool TargetTransformInfo::enableInterleavedAccessVectorization() const { return TTIImpl->enableInterleavedAccessVectorization(); } bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const { return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const { return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, Alignment, Fast); } TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); } bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { return TTIImpl->haveFastSqrt(Ty); } bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty); } int TargetTransformInfo::getFPOpCost(Type *Ty) const { int Cost = TTIImpl->getFPOpCost(Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCost(Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const { int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return TTIImpl->getNumberOfRegisters(Vector); } unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { return TTIImpl->getMinVectorRegisterBitWidth(); } bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } llvm::Optional TargetTransformInfo::getCacheSize(CacheLevel Level) const { return TTIImpl->getCacheSize(Level); } llvm::Optional TargetTransformInfo::getCacheAssociativity( CacheLevel Level) const { return TTIImpl->getCacheAssociativity(Level); } unsigned TargetTransformInfo::getPrefetchDistance() const { return TTIImpl->getPrefetchDistance(); } unsigned TargetTransformInfo::getMinPrefetchStride() const { return TTIImpl->getMinPrefetchStride(); } unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const { return TTIImpl->getMaxPrefetchIterationsAhead(); } unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } int TargetTransformInfo::getArithmeticInstrCost( unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args) const { int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, Type *SubTp) const { int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I) const { assert ((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const { int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { int Cost = TTIImpl->getCFInstrCost(Opcode); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I) const { assert ((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I) const { assert ((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { int Cost = TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const { int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, Alignment); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace) const { int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) const { int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, ScalarizationCostPassed); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF) const { int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) const { int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return TTIImpl->getNumberOfParts(Tp); } int TargetTransformInfo::getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *Ptr) const { int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const { int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const { int Cost = TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef Tys) const { return TTIImpl->getCostOfKeepingLiveOverCall(Tys); } bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { return TTIImpl->getTgtMemIntrinsic(Inst, Info); } unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); } Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( IntrinsicInst *Inst, Type *ExpectedType) const { return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const { return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign); } void TargetTransformInfo::getMemcpyLoopResidualLoweringType( SmallVectorImpl &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const { TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, SrcAlign, DestAlign); } bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); } unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { return TTIImpl->getLoadStoreVecRegBitWidth(AS); } bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const { return TTIImpl->isLegalToVectorizeLoad(LI); } bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { return TTIImpl->isLegalToVectorizeStore(SI); } bool TargetTransformInfo::isLegalToVectorizeLoadChain( unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); } bool TargetTransformInfo::isLegalToVectorizeStoreChain( unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, AddrSpace); } unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); } unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const { return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const { return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); } bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { return TTIImpl->getInstructionLatency(I); } static bool isReverseVectorMask(ArrayRef Mask) { for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) return false; return true; } static bool isSingleSourceVectorMask(ArrayRef Mask) { bool Vec0 = false; bool Vec1 = false; for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { if (Mask[i] >= 0) { if ((unsigned)Mask[i] >= NumVecElts) Vec1 = true; else Vec0 = true; } } return !(Vec0 && Vec1); } static bool isZeroEltBroadcastVectorMask(ArrayRef Mask) { for (unsigned i = 0; i < Mask.size(); ++i) if (Mask[i] > 0) return false; return true; } static bool isAlternateVectorMask(ArrayRef Mask) { bool isAlternate = true; unsigned MaskSize = Mask.size(); // Example: shufflevector A, B, <0,5,2,7> for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { if (Mask[i] < 0) continue; isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); } if (isAlternate) return true; isAlternate = true; // Example: shufflevector A, B, <4,1,6,3> for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { if (Mask[i] < 0) continue; isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); } return isAlternate; } static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { TargetTransformInfo::OperandValueKind OpInfo = TargetTransformInfo::OK_AnyValue; // Check for a splat of a constant or for a non uniform vector of constants. if (isa(V) || isa(V)) { OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; if (cast(V)->getSplatValue() != nullptr) OpInfo = TargetTransformInfo::OK_UniformConstantValue; } // Check for a splat of a uniform value. This is not loop aware, so return // true only for the obviously uniform cases (argument, globalvalue) const Value *Splat = getSplatValue(V); if (Splat && (isa(Splat) || isa(Splat))) OpInfo = TargetTransformInfo::OK_UniformValue; return OpInfo; } static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of // the vector. if (!SI && Level == 0 && IsLeft) return true; else if (!SI) return false; SmallVector Mask(SI->getType()->getVectorNumElements(), -1); // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether // we look at the left or right side. for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) Mask[i] = val; SmallVector ActualMask = SI->getShuffleMask(); return Mask == ActualMask; } namespace { /// Kind of the reduction data. enum ReductionKind { RK_None, /// Not a reduction. RK_Arithmetic, /// Binary reduction data. RK_MinMax, /// Min/max reduction data. RK_UnsignedMinMax, /// Unsigned min/max reduction data. }; /// Contains opcode + LHS/RHS parts of the reduction operations. struct ReductionData { ReductionData() = delete; ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { assert(Kind != RK_None && "expected binary or min/max reduction only."); } unsigned Opcode = 0; Value *LHS = nullptr; Value *RHS = nullptr; ReductionKind Kind = RK_None; bool hasSameData(ReductionData &RD) const { return Kind == RD.Kind && Opcode == RD.Opcode; } }; } // namespace static Optional getReductionData(Instruction *I) { Value *L, *R; if (m_BinOp(m_Value(L), m_Value(R)).match(I)) return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); if (auto *SI = dyn_cast(I)) { if (m_SMin(m_Value(L), m_Value(R)).match(SI) || m_SMax(m_Value(L), m_Value(R)).match(SI) || m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast(SI->getCondition()); return ReductionData(RK_MinMax, CI->getOpcode(), L, R); } if (m_UMin(m_Value(L), m_Value(R)).match(SI) || m_UMax(m_Value(L), m_Value(R)).match(SI)) { auto *CI = cast(SI->getCondition()); return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); } } return llvm::None; } static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels) { // Match one level of pairwise operations. // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 if (!I) return RK_None; assert(I->getType()->isVectorTy() && "Expecting a vector type"); Optional RD = getReductionData(I); if (!RD) return RK_None; ShuffleVectorInst *LS = dyn_cast(RD->LHS); if (!LS && Level) return RK_None; ShuffleVectorInst *RS = dyn_cast(RD->RHS); if (!RS && Level) return RK_None; // On level 0 we can omit one shufflevector instruction. if (!Level && !RS && !LS) return RK_None; // Shuffle inputs must match. Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; Value *NextLevelOp = nullptr; if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) return RK_None; NextLevelOp = NextLevelOpL; } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { // On the first level we can omit the shufflevector <0, undef,...>. So the // input to the other shufflevector <1, undef> must match with one of the // inputs to the current binary operation. // Example: // %NextLevelOpL = shufflevector %R, <1, undef ...> // %BinOp = fadd %NextLevelOpL, %R if (NextLevelOpL && NextLevelOpL != RD->RHS) return RK_None; else if (NextLevelOpR && NextLevelOpR != RD->LHS) return RK_None; NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; } else return RK_None; // Check that the next levels binary operation exists and matches with the // current one. if (Level + 1 != NumLevels) { Optional NextLevelRD = getReductionData(cast(NextLevelOp)); if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) return RK_None; } // Shuffle mask for pairwise operation must match. if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) return RK_None; } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) return RK_None; } else { return RK_None; } if (++Level == NumLevels) return RD->Kind; // Match next level. return matchPairwiseReductionAtLevel(cast(NextLevelOp), Level, NumLevels); } static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty) { if (!EnableReduxCost) return RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return RK_None; auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); if (!RdxStart) return RK_None; Optional RD = getReductionData(RdxStart); if (!RD) return RK_None; Type *VecTy = RdxStart->getType(); unsigned NumVecElems = VecTy->getVectorNumElements(); if (!isPowerOf2_32(NumVecElems)) return RK_None; // We look for a sequence of shuffle,shuffle,add triples like the following // that builds a pairwise reduction tree. // // (X0, X1, X2, X3) // (X0 + X1, X2 + X3, undef, undef) // ((X0 + X1) + (X2 + X3), undef, undef, undef) // // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, // <4 x i32> // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 // %r = extractelement <4 x float> %bin.rdx8, i32 0 if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == RK_None) return RK_None; Opcode = RD->Opcode; Ty = VecTy; return RD->Kind; } static std::pair getShuffleAndOtherOprd(Value *L, Value *R) { ShuffleVectorInst *S = nullptr; if ((S = dyn_cast(L))) return std::make_pair(R, S); S = dyn_cast(R); return std::make_pair(L, S); } static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty) { if (!EnableReduxCost) return RK_None; // Need to extract the first element. ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); unsigned Idx = ~0u; if (CI) Idx = CI->getZExtValue(); if (Idx != 0) return RK_None; auto *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); if (!RdxStart) return RK_None; Optional RD = getReductionData(RdxStart); if (!RD) return RK_None; Type *VecTy = ReduxRoot->getOperand(0)->getType(); unsigned NumVecElems = VecTy->getVectorNumElements(); if (!isPowerOf2_32(NumVecElems)) return RK_None; // We look for a sequence of shuffles and adds like the following matching one // fadd, shuffle vector pair at a time. // // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, // <4 x i32> // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 // %r = extractelement <4 x float> %bin.rdx8, i32 0 unsigned MaskStart = 1; Instruction *RdxOp = RdxStart; SmallVector ShuffleMask(NumVecElems, 0); unsigned NumVecElemsRemain = NumVecElems; while (NumVecElemsRemain - 1) { // Check for the right reduction operation. if (!RdxOp) return RK_None; Optional RDLevel = getReductionData(RdxOp); if (!RDLevel || !RDLevel->hasSameData(*RD)) return RK_None; Value *NextRdxOp; ShuffleVectorInst *Shuffle; std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); // Check the current reduction operation and the shuffle use the same value. if (Shuffle == nullptr) return RK_None; if (Shuffle->getOperand(0) != NextRdxOp) return RK_None; // Check that shuffle masks matches. for (unsigned j = 0; j != MaskStart; ++j) ShuffleMask[j] = MaskStart + j; // Fill the rest of the mask with -1 for undef. std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); SmallVector Mask = Shuffle->getShuffleMask(); if (ShuffleMask != Mask) return RK_None; RdxOp = dyn_cast(NextRdxOp); NumVecElemsRemain /= 2; MaskStart *= 2; } Opcode = RD->Opcode; Ty = VecTy; return RD->Kind; } int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { switch (I->getOpcode()) { case Instruction::GetElementPtr: return getUserCost(I); case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { return getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { TargetTransformInfo::OperandValueKind Op1VK = getOperandInfo(I->getOperand(0)); TargetTransformInfo::OperandValueKind Op2VK = getOperandInfo(I->getOperand(1)); SmallVector Operands(I->operand_values()); return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, Operands); } case Instruction::Select: { const SelectInst *SI = cast(I); Type *CondTy = SI->getCondition()->getType(); return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); } case Instruction::Store: { const StoreInst *SI = cast(I); Type *ValTy = SI->getValueOperand()->getType(); return getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace(), I); } case Instruction::Load: { const LoadInst *LI = cast(I); return getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace(), I); } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::FPExt: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::SIToFP: case Instruction::UIToFP: case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); } case Instruction::ExtractElement: { const ExtractElementInst * EEI = cast(I); ConstantInt *CI = dyn_cast(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); // Try to match a reduction sequence (series of shufflevector and vector // adds followed by a extractelement). unsigned ReduxOpCode; Type *ReduxType; switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, /*IsPairwiseForm=*/false); case RK_MinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); case RK_None: break; } switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, /*IsPairwiseForm=*/true); case RK_MinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( ReduxType, CmpInst::makeCmpResultType(ReduxType), /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); case RK_None: break; } return getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { const InsertElementInst * IE = cast(I); ConstantInt *CI = dyn_cast(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx); } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); unsigned NumVecElems = VecTypOp0->getVectorNumElements(); SmallVector Mask = Shuffle->getShuffleMask(); if (NumVecElems == Mask.size()) { if (isReverseVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, nullptr); if (isAlternateVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_Alternate, VecTypOp0, 0, nullptr); if (isZeroEltBroadcastVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTypOp0, 0, nullptr); if (isSingleSourceVectorMask(Mask)) return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTypOp0, 0, nullptr); return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, VecTypOp0, 0, nullptr); } return -1; } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast(I)) { SmallVector Args(II->arg_operands()); FastMathFlags FMF; if (auto *FPMO = dyn_cast(II)) FMF = FPMO->getFastMathFlags(); return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, FMF); } return -1; default: // We don't have any information on this instruction. return -1; } } TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} TargetIRAnalysis::TargetIRAnalysis( std::function TTICallback) : TTICallback(std::move(TTICallback)) {} TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F, FunctionAnalysisManager &) { return TTICallback(F); } AnalysisKey TargetIRAnalysis::Key; TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { return Result(F.getParent()->getDataLayout()); } // Register the basic pass. INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", "Target Transform Information", false, true) char TargetTransformInfoWrapperPass::ID = 0; void TargetTransformInfoWrapperPass::anchor() {} TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass() : ImmutablePass(ID) { initializeTargetTransformInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( TargetIRAnalysis TIRA) : ImmutablePass(ID), TIRA(std::move(TIRA)) { initializeTargetTransformInfoWrapperPassPass( *PassRegistry::getPassRegistry()); } TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { FunctionAnalysisManager DummyFAM; TTI = TIRA.run(F, DummyFAM); return *TTI; } ImmutablePass * llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) { return new TargetTransformInfoWrapperPass(std::move(TIRA)); }