diff options
-rw-r--r-- | include/llvm/Analysis/TargetTransformInfo.h | 7 | ||||
-rw-r--r-- | include/llvm/Analysis/TargetTransformInfoImpl.h | 2 | ||||
-rw-r--r-- | include/llvm/CodeGen/BasicTTIImpl.h | 4 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 179 | ||||
-rw-r--r-- | test/Transforms/SimplifyCFG/X86/if-conversion.ll | 231 |
6 files changed, 0 insertions, 427 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index cecd8958e9d..c20f20cfbe4 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -646,9 +646,6 @@ public: /// \brief Additional properties of an operand's values. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; - /// \return True if target can execute instructions out of order. - bool isOutOfOrder() const; - /// \return The number of scalar or vector registers that the target has. /// If 'Vectors' is true, it returns the number of vector registers. If it is /// set to false, it returns the number of scalar registers. @@ -1021,7 +1018,6 @@ public: Type *Ty) = 0; virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual bool isOutOfOrder() const = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; @@ -1299,9 +1295,6 @@ public: Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } - bool isOutOfOrder() const override { - return Impl.isOutOfOrder(); - } unsigned getNumberOfRegisters(bool Vector) override { return Impl.getNumberOfRegisters(Vector); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 3625675d53d..4c37402278e 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -337,8 +337,6 @@ public: return TTI::TCC_Free; } - bool isOutOfOrder() const { return false; } - unsigned getNumberOfRegisters(bool Vector) { return 8; } unsigned getRegisterBitWidth(bool Vector) const { return 32; } diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index f1f9275b078..526ddb1b970 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -402,10 +402,6 @@ public: return BaseT::getInstructionLatency(I); } - bool isOutOfOrder() const { - return getST()->getSchedModel().isOutOfOrder(); - } - /// @} /// \name Vector TTI Implementations diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index c9e9c6d1a41..b744cae51ed 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return Cost; } -bool TargetTransformInfo::isOutOfOrder() const { - return TTIImpl->isOutOfOrder(); -} - unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return TTIImpl->getNumberOfRegisters(Vector); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b3c80424c8b..e7358dbcb62 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth( cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); -static cl::opt<unsigned> DependenceChainLatency( - "dependence-chain-latency", cl::Hidden, cl::init(8), - cl::desc("Limit the maximum latency of dependence chain containing cmp " - "for if conversion")); - -static cl::opt<unsigned> SmallBBSize( - "small-bb-size", cl::Hidden, cl::init(40), - cl::desc("Check dependence chain latency only in basic block smaller than " - "this number")); - STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, return true; } -/// Estimate the code size of the specified BB. -static unsigned CountBBCodeSize(BasicBlock *BB, - const TargetTransformInfo &TTI) { - unsigned Size = 0; - for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II) - Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize); - return Size; -} - -/// Find out the latency of the longest dependence chain in the BB if -/// LongestChain is true, or the dependence chain containing the compare -/// instruction feeding the block's conditional branch. -static unsigned FindDependenceChainLatency(BasicBlock *BB, - DenseMap<Instruction *, unsigned> &Instructions, - const TargetTransformInfo &TTI, - bool LongestChain) { - unsigned MaxLatency = 0; - - BasicBlock::iterator II; - for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) { - unsigned Latency = 0; - for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) { - Instruction *Op = dyn_cast<Instruction>(II->getOperand(O)); - if (Op && Instructions.count(Op)) { - auto OpLatency = Instructions[Op]; - if (OpLatency > Latency) - Latency = OpLatency; - } - } - Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency); - Instructions[&(*II)] = Latency; - - if (Latency > MaxLatency) - MaxLatency = Latency; - } - - if (LongestChain) - return MaxLatency; - - // The length of the dependence chain containing the compare instruction is - // wanted, so the terminator must be a BranchInst. - assert(isa<BranchInst>(II)); - BranchInst* Br = cast<BranchInst>(II); - Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition()); - if (Cmp && Instructions.count(Cmp)) - return Instructions[Cmp]; - else - return 0; -} - -/// Instructions in BB2 may depend on instructions in BB1, and instructions -/// in BB1 may have users in BB2. If the last (in terms of latency) such kind -/// of instruction in BB1 is I, then the instructions after I can be executed -/// in parallel with instructions in BB2. -/// This function returns the latency of I. -static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - DenseMap<Instruction *, unsigned> &BB1Instructions) { - unsigned LastLatency = 0; - SmallVector<Instruction *, 16> Worklist; - BasicBlock::iterator II; - for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) { - if (PHINode *PN = dyn_cast<PHINode>(II)) { - // Look for users in BB2. - bool InBBUser = false; - for (User *U : PN->users()) { - if (cast<Instruction>(U)->getParent() == BB2) { - InBBUser = true; - break; - } - } - // No such user, we don't care about this instruction and its operands. - if (!InBBUser) - break; - } - Worklist.push_back(&(*II)); - } - - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) { - if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) { - if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2) - Worklist.push_back(Op); - else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) { - if (BB1Instructions[Op] > LastLatency) - LastLatency = BB1Instructions[Op]; - } - } - } - } - - return LastLatency; -} - -/// If after if conversion, most of the instructions in this new BB construct a -/// long and slow dependence chain, it may be slower than cmp/branch, even -/// if the branch has a high miss rate, because the control dependence is -/// transformed into data dependence, and control dependence can be speculated, -/// and thus, the second part can execute in parallel with the first part on -/// modern OOO processor. -/// -/// To check this condition, this function finds the length of the dependence -/// chain in BB1 (only the part that can be executed in parallel with code after -/// branch in BB2) containing cmp, and if the length is longer than a threshold, -/// don't perform if conversion. -/// -/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion. -/// SpeculationSize contains the code size of IfBlock1 and IfBlock2. -static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - unsigned SpeculationSize, - const TargetTransformInfo &TTI) { - // Accumulated latency of each instruction in their BBs. - DenseMap<Instruction *, unsigned> BB1Instructions; - DenseMap<Instruction *, unsigned> BB2Instructions; - - if (!TTI.isOutOfOrder()) - return false; - - unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI) - + SpeculationSize; - - // We check small BB only since it is more difficult to find unrelated - // instructions to fill functional units in a small BB. - if (NewBBSize > SmallBBSize) - return false; - - auto BB1Chain = - FindDependenceChainLatency(BB1, BB1Instructions, TTI, false); - auto BB2Chain = - FindDependenceChainLatency(BB2, BB2Instructions, TTI, true); - - // If there are many unrelated instructions in the new BB, there will be - // other instructions for the processor to issue regardless of the length - // of this new dependence chain. - // Modern processors can issue 3 or more instructions in each cycle. But in - // real world applications, an IPC of 2 is already very good for non-loop - // code with small basic blocks. Higher IPC is usually found in programs with - // small kernel. So IPC of 2 is more reasonable for most applications. - if ((BB1Chain + BB2Chain) * 2 <= NewBBSize) - return false; - - // We only care about part of the dependence chain in BB1 that can be - // executed in parallel with BB2, so adjust the latency. - BB1Chain -= - LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions); - - // Correctly predicted branch instruction can skip the dependence chain in - // BB1, but misprediction has a penalty, so only when the dependence chain is - // longer than DependenceChainLatency, then branch is better than select. - // Besides misprediction penalty, the threshold value DependenceChainLatency - // also depends on branch misprediction rate, taken branch latency and cmov - // latency. - if (BB1Chain >= DependenceChainLatency) - return true; - - return false; -} - /// Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { @@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; - // Don't do if conversion for long dependence chain. - if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr, - CountBBCodeSize(ThenBB, TTI), TTI)) - return false; - // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); @@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } } - if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2, - AggressiveInsts.size(), TTI)) - return false; - DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); diff --git a/test/Transforms/SimplifyCFG/X86/if-conversion.ll b/test/Transforms/SimplifyCFG/X86/if-conversion.ll deleted file mode 100644 index 28702572d48..00000000000 --- a/test/Transforms/SimplifyCFG/X86/if-conversion.ll +++ /dev/null @@ -1,231 +0,0 @@ -; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s -; Avoid if-conversion if there is a long dependence chain. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -; The first several cases test FindLongDependenceChain returns true, so -; if-conversion is blocked. - -define i64 @test1(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-NOT: select -} - -define i64 @test2(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = add i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test2 -; CHECK-NOT: select -} - -; The following cases test FindLongDependenceChain returns false, so -; if-conversion will proceed. - -; Non trivial LatencyAdjustment. -define i64 @test3(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %p4 = add i64 %p3, %1 - %ptr = inttoptr i64 %p4 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test3 -; CHECK: select -} - -; Short dependence chain. -define i64 @test4(i64* %pp, i64* %p) { -entry: - %0 = load i64, i64* %pp, align 8 - %cmp = icmp slt i64 %0, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test4 -; CHECK: select -} - -; High IPC. -define i64 @test5(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - %2 = add i64 %pint, 2 - %3 = add i64 %pint, 3 - %4 = or i64 %pint, 16 - %5 = and i64 %pint, 255 - - %6 = or i64 %2, 9 - %7 = and i64 %3, 255 - %8 = add i64 %4, 4 - %9 = add i64 %5, 5 - - %10 = add i64 %6, 2 - %11 = add i64 %7, 3 - %12 = add i64 %8, 4 - %13 = add i64 %9, 5 - - %14 = add i64 %10, 6 - %15 = add i64 %11, 7 - %16 = add i64 %12, 8 - %17 = add i64 %13, 9 - - %18 = add i64 %14, 10 - %19 = add i64 %15, 11 - %20 = add i64 %16, 12 - %21 = add i64 %17, 13 - - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - - ret i64 %val - -; CHECK-LABEL: @test5 -; CHECK: select -} - -; Large BB size. -define i64 @test6(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - %2 = add i64 %pint, 2 - %3 = add i64 %pint, 3 - %4 = add i64 %2, 4 - %5 = add i64 %3, 5 - %6 = add i64 %4, 6 - %7 = add i64 %5, 7 - %8 = add i64 %6, 6 - %9 = add i64 %7, 7 - %10 = add i64 %8, 6 - %11 = add i64 %9, 7 - %12 = add i64 %10, 6 - %13 = add i64 %11, 7 - %14 = add i64 %12, 6 - %15 = add i64 %13, 7 - %16 = add i64 %14, 6 - %17 = add i64 %15, 7 - %18 = add i64 %16, 6 - %19 = add i64 %17, 7 - %20 = add i64 %18, 6 - %21 = add i64 %19, 7 - %22 = add i64 %20, 6 - %23 = add i64 %21, 7 - %24 = add i64 %22, 6 - %25 = add i64 %23, 7 - %26 = add i64 %24, 6 - %27 = add i64 %25, 7 - %28 = add i64 %26, 6 - %29 = add i64 %27, 7 - %30 = add i64 %28, 6 - %31 = add i64 %29, 7 - %32 = add i64 %30, 8 - %33 = add i64 %31, 9 - %34 = add i64 %32, %33 - %35 = and i64 %34, 255 - %res = add i64 %val, %35 - - ret i64 %res - -; CHECK-LABEL: @test6 -; CHECK: select -} |