diff options
author | Chad Rosier <mcrosier@codeaurora.org> | 2016-11-07 15:27:22 +0000 |
---|---|---|
committer | Chad Rosier <mcrosier@codeaurora.org> | 2016-11-07 15:27:22 +0000 |
commit | ea453ce2582e5c0c8aa5531a27df683a4304fbf5 (patch) | |
tree | e6bab8cb2f0696e0a7ef210f497d67bb341ab90b /lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | |
parent | d5d412ea27897603c546452a08ff79140be98a02 (diff) |
[AArch64] Removed the narrow load merging code in the ld/st optimizer.
This feature has been disabled for some time now, so remove cruft.
Differential Revision: https://reviews.llvm.org/D26248
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286110 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 271 |
1 files changed, 42 insertions, 229 deletions
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 0eebec63c3a..c83f08ec40a 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -38,7 +38,6 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded"); STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); -STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted"); STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); @@ -51,10 +50,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); -static cl::opt<bool> EnableNarrowLdMerge("enable-narrow-ld-merge", cl::Hidden, - cl::init(false), - cl::desc("Enable narrow load merge")); - #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" namespace { @@ -107,11 +102,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit, MachineBasicBlock::iterator &StoreI); - // Merge the two instructions indicated into a wider instruction. + // Merge the two instructions indicated into a wider narrow store instruction. MachineBasicBlock::iterator - mergeNarrowInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator MergeMI, - const LdStPairFlags &Flags); + mergeNarrowZeroStores(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator MergeMI, + const LdStPairFlags &Flags); // Merge the two instructions indicated into a single pair-wise instruction. MachineBasicBlock::iterator @@ -147,8 +142,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { mergeUpdateInsn(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update, bool IsPreIdx); - // Find and merge foldable ldr/str instructions. - bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI); + // Find and merge zero store instructions. + bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI); // Find and pair ldr/str instructions. bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); @@ -156,7 +151,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and promote load instructions which read directly from store. bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); - bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt); + bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -173,23 +168,6 @@ char AArch64LoadStoreOpt::ID = 0; INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) -static unsigned getBitExtrOpcode(MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode."); - case AArch64::LDRBBui: - case AArch64::LDURBBi: - case AArch64::LDRHHui: - case AArch64::LDURHHi: - return AArch64::UBFMWri; - case AArch64::LDRSBWui: - case AArch64::LDURSBWi: - case AArch64::LDRSHWui: - case AArch64::LDURSHWi: - return AArch64::SBFMWri; - } -} - static bool isNarrowStore(unsigned Opc) { switch (Opc) { default: @@ -202,30 +180,6 @@ static bool isNarrowStore(unsigned Opc) { } } -static bool isNarrowLoad(unsigned Opc) { - switch (Opc) { - default: - return false; - case AArch64::LDRHHui: - case AArch64::LDURHHi: - case AArch64::LDRBBui: - case AArch64::LDURBBi: - case AArch64::LDRSHWui: - case AArch64::LDURSHWi: - case AArch64::LDRSBWui: - case AArch64::LDURSBWi: - return true; - } -} - -static bool isNarrowLoad(MachineInstr &MI) { - return isNarrowLoad(MI.getOpcode()); -} - -static bool isNarrowLoadOrStore(unsigned Opc) { - return isNarrowLoad(Opc) || isNarrowStore(Opc); -} - // Scaling factor for unscaled load or store. static int getMemScale(MachineInstr &MI) { switch (MI.getOpcode()) { @@ -317,23 +271,11 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, case AArch64::STURSi: case AArch64::LDRSui: case AArch64::LDURSi: - case AArch64::LDRHHui: - case AArch64::LDURHHi: - case AArch64::LDRBBui: - case AArch64::LDURBBi: return Opc; case AArch64::LDRSWui: return AArch64::LDRWui; case AArch64::LDURSWi: return AArch64::LDURWi; - case AArch64::LDRSBWui: - return AArch64::LDRBBui; - case AArch64::LDRSHWui: - return AArch64::LDRHHui; - case AArch64::LDURSBWi: - return AArch64::LDURBBi; - case AArch64::LDURSHWi: - return AArch64::LDURHHi; } } @@ -353,18 +295,6 @@ static unsigned getMatchingWideOpcode(unsigned Opc) { return AArch64::STURXi; case AArch64::STRWui: return AArch64::STRXui; - case AArch64::LDRHHui: - case AArch64::LDRSHWui: - return AArch64::LDRWui; - case AArch64::LDURHHi: - case AArch64::LDURSHWi: - return AArch64::LDURWi; - case AArch64::LDRBBui: - case AArch64::LDRSBWui: - return AArch64::LDRHHui; - case AArch64::LDURBBi: - case AArch64::LDURSBWi: - return AArch64::LDURHHi; } } @@ -608,23 +538,20 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst, (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize)); } -static bool isPromotableZeroStoreOpcode(unsigned Opc) { - return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi; -} - -static bool isPromotableZeroStoreOpcode(MachineInstr &MI) { - return isPromotableZeroStoreOpcode(MI.getOpcode()); -} - static bool isPromotableZeroStoreInst(MachineInstr &MI) { - return (isPromotableZeroStoreOpcode(MI)) && + unsigned Opc = MI.getOpcode(); + return (Opc == AArch64::STRWui || Opc == AArch64::STURWi || + isNarrowStore(Opc)) && getLdStRegOp(MI).getReg() == AArch64::WZR; } MachineBasicBlock::iterator -AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator MergeMI, - const LdStPairFlags &Flags) { +AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator MergeMI, + const LdStPairFlags &Flags) { + assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && + "Expected promotable zero stores."); + MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -665,105 +592,9 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, OffsetImm /= 2; } + // Construct the new instruction. DebugLoc DL = I->getDebugLoc(); MachineBasicBlock *MBB = I->getParent(); - if (isNarrowLoad(Opc)) { - MachineInstr *RtNewDest = &*(MergeForward ? I : MergeMI); - // When merging small (< 32 bit) loads for big-endian targets, the order of - // the component parts gets swapped. - if (!Subtarget->isLittleEndian()) - std::swap(RtMI, Rt2MI); - // Construct the new load instruction. - MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2; - NewMemMI = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) - .addOperand(getLdStRegOp(*RtNewDest)) - .addOperand(BaseRegOp) - .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*MergeMI)); - (void)NewMemMI; - - DEBUG( - dbgs() - << "Creating the new load and extract. Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(MergeMI->print(dbgs())); - DEBUG(dbgs() << " with instructions:\n "); - DEBUG((NewMemMI)->print(dbgs())); - - int Width = getMemScale(*I) == 1 ? 8 : 16; - int LSBLow = 0; - int LSBHigh = Width; - int ImmsLow = LSBLow + Width - 1; - int ImmsHigh = LSBHigh + Width - 1; - MachineInstr *ExtDestMI = &*(MergeForward ? MergeMI : I); - if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) { - // Create the bitfield extract for high bits. - BitExtMI1 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI))) - .addOperand(getLdStRegOp(*Rt2MI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBHigh) - .addImm(ImmsHigh); - // Create the bitfield extract for low bits. - if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { - // For unsigned, prefer to use AND for low bits. - BitExtMI2 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri)) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(ImmsLow); - } else { - BitExtMI2 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI))) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBLow) - .addImm(ImmsLow); - } - } else { - // Create the bitfield extract for low bits. - if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) { - // For unsigned, prefer to use AND for low bits. - BitExtMI1 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri)) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(ImmsLow); - } else { - BitExtMI1 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI))) - .addOperand(getLdStRegOp(*RtMI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBLow) - .addImm(ImmsLow); - } - - // Create the bitfield extract for high bits. - BitExtMI2 = - BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI))) - .addOperand(getLdStRegOp(*Rt2MI)) - .addReg(getLdStRegOp(*RtNewDest).getReg()) - .addImm(LSBHigh) - .addImm(ImmsHigh); - } - (void)BitExtMI1; - (void)BitExtMI2; - - DEBUG(dbgs() << " "); - DEBUG((BitExtMI1)->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG((BitExtMI2)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions. - I->eraseFromParent(); - MergeMI->eraseFromParent(); - return NextI; - } - assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) && - "Expected promotable zero store"); - - // Construct the new instruction. MachineInstrBuilder MIB; MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) @@ -772,7 +603,7 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I, .setMemRefs(I->mergeMemRefsWith(*MergeMI)); (void)MIB; - DEBUG(dbgs() << "Creating wider load/store. Replacing instructions:\n "); + DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); DEBUG(MergeMI->print(dbgs())); @@ -1179,13 +1010,14 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, return true; } - // If the second instruction isn't even a load/store, bail out. + // If the second instruction isn't even a mergable/pairable load/store, bail + // out. if (!PairIsValidLdStrOpc) return false; - // FIXME: We don't support merging narrow loads/stores with mixed - // scaled/unscaled offsets. - if (isNarrowLoadOrStore(OpcA) || isNarrowLoadOrStore(OpcB)) + // FIXME: We don't support merging narrow stores with mixed scaled/unscaled + // offsets. + if (isNarrowStore(OpcA) || isNarrowStore(OpcB)) return false; // Try to match an unscaled load/store with a scaled load/store. @@ -1596,37 +1428,26 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore( return false; } -// Find narrow loads that can be converted into a single wider load with -// bitfield extract instructions. Also merge adjacent zero stores into a wider -// store. -bool AArch64LoadStoreOpt::tryToMergeLdStInst( +// Merge adjacent zero stores into a wider store. +bool AArch64LoadStoreOpt::tryToMergeZeroStInst( MachineBasicBlock::iterator &MBBI) { - assert((isNarrowLoad(*MBBI) || isPromotableZeroStoreOpcode(*MBBI)) && - "Expected narrow op."); + assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store."); MachineInstr &MI = *MBBI; MachineBasicBlock::iterator E = MI.getParent()->end(); if (!TII->isCandidateToMergeOrPair(MI)) return false; - // For promotable zero stores, the stored value should be WZR. - if (isPromotableZeroStoreOpcode(MI) && - getLdStRegOp(MI).getReg() != AArch64::WZR) - return false; - // Look ahead up to LdStLimit instructions for a mergable instruction. LdStPairFlags Flags; MachineBasicBlock::iterator MergeMI = findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true); if (MergeMI != E) { - if (isNarrowLoad(MI)) { - ++NumNarrowLoadsPromoted; - } else if (isPromotableZeroStoreInst(MI)) { - ++NumZeroStoresPromoted; - } + ++NumZeroStoresPromoted; + // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. - MBBI = mergeNarrowInsns(MBBI, MergeMI, Flags); + MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags); return true; } return false; @@ -1667,7 +1488,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { } bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, - bool enableNarrowLdOpt) { + bool EnableNarrowZeroStOpt) { bool Modified = false; // Four tranformations to do here: // 1) Find loads that directly read from stores and promote them by @@ -1706,29 +1527,21 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, } } } - // 2) Find narrow loads that can be converted into a single wider load - // with bitfield extract instructions. - // e.g., - // ldrh w0, [x2] - // ldrh w1, [x2, #2] - // ; becomes - // ldr w0, [x2] - // ubfx w1, w0, #16, #16 - // and w0, w0, #ffff - // - // Also merge adjacent zero stores into a wider store. + // 2) Merge adjacent zero stores into a wider store. // e.g., // strh wzr, [x0] // strh wzr, [x0, #2] // ; becomes // str wzr, [x0] + // e.g., + // str wzr, [x0] + // str wzr, [x0, #4] + // ; becomes + // str xzr, [x0] for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - enableNarrowLdOpt && MBBI != E;) { - MachineInstr &MI = *MBBI; - unsigned Opc = MI.getOpcode(); - if (isPromotableZeroStoreOpcode(Opc) || - (EnableNarrowLdMerge && isNarrowLoad(Opc))) { - if (tryToMergeLdStInst(MBBI)) { + EnableNarrowZeroStOpt && MBBI != E;) { + if (isPromotableZeroStoreInst(*MBBI)) { + if (tryToMergeZeroStInst(MBBI)) { Modified = true; } else ++MBBI; @@ -1889,10 +1702,10 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { UsedRegs.resize(TRI->getNumRegs()); bool Modified = false; - bool enableNarrowLdOpt = - Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign(); + bool enableNarrowZeroStOpt = + Subtarget->mergeNarrowStores() && !Subtarget->requiresStrictAlign(); for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB, enableNarrowLdOpt); + Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt); return Modified; } |