From 3bbf394145bab93a33e211a5489007b54d8f8b49 Mon Sep 17 00:00:00 2001 From: Nirav Dave Date: Tue, 14 Mar 2017 00:34:14 +0000 Subject: In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled. Recommiting with compiler time improvements Recommitting after fixup of 32-bit aliasing sign offset bug in DAGCombiner. * Simplify Consecutive Merge Store Candidate Search Now that address aliasing is much less conservative, push through simplified store merging search and chain alias analysis which only checks for parallel stores through the chain subgraph. This is cleaner as the separation of non-interfering loads/stores from the store-merging logic. When merging stores search up the chain through a single load, and finds all possible stores by looking down from through a load and a TokenFactor to all stores visited. This improves the quality of the output SelectionDAG and the output Codegen (save perhaps for some ARM cases where we correctly constructs wider loads, but then promotes them to float operations which appear but requires more expensive constant generation). Some minor peephole optimizations to deal with improved SubDAG shapes (listed below) Additional Minor Changes: 1. Finishes removing unused AliasLoad code 2. Unifies the chain aggregation in the merged stores across code paths 3. Re-add the Store node to the worklist after calling SimplifyDemandedBits. 4. Increase GatherAllAliasesMaxDepth from 6 to 18. That number is arbitrary, but seems sufficient to not cause regressions in tests. 5. Remove Chain dependencies of Memory operations on CopyfromReg nodes as these are captured by data dependence 6. Forward loads-store values through tokenfactors containing {CopyToReg,CopyFromReg} Values. 7. Peephole to convert buildvector of extract_vector_elt to extract_subvector if possible (see CodeGen/AArch64/store-merge.ll) 8. Store merging for the ARM target is restricted to 32-bit as some in some contexts invalid 64-bit operations are being generated. This can be removed once appropriate checks are added. This finishes the change Matt Arsenault started in r246307 and jyknight's original patch. Many tests required some changes as memory operations are now reorderable, improving load-store forwarding. One test in particular is worth noting: CodeGen/PowerPC/ppc64-align-long-double.ll - Improved load-store forwarding converts a load-store pair into a parallel store and a memory-realized bitcast of the same value. However, because we lose the sharing of the explicit and implicit store values we must create another local store. A similar transformation happens before SelectionDAG as well. Reviewers: arsenm, hfinkel, tstellarAMD, jyknight, nhaehnle git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297695 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 3 + lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 760 +++++++------- lib/CodeGen/TargetLoweringBase.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- lib/Target/ARM/ARMISelLowering.h | 5 + test/CodeGen/AArch64/argument-blocks.ll | 4 +- test/CodeGen/AArch64/arm64-abi.ll | 5 +- test/CodeGen/AArch64/arm64-memset-inline.ll | 4 +- test/CodeGen/AArch64/arm64-variadic-aapcs.ll | 2 +- test/CodeGen/AArch64/merge-store.ll | 3 +- test/CodeGen/AArch64/vector_merge_dep_check.ll | 3 +- test/CodeGen/AMDGPU/debugger-insert-nops.ll | 24 +- test/CodeGen/AMDGPU/insert_vector_elt.ll | 6 +- test/CodeGen/AMDGPU/merge-stores.ll | 24 +- test/CodeGen/AMDGPU/private-element-size.ll | 12 +- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll | 17 +- test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll | 3 +- test/CodeGen/ARM/alloc-no-stack-realign.ll | 100 +- test/CodeGen/ARM/gpr-paired-spill.ll | 18 +- test/CodeGen/ARM/ifcvt10.ll | 2 - test/CodeGen/ARM/illegal-bitfield-loadstore.ll | 86 +- test/CodeGen/ARM/static-addr-hoisting.ll | 6 +- test/CodeGen/BPF/undef.ll | 65 +- test/CodeGen/MSP430/Inst16mm.ll | 2 +- test/CodeGen/Mips/cconv/arguments-float.ll | 24 +- test/CodeGen/Mips/cconv/arguments-varargs.ll | 44 +- test/CodeGen/Mips/fastcc.ll | 76 +- test/CodeGen/Mips/load-store-left-right.ll | 126 +-- test/CodeGen/Mips/micromips-li.ll | 2 +- test/CodeGen/Mips/mips64-f128-call.ll | 15 +- test/CodeGen/Mips/mips64-f128.ll | 2 +- test/CodeGen/Mips/mno-ldc1-sdc1.ll | 46 +- test/CodeGen/Mips/msa/f16-llvm-ir.ll | 14 +- test/CodeGen/Mips/msa/i5_ld_st.ll | 32 +- test/CodeGen/Mips/o32_cc_byval.ll | 54 +- test/CodeGen/Mips/o32_cc_vararg.ll | 4 +- test/CodeGen/PowerPC/anon_aggr.ll | 59 +- test/CodeGen/PowerPC/complex-return.ll | 12 +- test/CodeGen/PowerPC/jaggedstructs.ll | 52 +- test/CodeGen/PowerPC/ppc64-align-long-double.ll | 41 +- test/CodeGen/PowerPC/structsinmem.ll | 28 +- test/CodeGen/PowerPC/structsinregs.ll | 60 +- test/CodeGen/SystemZ/unaligned-01.ll | 5 +- test/CodeGen/Thumb/2010-07-15-debugOrdering.ll | 2 +- test/CodeGen/Thumb/stack-access.ll | 26 +- test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll | 2 +- test/CodeGen/X86/2012-11-28-merge-store-alias.ll | 2 +- test/CodeGen/X86/MergeConsecutiveStores.ll | 17 +- test/CodeGen/X86/avx-vbroadcast.ll | 18 +- test/CodeGen/X86/avx512-mask-op.ll | 4 - test/CodeGen/X86/chain_order.ll | 4 +- .../CodeGen/X86/clear_upper_vector_element_bits.ll | 400 ++++---- test/CodeGen/X86/combiner-aa-0.ll | 20 - test/CodeGen/X86/combiner-aa-1.ll | 23 - test/CodeGen/X86/copy-eflags.ll | 17 +- test/CodeGen/X86/dag-merge-fast-accesses.ll | 12 +- .../X86/dont-trunc-store-double-to-float.ll | 6 +- .../extractelement-legalization-store-ordering.ll | 15 +- test/CodeGen/X86/i256-add.ll | 350 +++++-- test/CodeGen/X86/i386-shrink-wrapping.ll | 5 +- test/CodeGen/X86/live-range-nosubreg.ll | 5 +- test/CodeGen/X86/longlong-deadload.ll | 2 - test/CodeGen/X86/merge-consecutive-loads-128.ll | 20 +- test/CodeGen/X86/merge-consecutive-loads-256.ll | 8 +- .../X86/merge-store-partially-alias-loads.ll | 8 +- test/CodeGen/X86/pr18023.ll | 31 - test/CodeGen/X86/pr32108.ll | 20 + test/CodeGen/X86/split-store.ll | 27 +- test/CodeGen/X86/stores-merging.ll | 11 +- test/CodeGen/X86/vector-compare-results.ll | 730 +++++++------- test/CodeGen/X86/vector-shuffle-variable-128.ll | 1060 +++++++++----------- test/CodeGen/X86/vector-shuffle-variable-256.ll | 233 ++--- test/CodeGen/X86/vectorcall.ll | 4 +- test/CodeGen/X86/win32-eh.ll | 157 +-- test/CodeGen/XCore/varargs.ll | 2 +- 75 files changed, 2542 insertions(+), 2553 deletions(-) delete mode 100644 test/CodeGen/X86/combiner-aa-0.ll delete mode 100644 test/CodeGen/X86/combiner-aa-1.ll delete mode 100644 test/CodeGen/X86/pr18023.ll create mode 100644 test/CodeGen/X86/pr32108.ll diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index b03d35a00b2..736bf1e1e17 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -363,6 +363,9 @@ public: return false; } + /// Returns if it's reasonable to merge stores to MemVT size. + virtual bool canMergeStoresTo(EVT MemVT) const { return true; } + /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. virtual bool isCheapToSpeculateCttz() const { return false; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d539bbeb653..30330e4588c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -52,10 +52,6 @@ STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); namespace { - static cl::opt - CombinerAA("combiner-alias-analysis", cl::Hidden, - cl::desc("Enable DAG combiner alias-analysis heuristics")); - static cl::opt CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis")); @@ -133,6 +129,9 @@ namespace { /// Add to the worklist making sure its instance is at the back (next to be /// processed.) void AddToWorklist(SDNode *N) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Deleted Node added to Worklist"); + // Skip handle nodes as they can't usefully be combined and confuse the // zero-use deletion strategy. if (N->getOpcode() == ISD::HANDLENODE) @@ -177,6 +176,7 @@ namespace { void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: + unsigned MaximumLegalStoreInBits; /// Check the specified integer node value to see if it can be simplified or /// if things it uses can be simplified by bit propagation. @@ -422,15 +422,12 @@ namespace { /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { - MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): - MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + MemOpLink(LSBaseSDNode *N, int64_t Offset) + : MemNode(N), OffsetFromBase(Offset) {} // Ptr to the mem node. LSBaseSDNode *MemNode; // Offset from the base ptr. int64_t OffsetFromBase; - // What is the sequence number of this mem node. - // Lowest mem operand in the DAG starts at zero. - unsigned SequenceNum; }; /// This is a helper function for visitMUL to check the profitability @@ -441,12 +438,6 @@ namespace { SDValue &AddNode, SDValue &ConstNode); - /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a - /// constant build_vector of the stored constant values in Stores. - SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL, - ArrayRef Stores, - SmallVectorImpl &Chains, - EVT Ty) const; /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns @@ -460,18 +451,15 @@ namespace { /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. - /// \return number of stores that were merged into a merged store (always - /// a prefix of \p StoreNode). - bool MergeStoresOfConstantsOrVecElts( - SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, - bool IsConstantSrc, bool UseVector); + /// \return True if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, + EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector); /// This is a helper function for MergeConsecutiveStores. /// Stores that may be merged are placed in StoreNodes. - /// Loads that may alias with those stores are placed in AliasLoadNodes. - void getStoreMergeAndAliasCandidates( - StoreSDNode* St, SmallVectorImpl &StoreNodes, - SmallVectorImpl &AliasLoadNodes); + void getStoreMergeCandidates(StoreSDNode *St, + SmallVectorImpl &StoreNodes); /// Helper function for MergeConsecutiveStores. Checks if /// Candidate stores have indirect dependency through their @@ -483,8 +471,7 @@ namespace { /// This optimization uses wide integers or vectors when possible. /// \return number of stores that were merged into a merged store (the /// affected nodes are stored as a prefix in \p StoreNodes). - bool MergeConsecutiveStores(StoreSDNode *N, - SmallVectorImpl &StoreNodes); + bool MergeConsecutiveStores(StoreSDNode *N); /// \brief Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) @@ -499,6 +486,13 @@ namespace { : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); + + MaximumLegalStoreInBits = 0; + for (MVT VT : MVT::all_valuetypes()) + if (EVT(VT).isSimple() && VT != MVT::Other && + TLI.isTypeLegal(EVT(VT)) && + VT.getSizeInBits() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits(); } /// Runs the dag combiner on all nodes in the work list @@ -1589,7 +1583,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } SmallVector TFs; // List of token factors to visit. - SmallVector Ops; // Ops for replacing token factor. + SmallVector Ops; // Ops for replacing token factor. SmallPtrSet SeenOps; bool Changed = false; // If we should replace this token factor. @@ -1633,6 +1627,86 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } + // Remove Nodes that are chained to another node in the list. Do so + // by walking up chains breath-first stopping when we've seen + // another operand. In general we must climb to the EntryNode, but we can exit + // early if we find all remaining work is associated with just one operand as + // no further pruning is possible. + + // List of nodes to search through and original Ops from which they originate. + SmallVector, 8> Worklist; + SmallVector OpWorkCount; // Count of work for each Op. + SmallPtrSet SeenChains; + bool DidPruneOps = false; + + unsigned NumLeftToConsider = 0; + for (const SDValue &Op : Ops) { + Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); + OpWorkCount.push_back(1); + } + + auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { + // If this is an Op, we can remove the op from the list. Remark any + // search associated with it as from the current OpNumber. + if (SeenOps.count(Op) != 0) { + Changed = true; + DidPruneOps = true; + unsigned OrigOpNumber = 0; + while (Ops[OrigOpNumber].getNode() != Op && OrigOpNumber < Ops.size()) + OrigOpNumber++; + assert((OrigOpNumber != Ops.size()) && + "expected to find TokenFactor Operand"); + // Re-mark worklist from OrigOpNumber to OpNumber + for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { + if (Worklist[i].second == OrigOpNumber) { + Worklist[i].second = OpNumber; + } + } + OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; + OpWorkCount[OrigOpNumber] = 0; + NumLeftToConsider--; + } + // Add if it's a new chain + if (SeenChains.insert(Op).second) { + OpWorkCount[OpNumber]++; + Worklist.push_back(std::make_pair(Op, OpNumber)); + } + }; + + for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { + // We need at least be consider at least 2 Ops to prune. + if (NumLeftToConsider <= 1) + break; + auto CurNode = Worklist[i].first; + auto CurOpNumber = Worklist[i].second; + assert((OpWorkCount[CurOpNumber] > 0) && + "Node should not appear in worklist"); + switch (CurNode->getOpcode()) { + case ISD::EntryToken: + // Hitting EntryToken is the only way for the search to terminate without + // hitting + // another operand's search. Prevent us from marking this operand + // considered. + NumLeftToConsider++; + break; + case ISD::TokenFactor: + for (const SDValue &Op : CurNode->op_values()) + AddToWorklist(i, Op.getNode(), CurOpNumber); + break; + case ISD::CopyFromReg: + case ISD::CopyToReg: + AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); + break; + default: + if (auto *MemNode = dyn_cast(CurNode)) + AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); + break; + } + OpWorkCount[CurOpNumber]--; + if (OpWorkCount[CurOpNumber] == 0) + NumLeftToConsider--; + } + SDValue Result; // If we've changed things around then replace token factor. @@ -1641,15 +1715,22 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // The entry token is the only possible outcome. Result = DAG.getEntryNode(); } else { - // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); + if (DidPruneOps) { + SmallVector PrunedOps; + // + for (const SDValue &Op : Ops) { + if (SeenChains.count(Op.getNode()) == 0) + PrunedOps.push_back(Op); + } + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps); + } else { + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); + } } - // Add users to worklist if AA is enabled, since it may introduce - // a lot of new chained token factors while removing memory deps. - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - return CombineTo(N, Result, UseAA /*add to worklist*/); + // Add users to worklist, since we may introduce a lot of new + // chained token factors while removing memory deps. + return CombineTo(N, Result, true /*add to worklist*/); } return Result; @@ -6792,6 +6873,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + // Simplify TF. + AddToWorklist(NewChain.getNode()); + CombineTo(N, NewValue); // Replace uses of the original load (before extension) @@ -10947,7 +11031,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); - + AddUsersToWorklist(Chain.getNode()); if (N->use_empty()) deleteAndRecombine(N); @@ -11000,7 +11084,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { StoreSDNode *PrevST = cast(Chain); if (PrevST->getBasePtr() == Ptr && PrevST->getValue().getValueType() == N->getValueType(0)) - return CombineTo(N, Chain.getOperand(1), Chain); + return CombineTo(N, PrevST->getOperand(1), Chain); } } @@ -11018,14 +11102,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } } - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); -#ifndef NDEBUG - if (CombinerAAOnlyFunc.getNumOccurrences() && - CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) - UseAA = false; -#endif - if (UseAA && LD->isUnindexed()) { + if (LD->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -11607,6 +11684,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + AddToWorklist(Chain.getNode()); return true; } @@ -12000,20 +12078,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } -SDValue DAGCombiner::getMergedConstantVectorStore( - SelectionDAG &DAG, const SDLoc &SL, ArrayRef Stores, - SmallVectorImpl &Chains, EVT Ty) const { - SmallVector BuildVector; - - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { - StoreSDNode *St = cast(Stores[I].MemNode); - Chains.push_back(St->getChain()); - BuildVector.push_back(St->getValue()); - } - - return DAG.getBuildVector(Ty, SL, BuildVector); -} - bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector) { @@ -12022,22 +12086,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return false; int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned LatestNodeUsed = 0; - - for (unsigned i=0; i < NumStores; ++i) { - // Find a chain for the new wide-store operand. Notice that some - // of the store nodes that we found may not be selected for inclusion - // in the wide store. The chain we use needs to be the chain of the - // latest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) - LatestNodeUsed = i; - } - - SmallVector Chains; // The latest Node in the DAG. - LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; @@ -12053,7 +12103,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); + SmallVector BuildVector; + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast(StoreNodes[I].MemNode); + SDValue Val = St->getValue(); + if (MemVT.getScalarType().isInteger()) + if (auto *CFP = dyn_cast(St->getValue())) + Val = DAG.getConstant( + (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(), + SDLoc(CFP), MemVT); + BuildVector.push_back(Val); + } + StoredVal = DAG.getBuildVector(Ty, DL, BuildVector); } else { SmallVector Ops; for (unsigned i = 0; i < NumStores; ++i) { @@ -12063,7 +12124,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); - Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. @@ -12083,7 +12143,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( for (unsigned i = 0; i < NumStores; ++i) { unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); - Chains.push_back(St->getChain()); SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; @@ -12101,54 +12160,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - assert(!Chains.empty()); + SmallVector Chains; + + // Gather all Chains we're inheriting. As generally all chains are + // equal, do minor check to remove obvious redundancies. + Chains.push_back(StoreNodes[0].MemNode->getChain()); + for (unsigned i = 1; i < NumStores; ++i) + if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain()) + Chains.push_back(StoreNodes[i].MemNode->getChain()); + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - if (UseAA) { - // Replace all merged stores with the new store. - for (unsigned i = 0; i < NumStores; ++i) - CombineTo(StoreNodes[i].MemNode, NewStore); - } else { - // Replace the last store with the new store. - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumStores; ++i) { - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); - } - } + // Replace all merged stores with the new store. + for (unsigned i = 0; i < NumStores; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); - StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end()); + AddToWorklist(NewChain.getNode()); return true; } -void DAGCombiner::getStoreMergeAndAliasCandidates( - StoreSDNode* St, SmallVectorImpl &StoreNodes, - SmallVectorImpl &AliasLoadNodes) { +void DAGCombiner::getStoreMergeCandidates( + StoreSDNode *St, SmallVectorImpl &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + EVT MemVT = St->getMemoryVT(); // We must have a base and an offset. if (!BasePtr.Base.getNode()) @@ -12158,104 +12199,70 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (BasePtr.Base.isUndef()) return; - // Walk up the chain and look for nodes with offsets from the same - // base pointer. Stop when reaching an instruction with a different kind - // or instruction which has a different base pointer. - EVT MemVT = St->getMemoryVT(); - unsigned Seq = 0; - StoreSDNode *Index = St; - - - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - - if (UseAA) { - // Look at other users of the same chain. Stores on the same chain do not - // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized - // to be on the same chain, so don't bother looking at adjacent chains. - - SDValue Chain = St->getChain(); - for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { - if (StoreSDNode *OtherST = dyn_cast(*I)) { - if (I.getOperandNo() != 0) - continue; - - if (OtherST->isVolatile() || OtherST->isIndexed()) - continue; - - if (OtherST->getMemoryVT() != MemVT) - continue; - - BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG); - - if (Ptr.equalBaseIndex(BasePtr)) - StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); - } - } - - return; - } - - while (Index) { - // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 0)->hasOneUse()) - break; - - // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); - - // Check that the base pointer is the same as the original one. - if (!Ptr.equalBaseIndex(BasePtr)) - break; - - // The memory operands must not be volatile. - if (Index->isVolatile() || Index->isIndexed()) - break; - - // No truncation. - if (Index->isTruncatingStore()) - break; + // We looking for a root node which is an ancestor to all mergable + // stores. We search up through a load, to our root and then down + // through all children. For instance we will find Store{1,2,3} if + // St is Store1, Store2. or Store3 where the root is not a load + // which always true for nonvolatile ops. TODO: Expand + // the search to find all valid candidates through multiple layers of loads. + // + // Root + // |-------|-------| + // Load Load Store3 + // | | + // Store1 Store2 + // + // FIXME: We should be able to climb and + // descend TokenFactors to find candidates as well. - // The stored memory type must be the same. - if (Index->getMemoryVT() != MemVT) - break; + SDNode *RootNode = (St->getChain()).getNode(); - // We do not allow under-aligned stores in order to prevent - // overriding stores. NOTE: this is a bad hack. Alignment SHOULD - // be irrelevant here; what MATTERS is that we not move memory - // operations that potentially overlap past each-other. - if (Index->getAlignment() < MemVT.getStoreSize()) - break; + // Set of Parents of Candidates + std::set CandidateParents; - // We found a potential memory operand to merge. - StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); + if (LoadSDNode *Ldn = dyn_cast(RootNode)) { + RootNode = Ldn->getChain().getNode(); + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) + if (I.getOperandNo() == 0 && isa(*I)) // walk down chain + CandidateParents.insert(*I); + } else + CandidateParents.insert(RootNode); + + bool IsLoadSrc = isa(St->getValue()); + bool IsConstantSrc = isa(St->getValue()) || + isa(St->getValue()); + bool IsExtractVecSrc = + (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || + St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + auto CorrectValueKind = [&](StoreSDNode *Other) -> bool { + if (IsLoadSrc) + return isa(Other->getValue()); + if (IsConstantSrc) + return (isa(Other->getValue()) || + isa(Other->getValue())); + if (IsExtractVecSrc) + return (Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + return false; + }; - // Find the next memory operand in the chain. If the next operand in the - // chain is a store then move up and continue the scan with the next - // memory operand. If the next operand is a load save it and use alias - // information to check if it interferes with anything. - SDNode *NextInChain = Index->getChain().getNode(); - while (1) { - if (StoreSDNode *STn = dyn_cast(NextInChain)) { - // We found a store node. Use it for the next iteration. - Index = STn; - break; - } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { - if (Ldn->isVolatile()) { - Index = nullptr; - break; + // check all parents of mergable children + for (auto P = CandidateParents.begin(); P != CandidateParents.end(); ++P) + for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E; ++I) + if (I.getOperandNo() == 0) + if (StoreSDNode *OtherST = dyn_cast(*I)) { + if (OtherST->isVolatile() || OtherST->isIndexed()) + continue; + // We can merge constant floats to equivalent integers + if (OtherST->getMemoryVT() != MemVT) + if (!(MemVT.isInteger() && MemVT.bitsEq(OtherST->getMemoryVT()) && + isa(OtherST->getValue()))) + continue; + BaseIndexOffset Ptr = + BaseIndexOffset::match(OtherST->getBasePtr(), DAG); + if (Ptr.equalBaseIndex(BasePtr) && CorrectValueKind(OtherST)) + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); } - - // Save the load node for later. Continue the scan. - AliasLoadNodes.push_back(Ldn); - NextInChain = Ldn->getChain().getNode(); - continue; - } else { - Index = nullptr; - break; - } - } - } } // We need to check that merging these stores does not cause a loop @@ -12282,13 +12289,16 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::MergeConsecutiveStores( - StoreSDNode* St, SmallVectorImpl &StoreNodes) { +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (OptLevel == CodeGenOpt::None) return false; EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + + if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) + return false; + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( Attribute::NoImplicitFloat); @@ -12317,145 +12327,136 @@ bool DAGCombiner::MergeConsecutiveStores( if (MemVT.isVector() && IsLoadSrc) return false; - // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 0); - if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) - return false; - - // Save the LoadSDNodes that we find in the chain. - // We need to make sure that these nodes do not interfere with - // any of the store nodes. - SmallVector AliasLoadNodes; - - getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); + SmallVector StoreNodes; + // Find potential store merge candidates by searching through chain sub-DAG + getStoreMergeCandidates(St, StoreNodes); // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; - // only do dependence check in AA case - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); - if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) + // Check that we can merge these candidates without causing a cycle + if (!checkMergeStoreCandidatesForDependencies(StoreNodes)) return false; // Sort the memory operands according to their distance from the - // base pointer. As a secondary criteria: make sure stores coming - // later in the code come first in the list. This is important for - // the non-UseAA case, because we're merging stores into the FINAL - // store along a chain which potentially contains aliasing stores. - // Thus, if there are multiple stores to the same address, the last - // one can be considered for merging but not the others. + // base pointer. std::sort(StoreNodes.begin(), StoreNodes.end(), [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase || - (LHS.OffsetFromBase == RHS.OffsetFromBase && - LHS.SequenceNum < RHS.SequenceNum); - }); + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); // Scan the memory operations on the chain and find the first non-consecutive // store memory address. - unsigned LastConsecutiveStore = 0; + unsigned NumConsecutiveStores = 0; int64_t StartAddress = StoreNodes[0].OffsetFromBase; - for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { - - // Check that the addresses are consecutive starting from the second - // element in the list of stores. - if (i > 0) { - int64_t CurrAddress = StoreNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - } - // Check if this store interferes with any of the loads that we found. - // If we find a load that alias with this store. Stop the sequence. - if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) { - return isAlias(Ldn, StoreNodes[i].MemNode); - })) + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; - - // Mark this node as useful. - LastConsecutiveStore = i; + NumConsecutiveStores = i + 1; } + if (NumConsecutiveStores < 2) + return false; + // The node with the lowest store address. - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { - unsigned LastLegalType = 0; - unsigned LastLegalVectorType = 0; - bool NonZero = false; - for (unsigned i=0; i(StoreNodes[i].MemNode); - SDValue StoredVal = St->getValue(); - - if (ConstantSDNode *C = dyn_cast(StoredVal)) { - NonZero |= !C->isNullValue(); - } else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) { - NonZero |= !C->getConstantFPValue()->isNullValue(); - } else { - // Non-constant. - break; - } + bool RV = false; + while (NumConsecutiveStores > 1) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 0; + unsigned LastLegalVectorType = 0; + bool NonZero = false; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + + if (ConstantSDNode *C = dyn_cast(StoredVal)) { + NonZero |= !C->isNullValue(); + } else if (ConstantFPSDNode *C = + dyn_cast(StoredVal)) { + NonZero |= !C->getConstantFPValue()->isNullValue(); + } else { + // Non-constant. + break; + } - // Find a legal type for the constant store. - unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast; - if (TLI.isTypeLegal(StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && IsFast) { - LastLegalType = i+1; - // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFast) && + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; + if (TLI.isTypeLegal(StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFast) && + IsFast) { + LastLegalType = i + 1; + } } - } - // We only use vectors if the constant is known to be zero or the target - // allows it and the function is not marked with the noimplicitfloat - // attribute. - if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, - FirstStoreAS)) && - !NoVectors) { - // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); - if (TLI.isTypeLegal(Ty) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && IsFast) - LastLegalVectorType = i + 1; + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } } - } - // Check if we found a legal integer type to store. - if (LastLegalType == 0 && LastLegalVectorType == 0) - return false; + // Check if we found a legal integer type that creates a meaningful merge. + if (LastLegalType < 2 && LastLegalVectorType < 2) + break; - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, - true, UseVector); + bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + true, UseVector); + if (!Merged) + break; + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + RV = true; + NumConsecutiveStores -= NumElem; + } + return RV; } // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. if (IsExtractVecSrc) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); unsigned NumStoresToMerge = 0; bool IsVec = MemVT.isVector(); - for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast(StoreNodes[i].MemNode); unsigned StoreValOpcode = St->getValue().getOpcode(); // This restriction could be loosened. @@ -12495,7 +12496,7 @@ bool DAGCombiner::MergeConsecutiveStores( // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; - for (unsigned i=0; i(StoreNodes[i].MemNode); LoadSDNode *Ld = dyn_cast(St->getValue()); if (!Ld) break; @@ -12528,7 +12529,7 @@ bool DAGCombiner::MergeConsecutiveStores( } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset)); } if (LoadNodes.size() < 2) @@ -12540,7 +12541,9 @@ bool DAGCombiner::MergeConsecutiveStores( if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && St->getAlignment() >= RequiredAlignment) return false; - + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); unsigned FirstLoadAS = FirstLoad->getAddressSpace(); unsigned FirstLoadAlign = FirstLoad->getAlignment(); @@ -12609,30 +12612,19 @@ bool DAGCombiner::MergeConsecutiveStores( // We add +1 here because the LastXXX variables refer to location while // the NumElem refers to array/index size. - unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; + unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); NumElem = std::min(LastLegalType, NumElem); if (NumElem < 2) return false; - // Collect the chains from all merged stores. + // Collect the chains from all merged stores. Because the common case + // all chains are the same, check if we match the first Chain. SmallVector MergeStoreChains; MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); - - // The latest Node in the DAG. - unsigned LatestNodeUsed = 0; - for (unsigned i=1; igetChain()); - } - - LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + for (unsigned i = 1; i < NumElem; ++i) + if (StoreNodes[0].MemNode->getChain() != StoreNodes[i].MemNode->getChain()) + MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); // Find if it is better to use vectors or integers to load and store // to memory. @@ -12656,6 +12648,8 @@ bool DAGCombiner::MergeConsecutiveStores( SDValue NewStoreChain = DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + AddToWorklist(NewStoreChain.getNode()); + SDValue NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstStoreAlign); @@ -12667,25 +12661,9 @@ bool DAGCombiner::MergeConsecutiveStores( SDValue(NewLoad.getNode(), 1)); } - if (UseAA) { - // Replace the all stores with the new store. - for (unsigned i = 0; i < NumElem; ++i) - CombineTo(StoreNodes[i].MemNode, NewStore); - } else { - // Replace the last store with the new store. - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem; ++i) { - // Remove all Store nodes. - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast(StoreNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); - } - } - - StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end()); + // Replace the all stores with the new store. + for (unsigned i = 0; i < NumElem; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); return true; } @@ -12842,19 +12820,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA - : DAG.getSubtarget().useAA(); -#ifndef NDEBUG - if (CombinerAAOnlyFunc.getNumOccurrences() && - CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) - UseAA = false; -#endif - if (UseAA && ST->isUnindexed()) { - // FIXME: We should do this even without AA enabled. AA will just allow - // FindBetterChain to work in more situations. The problem with this is that - // any combine that expects memory operations to be on consecutive chains - // first needs to be updated to look for users of the same chain. - + if (ST->isUnindexed()) { // Walk up chain skipping non-aliasing memory nodes, on this store and any // adjacent stores. if (findBetterNeighborChains(ST)) { @@ -12888,8 +12854,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (SimplifyDemandedBits( Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), - ST->getMemoryVT().getScalarSizeInBits()))) + ST->getMemoryVT().getScalarSizeInBits()))) { + // Re-visit the store if anything changed and the store hasn't been merged + // with another node (N is deleted) SimplifyDemandedBits will add Value's + // node back to the worklist if necessary, but we also need to re-visit + // the Store node itself. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); return SDValue(N, 0); + } } // If this is a load followed by a store to the same location, then the store @@ -12933,15 +12906,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. - SmallVector StoreNodes; - bool Changed = MergeConsecutiveStores(ST, StoreNodes); + bool Changed = MergeConsecutiveStores(ST); if (!Changed) break; - - if (any_of(StoreNodes, - [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) { - // ST has been merged and no longer exists. + // Return N as merge only uses CombineTo and no worklist clean + // up is necessary. + if (N->getOpcode() == ISD::DELETED_NODE || !isa(N)) return SDValue(N, 0); - } } } @@ -12950,7 +12920,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Make sure to do this only after attempting to merge stores in order to // avoid changing the types of some subset of stores due to visit order, // preventing their merging. - if (isa(Value)) { + if (isa(ST->getValue())) { if (SDValue NewSt = replaceStoreOfFPConstant(ST)) return NewSt; } @@ -13887,6 +13857,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); + // Check if we can express BUILD VECTOR via subvector extract. + if (!LegalTypes && (N->getNumOperands() > 1)) { + SDValue Op0 = N->getOperand(0); + auto checkElem = [&](SDValue Op) -> uint64_t { + if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && + (Op0.getOperand(0) == Op.getOperand(0))) + if (auto CNode = dyn_cast(Op.getOperand(1))) + return CNode->getZExtValue(); + return -1; + }; + + int Offset = checkElem(Op0); + for (unsigned i = 0; i < N->getNumOperands(); ++i) { + if (Offset + i != checkElem(N->getOperand(i))) { + Offset = -1; + break; + } + } + + if ((Offset == 0) && + (Op0.getOperand(0).getValueType() == N->getValueType(0))) + return Op0.getOperand(0); + if ((Offset != -1) && + ((Offset % N->getValueType(0).getVectorNumElements()) == + 0)) // IDX must be multiple of output size. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), + Op0.getOperand(0), Op0.getOperand(1)); + } + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; @@ -15983,7 +15982,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, if (Base.getOpcode() == ISD::ADD) { if (ConstantSDNode *C = dyn_cast(Base.getOperand(1))) { Base = Base.getOperand(0); - Offset += C->getZExtValue(); + Offset += C->getSExtValue(); } } @@ -16180,6 +16179,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, ++Depth; break; + case ISD::CopyFromReg: + // Forward past CopyFromReg. + Chains.push_back(Chain.getOperand(0)); + ++Depth; + break; + default: // For all other instructions we will just have to take what we can get. Aliases.push_back(Chain); @@ -16208,6 +16213,18 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +// This function tries to collect a bunch of potentially interesting +// nodes to improve the chains of, all at once. This might seem +// redundant, as this function gets called when visiting every store +// node, so why not let the work be done on each store as it's visited? +// +// I believe this is mainly important because MergeConsecutiveStores +// is unable to deal with merging stores of different sizes, so unless +// we improve the chains of all the potential candidates up-front +// before running MergeConsecutiveStores, it might only see some of +// the nodes that will eventually be candidates, and then not be able +// to go from a partially-merged state to the desired final +// fully-merged state. bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. @@ -16243,10 +16260,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { if (!Ptr.equalBaseIndex(BasePtr)) break; - // Find the next memory operand in the chain. If the next operand in the - // chain is a store then move up and continue the scan with the next - // memory operand. If the next operand is a load save it and use alias - // information to check if it interferes with anything. + // Walk up the chain to find the next store node, ignoring any + // intermediate loads. Any other kind of node will halt the loop. SDNode *NextInChain = Index->getChain().getNode(); while (true) { if (StoreSDNode *STn = dyn_cast(NextInChain)) { @@ -16265,9 +16280,14 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { Index = nullptr; break; } - } + } // end while } + // At this point, ChainedStores lists all of the Store nodes + // reachable by iterating up through chain nodes matching the above + // conditions. For each such store identified, try to find an + // earlier chain to attach the store to which won't violate the + // required ordering. bool MadeChangeToSt = false; SmallVector, 8> BetterChains; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 5f021a17612..518417dd11e 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -850,7 +850,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { MinFunctionAlignment = 0; PrefFunctionAlignment = 0; PrefLoopAlignment = 0; - GatherAllAliasesMaxDepth = 6; + GatherAllAliasesMaxDepth = 18; MinStackArgumentAlignment = 1; // TODO: the default will be switched to 0 in the next commit, along // with the Target-specific changes necessary. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 8a5b6e348b9..dba9946f3b2 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9338,7 +9338,7 @@ static SDValue performSTORECombine(SDNode *N, return SDValue(); } - /// This function handles the log2-shuffle pattern produced by the +/// This function handles the log2-shuffle pattern produced by the /// LoopVectorizer for the across vector reduction. It consists of /// log2(NumVectorElements) steps and, in each step, 2^(s) elements /// are reduced, where s is an induction variable from 0 to diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index a20de3b5716..b1a954c5813 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -500,6 +500,11 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; + bool canMergeStoresTo(EVT MemVT) const override { + // Do not merge to larger than i32. + return (MemVT.getSizeInBits() <= 32); + } + bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; diff --git a/test/CodeGen/AArch64/argument-blocks.ll b/test/CodeGen/AArch64/argument-blocks.ll index 3169abc2dcb..b5374ca8ced 100644 --- a/test/CodeGen/AArch64/argument-blocks.ll +++ b/test/CodeGen/AArch64/argument-blocks.ll @@ -59,10 +59,10 @@ define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) { } ; [2 x float] should not be promoted to double by the Darwin varargs handling, -; but should go in an 8-byte aligned slot. +; but should go in an 8-byte aligned slot and can be merged as integer stores. define void @test_varargs_stackalign() { ; CHECK-LABEL: test_varargs_stackalign: -; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16] +; CHECK-DARWINPCS: str {{x[0-9]+}}, [sp, #16] call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0]) ret void diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index fb52b1d99fc..6cf0ab35b9b 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -205,10 +205,7 @@ declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32, define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind { entry: ; CHECK-LABEL: test8 -; CHECK: strb {{w[0-9]+}}, [sp, #3] -; CHECK: strb wzr, [sp, #2] -; CHECK: strb {{w[0-9]+}}, [sp, #1] -; CHECK: strb wzr, [sp] +; CHECK: str w8, [sp] ; CHECK: bl ; FAST-LABEL: test8 ; FAST: strb {{w[0-9]+}}, [sp] diff --git a/test/CodeGen/AArch64/arm64-memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll index 8f22f97ca08..384aaa8541d 100644 --- a/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -13,8 +13,8 @@ define void @t2() nounwind ssp { entry: ; CHECK-LABEL: t2: ; CHECK: strh wzr, [sp, #32] -; CHECK: stp xzr, xzr, [sp, #16] -; CHECK: str xzr, [sp, #8] +; CHECK: stp xzr, xzr, [sp, #8] +; CHECK: str xzr, [sp, #24] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index 16ddf690fe9..375877c5179 100644 --- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -99,7 +99,7 @@ define void @test_nospare([8 x i64], [8 x float], ...) { ; __stack field should point just past them. define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) { ; CHECK-LABEL: test_offsetstack: -; CHECK: sub sp, sp, #80 +; CHECK: stp {{q[0-9]+}}, {{q[0-9]+}}, [sp, #-80]! ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96 ; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var ; CHECK: str [[STACK_TOP]], [x[[VAR]]] diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll index 1d0196ad521..1d26e4a42b1 100644 --- a/test/CodeGen/AArch64/merge-store.ll +++ b/test/CodeGen/AArch64/merge-store.ll @@ -4,8 +4,7 @@ @g0 = external global <3 x float>, align 16 @g1 = external global <3 x float>, align 4 -; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4 -; CHECK: ld1{{\.?s?}} { v[[R0]]{{\.?s?}} }[1], {{\[}}[[R1]]{{\]}} +; CHECK: ldr q[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]], :lo12:g0 ; CHECK: str d[[R0]] define void @blam() { diff --git a/test/CodeGen/AArch64/vector_merge_dep_check.ll b/test/CodeGen/AArch64/vector_merge_dep_check.ll index 9220947e836..e4e64ef8c8d 100644 --- a/test/CodeGen/AArch64/vector_merge_dep_check.ll +++ b/test/CodeGen/AArch64/vector_merge_dep_check.ll @@ -1,5 +1,4 @@ -; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s -; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s ; This test checks that we do not merge stores together which have ; dependencies through their non-chain operands (e.g. one store is the diff --git a/test/CodeGen/AMDGPU/debugger-insert-nops.ll b/test/CodeGen/AMDGPU/debugger-insert-nops.ll index 6638f4e2582..7be7d9486a4 100644 --- a/test/CodeGen/AMDGPU/debugger-insert-nops.ll +++ b/test/CodeGen/AMDGPU/debugger-insert-nops.ll @@ -1,13 +1,21 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP -; CHECK: test01.cl:2:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 +; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each. -; CHECK: test01.cl:3:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 +; Check that each line appears at least once +; CHECK-DAG: test01.cl:2:3 +; CHECK-DAG: test01.cl:3:3 +; CHECK-DAG: test01.cl:4:3 -; CHECK: test01.cl:4:{{[0-9]+}} -; CHECK-NEXT: s_nop 0 + +; Check that each of each of the lines consists of the line output, followed by "s_nop 0" +; CHECKNOP: test01.cl:{{[234]}}:3 +; CHECKNOP-NEXT: s_nop 0 +; CHECKNOP: test01.cl:{{[234]}}:3 +; CHECKNOP-NEXT: s_nop 0 +; CHECKNOP: test01.cl:{{[234]}}:3 +; CHECKNOP-NEXT: s_nop 0 ; CHECK: test01.cl:5:{{[0-9]+}} ; CHECK-NEXT: s_nop 0 @@ -21,7 +29,7 @@ entry: call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20 - store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21 + store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20 %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22 store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23 diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll index c05a6b907c4..97ef5ce9039 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -253,11 +253,9 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a ; GCN: buffer_load_ubyte v{{[0-9]+}}, off ; GCN: buffer_load_ubyte v{{[0-9]+}}, off -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6 -; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5 ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 - -; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:5 +; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6 ; GCN-NO-TONGA: buffer_load_ubyte ; GCN-NO-TONGA: buffer_load_ubyte diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll index 07104ebc8c9..fd2d8ee1189 100644 --- a/test/CodeGen/AMDGPU/merge-stores.ll +++ b/test/CodeGen/AMDGPU/merge-stores.ll @@ -1,8 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s - -; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s ; This test is mostly to test DAG store merging, so disable the vectorizer. ; Run with devices with different unaligned load restrictions. @@ -150,12 +147,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { } ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: -; GCN-NOAA: buffer_store_dwordx4 v - -; GCN-AA: buffer_store_dwordx2 -; GCN-AA: buffer_store_dword v -; GCN-AA: buffer_store_dword v - +; GCN-AA: buffer_store_dwordx4 v ; GCN: s_endpgm define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 @@ -474,17 +466,9 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1 ret void } -; This works once AA is enabled on the subtarget ; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] - -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v - -; GCN-AA: buffer_store_dwordx4 [[LOAD]] - +; GCN: buffer_store_dwordx4 [[LOAD]] ; GCN: s_endpgm define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 diff --git a/test/CodeGen/AMDGPU/private-element-size.ll b/test/CodeGen/AMDGPU/private-element-size.ll index eb487637ce9..9e75cc2bae4 100644 --- a/test/CodeGen/AMDGPU/private-element-size.ll +++ b/test/CodeGen/AMDGPU/private-element-size.ll @@ -32,10 +32,10 @@ ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:40{{$}} ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s9 offset:44{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}} -; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}} +; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}} define void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -130,8 +130,8 @@ entry: ; HSA-ELT8: private_element_size = 2 ; HSA-ELT4: private_element_size = 1 -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:16 -; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], s9 offset:24 +; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:1 +; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], s9 offset:2 ; HSA-ELTGE8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], s9 offen diff --git a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 4beefb047f2..c1d691fcff8 100644 --- a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -157,9 +157,8 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, ; FUNC-LABEL: @reorder_local_offsets ; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102 -; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:100 -; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 -; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 +; CI-DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100 +; CI-DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 ; CI: buffer_store_dword ; CI: s_endpgm define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 { @@ -181,12 +180,12 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa } ; FUNC-LABEL: @reorder_global_offsets -; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 -; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 -; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 -; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 -; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 -; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 +; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 +; CI: buffer_store_dword ; CI: s_endpgm define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 { %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3 diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll index 4a1341c4d6e..d2ff09a6200 100644 --- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -12,7 +12,8 @@ define void @test_byval_8_bytes_alignment(i32 %i, ...) { entry: ; CHECK: sub sp, sp, #12 ; CHECK: sub sp, sp, #4 -; CHECK: stmib sp, {r1, r2, r3} +; CHECK: add r0, sp, #4 +; CHECK: stm sp, {r0, r1, r2, r3} %g = alloca i8* %g1 = bitcast i8** %g to i8* call void @llvm.va_start(i8* %g1) diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll index 7d37c83d748..0e077b3aee5 100644 --- a/test/CodeGen/ARM/alloc-no-stack-realign.ll +++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=NO-REALIGN -; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=REALIGN +; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s ; rdar://12713765 ; When realign-stack is set to false, make sure we are not creating stack @@ -8,29 +7,31 @@ define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" { entry: -; NO-REALIGN-LABEL: test1 -; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48 -; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] - -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: mov r[[R3:[0-9]+]], r[[R1]] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128]! -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R3]]:128] - -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32 -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! -; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128] +; CHECK-LABEL: test1 +; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] +; CHECK: add r[[R2:[0-9]+]], r1, #48 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], sp +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval @@ -41,32 +42,33 @@ entry: define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp { entry: -; REALIGN-LABEL: test2 -; REALIGN: bfc sp, #0, #6 -; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]] -; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32 -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48 -; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], sp +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] - -; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32 -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]! -; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128] - %retval = alloca <16 x float>, align 16 +%retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval %1 = load <16 x float>, <16 x float>* %retval diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll index ef3e5a54a2d..797b147d5d0 100644 --- a/test/CodeGen/ARM/gpr-paired-spill.ll +++ b/test/CodeGen/ARM/gpr-paired-spill.ll @@ -16,22 +16,22 @@ define void @foo(i64* %addr) { ; an LDMIA was created with both a FrameIndex and an offset, which ; is not allowed. -; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] -; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] +; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD-DAG: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] -; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] -; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] +; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD-DAG: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] ; We also want to ensure the register scavenger is working (i.e. an ; offset from sp can be generated), so we need two spills. -; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} -; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} +; CHECK-WITHOUT-LDRD-DAG: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} ; In principle LLVM may have to recalculate the offset. At the moment ; it reuses the original though. -; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} -; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD-DAG: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} store volatile i64 %val1, i64* %addr store volatile i64 %val2, i64* %addr diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 5725a404c32..c7e18d35dbe 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -9,8 +9,6 @@ entry: ; CHECK-LABEL: t: ; CHECK: vpop {d8} ; CHECK-NOT: vpopne -; CHECK: pop {r7, pc} -; CHECK: vpop {d8} ; CHECK: pop {r7, pc} br i1 undef, label %if.else, label %if.then diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index 892f0982755..bfb2b95c256 100644 --- a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -6,9 +6,7 @@ define void @i24_or(i24* %a) { ; LE-LABEL: i24_or: ; LE: @ BB#0: ; LE-NEXT: ldrh r1, [r0] -; LE-NEXT: ldrb r2, [r0, #2] ; LE-NEXT: orr r1, r1, #384 -; LE-NEXT: strb r2, [r0, #2] ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr ; @@ -31,21 +29,19 @@ define void @i24_or(i24* %a) { define void @i24_and_or(i24* %a) { ; LE-LABEL: i24_and_or: ; LE: @ BB#0: -; LE-NEXT: ldrb r2, [r0, #2] ; LE-NEXT: ldrh r1, [r0] -; LE-NEXT: strb r2, [r0, #2] ; LE-NEXT: mov r2, #16256 -; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: orr r2, r2, #49152 +; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: and r1, r1, r2 ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i24_and_or: ; BE: @ BB#0: +; BE-NEXT: mov r1, #128 +; BE-NEXT: strb r1, [r0, #2] ; BE-NEXT: ldrh r1, [r0] -; BE-NEXT: mov r2, #128 -; BE-NEXT: strb r2, [r0, #2] ; BE-NEXT: orr r1, r1, #1 ; BE-NEXT: strh r1, [r0] ; BE-NEXT: mov pc, lr @@ -59,9 +55,7 @@ define void @i24_and_or(i24* %a) { define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { ; LE-LABEL: i24_insert_bit: ; LE: @ BB#0: -; LE-NEXT: ldrb r3, [r0, #2] ; LE-NEXT: ldrh r2, [r0] -; LE-NEXT: strb r3, [r0, #2] ; LE-NEXT: mov r3, #255 ; LE-NEXT: orr r3, r3, #57088 ; LE-NEXT: and r2, r2, r3 @@ -71,9 +65,7 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { ; ; BE-LABEL: i24_insert_bit: ; BE: @ BB#0: -; BE-NEXT: ldrb r3, [r0, #2] ; BE-NEXT: ldrh r2, [r0] -; BE-NEXT: strb r3, [r0, #2] ; BE-NEXT: mov r3, #57088 ; BE-NEXT: orr r3, r3, #16711680 ; BE-NEXT: and r2, r3, r2, lsl #8 @@ -93,14 +85,9 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { define void @i56_or(i56* %a) { ; LE-LABEL: i56_or: ; LE: @ BB#0: -; LE-NEXT: mov r2, r0 -; LE-NEXT: ldr r12, [r0] -; LE-NEXT: ldrh r3, [r2, #4]! -; LE-NEXT: ldrb r1, [r2, #2] -; LE-NEXT: strb r1, [r2, #2] -; LE-NEXT: orr r1, r12, #384 +; LE-NEXT: ldr r1, [r0] +; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: str r1, [r0] -; LE-NEXT: strh r3, [r2] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_or: @@ -128,36 +115,29 @@ define void @i56_or(i56* %a) { define void @i56_and_or(i56* %a) { ; LE-LABEL: i56_and_or: ; LE: @ BB#0: -; LE-NEXT: mov r2, r0 ; LE-NEXT: ldr r1, [r0] -; LE-NEXT: ldrh r12, [r2, #4]! ; LE-NEXT: orr r1, r1, #384 -; LE-NEXT: ldrb r3, [r2, #2] ; LE-NEXT: bic r1, r1, #127 -; LE-NEXT: strb r3, [r2, #2] ; LE-NEXT: str r1, [r0] -; LE-NEXT: strh r12, [r2] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_and_or: ; BE: @ BB#0: -; BE-NEXT: .save {r11, lr} -; BE-NEXT: push {r11, lr} -; BE-NEXT: mov r2, r0 -; BE-NEXT: ldr lr, [r0] +; BE-NEXT: mov r1, r0 ; BE-NEXT: mov r3, #128 -; BE-NEXT: ldrh r12, [r2, #4]! -; BE-NEXT: strb r3, [r2, #2] -; BE-NEXT: lsl r3, r12, #8 -; BE-NEXT: orr r3, r3, lr, lsl #24 -; BE-NEXT: orr r3, r3, #384 -; BE-NEXT: lsr r1, r3, #8 -; BE-NEXT: strh r1, [r2] -; BE-NEXT: bic r1, lr, #255 -; BE-NEXT: orr r1, r1, r3, lsr #24 +; BE-NEXT: ldrh r2, [r1, #4]! +; BE-NEXT: strb r3, [r1, #2] +; BE-NEXT: lsl r2, r2, #8 +; BE-NEXT: ldr r12, [r0] +; BE-NEXT: orr r2, r2, r12, lsl #24 +; BE-NEXT: orr r2, r2, #384 +; BE-NEXT: lsr r3, r2, #8 +; BE-NEXT: strh r3, [r1] +; BE-NEXT: bic r1, r12, #255 +; BE-NEXT: orr r1, r1, r2, lsr #24 ; BE-NEXT: str r1, [r0] -; BE-NEXT: pop {r11, lr} ; BE-NEXT: mov pc, lr + %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 %d = or i56 %c, 384 @@ -168,35 +148,27 @@ define void @i56_and_or(i56* %a) { define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; LE-LABEL: i56_insert_bit: ; LE: @ BB#0: -; LE-NEXT: .save {r11, lr} -; LE-NEXT: push {r11, lr} -; LE-NEXT: mov r3, r0 -; LE-NEXT: ldr lr, [r0] -; LE-NEXT: ldrh r12, [r3, #4]! -; LE-NEXT: ldrb r2, [r3, #2] -; LE-NEXT: strb r2, [r3, #2] -; LE-NEXT: bic r2, lr, #8192 +; LE-NEXT: ldr r2, [r0] +; LE-NEXT: bic r2, r2, #8192 ; LE-NEXT: orr r1, r2, r1, lsl #13 ; LE-NEXT: str r1, [r0] -; LE-NEXT: strh r12, [r3] -; LE-NEXT: pop {r11, lr} ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_insert_bit: ; BE: @ BB#0: ; BE-NEXT: .save {r11, lr} ; BE-NEXT: push {r11, lr} -; BE-NEXT: mov r3, r0 +; BE-NEXT: mov r2, r0 +; BE-NEXT: ldrh r12, [r2, #4]! +; BE-NEXT: ldrb r3, [r2, #2] +; BE-NEXT: strb r3, [r2, #2] +; BE-NEXT: orr r12, r3, r12, lsl #8 ; BE-NEXT: ldr lr, [r0] -; BE-NEXT: ldrh r12, [r3, #4]! -; BE-NEXT: ldrb r2, [r3, #2] -; BE-NEXT: strb r2, [r3, #2] -; BE-NEXT: orr r2, r2, r12, lsl #8 -; BE-NEXT: orr r2, r2, lr, lsl #24 -; BE-NEXT: bic r2, r2, #8192 -; BE-NEXT: orr r1, r2, r1, lsl #13 -; BE-NEXT: lsr r2, r1, #8 -; BE-NEXT: strh r2, [r3] +; BE-NEXT: orr r3, r12, lr, lsl #24 +; BE-NEXT: bic r3, r3, #8192 +; BE-NEXT: orr r1, r3, r1, lsl #13 +; BE-NEXT: lsr r3, r1, #8 +; BE-NEXT: strh r3, [r2] ; BE-NEXT: bic r2, lr, #255 ; BE-NEXT: orr r1, r2, r1, lsr #24 ; BE-NEXT: str r1, [r0] diff --git a/test/CodeGen/ARM/static-addr-hoisting.ll b/test/CodeGen/ARM/static-addr-hoisting.ll index 3d47e02f965..683d607936b 100644 --- a/test/CodeGen/ARM/static-addr-hoisting.ll +++ b/test/CodeGen/ARM/static-addr-hoisting.ll @@ -6,9 +6,9 @@ define void @multiple_store() { ; CHECK: movs [[VAL:r[0-9]+]], #42 ; CHECK: movt r[[BASE1]], #15 -; CHECK: str [[VAL]], [r[[BASE1]]] -; CHECK: str [[VAL]], [r[[BASE1]], #24] -; CHECK: str.w [[VAL]], [r[[BASE1]], #42] +; CHECK-DAG: str [[VAL]], [r[[BASE1]]] +; CHECK-DAG: str [[VAL]], [r[[BASE1]], #24] +; CHECK-DAG: str.w [[VAL]], [r[[BASE1]], #42] ; CHECK: movw r[[BASE2:[0-9]+]], #20394 ; CHECK: movt r[[BASE2]], #18 diff --git a/test/CodeGen/BPF/undef.ll b/test/CodeGen/BPF/undef.ll index 541d81ea07b..de14bfde1ab 100644 --- a/test/CodeGen/BPF/undef.ll +++ b/test/CodeGen/BPF/undef.ll @@ -13,50 +13,55 @@ ; Function Attrs: nounwind uwtable define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { +; CHECK: r2 = r10 +; CHECK: r2 += -2 +; CHECK: r1 = 0 +; CHECK: *(u16 *)(r2 + 6) = r1 +; CHECK: *(u16 *)(r2 + 4) = r1 +; CHECK: *(u16 *)(r2 + 2) = r1 +; CHECK: r2 = 6 +; CHECK: *(u8 *)(r10 - 7) = r2 +; CHECK: r2 = 5 +; CHECK: *(u8 *)(r10 - 8) = r2 +; CHECK: r2 = 7 +; CHECK: *(u8 *)(r10 - 6) = r2 +; CHECK: r2 = 8 +; CHECK: *(u8 *)(r10 - 5) = r2 +; CHECK: r2 = 9 +; CHECK: *(u8 *)(r10 - 4) = r2 +; CHECK: r2 = 10 +; CHECK: *(u8 *)(r10 - 3) = r2 +; CHECK: *(u16 *)(r10 + 24) = r1 +; CHECK: *(u16 *)(r10 + 22) = r1 +; CHECK: *(u16 *)(r10 + 20) = r1 +; CHECK: *(u16 *)(r10 + 18) = r1 +; CHECK: *(u16 *)(r10 + 16) = r1 +; CHECK: *(u16 *)(r10 + 14) = r1 +; CHECK: *(u16 *)(r10 + 12) = r1 +; CHECK: *(u16 *)(r10 + 10) = r1 +; CHECK: *(u16 *)(r10 + 8) = r1 +; CHECK: *(u16 *)(r10 + 6) = r1 +; CHECK: *(u16 *)(r10 - 2) = r1 +; CHECK: *(u16 *)(r10 + 26) = r1 +; CHECK: r2 = r10 +; CHECK: r2 += -8 +; CHECK: r1 = ll +; CHECK: call bpf_map_lookup_elem +; CHECK: exit %key = alloca %struct.routing_key_2, align 1 %1 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 0 -; CHECK: r1 = 5 -; CHECK: *(u8 *)(r10 - 8) = r1 store i8 5, i8* %1, align 1 %2 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 1 -; CHECK: r1 = 6 -; CHECK: *(u8 *)(r10 - 7) = r1 store i8 6, i8* %2, align 1 %3 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 2 -; CHECK: r1 = 7 -; CHECK: *(u8 *)(r10 - 6) = r1 store i8 7, i8* %3, align 1 %4 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 3 -; CHECK: r1 = 8 -; CHECK: *(u8 *)(r10 - 5) = r1 store i8 8, i8* %4, align 1 %5 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 4 -; CHECK: r1 = 9 -; CHECK: *(u8 *)(r10 - 4) = r1 store i8 9, i8* %5, align 1 %6 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 0, i32 0, i64 5 -; CHECK: r1 = 10 -; CHECK: *(u8 *)(r10 - 3) = r1 store i8 10, i8* %6, align 1 %7 = getelementptr inbounds %struct.routing_key_2, %struct.routing_key_2* %key, i64 1, i32 0, i64 0 -; CHECK: r1 = r10 -; CHECK: r1 += -2 -; CHECK: r2 = 0 -; CHECK: *(u16 *)(r1 + 6) = r2 -; CHECK: *(u16 *)(r1 + 4) = r2 -; CHECK: *(u16 *)(r1 + 2) = r2 -; CHECK: *(u16 *)(r10 + 24) = r2 -; CHECK: *(u16 *)(r10 + 22) = r2 -; CHECK: *(u16 *)(r10 + 20) = r2 -; CHECK: *(u16 *)(r10 + 18) = r2 -; CHECK: *(u16 *)(r10 + 16) = r2 -; CHECK: *(u16 *)(r10 + 14) = r2 -; CHECK: *(u16 *)(r10 + 12) = r2 -; CHECK: *(u16 *)(r10 + 10) = r2 -; CHECK: *(u16 *)(r10 + 8) = r2 -; CHECK: *(u16 *)(r10 + 6) = r2 -; CHECK: *(u16 *)(r10 - 2) = r2 -; CHECK: *(u16 *)(r10 + 26) = r2 call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 30, i32 1, i1 false) %8 = call i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...) bitcast (i32 (...)* @bpf_map_lookup_elem to i32 (%struct.bpf_map_def*, %struct.routing_key_2*, ...)*)(%struct.bpf_map_def* nonnull @routing, %struct.routing_key_2* nonnull %key) #3 ret i32 undef diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll index c75e1beb235..a48d8592c1a 100644 --- a/test/CodeGen/MSP430/Inst16mm.ll +++ b/test/CodeGen/MSP430/Inst16mm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s +; RUN: llc -march=msp430 < %s | FileCheck %s target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8" target triple = "msp430-generic-generic" @foo = common global i16 0, align 2 diff --git a/test/CodeGen/Mips/cconv/arguments-float.ll b/test/CodeGen/Mips/cconv/arguments-float.ll index 7d32992ecb1..004f6d94749 100644 --- a/test/CodeGen/Mips/cconv/arguments-float.ll +++ b/test/CodeGen/Mips/cconv/arguments-float.ll @@ -63,39 +63,39 @@ entry: ; NEW-DAG: sd $5, 16([[R2]]) ; O32 has run out of argument registers and starts using the stack -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 16($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 20($sp) ; O32-DAG: sw [[R3]], 24([[R2]]) ; O32-DAG: sw [[R4]], 28([[R2]]) ; NEW-DAG: sd $6, 24([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 24($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 28($sp) ; O32-DAG: sw [[R3]], 32([[R2]]) ; O32-DAG: sw [[R4]], 36([[R2]]) ; NEW-DAG: sd $7, 32([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 32($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 36($sp) ; O32-DAG: sw [[R3]], 40([[R2]]) ; O32-DAG: sw [[R4]], 44([[R2]]) ; NEW-DAG: sd $8, 40([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 40($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 44($sp) ; O32-DAG: sw [[R3]], 48([[R2]]) ; O32-DAG: sw [[R4]], 52([[R2]]) ; NEW-DAG: sd $9, 48([[R2]]) -; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 56($sp) -; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 60($sp) +; O32-DAG: lw [[R3:\$([0-9]+|gp)]], 48($sp) +; O32-DAG: lw [[R4:\$([0-9]+|gp)]], 52($sp) ; O32-DAG: sw [[R3]], 56([[R2]]) ; O32-DAG: sw [[R4]], 60([[R2]]) ; NEW-DAG: sd $10, 56([[R2]]) ; N32/N64 have run out of registers and starts using the stack too -; O32-DAG: lw [[R3:\$[0-9]+]], 64($sp) -; O32-DAG: lw [[R4:\$[0-9]+]], 68($sp) +; O32-DAG: lw [[R3:\$[0-9]+]], 56($sp) +; O32-DAG: lw [[R4:\$[0-9]+]], 60($sp) ; O32-DAG: sw [[R3]], 64([[R2]]) ; O32-DAG: sw [[R4]], 68([[R2]]) ; NEW-DAG: ld [[R3:\$[0-9]+]], 0($sp) diff --git a/test/CodeGen/Mips/cconv/arguments-varargs.ll b/test/CodeGen/Mips/cconv/arguments-varargs.ll index 785188b3c51..d662128945f 100644 --- a/test/CodeGen/Mips/cconv/arguments-varargs.ll +++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll @@ -315,12 +315,11 @@ entry: ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte ; order. ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 8([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 12([[GV]]) ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) @@ -349,10 +348,9 @@ entry: ; Load the second argument from the variable portion and copy it to the global. ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG2]], 16([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG2]], 20([[GV]]) ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) @@ -678,12 +676,11 @@ entry: ; Big-endian mode for N32/N64 must add an additional 4 to the offset due to byte ; order. ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 8([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 12([[GV]]) ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) @@ -712,10 +709,9 @@ entry: ; Load the second argument from the variable portion and copy it to the global. ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG2]], 16([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 ; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG2]], 20([[GV]]) ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) @@ -1040,10 +1036,9 @@ entry: ; O32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) ; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG1]], 8([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG1:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG1:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG1]], 12([[GV]]) ; N32-DAG: addiu [[GV:\$[0-9]+]], ${{[0-9]+}}, %lo(dwords) @@ -1072,10 +1067,9 @@ entry: ; Load the second argument from the variable portion and copy it to the global. ; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) ; O32-DAG: sw [[ARG2]], 16([[GV]]) -; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]]) -; O32-DAG: addiu [[VA2:\$[0-9]+]], [[VA]], 4 -; O32-DAG: sw [[VA2]], 0([[SP]]) -; O32-DAG: lw [[ARG2:\$[0-9]+]], 0([[VA]]) +; O32-DAG: addiu [[VA3:\$[0-9]+]], [[VA2]], 4 +; O32-DAG: sw [[VA3]], 0([[SP]]) +; O32-DAG: lw [[ARG2:\$[0-9]+]], 4([[VA_TMP2]]) ; O32-DAG: sw [[ARG2]], 20([[GV]]) ; NEW-DAG: ld [[ARG2:\$[0-9]+]], 0([[VA2]]) diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll index 13abc20eb3e..fb1bc4d9a8a 100644 --- a/test/CodeGen/Mips/fastcc.ll +++ b/test/CodeGen/Mips/fastcc.ll @@ -132,20 +132,19 @@ entry: define internal fastcc void @callee0(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) nounwind noinline { entry: ; CHECK: callee0 -; CHECK: sw $4 -; CHECK: sw $5 -; CHECK: sw $6 -; CHECK: sw $7 -; CHECK: sw $8 -; CHECK: sw $9 -; CHECK: sw $10 -; CHECK: sw $11 -; CHECK: sw $12 -; CHECK: sw $13 -; CHECK: sw $14 -; CHECK: sw $15 -; CHECK: sw $24 -; CHECK: sw $3 +; CHECK-DAG: sw $4 +; CHECK-DAG: sw $5 +; CHECK-DAG: sw $7 +; CHECK-DAG: sw $8 +; CHECK-DAG: sw $9 +; CHECK-DAG: sw $10 +; CHECK-DAG: sw $11 +; CHECK-DAG: sw $12 +; CHECK-DAG: sw $13 +; CHECK-DAG: sw $14 +; CHECK-DAG: sw $15 +; CHECK-DAG: sw $24 +; CHECK-DAG: sw $3 ; t6, t7 and t8 are reserved in NaCl and cannot be used for fastcc. ; CHECK-NACL-NOT: sw $14 @@ -223,27 +222,27 @@ entry: define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline { entry: -; CHECK: callee1 -; CHECK: swc1 $f0 -; CHECK: swc1 $f1 -; CHECK: swc1 $f2 -; CHECK: swc1 $f3 -; CHECK: swc1 $f4 -; CHECK: swc1 $f5 -; CHECK: swc1 $f6 -; CHECK: swc1 $f7 -; CHECK: swc1 $f8 -; CHECK: swc1 $f9 -; CHECK: swc1 $f10 -; CHECK: swc1 $f11 -; CHECK: swc1 $f12 -; CHECK: swc1 $f13 -; CHECK: swc1 $f14 -; CHECK: swc1 $f15 -; CHECK: swc1 $f16 -; CHECK: swc1 $f17 -; CHECK: swc1 $f18 -; CHECK: swc1 $f19 +; CHECK-LABEL: callee1: +; CHECK-DAG: swc1 $f0 +; CHECK-DAG: swc1 $f1 +; CHECK-DAG: swc1 $f2 +; CHECK-DAG: swc1 $f3 +; CHECK-DAG: swc1 $f4 +; CHECK-DAG: swc1 $f5 +; CHECK-DAG: swc1 $f6 +; CHECK-DAG: swc1 $f7 +; CHECK-DAG: swc1 $f8 +; CHECK-DAG: swc1 $f9 +; CHECK-DAG: swc1 $f10 +; CHECK-DAG: swc1 $f11 +; CHECK-DAG: swc1 $f12 +; CHECK-DAG: swc1 $f13 +; CHECK-DAG: swc1 $f14 +; CHECK-DAG: swc1 $f15 +; CHECK-DAG: swc1 $f16 +; CHECK-DAG: swc1 $f17 +; CHECK-DAG: swc1 $f18 +; CHECK-DAG: swc1 $f19 store float %a0, float* @gf0, align 4 store float %a1, float* @gf1, align 4 @@ -316,8 +315,6 @@ entry: ; NOODDSPREG-LABEL: callee2: -; NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]] - ; Check that first 10 arguments are received in even float registers ; f0, f2, ... , f18. Check that 11th argument is received on stack. @@ -333,7 +330,7 @@ entry: ; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]]) ; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]]) -; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp) +; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp) ; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]]) store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4 @@ -397,7 +394,6 @@ entry: ; FP64-NOODDSPREG-LABEL: callee3: -; FP64-NOODDSPREG: addiu $sp, $sp, -[[OFFSET:[0-9]+]] ; Check that first 10 arguments are received in even float registers ; f0, f2, ... , f18. Check that 11th argument is received on stack. @@ -414,7 +410,7 @@ entry: ; FP64-NOODDSPREG-DAG: sdc1 $f16, 64($[[R0]]) ; FP64-NOODDSPREG-DAG: sdc1 $f18, 72($[[R0]]) -; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp) +; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 0($sp) ; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]]) store double %a0, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 0), align 8 diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll index 3bd924a8120..6def55cf883 100644 --- a/test/CodeGen/Mips/load-store-left-right.ll +++ b/test/CodeGen/Mips/load-store-left-right.ll @@ -250,12 +250,18 @@ entry: ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)( ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s0)( -; FIXME: We should be able to do better than this on MIPS32r6/MIPS64r6 since -; we have unaligned halfword load/store available -; ALL-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) -; ALL-DAG: sb $[[R1]], 2($[[PTR]]) -; ALL-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) -; ALL-DAG: sb $[[R1]], 3($[[PTR]]) +; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-DAG: sb $[[R1]], 2($[[PTR]]) +; MIPS32-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]]) +; MIPS32-DAG: sb $[[R2]], 3($[[PTR]]) + +; MIPS32R6: lhu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32R6: sh $[[R1]], 2($[[PTR]]) + +; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-DAG: sb $[[R1]], 2($[[PTR]]) +; MIPS64-DAG: lbu $[[R2:[0-9]+]], 1($[[PTR]]) +; MIPS64-DAG: sb $[[R2]], 3($[[PTR]]) %0 = load %struct.S0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 0), align 1 store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 1), align 1 @@ -268,37 +274,54 @@ entry: ; MIPS32-EL: lw $[[PTR:[0-9]+]], %got(struct_s1)( ; MIPS32-EB: lw $[[PTR:[0-9]+]], %got(struct_s1)( -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 4($[[PTR]]) -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 5($[[PTR]]) -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 6($[[PTR]]) -; MIPS32-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) -; MIPS32-DAG: sb $[[R1]], 7($[[PTR]]) +; MIPS32-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS32-EL-DAG: lwr $[[R1]], 0($[[PTR]]) +; MIPS32-EL-DAG: swl $[[R1]], 7($[[PTR]]) +; MIPS32-EL-DAG: swr $[[R1]], 4($[[PTR]]) +; MIPS32-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS32-EB-DAG: swl $[[R1]], 4($[[PTR]]) +; MIPS32-EB-DAG: swr $[[R1]], 7($[[PTR]]) + +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS32-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]]) ; MIPS32R6: lw $[[PTR:[0-9]+]], %got(struct_s1)( -; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS32R6-DAG: sh $[[R1]], 4($[[PTR]]) -; MIPS32R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS32R6-DAG: sh $[[R1]], 6($[[PTR]]) +; MIPS32R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS32R6-DAG: sw $[[R1]], 4($[[PTR]]) ; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 4($[[PTR]]) -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 5($[[PTR]]) -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 6($[[PTR]]) -; MIPS64-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) -; MIPS64-DAG: sb $[[R1]], 7($[[PTR]]) + +; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) +; MIPS64-EL-DAG: swl $[[R1]], 7($[[PTR]]) +; MIPS64-EL-DAG: swr $[[R1]], 4($[[PTR]]) + +; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS64-EB-DAG: swl $[[R1]], 4($[[PTR]]) +; MIPS64-EB-DAG: swr $[[R1]], 7($[[PTR]]) + + +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 4($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 5($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 2($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 6($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: lbu $[[R1:[0-9]+]], 3($[[PTR]]) +; MIPS64-NOLEFTRIGHT-DAG: sb $[[R1]], 7($[[PTR]]) ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s1)( -; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64R6-DAG: sh $[[R1]], 4($[[PTR]]) -; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]]) -; MIPS64R6-DAG: sh $[[R1]], 6($[[PTR]]) +; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64R6-DAG: sw $[[R1]], 4($[[PTR]]) %0 = load %struct.S1, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 0), align 1 store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 1), align 1 @@ -336,30 +359,21 @@ entry: ; MIPS32R6-DAG: sw $[[R1]], 12($[[PTR]]) ; MIPS64-EL: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( -; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) -; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) -; MIPS64-EL-DAG: swl $[[R1]], 11($[[PTR]]) -; MIPS64-EL-DAG: swr $[[R1]], 8($[[PTR]]) -; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 7($[[PTR]]) -; MIPS64-EL-DAG: lwr $[[R1]], 4($[[PTR]]) -; MIPS64-EL-DAG: swl $[[R1]], 15($[[PTR]]) -; MIPS64-EL-DAG: swr $[[R1]], 12($[[PTR]]) + +; MIPS64-EL-DAG: ldl $[[R1:[0-9]+]], 7($[[PTR]]) +; MIPS64-EL-DAG: ldr $[[R1]], 0($[[PTR]]) +; MIPS64-EL-DAG: sdl $[[R1]], 15($[[PTR]]) +; MIPS64-EL-DAG: sdr $[[R1]], 8($[[PTR]]) ; MIPS64-EB: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( -; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) -; MIPS64-EB-DAG: swl $[[R1]], 8($[[PTR]]) -; MIPS64-EB-DAG: swr $[[R1]], 11($[[PTR]]) -; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 4($[[PTR]]) -; MIPS64-EB-DAG: lwr $[[R1]], 7($[[PTR]]) -; MIPS64-EB-DAG: swl $[[R1]], 12($[[PTR]]) -; MIPS64-EB-DAG: swr $[[R1]], 15($[[PTR]]) +; MIPS64-EB-DAG: ldl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: ldr $[[R1]], 7($[[PTR]]) +; MIPS64-EB-DAG: sdl $[[R1]], 8($[[PTR]]) +; MIPS64-EB-DAG: sdr $[[R1]], 15($[[PTR]]) ; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(struct_s2)( -; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64R6-DAG: sw $[[R1]], 8($[[PTR]]) -; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 4($[[PTR]]) -; MIPS64R6-DAG: sw $[[R1]], 12($[[PTR]]) +; MIPS64R6-DAG: ld $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64R6-DAG: sd $[[R1]], 8($[[PTR]]) %0 = load %struct.S2, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 0), align 1 store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 1), align 1 @@ -416,17 +430,17 @@ entry: ; MIPS64-EL-DAG: lwl $[[R1:[0-9]+]], 3($[[PTR]]) ; MIPS64-EL-DAG: lwr $[[R1]], 0($[[PTR]]) -; MIPS64-EB: ld $[[SPTR:[0-9]+]], %got_disp(arr)( -; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) -; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) -; MIPS64-EB-DAG: dsll $[[R1]], $[[R1]], 32 +; MIPS64-EB: ld $[[SPTR:[0-9]+]], %got_disp(arr)( ; MIPS64-EB-DAG: lbu $[[R2:[0-9]+]], 5($[[PTR]]) ; MIPS64-EB-DAG: lbu $[[R3:[0-9]+]], 4($[[PTR]]) ; MIPS64-EB-DAG: dsll $[[T0:[0-9]+]], $[[R3]], 8 ; MIPS64-EB-DAG: or $[[T1:[0-9]+]], $[[T0]], $[[R2]] -; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16 -; MIPS64-EB-DAG: or $[[T3:[0-9]+]], $[[R1]], $[[T1]] ; MIPS64-EB-DAG: lbu $[[R4:[0-9]+]], 6($[[PTR]]) +; MIPS64-EB-DAG: dsll $[[T1]], $[[T1]], 16 +; MIPS64-EB-DAG: lwl $[[R1:[0-9]+]], 0($[[PTR]]) +; MIPS64-EB-DAG: lwr $[[R1]], 3($[[PTR]]) +; MIPS64-EB-DAG: dsll $[[R5:[0-9]+]], $[[R1]], 32 +; MIPS64-EB-DAG: or $[[T3:[0-9]+]], $[[R5]], $[[T1]] ; MIPS64-EB-DAG: dsll $[[T4:[0-9]+]], $[[R4]], 8 ; MIPS64-EB-DAG: or $4, $[[T3]], $[[T4]] diff --git a/test/CodeGen/Mips/micromips-li.ll b/test/CodeGen/Mips/micromips-li.ll index ac315f93825..997f4e9196a 100644 --- a/test/CodeGen/Mips/micromips-li.ll +++ b/test/CodeGen/Mips/micromips-li.ll @@ -13,6 +13,6 @@ entry: ret i32 0 } -; CHECK: li16 ${{[2-7]|16|17}}, 1 ; CHECK: addiu ${{[0-9]+}}, $zero, 2148 +; CHECK: li16 ${{[2-7]|16|17}}, 1 ; CHECK: ori ${{[0-9]+}}, $zero, 33332 diff --git a/test/CodeGen/Mips/mips64-f128-call.ll b/test/CodeGen/Mips/mips64-f128-call.ll index c59f25ef4af..19fa8fc7524 100644 --- a/test/CodeGen/Mips/mips64-f128-call.ll +++ b/test/CodeGen/Mips/mips64-f128-call.ll @@ -4,8 +4,8 @@ @gld1 = external global fp128 ; CHECK: foo0 -; CHECK: sdc1 $f12, %lo(gld0)(${{[0-9]+}}) -; CHECK: sdc1 $f13, 8(${{[0-9]+}}) +; CHECK-DAG: sdc1 $f12, %lo(gld0)(${{[0-9]+}}) +; CHECK-DAG: sdc1 $f13, 8(${{[0-9]+}}) define void @foo0(fp128 %a0) { entry: @@ -14,8 +14,8 @@ entry: } ; CHECK: foo1 -; CHECK: ldc1 $f12, %lo(gld0)(${{[0-9]+}}) -; CHECK: ldc1 $f13, 8(${{[0-9]+}}) +; CHECK-DAG: ldc1 $f12, %lo(gld0)(${{[0-9]+}}) +; CHECK-DAG: ldc1 $f13, 8(${{[0-9]+}}) define void @foo1() { entry: @@ -26,11 +26,11 @@ entry: declare void @foo2(fp128) + ; CHECK: foo3: -; CHECK: daddiu $[[R0:[0-9]+]], ${{[0-9]+}}, %hi(gld0) -; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 16 + +; CHECK: daddiu $[[R2:[0-9]+]], $[[R1:[0-9]+]], %lo(gld0) ; CHECK: sdc1 $f0, %lo(gld0)($[[R1]]) -; CHECK: daddiu $[[R2:[0-9]]], $[[R1]], %lo(gld0) ; CHECK: sdc1 $f2, 8($[[R2]]) ; CHECK: daddiu $[[R3:[0-9]+]], ${{[0-9]+}}, %hi(gld1) ; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 16 @@ -39,7 +39,6 @@ declare void @foo2(fp128) ; CHECK: ldc1 $f2, 8($[[R5]]) - define fp128 @foo3() { entry: %call = tail call fp128 @foo4() diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll index 304ab8b90d3..237b4c5f1ee 100644 --- a/test/CodeGen/Mips/mips64-f128.ll +++ b/test/CodeGen/Mips/mips64-f128.ll @@ -577,10 +577,10 @@ entry: ; ALL-LABEL: store_LD_LD: ; ALL: ld $[[R0:[0-9]+]], %got_disp(gld1) -; ALL: ld $[[R1:[0-9]+]], 0($[[R0]]) ; ALL: ld $[[R2:[0-9]+]], 8($[[R0]]) ; ALL: ld $[[R3:[0-9]+]], %got_disp(gld0) ; ALL: sd $[[R2]], 8($[[R3]]) +; ALL: ld $[[R1:[0-9]+]], 0($[[R0]]) ; ALL: sd $[[R1]], 0($[[R3]]) define void @store_LD_LD() { diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll index 9663138d4c8..0260afaa186 100644 --- a/test/CodeGen/Mips/mno-ldc1-sdc1.ll +++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll @@ -130,12 +130,12 @@ ; MM-MNO-PIC: addiu $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp) ; MM-MNO-PIC: addu $[[R2:[0-9]+]], $[[R1]], $25 ; MM-MNO-PIC: lw $[[R3:[0-9]+]], %got(g0)($[[R2]]) -; MM-MNO-PIC: lw16 $[[R4:[0-9]+]], 0($[[R3]]) -; MM-MNO-PIC: lw16 $[[R5:[0-9]+]], 4($[[R3]]) -; MM-MNO-LE-PIC: mtc1 $[[R4]], $f0 -; MM-MNO-LE-PIC: mthc1 $[[R5]], $f0 -; MM-MNO-BE-PIC: mtc1 $[[R5]], $f0 -; MM-MNO-BE-PIC: mthc1 $[[R4]], $f0 +; MM-MNO-PIC-DAG: lw16 $[[R4:[0-9]+]], 0($[[R3]]) +; MM-MNO-PIC-DAG: lw16 $[[R5:[0-9]+]], 4($[[R3]]) +; MM-MNO-LE-PIC-DAG: mtc1 $[[R4]], $f0 +; MM-MNO-LE-PIC-DAG: mthc1 $[[R5]], $f0 +; MM-MNO-BE-PIC-DAG: mtc1 $[[R5]], $f0 +; MM-MNO-BE-PIC-DAG: mthc1 $[[R4]], $f0 ; MM-STATIC-PIC: lui $[[R0:[0-9]+]], %hi(g0) ; MM-STATIC-PIC: ldc1 $f0, %lo(g0)($[[R0]]) @@ -214,13 +214,13 @@ entry: ; MM-MNO-PIC: lui $[[R0:[0-9]+]], %hi(_gp_disp) ; MM-MNO-PIC: addiu $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp) ; MM-MNO-PIC: addu $[[R2:[0-9]+]], $[[R1]], $25 -; MM-MNO-LE-PIC: mfc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfhc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-PIC: lw $[[R4:[0-9]+]], %got(g0)($[[R2]]) -; MM-MNO-PIC: sw16 $[[R3]], 0($[[R4]]) -; MM-MNO-LE-PIC: mfhc1 $[[R5:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfc1 $[[R5:[0-9]+]], $f12 -; MM-MNO-PIC: sw16 $[[R5]], 4($[[R4]]) +; MM-MNO-LE-PIC-DAG: mfc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfhc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: lw $[[R4:[0-9]+]], %got(g0)($[[R2]]) +; MM-MNO-PIC-DAG: sw16 $[[R3]], 0($[[R4]]) +; MM-MNO-LE-PIC-DAG: mfhc1 $[[R5:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfc1 $[[R5:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: sw16 $[[R5]], 4($[[R4]]) ; MM-STATIC-PIC: lui $[[R0:[0-9]+]], %hi(g0) ; MM-STATIC-PIC: sdc1 $f12, %lo(g0)($[[R0]]) @@ -267,8 +267,8 @@ entry: ; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $5, 3 ; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $4, $[[R0]] -; MM-MNO-PIC: lw16 $[[R2:[0-9]+]], 0($[[R1]]) -; MM-MNO-PIC: lw16 $[[R3:[0-9]+]], 4($[[R1]]) +; MM-MNO-PIC-DAG: lw16 $[[R2:[0-9]+]], 0($[[R1]]) +; MM-MNO-PIC-DAG: lw16 $[[R3:[0-9]+]], 4($[[R1]]) ; MM-MNO-LE-PIC: mtc1 $[[R2]], $f0 ; MM-MNO-LE-PIC: mthc1 $[[R3]], $f0 ; MM-MNO-BE-PIC: mtc1 $[[R3]], $f0 @@ -313,14 +313,14 @@ entry: ; MM: addu16 $[[R1:[0-9]+]], $6, $[[R0]] ; MM: sdc1 $f12, 0($[[R1]]) -; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $7, 3 -; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]] -; MM-MNO-LE-PIC: mfc1 $[[R2:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfhc1 $[[R2:[0-9]+]], $f12 -; MM-MNO-PIC: sw16 $[[R2]], 0($[[R1]]) -; MM-MNO-LE-PIC: mfhc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-BE-PIC: mfc1 $[[R3:[0-9]+]], $f12 -; MM-MNO-PIC: sw16 $[[R3]], 4($[[R1]]) +; MM-MNO-PIC: sll16 $[[R0:[0-9]+]], $7, 3 +; MM-MNO-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]] +; MM-MNO-LE-PIC-DAG: mfc1 $[[R2:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfhc1 $[[R2:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: sw16 $[[R2]], 0($[[R1]]) +; MM-MNO-LE-PIC-DAG: mfhc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-BE-PIC-DAG: mfc1 $[[R3:[0-9]+]], $f12 +; MM-MNO-PIC-DAG: sw16 $[[R3]], 4($[[R1]]) ; MM-STATIC-PIC: sll16 $[[R0:[0-9]+]], $7, 3 ; MM-STATIC-PIC: addu16 $[[R1:[0-9]+]], $6, $[[R0]] diff --git a/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/test/CodeGen/Mips/msa/f16-llvm-ir.ll index 9957d5be26e..ac69dc913c1 100644 --- a/test/CodeGen/Mips/msa/f16-llvm-ir.ll +++ b/test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -234,15 +234,15 @@ entry: ; MIPS32: insert.w $w[[W0]][1], $[[R1]] ; MIPS32: insert.w $w[[W0]][3], $[[R1]] -; MIPS64-N64: ld $[[R3:[0-9]+]], %got_disp(h) -; MIPS64-N32: lw $[[R3:[0-9]+]], %got_disp(h) -; MIPS64: dmfc1 $[[R1:[0-9]+]], $f[[F2]] -; MIPS64: fill.d $w[[W0:[0-9]+]], $[[R1]] +; MIPS64-N64-DAG: ld $[[R3:[0-9]+]], %got_disp(h) +; MIPS64-N32-DAG: lw $[[R3:[0-9]+]], %got_disp(h) +; MIPS64-DAG: dmfc1 $[[R1:[0-9]+]], $f[[F2]] +; MIPS64-DAG: fill.d $w[[W0:[0-9]+]], $[[R1]] -; ALL: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]] -; ALL: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]] +; ALL-DAG: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]] +; ALL-DAG: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]] -; MIPS32: lw $[[R3:[0-9]+]], %got(h) +; MIPS32-DAG: lw $[[R3:[0-9]+]], %got(h) ; ALL: copy_u.h $[[R2:[0-9]+]], $w[[W2]] ; ALL: sh $[[R2]], 0($[[R3]]) diff --git a/test/CodeGen/Mips/msa/i5_ld_st.ll b/test/CodeGen/Mips/msa/i5_ld_st.ll index c644d242a00..812c400d46e 100644 --- a/test/CodeGen/Mips/msa/i5_ld_st.ll +++ b/test/CodeGen/Mips/msa/i5_ld_st.ll @@ -336,8 +336,8 @@ entry: ; CHECK: llvm_mips_st_b_valid_range_tests: ; CHECK: ld.b -; CHECK: st.b [[R1:\$w[0-9]+]], -512( -; CHECK: st.b [[R1:\$w[0-9]+]], 511( +; CHECK-DAG: st.b [[R1:\$w[0-9]+]], -512( +; CHECK-DAG: st.b [[R1:\$w[0-9]+]], 511( ; CHECK: .size llvm_mips_st_b_valid_range_tests ; @@ -351,10 +351,10 @@ entry: } ; CHECK: llvm_mips_st_b_invalid_range_tests: -; CHECK: addiu $2, $1, -513 +; CHECK: addiu $2, $1, 512 ; CHECK: ld.b ; CHECK: st.b [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 512 +; CHECK: addiu $1, $1, -513 ; CHECK: st.b [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_b_invalid_range_tests ; @@ -404,8 +404,8 @@ entry: ; CHECK: llvm_mips_st_h_valid_range_tests: ; CHECK: ld.h -; CHECK: st.h [[R1:\$w[0-9]+]], -1024( -; CHECK: st.h [[R1:\$w[0-9]+]], 1022( +; CHECK-DAG: st.h [[R1:\$w[0-9]+]], -1024( +; CHECK-DAG: st.h [[R1:\$w[0-9]+]], 1022( ; CHECK: .size llvm_mips_st_h_valid_range_tests ; @@ -419,10 +419,10 @@ entry: } ; CHECK: llvm_mips_st_h_invalid_range_tests: -; CHECK: addiu $2, $1, -1026 +; CHECK: addiu $2, $1, 1024 ; CHECK: ld.h ; CHECK: st.h [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 1024 +; CHECK: addiu $1, $1, -1026 ; CHECK: st.h [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_h_invalid_range_tests ; @@ -472,8 +472,8 @@ entry: ; CHECK: llvm_mips_st_w_valid_range_tests: ; CHECK: ld.w -; CHECK: st.w [[R1:\$w[0-9]+]], -2048( -; CHECK: st.w [[R1:\$w[0-9]+]], 2044( +; CHECK-DAG: st.w [[R1:\$w[0-9]+]], -2048( +; CHECK-DAG: st.w [[R1:\$w[0-9]+]], 2044( ; CHECK: .size llvm_mips_st_w_valid_range_tests ; @@ -487,10 +487,10 @@ entry: } ; CHECK: llvm_mips_st_w_invalid_range_tests: -; CHECK: addiu $2, $1, -2052 +; CHECK: addiu $2, $1, 2048 ; CHECK: ld.w ; CHECK: st.w [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 2048 +; CHECK: addiu $1, $1, -2052 ; CHECK: st.w [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_w_invalid_range_tests ; @@ -540,8 +540,8 @@ entry: ; CHECK: llvm_mips_st_d_valid_range_tests: ; CHECK: ld.d -; CHECK: st.d [[R1:\$w[0-9]+]], -4096( -; CHECK: st.d [[R1:\$w[0-9]+]], 4088( +; CHECK-DAG: st.d [[R1:\$w[0-9]+]], -4096( +; CHECK-DAG: st.d [[R1:\$w[0-9]+]], 4088( ; CHECK: .size llvm_mips_st_d_valid_range_tests ; @@ -555,10 +555,10 @@ entry: } ; CHECK: llvm_mips_st_d_invalid_range_tests: -; CHECK: addiu $2, $1, -4104 +; CHECK: addiu $2, $1, 4096 ; CHECK: ld.d ; CHECK: st.d [[R1:\$w[0-9]+]], 0( -; CHECK: addiu $1, $1, 4096 +; CHECK: addiu $1, $1, -4104 ; CHECK: st.d [[R1:\$w[0-9]+]], 0( ; CHECK: .size llvm_mips_st_d_invalid_range_tests ; diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll index 33431dba43c..eadf4abfc75 100644 --- a/test/CodeGen/Mips/o32_cc_byval.ll +++ b/test/CodeGen/Mips/o32_cc_byval.ll @@ -45,20 +45,18 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval) define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: lw $4, 80($sp) -; CHECK: ldc1 $f[[F0:[0-9]+]], 72($sp) -; CHECK: lw $[[R3:[0-9]+]], 64($sp) -; CHECK: lw $[[R4:[0-9]+]], 68($sp) -; CHECK: lw $[[R2:[0-9]+]], 60($sp) -; CHECK: lh $[[R1:[0-9]+]], 58($sp) -; CHECK: lb $[[R0:[0-9]+]], 56($sp) -; CHECK: sw $[[R0]], 32($sp) -; CHECK: sw $[[R1]], 28($sp) -; CHECK: sw $[[R2]], 24($sp) -; CHECK: sw $[[R4]], 20($sp) -; CHECK: sw $[[R3]], 16($sp) +; CHECK-DAG: sw $7, 60($sp) +; CHECK-DAG: sw $6, 56($sp) +; CHECK-DAG: ldc1 $f[[F0:[0-9]+]], 72($sp) +; CHECK-DAG: lw $[[R3:[0-9]+]], 64($sp) +; CHECK-DAG: lw $[[R4:[0-9]+]], 68($sp) +; CHECK-DAG: lh $[[R1:[0-9]+]], 58($sp) +; CHECK-DAG: lb $[[R0:[0-9]+]], 56($sp) +; CHECK-DAG: sw $[[R0]], 32($sp) +; CHECK-DAG: sw $[[R1]], 28($sp) +; CHECK-DAG: sw $[[R4]], 20($sp) +; CHECK-DAG: sw $[[R3]], 16($sp) +; CHECK-DAG: sw $7, 24($sp) ; CHECK: mfc1 $6, $f[[F0]] %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 5 @@ -82,13 +80,11 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float) define void @f3(%struct.S2* nocapture byval %s2) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: sw $5, 52($sp) -; CHECK: sw $4, 48($sp) -; CHECK: lw $4, 48($sp) -; CHECK: lw $[[R0:[0-9]+]], 60($sp) -; CHECK: sw $[[R0]], 24($sp) +; CHECK-DAG: sw $7, 60($sp) +; CHECK-DAG: sw $6, 56($sp) +; CHECK-DAG: sw $5, 52($sp) +; CHECK-DAG: sw $4, 48($sp) +; CHECK-DAG: sw $7, 24($sp) %arrayidx = getelementptr inbounds %struct.S2, %struct.S2* %s2, i32 0, i32 0, i32 0 %tmp = load i32, i32* %arrayidx, align 4 @@ -101,14 +97,14 @@ entry: define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind { entry: ; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: sw $5, 52($sp) -; CHECK: lw $4, 60($sp) -; CHECK: lw $[[R1:[0-9]+]], 80($sp) -; CHECK: lb $[[R0:[0-9]+]], 52($sp) -; CHECK: sw $[[R0]], 32($sp) -; CHECK: sw $[[R1]], 24($sp) +; CHECK-DAG: sw $7, 60($sp) +; CHECK-DAG: sw $6, 56($sp) +; CHECK-DAG: sw $5, 52($sp) +; CHECK-DAG: lw $[[R1:[0-9]+]], 80($sp) +; CHECK-DAG: lb $[[R0:[0-9]+]], 52($sp) +; CHECK-DAG: sw $[[R0]], 32($sp) +; CHECK-DAG: sw $[[R1]], 24($sp) +; CHECK: move $4, $7 %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2 %tmp = load i32, i32* %i, align 4 diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll index 1dc3a927d93..73aad48b73e 100644 --- a/test/CodeGen/Mips/o32_cc_vararg.ll +++ b/test/CodeGen/Mips/o32_cc_vararg.ll @@ -29,9 +29,9 @@ entry: ; CHECK-LABEL: va1: ; CHECK: addiu $sp, $sp, -16 +; CHECK: sw $5, 20($sp) ; CHECK: sw $7, 28($sp) ; CHECK: sw $6, 24($sp) -; CHECK: sw $5, 20($sp) ; CHECK: lw $2, 20($sp) } @@ -83,8 +83,8 @@ entry: ; CHECK-LABEL: va3: ; CHECK: addiu $sp, $sp, -16 -; CHECK: sw $7, 28($sp) ; CHECK: sw $6, 24($sp) +; CHECK: sw $7, 28($sp) ; CHECK: lw $2, 24($sp) } diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll index f4e788849ec..9b32a8f55f3 100644 --- a/test/CodeGen/PowerPC/anon_aggr.ll +++ b/test/CodeGen/PowerPC/anon_aggr.ll @@ -60,33 +60,34 @@ equal: unequal: ret i8* %array2_ptr } - ; CHECK-LABEL: func2: -; CHECK: ld [[REG2:[0-9]+]], 72(1) -; CHECK: cmpld {{([0-9]+,)?}}4, [[REG2]] -; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]] +; CHECK: cmpld {{([0-9]+,)?}}4, 6 +; CHECK-DAG: std 6, 72(1) +; CHECK-DAG: std 5, 64(1) +; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]] ; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]] ; CHECK: ld 3, -[[OFFSET2]](1) ; CHECK: ld 3, -[[OFFSET1]](1) -; DARWIN32: _func2: -; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36 -; DARWIN32: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]]) +; DARWIN32-LABEL: _func2 +; DARWIN32-DAG: addi r[[REG8:[0-9]+]], r[[REGSP:[0-9]+]], 36 +; DARWIN32-DAG: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]]) ; DARWIN32: mr -; DARWIN32: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]] -; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]] -; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]] -; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]] -; DARWIN32: lwz r3, -[[OFFSET1]] -; DARWIN32: lwz r3, -[[OFFSET2]] +; DARWIN32: mr r[[REG7:[0-9]+]], r5 +; DARWIN32-DAG: cmplw {{(cr[0-9]+,)?}}r5, r[[REG2]] +; DARWIN32-DAG: stw r[[REG7]], -[[OFFSET1:[0-9]+]] +; DARWIN32-DAG: stw r[[REG2]], -[[OFFSET2:[0-9]+]] +; DARWIN32-DAG: lwz r3, -[[OFFSET1]] +; DARWIN32-DAG: lwz r3, -[[OFFSET2]] + ; DARWIN64: _func2: ; DARWIN64: ld r[[REG2:[0-9]+]], 72(r1) ; DARWIN64: mr ; DARWIN64: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]] ; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]] -; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]] ; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]] +; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]] ; DARWIN64: ld r3, -[[OFFSET1]] ; DARWIN64: ld r3, -[[OFFSET2]] @@ -106,24 +107,24 @@ unequal: } ; CHECK-LABEL: func3: -; CHECK: ld [[REG3:[0-9]+]], 72(1) -; CHECK: ld [[REG4:[0-9]+]], 56(1) -; CHECK: cmpld {{([0-9]+,)?}}[[REG4]], [[REG3]] -; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1) -; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1) +; CHECK: cmpld {{([0-9]+,)?}}4, 6 +; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]](1) +; CHECK-DAG: std 6, -[[OFFSET1:[0-9]+]](1) ; CHECK: ld 3, -[[OFFSET2]](1) ; CHECK: ld 3, -[[OFFSET1]](1) -; DARWIN32: _func3: -; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36 -; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24 -; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]]) -; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]]) -; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]] -; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]] -; DARWIN32: stw r[[REG4]], -[[OFFSET2:[0-9]+]] -; DARWIN32: lwz r3, -[[OFFSET2]] -; DARWIN32: lwz r3, -[[OFFSET1]] +; DARWIN32-LABEL: _func3: +; DARWIN32-DAG: stw r[[REG8:[0-9]+]], 44(r[[REGSP:[0-9]+]]) +; DARWIN32-DAG: stw r[[REG5:[0-9]+]], 32(r[[REGSP]]) +; DARWIN32-DAG: addi r[[REG5a:[0-9]+]], r[[REGSP:[0-9]+]], 36 +; DARWIN32-DAG: addi r[[REG8a:[0-9]+]], r[[REGSP]], 24 +; DARWIN32-DAG: lwz r[[REG5a:[0-9]+]], 44(r[[REGSP]]) +; DARWIN32-DAG: lwz r[[REG8a:[0-9]+]], 32(r[[REGSP]]) +; DARWIN32-DAG: cmplw {{(cr[0-9]+,)?}}r[[REG8a]], r[[REG5a]] +; DARWIN32-DAG: stw r[[REG5a]], -[[OFFSET1:[0-9]+]] +; DARWIN32-DAG: stw r[[REG8a]], -[[OFFSET2:[0-9]+]] +; DARWIN32-DAG: lwz r3, -[[OFFSET1:[0-9]+]] +; DARWIN32-DAG: lwz r3, -[[OFFSET2:[0-9]+]] ; DARWIN64: _func3: ; DARWIN64: ld r[[REG3:[0-9]+]], 72(r1) diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll index f6097e65512..ec87a89b110 100644 --- a/test/CodeGen/PowerPC/complex-return.ll +++ b/test/CodeGen/PowerPC/complex-return.ll @@ -24,10 +24,10 @@ entry: } ; CHECK-LABEL: foo: -; CHECK: lfd 1 -; CHECK: lfd 2 -; CHECK: lfd 3 -; CHECK: lfd 4 +; CHECK-DAG: lfd 1 +; CHECK-DAG: lfd 2 +; CHECK-DAG: lfd 3 +; CHECK_DAG: lfd 4 define { float, float } @oof() nounwind { entry: @@ -50,6 +50,6 @@ entry: } ; CHECK-LABEL: oof: -; CHECK: lfs 2 -; CHECK: lfs 1 +; CHECK-DAG: lfs 2 +; CHECK-DAG: lfs 1 diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll index b28b34d7814..6128316f45f 100644 --- a/test/CodeGen/PowerPC/jaggedstructs.ll +++ b/test/CodeGen/PowerPC/jaggedstructs.ll @@ -18,31 +18,31 @@ entry: ret void } -; CHECK: std 6, 184(1) -; CHECK: std 5, 176(1) -; CHECK: std 4, 168(1) -; CHECK: std 3, 160(1) -; CHECK: lbz {{[0-9]+}}, 167(1) -; CHECK: lhz {{[0-9]+}}, 165(1) -; CHECK: stb {{[0-9]+}}, 55(1) -; CHECK: sth {{[0-9]+}}, 53(1) -; CHECK: lbz {{[0-9]+}}, 175(1) -; CHECK: lwz {{[0-9]+}}, 171(1) -; CHECK: stb {{[0-9]+}}, 63(1) -; CHECK: stw {{[0-9]+}}, 59(1) -; CHECK: lhz {{[0-9]+}}, 182(1) -; CHECK: lwz {{[0-9]+}}, 178(1) -; CHECK: sth {{[0-9]+}}, 70(1) -; CHECK: stw {{[0-9]+}}, 66(1) -; CHECK: lbz {{[0-9]+}}, 191(1) -; CHECK: lhz {{[0-9]+}}, 189(1) -; CHECK: lwz {{[0-9]+}}, 185(1) -; CHECK: stb {{[0-9]+}}, 79(1) -; CHECK: sth {{[0-9]+}}, 77(1) -; CHECK: stw {{[0-9]+}}, 73(1) -; CHECK: ld 6, 72(1) -; CHECK: ld 5, 64(1) -; CHECK: ld 4, 56(1) -; CHECK: ld 3, 48(1) +; CHECK-DAG: std 3, 160(1) +; CHECK-DAG: std 6, 184(1) +; CHECK-DAG: std 5, 176(1) +; CHECK-DAG: std 4, 168(1) +; CHECK-DAG: lbz {{[0-9]+}}, 167(1) +; CHECK-DAG: lhz {{[0-9]+}}, 165(1) +; CHECK-DAG: stb {{[0-9]+}}, 55(1) +; CHECK-DAG-DAG: sth {{[0-9]+}}, 53(1) +; CHECK-DAG: lbz {{[0-9]+}}, 175(1) +; CHECK-DAG: lwz {{[0-9]+}}, 171(1) +; CHECK-DAG: stb {{[0-9]+}}, 63(1) +; CHECK-DAG: stw {{[0-9]+}}, 59(1) +; CHECK-DAG: lhz {{[0-9]+}}, 182(1) +; CHECK-DAG: lwz {{[0-9]+}}, 178(1) +; CHECK-DAG: sth {{[0-9]+}}, 70(1) +; CHECK-DAG: stw {{[0-9]+}}, 66(1) +; CHECK-DAG: lbz {{[0-9]+}}, 191(1) +; CHECK-DAG: lhz {{[0-9]+}}, 189(1) +; CHECK-DAG: lwz {{[0-9]+}}, 185(1) +; CHECK-DAG: stb {{[0-9]+}}, 79(1) +; CHECK-DAG: sth {{[0-9]+}}, 77(1) +; CHECK-DAG: stw {{[0-9]+}}, 73(1) +; CHECK-DAG: ld 6, 72(1) +; CHECK-DAG: ld 5, 64(1) +; CHECK-DAG: ld 4, 56(1) +; CHECK-DAG: ld 3, 48(1) declare void @check(%struct.S3* byval, %struct.S5* byval, %struct.S6* byval, %struct.S7* byval) diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll index c3cccd5b293..d59dc64dcf8 100644 --- a/test/CodeGen/PowerPC/ppc64-align-long-double.ll +++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -1,6 +1,6 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-P9 %s ; Verify internal alignment of long double in a struct. The double ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain @@ -19,19 +19,44 @@ entry: ret ppc_fp128 %0 } +; The additional stores are caused because we forward the value in the +; store->load->bitcast path to make a store and bitcast of the same +; value. Since the target does bitcast through memory and we no longer +; remember the address we need to do the store in a fresh local +; address. + ; CHECK-DAG: std 6, 72(1) ; CHECK-DAG: std 5, 64(1) ; CHECK-DAG: std 4, 56(1) ; CHECK-DAG: std 3, 48(1) -; CHECK: lfd 1, 64(1) -; CHECK: lfd 2, 72(1) + +; CHECK-DAG: std 5, -16(1) +; CHECK-DAG: std 6, -8(1) +; CHECK-DAG: lfd 1, -16(1) +; CHECK-DAG: lfd 2, -8(1) + +; FIXMECHECK: lfd 1, 64(1) +; FIXMECHECK: lfd 2, 72(1) ; CHECK-VSX-DAG: std 6, 72(1) ; CHECK-VSX-DAG: std 5, 64(1) ; CHECK-VSX-DAG: std 4, 56(1) ; CHECK-VSX-DAG: std 3, 48(1) -; CHECK-VSX: li 3, 16 -; CHECK-VSX: addi 4, 1, 48 -; CHECK-VSX: lxsdx 1, 4, 3 -; CHECK-VSX: li 3, 24 -; CHECK-VSX: lxsdx 2, 4, 3 +; CHECK-VSX-DAG: std 5, -16(1) +; CHECK-VSX-DAG: std 6, -8(1) +; CHECK-VSX: addi 3, 1, -16 +; CHECK-VSX: lxsdx 1, 0, 3 +; CHECK-VSX: addi 3, 1, -8 +; CHECK-VSX: lxsdx 2, 0, 3 + +; FIXME-VSX: addi 4, 1, 48 +; FIXME-VSX: lxsdx 1, 4, 3 +; FIXME-VSX: li 3, 24 +; FIXME-VSX: lxsdx 2, 4, 3 + +; CHECK-P9: std 6, 72(1) +; CHECK-P9: std 5, 64(1) +; CHECK-P9: std 4, 56(1) +; CHECK-P9: std 3, 48(1) +; CHECK-P9: mtvsrd 1, 5 +; CHECK-P9: mtvsrd 2, 6 diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll index 3777f3ec5ba..01b0848e707 100644 --- a/test/CodeGen/PowerPC/structsinmem.ll +++ b/test/CodeGen/PowerPC/structsinmem.ll @@ -113,13 +113,13 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: lha {{[0-9]+}}, 126(1) -; CHECK: lha {{[0-9]+}}, 132(1) -; CHECK: lbz {{[0-9]+}}, 119(1) -; CHECK: lwz {{[0-9]+}}, 140(1) -; CHECK: lwz {{[0-9]+}}, 144(1) -; CHECK: lwz {{[0-9]+}}, 152(1) -; CHECK: lwz {{[0-9]+}}, 160(1) +; CHECK-DAG: lha {{[0-9]+}}, 126(1) +; CHECK-DAG: lha {{[0-9]+}}, 132(1) +; CHECK-DAG: lbz {{[0-9]+}}, 119(1) +; CHECK-DAG: lwz {{[0-9]+}}, 140(1) +; CHECK-DAG: lwz {{[0-9]+}}, 144(1) +; CHECK-DAG: lwz {{[0-9]+}}, 152(1) +; CHECK-DAG: lwz {{[0-9]+}}, 160(1) } define i32 @caller2() nounwind { @@ -205,11 +205,11 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: lha {{[0-9]+}}, 126(1) -; CHECK: lha {{[0-9]+}}, 133(1) -; CHECK: lbz {{[0-9]+}}, 119(1) -; CHECK: lwz {{[0-9]+}}, 140(1) -; CHECK: lwz {{[0-9]+}}, 147(1) -; CHECK: lwz {{[0-9]+}}, 154(1) -; CHECK: lwz {{[0-9]+}}, 161(1) +; CHECK-DAG: lha {{[0-9]+}}, 126(1) +; CHECK-DAG: lha {{[0-9]+}}, 133(1) +; CHECK-DAG: lbz {{[0-9]+}}, 119(1) +; CHECK-DAG: lwz {{[0-9]+}}, 140(1) +; CHECK-DAG: lwz {{[0-9]+}}, 147(1) +; CHECK-DAG: lwz {{[0-9]+}}, 154(1) +; CHECK-DAG: lwz {{[0-9]+}}, 161(1) } diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll index e27041dd4c8..54679f259e9 100644 --- a/test/CodeGen/PowerPC/structsinregs.ll +++ b/test/CodeGen/PowerPC/structsinregs.ll @@ -59,6 +59,7 @@ entry: %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7) ret i32 %call +; CHECK-LABEL: caller1 ; CHECK: ld 9, 112(31) ; CHECK: ld 8, 120(31) ; CHECK: ld 7, 128(31) @@ -97,20 +98,21 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: std 9, 96(1) -; CHECK: std 8, 88(1) -; CHECK: std 7, 80(1) -; CHECK: stw 6, 76(1) -; CHECK: stw 5, 68(1) -; CHECK: sth 4, 62(1) -; CHECK: stb 3, 55(1) -; CHECK: lha {{[0-9]+}}, 62(1) -; CHECK: lha {{[0-9]+}}, 68(1) -; CHECK: lbz {{[0-9]+}}, 55(1) -; CHECK: lwz {{[0-9]+}}, 76(1) -; CHECK: lwz {{[0-9]+}}, 80(1) -; CHECK: lwz {{[0-9]+}}, 88(1) -; CHECK: lwz {{[0-9]+}}, 96(1) +; CHECK-LABEL: callee1 +; CHECK-DAG: std 9, 96(1) +; CHECK-DAG: std 8, 88(1) +; CHECK-DAG: std 7, 80(1) +; CHECK-DAG: stw 6, 76(1) +; CHECK-DAG: stw 5, 68(1) +; CHECK-DAG: sth 4, 62(1) +; CHECK-DAG: stb 3, 55(1) +; CHECK-DAG: lha {{[0-9]+}}, 62(1) +; CHECK-DAG: lha {{[0-9]+}}, 68(1) +; CHECK-DAG: lbz {{[0-9]+}}, 55(1) +; CHECK-DAG: lwz {{[0-9]+}}, 76(1) +; CHECK-DAG: lwz {{[0-9]+}}, 80(1) +; CHECK-DAG: lwz {{[0-9]+}}, 88(1) +; CHECK-DAG: lwz {{[0-9]+}}, 96(1) } define i32 @caller2() nounwind { @@ -139,6 +141,7 @@ entry: %call = call i32 @callee2(%struct.t1* byval %p1, %struct.t2* byval %p2, %struct.t3* byval %p3, %struct.t4* byval %p4, %struct.t5* byval %p5, %struct.t6* byval %p6, %struct.t7* byval %p7) ret i32 %call +; CHECK-LABEL: caller2 ; CHECK: stb {{[0-9]+}}, 71(1) ; CHECK: sth {{[0-9]+}}, 69(1) ; CHECK: stb {{[0-9]+}}, 87(1) @@ -184,18 +187,19 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: std 9, 96(1) -; CHECK: std 8, 88(1) -; CHECK: std 7, 80(1) -; CHECK: stw 6, 76(1) -; CHECK: std 5, 64(1) -; CHECK: sth 4, 62(1) -; CHECK: stb 3, 55(1) -; CHECK: lha {{[0-9]+}}, 62(1) -; CHECK: lha {{[0-9]+}}, 69(1) -; CHECK: lbz {{[0-9]+}}, 55(1) -; CHECK: lwz {{[0-9]+}}, 76(1) -; CHECK: lwz {{[0-9]+}}, 83(1) -; CHECK: lwz {{[0-9]+}}, 90(1) -; CHECK: lwz {{[0-9]+}}, 97(1) +; CHECK-LABEL: callee2 +; CHECK-DAG: std 9, 96(1) +; CHECK-DAG: std 8, 88(1) +; CHECK-DAG: std 7, 80(1) +; CHECK-DAG: stw 6, 76(1) +; CHECK-DAG: std 5, 64(1) +; CHECK-DAG: sth 4, 62(1) +; CHECK-DAG: stb 3, 55(1) +; CHECK-DAG: lha {{[0-9]+}}, 62(1) +; CHECK-DAG: lha {{[0-9]+}}, 69(1) +; CHECK-DAG: lbz {{[0-9]+}}, 55(1) +; CHECK-DAG: lwz {{[0-9]+}}, 76(1) +; CHECK-DAG: lwz {{[0-9]+}}, 83(1) +; CHECK-DAG: lwz {{[0-9]+}}, 90(1) +; CHECK-DAG: lwz {{[0-9]+}}, 97(1) } diff --git a/test/CodeGen/SystemZ/unaligned-01.ll b/test/CodeGen/SystemZ/unaligned-01.ll index 94cad0e1743..2af1aa79a23 100644 --- a/test/CodeGen/SystemZ/unaligned-01.ll +++ b/test/CodeGen/SystemZ/unaligned-01.ll @@ -1,10 +1,7 @@ ; Check that unaligned accesses are allowed in general. We check the ; few exceptions (like CRL) in their respective test files. ; -; FIXME: -combiner-alias-analysis (the default for SystemZ) stops -; f1 from being optimized. -; RUN: llc < %s -mtriple=s390x-linux-gnu -combiner-alias-analysis=false \ -; RUN: | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; Check that these four byte stores become a single word store. define void @f1(i8 *%ptr) { diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll index 2f8e36b66b8..08349a31dfa 100644 --- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -9,9 +9,9 @@ define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind { ; CHECK: bl ___muldf3 -; CHECK: bl ___muldf3 ; CHECK: beq LBB0 ; CHECK: bl ___muldf3 +; CHECK: bl ___muldf3 ;