summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2015-02-25 21:36:59 +0000
committerHal Finkel <hfinkel@anl.gov>2015-02-25 21:36:59 +0000
commit7840990de8f102bf83ee297e74f225b109228512 (patch)
treeee115c8e4342e2b29c0348e033acbfe8564a77b3
parente9e16aa4a5fe2da7fb9621080cc9ef13a3604549 (diff)
[PowerPC] Make LDtocL and friends invariant loads
LDtocL, and other loads that roughly correspond to the TOC_ENTRY SDAG node, represent loads from the TOC, which is invariant. As a result, these loads can be hoisted out of loops, etc. In order to do this, we need to generate GOT-style MMOs for TOC_ENTRY, which requires treating it as a legitimate memory intrinsic node type. Once this is done, the MMO transfer is automatically handled for TableGen-driven instruction selection, and for nodes generated directly in PPCISelDAGToDAG, we need to transfer the MMOs manually. Also, we were not transferring MMOs associated with pre-increment loads, so do that too. Lastly, this fixes an exposed bug where R30 was not added as a defined operand of UpdateGBR. This problem was highlighted by an example (used to generate the test case) posted to llvmdev by Francois Pichet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230553 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp40
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp36
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td3
-rw-r--r--test/CodeGen/PowerPC/ldtoc-inv.ll39
-rw-r--r--test/CodeGen/PowerPC/ppc64le-aggregates.ll40
-rw-r--r--test/CodeGen/PowerPC/tls-store2.ll5
-rw-r--r--test/CodeGen/PowerPC/vec-abi-align.ll32
8 files changed, 133 insertions, 71 deletions
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 0d553d32f31..b10e85437ba 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -223,6 +223,8 @@ private:
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
+
+ SDNode *transferMemOperands(SDNode *N, SDNode *Result);
};
}
@@ -315,7 +317,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl,
- TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+ TII.get(PPC::UpdateGBR), GlobalBaseReg)
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
}
@@ -2342,6 +2344,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
}
+SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ return Result;
+}
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
@@ -2460,9 +2470,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops);
+ return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops));
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -2497,9 +2508,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
- return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops);
+ return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops));
}
}
@@ -2851,8 +2863,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
"Only supported for 64-bit ABI and 32-bit SVR4");
if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
SDValue GA = N->getOperand(0);
- return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
- N->getOperand(1));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
+ MVT::i32, GA, N->getOperand(1)));
}
// For medium and large code model, we generate two instructions as
@@ -2872,12 +2884,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
- TOCbase, GA);
+ TOCbase, GA);
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
CModel == CodeModel::Large)
- return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
@@ -2885,8 +2897,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
(GValue->isDeclaration() || GValue->isWeakForLinker())) ||
GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage())
- return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f59cad50152..ae80fa3d761 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1821,6 +1821,19 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) {
setUsesTOCBasePtr(DAG.getMachineFunction());
}
+static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
+ SDValue GA) {
+ EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
+ DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
+
+ SDValue Ops[] = { GA, Reg };
+ return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
+ DAG.getVTList(VT, MVT::Other), Ops, VT,
+ MachinePointerInfo::getGOT(), 0, false, true,
+ false, 0);
+}
+
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
@@ -1832,8 +1845,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, SDLoc(CP), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -1843,9 +1855,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
PPCII::MO_PIC_FLAG);
- SDLoc DL(CP);
- return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
- DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ return getTOCEntry(DAG, SDLoc(CP), false, GA);
}
SDValue CPIHi =
@@ -1864,8 +1874,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, SDLoc(JT), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -1875,9 +1884,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
PPCII::MO_PIC_FLAG);
- SDLoc DL(GA);
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
- DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ return getTOCEntry(DAG, SDLoc(GA), false, GA);
}
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
@@ -1896,8 +1903,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
- return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2007,8 +2013,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
- return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
- DAG.getRegister(PPC::X2, MVT::i64));
+ return getTOCEntry(DAG, DL, true, GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2019,8 +2024,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
GSDN->getOffset(),
PPCII::MO_PIC_FLAG);
- return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
- DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+ return getTOCEntry(DAG, DL, false, GA);
}
SDValue GAHi =
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index a2a824106b1..faa1e3f06ff 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -71,8 +71,6 @@ namespace llvm {
/// though these are usually folded into other nodes.
Hi, Lo,
- TOC_ENTRY,
-
/// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
@@ -337,7 +335,12 @@ namespace llvm {
/// QBRC, CHAIN = QVLFSb CHAIN, Ptr
/// The 4xf32 load used for v4i1 constants.
- QVLFSb
+ QVLFSb,
+
+ /// GPRC = TOC_ENTRY GA, TOC
+ /// Loads the entry for GA from the TOC, where the TOC base is given by
+ /// the last operand.
+ TOC_ENTRY
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index cee58a6bfab..1a045b1393d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -119,7 +119,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
-def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
+ [SDNPMayLoad, SDNPMemOperand]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
diff --git a/test/CodeGen/PowerPC/ldtoc-inv.ll b/test/CodeGen/PowerPC/ldtoc-inv.ll
new file mode 100644
index 00000000000..550747c4695
--- /dev/null
+++ b/test/CodeGen/PowerPC/ldtoc-inv.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@phasor = external constant [4096 x i32]
+
+; Function Attrs: nounwind
+define void @test(i32* nocapture %out, i32 zeroext %step_size) #0 {
+entry:
+ %shl = shl i32 %step_size, 2
+ %idxprom = zext i32 %shl to i64
+ br label %for.body
+
+; Make sure that the TOC load has been hoisted out of the loop.
+; CHECK-LABEL: @test
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc@l
+; CHECK: %for.body
+; CHECK: blr
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = trunc i64 %indvars.iv to i32
+ %shl1 = shl i32 %0, %step_size
+ %idxprom2 = sext i32 %shl1 to i64
+ %arrayidx.sum = add nsw i64 %idxprom2, %idxprom
+ %arrayidx3 = getelementptr inbounds [4096 x i32]* @phasor, i64 0, i64 %arrayidx.sum
+ %1 = load i32* %arrayidx3, align 4
+ %arrayidx5 = getelementptr inbounds i32* %out, i64 %indvars.iv
+ store i32 %1, i32* %arrayidx5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ %cmp = icmp slt i64 %indvars.iv.next, 1020
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
index 4fe6f8db33f..3fce36ec23b 100644
--- a/test/CodeGen/PowerPC/ppc64le-aggregates.ll
+++ b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
@@ -264,26 +264,26 @@ entry:
ret void
}
; CHECK-LABEL: @caller2
-; CHECK: ld [[REG:[0-9]+]], .LC
-; CHECK-DAG: lfs 1, 0([[REG]])
-; CHECK-DAG: lfs 2, 4([[REG]])
-; CHECK-DAG: lfs 3, 8([[REG]])
-; CHECK-DAG: lfs 4, 12([[REG]])
-; CHECK-DAG: lfs 5, 16([[REG]])
-; CHECK-DAG: lfs 6, 20([[REG]])
-; CHECK-DAG: lfs 7, 24([[REG]])
-; CHECK-DAG: lfs 8, 28([[REG]])
-; CHECK: ld [[REG:[0-9]+]], .LC
-; CHECK-DAG: lfs 9, 0([[REG]])
-; CHECK-DAG: lfs 10, 4([[REG]])
-; CHECK-DAG: lfs 11, 8([[REG]])
-; CHECK-DAG: lfs 12, 12([[REG]])
-; CHECK-DAG: lfs 13, 16([[REG]])
-; CHECK: ld [[REG:[0-9]+]], .LC
-; CHECK-DAG: lwz [[REG0:[0-9]+]], 0([[REG]])
-; CHECK-DAG: lwz [[REG1:[0-9]+]], 4([[REG]])
-; CHECK-DAG: sldi [[REG1]], [[REG1]], 32
-; CHECK-DAG: or 10, [[REG0]], [[REG1]]
+; CHECK: ld {{[0-9]+}}, .LC
+; CHECK-DAG: lfs 1, 0({{[0-9]+}})
+; CHECK-DAG: lfs 2, 4({{[0-9]+}})
+; CHECK-DAG: lfs 3, 8({{[0-9]+}})
+; CHECK-DAG: lfs 4, 12({{[0-9]+}})
+; CHECK-DAG: lfs 5, 16({{[0-9]+}})
+; CHECK-DAG: lfs 6, 20({{[0-9]+}})
+; CHECK-DAG: lfs 7, 24({{[0-9]+}})
+; CHECK-DAG: lfs 8, 28({{[0-9]+}})
+
+; CHECK-DAG: lfs 9, 0({{[0-9]+}})
+; CHECK-DAG: lfs 10, 4({{[0-9]+}})
+; CHECK-DAG: lfs 11, 8({{[0-9]+}})
+; CHECK-DAG: lfs 12, 12({{[0-9]+}})
+; CHECK-DAG: lfs 13, 16({{[0-9]+}})
+
+; CHECK-DAG: lwz [[REG0:[0-9]+]], 0({{[0-9]+}})
+; CHECK-DAG: lwz [[REG1:[0-9]+]], 4({{[0-9]+}})
+; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 32
+; CHECK-DAG: or 10, [[REG0]], [[REG2]]
; CHECK: bl test2
declare void @test2([8 x float], [5 x float], [2 x float])
diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll
index a9c97b5e23e..e9aa17e8c0f 100644
--- a/test/CodeGen/PowerPC/tls-store2.ll
+++ b/test/CodeGen/PowerPC/tls-store2.ll
@@ -22,7 +22,10 @@ entry:
; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
; CHECK-NEXT: nop
-; CHECK: std {{[0-9]+}}, 0(3)
+; FIXME: We could check here for 'std {{[0-9]+}}, 0(3)', but that no longer
+; works because, with new scheduling freedom, we create a copy of R3 based on the
+; initial scheduling, but don't coalesce it again after we move the instructions
+; so that the copy is no longer necessary.
; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_call@tlsgd)
; CHECK-NEXT: nop
diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll
index 5075ff2b8c0..2ec57af3513 100644
--- a/test/CodeGen/PowerPC/vec-abi-align.ll
+++ b/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -35,17 +35,17 @@ entry:
ret void
; CHECK-LABEL: @test2
-; CHECK: ld {{[0-9]+}}, 112(1)
-; CHECK: li [[REG16:[0-9]+]], 16
-; CHECK: addi [[REGB:[0-9]+]], 1, 112
-; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK-DAG: ld {{[0-9]+}}, 112(1)
+; CHECK-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 112
+; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
; CHECK-VSX-LABEL: @test2
-; CHECK-VSX: ld {{[0-9]+}}, 112(1)
-; CHECK-VSX: li [[REG16:[0-9]+]], 16
-; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112
-; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX-DAG: ld {{[0-9]+}}, 112(1)
+; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 112
+; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX: blr
}
@@ -61,17 +61,17 @@ entry:
ret void
; CHECK-LABEL: @test3
-; CHECK: ld {{[0-9]+}}, 128(1)
-; CHECK: li [[REG16:[0-9]+]], 16
-; CHECK: addi [[REGB:[0-9]+]], 1, 128
-; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK-DAG: ld {{[0-9]+}}, 128(1)
+; CHECK-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 128
+; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
; CHECK-VSX-LABEL: @test3
-; CHECK-VSX: ld {{[0-9]+}}, 128(1)
-; CHECK-VSX: li [[REG16:[0-9]+]], 16
-; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128
-; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX-DAG: ld {{[0-9]+}}, 128(1)
+; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 128
+; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX: blr
}