summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp43
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td12
-rw-r--r--test/CodeGen/AArch64/atomic-ops-lse.ll43
-rw-r--r--test/CodeGen/ARM/atomic-cmpxchg.ll3
-rw-r--r--test/CodeGen/ARM/cmpxchg-O0.ll6
-rw-r--r--test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll94
-rw-r--r--test/CodeGen/PowerPC/atomics-regression.ll40
9 files changed, 240 insertions, 11 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bb1dc17b7a1..b566c232cbc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::ZERO_EXTEND:
LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
DAG.getValueType(AtomicType));
- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
ExtRes = LHS;
break;
case ISD::ANY_EXTEND:
LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
- RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
break;
default:
llvm_unreachable("Invalid atomic op extension");
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f9de65fcb1d..f0e8b11a3d9 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+ // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
+ case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
+ case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
@@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
+// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
+// compared to a value that is atomically loaded (atomic loads zero-extend).
+SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
+ "Expecting an atomic compare-and-swap here.");
+ SDLoc dl(Op);
+ auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
+ EVT MemVT = AtomicNode->getMemoryVT();
+ if (MemVT.getSizeInBits() >= 32)
+ return Op;
+
+ SDValue CmpOp = Op.getOperand(2);
+ // If this is already correctly zero-extended, leave it alone.
+ auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
+ if (DAG.MaskedValueIsZero(CmpOp, HighBits))
+ return Op;
+
+ // Clear the high bits of the compare operand.
+ unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
+ SDValue NewCmpOp =
+ DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
+ DAG.getConstant(MaskVal, dl, MVT::i32));
+
+ // Replace the existing compare operand with the properly zero-extended one.
+ SmallVector<SDValue, 4> Ops;
+ for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
+ Ops.push_back(AtomicNode->getOperand(i));
+ Ops[2] = NewCmpOp;
+ MachineMemOperand *MMO = AtomicNode->getMemOperand();
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
+ auto NodeTy =
+ (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
+ return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerREM(Op, DAG);
case ISD::BSWAP:
return LowerBSWAP(Op, DAG);
+ case ISD::ATOMIC_CMP_SWAP:
+ return LowerATOMIC_CMP_SWAP(Op, DAG);
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b119e5b4a56..b3215a84829 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -430,6 +430,11 @@ namespace llvm {
/// The 4xf32 load used for v4i1 constants.
QVLFSb,
+ /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+ /// except they ensure that the compare input is zero-extended for
+ /// sub-word versions because the atomic loads zero-extend.
+ ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
+
/// GPRC = TOC_ENTRY GA, TOC
/// Loads the entry for GA from the TOC, where the TOC base is given by
/// the last operand.
@@ -955,6 +960,7 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a932d05b24e..43dcc4479cf 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
+// PPC-specific atomic operations.
+def PPCatomicCmpSwap_8 :
+ SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def PPCatomicCmpSwap_16 :
+ SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
@@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in {
}
}
+def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
+ (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>;
+def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new),
+ (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>;
+
// Instructions to support atomic operations
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
diff --git a/test/CodeGen/AArch64/atomic-ops-lse.ll b/test/CodeGen/AArch64/atomic-ops-lse.ll
index 49f716547b1..1a5cd2dc423 100644
--- a/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -629,12 +629,27 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
+; CHECK-NEXT: casab w0, w1, [x[[ADDR]]]
+; CHECK-NEXT: ret
+
+ ret i8 %old
+}
+
+define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8_1:
+ %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
+ %success = extractvalue { i8, i1 } %pair, 1
-; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
- ret i8 %old
+; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp w[[NEW]], w0, uxtb
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ ret i1 %success
}
define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
@@ -644,12 +659,28 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
-; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
+; CHECK-NEXT: casah w0, w1, [x[[ADDR]]]
+; CHECK-NEXT: ret
+
+ ret i16 %old
+}
+
+define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16_1:
+ %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
+ %success = extractvalue { i16, i1 } %pair, 1
-; CHECK: casah w0, w1, [x[[ADDR]]]
; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
+; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
- ret i16 %old
+; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
+; CHECK-NEXT: cmp w[[NEW]], w0, uxth
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+
+ ret i1 %success
}
define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index a136e44fc19..fec11667708 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -49,9 +49,10 @@ entry:
; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1
; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0
+; CHECK-THUMBV6-NEXT: uxtb [[EXPECTED_ZEXT:r[0-9]+]], [[EXPECTED]]
; CHECK-THUMBV6-NEXT: movs r0, #1
; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0
-; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]]
+; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED_ZEXT]]
; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]]
; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]]
; CHECK-THUMBV6-NEXT: [[END]]:
diff --git a/test/CodeGen/ARM/cmpxchg-O0.ll b/test/CodeGen/ARM/cmpxchg-O0.ll
index f8ad2bbbbe0..b49378d6702 100644
--- a/test/CodeGen/ARM/cmpxchg-O0.ll
+++ b/test/CodeGen/ARM/cmpxchg-O0.ll
@@ -17,7 +17,8 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
-; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
+; CHECK: uxtb [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
+; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
; CHECK: dmb ish
%res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
@@ -36,7 +37,8 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
-; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
+; CHECK: uxth [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
+; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
; CHECK: dmb ish
%res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
diff --git a/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
new file mode 100644
index 00000000000..093899690d0
--- /dev/null
+++ b/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Make sure that a negative value for the compare-and-swap is zero extended
+; from i8/i16 to i32 since it will be compared for equality.
+; RUN: llc -mtriple=powerpc64le-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-P7
+
+@str = private unnamed_addr constant [46 x i8] c"FAILED: __atomic_compare_exchange_n() failed.\00"
+@str.1 = private unnamed_addr constant [59 x i8] c"FAILED: __atomic_compare_exchange_n() set the wrong value.\00"
+@str.2 = private unnamed_addr constant [7 x i8] c"PASSED\00"
+
+define signext i32 @main() {
+; CHECK-LABEL: main:
+; CHECK: li 3, -32477
+; CHECK: lis 12, 0
+; CHECK: li 6, 234
+; CHECK: sth 3, 46(1)
+; CHECK: ori 4, 12, 33059
+; CHECK: sync
+; CHECK: .LBB0_1: # %L.entry
+; CHECK: lharx 3, 0, 5
+; CHECK: cmpw 4, 3
+; CHECK: bne 0, .LBB0_3
+; CHECK: sthcx. 6, 0, 5
+; CHECK: bne 0, .LBB0_1
+; CHECK: b .LBB0_4
+; CHECK: .LBB0_3: # %L.entry
+; CHECK: sthcx. 3, 0, 5
+; CHECK: .LBB0_4: # %L.entry
+; CHECK: cmplwi 3, 33059
+; CHECK: lwsync
+; CHECK: lhz 3, 46(1)
+; CHECK: cmplwi 3, 234
+;
+; CHECK-P7-LABEL: main:
+; CHECK-P7: lis 4, 0
+; CHECK-P7: li 7, 0
+; CHECK-P7: li 3, -32477
+; CHECK-P7: sth 3, 46(1)
+; CHECK-P7: li 5, 234
+; CHECK-P7: ori 4, 4, 33059
+; CHECK-P7: rlwinm 3, 6, 3, 27, 27
+; CHECK-P7: ori 7, 7, 65535
+; CHECK-P7: sync
+; CHECK-P7: slw 8, 5, 3
+; CHECK-P7: slw 5, 7, 3
+; CHECK-P7: slw 9, 4, 3
+; CHECK-P7: and 7, 8, 5
+; CHECK-P7: rldicr 4, 6, 0, 61
+; CHECK-P7: and 8, 9, 5
+; CHECK-P7: .LBB0_1: # %L.entry
+; CHECK-P7: lwarx 9, 0, 4
+; CHECK-P7: and 6, 9, 5
+; CHECK-P7: cmpw 0, 6, 8
+; CHECK-P7: bne 0, .LBB0_3
+; CHECK-P7: andc 9, 9, 5
+; CHECK-P7: or 9, 9, 7
+; CHECK-P7: stwcx. 9, 0, 4
+; CHECK-P7: bne 0, .LBB0_1
+; CHECK-P7: b .LBB0_4
+; CHECK-P7: .LBB0_3: # %L.entry
+; CHECK-P7: stwcx. 9, 0, 4
+; CHECK-P7: .LBB0_4: # %L.entry
+; CHECK-P7: srw 3, 6, 3
+; CHECK-P7: lwsync
+; CHECK-P7: cmplwi 3, 33059
+; CHECK-P7: lhz 3, 46(1)
+; CHECK-P7: cmplwi 3, 234
+L.entry:
+ %value.addr = alloca i16, align 2
+ store i16 -32477, i16* %value.addr, align 2
+ %0 = cmpxchg i16* %value.addr, i16 -32477, i16 234 seq_cst seq_cst
+ %1 = extractvalue { i16, i1 } %0, 1
+ br i1 %1, label %L.B0000, label %L.B0003
+
+L.B0003: ; preds = %L.entry
+ %puts = call i32 @puts(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @str, i64 0, i64 0))
+ ret i32 1
+
+L.B0000: ; preds = %L.entry
+ %2 = load i16, i16* %value.addr, align 2
+ %3 = icmp eq i16 %2, 234
+ br i1 %3, label %L.B0001, label %L.B0005
+
+L.B0005: ; preds = %L.B0000
+ %puts1 = call i32 @puts(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @str.1, i64 0, i64 0))
+ ret i32 1
+
+L.B0001: ; preds = %L.B0000
+ %puts2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @str.2, i64 0, i64 0))
+ ret i32 0
+}
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #0
diff --git a/test/CodeGen/PowerPC/atomics-regression.ll b/test/CodeGen/PowerPC/atomics-regression.ll
index 7079f6dd52e..daf55fc426d 100644
--- a/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/test/CodeGen/PowerPC/atomics-regression.ll
@@ -404,6 +404,7 @@ define void @test39() {
define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test40:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: b .LBB40_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB40_1:
@@ -423,6 +424,7 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test41:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB41_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -444,6 +446,7 @@ define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test42:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB42_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -465,6 +468,7 @@ define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test43:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB43_2
; PPC64LE-NEXT: .p2align 5
@@ -485,6 +489,7 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test44:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB44_2
; PPC64LE-NEXT: .p2align 5
@@ -505,6 +510,7 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test45:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB45_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -527,6 +533,7 @@ define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test46:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB46_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -549,6 +556,7 @@ define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test47:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB47_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -571,6 +579,7 @@ define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test48:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB48_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -593,6 +602,7 @@ define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test49:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB49_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -615,6 +625,7 @@ define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test50:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: b .LBB50_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB50_1:
@@ -634,6 +645,7 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test51:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB51_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -655,6 +667,7 @@ define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test52:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB52_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -676,6 +689,7 @@ define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test53:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB53_2
; PPC64LE-NEXT: .p2align 5
@@ -696,6 +710,7 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test54:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB54_2
; PPC64LE-NEXT: .p2align 5
@@ -716,6 +731,7 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test55:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB55_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -738,6 +754,7 @@ define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test56:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB56_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -760,6 +777,7 @@ define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test57:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB57_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -782,6 +800,7 @@ define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test58:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB58_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -804,6 +823,7 @@ define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
define void @test59(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test59:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB59_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -1248,6 +1268,7 @@ define void @test79(i64* %ptr, i64 %cmp, i64 %val) {
define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test80:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: b .LBB80_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB80_1:
@@ -1267,6 +1288,7 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test81:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB81_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -1288,6 +1310,7 @@ define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test82:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB82_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -1309,6 +1332,7 @@ define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test83:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB83_2
; PPC64LE-NEXT: .p2align 5
@@ -1329,6 +1353,7 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test84:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB84_2
; PPC64LE-NEXT: .p2align 5
@@ -1349,6 +1374,7 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test85:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB85_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -1371,6 +1397,7 @@ define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test86:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB86_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -1393,6 +1420,7 @@ define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test87:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB87_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -1415,6 +1443,7 @@ define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test88:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB88_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -1437,6 +1466,7 @@ define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test89:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB89_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@@ -1459,6 +1489,7 @@ define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test90:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: b .LBB90_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB90_1:
@@ -1478,6 +1509,7 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test91:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB91_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -1499,6 +1531,7 @@ define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test92:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB92_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@@ -1520,6 +1553,7 @@ define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test93:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB93_2
; PPC64LE-NEXT: .p2align 5
@@ -1540,6 +1574,7 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test94:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB94_2
; PPC64LE-NEXT: .p2align 5
@@ -1560,6 +1595,7 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test95:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB95_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -1582,6 +1618,7 @@ define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test96:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB96_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -1604,6 +1641,7 @@ define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test97:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB97_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -1626,6 +1664,7 @@ define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test98:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB98_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@@ -1648,6 +1687,7 @@ define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
define void @test99(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test99:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB99_1:
; PPC64LE-NEXT: lharx 6, 0, 3