summaryrefslogtreecommitdiff
path: root/test/CodeGen/MIR
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2017-07-21 21:19:23 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2017-07-21 21:19:23 +0000
commit4c49579c51fb50294a99376cf553e21b8d02e7be (patch)
tree9e50a7d6afe01ba60608e880ca228be801cc3e01 /test/CodeGen/MIR
parent019755e31837e605e2b926b377598b5a9c387b60 (diff)
AMDGPU: Implement memory model
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308781 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/MIR')
-rw-r--r--test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir122
1 files changed, 122 insertions, 0 deletions
diff --git a/test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir b/test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir
new file mode 100644
index 00000000000..d4ddfbe31b9
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir
@@ -0,0 +1,122 @@
+# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck %s
+
+--- |
+ ; ModuleID = '<stdin>'
+ source_filename = "<stdin>"
+ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+ ; Function Attrs: nounwind readnone
+ declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+ ; Function Attrs: nounwind
+ define amdgpu_kernel void @atomic_max_i32_noret(
+ i32 addrspace(1)* %out,
+ i32 addrspace(1)* addrspace(1)* %in,
+ i32 addrspace(1)* %x,
+ i32 %y) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %idxprom = sext i32 %tid to i64
+ %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i64 %idxprom
+ %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
+ %xor = xor i32 %tid, 1
+ %cmp = icmp ne i32 %xor, 0
+ %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %cmp)
+ %2 = extractvalue { i1, i64 } %1, 0
+ %3 = extractvalue { i1, i64 } %1, 1
+ br i1 %2, label %atomic, label %exit
+
+ atomic: ; preds = %0
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100
+ %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst
+ br label %exit
+
+ exit: ; preds = %atomic, %0
+ call void @llvm.amdgcn.end.cf(i64 %3)
+ ret void
+ }
+
+ declare { i1, i64 } @llvm.amdgcn.if(i1)
+
+ declare void @llvm.amdgcn.end.cf(i64)
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #3
+
+ attributes #0 = { nounwind readnone "target-cpu"="tahiti" }
+ attributes #1 = { nounwind "target-cpu"="tahiti" }
+ attributes #2 = { readnone }
+ attributes #3 = { nounwind }
+
+...
+---
+
+# CHECK-LABEL: name: atomic_max_i32_noret
+
+# CHECK-LABEL: bb.1.atomic:
+# CHECK: BUFFER_ATOMIC_SMAX_ADDR64
+# CHECK-NEXT: S_WAITCNT 3952
+# CHECK-NEXT: BUFFER_WBINVL1_VOL
+
+name: atomic_max_i32_noret
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+ - { reg: '%vgpr0' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000)
+ liveins: %vgpr0, %sgpr0_sgpr1
+
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ %vgpr1 = V_ASHRREV_I32_e32 31, %vgpr0, implicit %exec
+ %vgpr1_vgpr2 = V_LSHL_B64 %vgpr0_vgpr1, 3, implicit %exec
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 0
+ S_WAITCNT 127
+ %vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed %vgpr1_vgpr2, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 8 from %ir.tid.gep)
+ %vgpr0 = V_XOR_B32_e32 1, killed %vgpr0, implicit %exec
+ V_CMP_NE_U32_e32 0, killed %vgpr0, implicit-def %vcc, implicit %exec
+ %sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed %vcc, implicit-def %exec, implicit-def %scc, implicit %exec
+ %sgpr2_sgpr3 = S_XOR_B64 %exec, killed %sgpr2_sgpr3, implicit-def dead %scc
+ SI_MASK_BRANCH %bb.2.exit, implicit %exec
+
+ bb.1.atomic:
+ successors: %bb.2.exit(0x80000000)
+ liveins: %sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, %sgpr0_sgpr1, %sgpr2_sgpr3, %vgpr1_vgpr2_vgpr3_vgpr4:0x00000003
+
+ %sgpr0 = S_LOAD_DWORD_IMM killed %sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+ dead %vgpr0 = V_MOV_B32_e32 -1, implicit %exec
+ dead %vgpr0 = V_MOV_B32_e32 61440, implicit %exec
+ %sgpr4_sgpr5 = S_MOV_B64 0
+ S_WAITCNT 127
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr0, implicit %exec, implicit %exec
+ S_WAITCNT 3952
+ BUFFER_ATOMIC_SMAX_ADDR64 killed %vgpr0, killed %vgpr1_vgpr2, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit %exec :: (volatile load seq_cst 4 from %ir.gep)
+
+ bb.2.exit:
+ liveins: %sgpr2_sgpr3
+
+ %exec = S_OR_B64 %exec, killed %sgpr2_sgpr3, implicit-def %scc
+ S_ENDPGM
+
+...
+