diff options
author | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-07-21 21:19:23 +0000 |
---|---|---|
committer | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-07-21 21:19:23 +0000 |
commit | 4c49579c51fb50294a99376cf553e21b8d02e7be (patch) | |
tree | 9e50a7d6afe01ba60608e880ca228be801cc3e01 /test/CodeGen/MIR | |
parent | 019755e31837e605e2b926b377598b5a9c387b60 (diff) |
AMDGPU: Implement memory model
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308781 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/MIR')
-rw-r--r-- | test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir b/test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir new file mode 100644 index 00000000000..d4ddfbe31b9 --- /dev/null +++ b/test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir @@ -0,0 +1,122 @@ +# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-memory-legalizer %s -o - | FileCheck %s + +--- | + ; ModuleID = '<stdin>' + source_filename = "<stdin>" + target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + + ; Function Attrs: nounwind readnone + declare i32 @llvm.amdgcn.workitem.id.x() #0 + + ; Function Attrs: nounwind + define amdgpu_kernel void @atomic_max_i32_noret( + i32 addrspace(1)* %out, + i32 addrspace(1)* addrspace(1)* %in, + i32 addrspace(1)* %x, + i32 %y) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = sext i32 %tid to i64 + %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i64 %idxprom + %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep + %xor = xor i32 %tid, 1 + %cmp = icmp ne i32 %xor, 0 + %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %cmp) + %2 = extractvalue { i1, i64 } %1, 0 + %3 = extractvalue { i1, i64 } %1, 1 + br i1 %2, label %atomic, label %exit + + atomic: ; preds = %0 + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100 + %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst + br label %exit + + exit: ; preds = %atomic, %0 + call void @llvm.amdgcn.end.cf(i64 %3) + ret void + } + + declare { i1, i64 } @llvm.amdgcn.if(i1) + + declare void @llvm.amdgcn.end.cf(i64) + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #3 + + attributes #0 = { nounwind readnone "target-cpu"="tahiti" } + attributes #1 = { nounwind "target-cpu"="tahiti" } + attributes #2 = { readnone } + attributes #3 = { nounwind } + +... +--- + +# CHECK-LABEL: name: atomic_max_i32_noret + +# CHECK-LABEL: bb.1.atomic: +# CHECK: BUFFER_ATOMIC_SMAX_ADDR64 +# CHECK-NEXT: S_WAITCNT 3952 +# CHECK-NEXT: BUFFER_WBINVL1_VOL + +name: atomic_max_i32_noret +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%sgpr0_sgpr1' } + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000) + liveins: %vgpr0, %sgpr0_sgpr1 + + %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %vgpr1 = V_ASHRREV_I32_e32 31, %vgpr0, implicit %exec + %vgpr1_vgpr2 = V_LSHL_B64 %vgpr0_vgpr1, 3, implicit %exec + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 0 + S_WAITCNT 127 + %vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed %vgpr1_vgpr2, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 8 from %ir.tid.gep) + %vgpr0 = V_XOR_B32_e32 1, killed %vgpr0, implicit %exec + V_CMP_NE_U32_e32 0, killed %vgpr0, implicit-def %vcc, implicit %exec + %sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed %vcc, implicit-def %exec, implicit-def %scc, implicit %exec + %sgpr2_sgpr3 = S_XOR_B64 %exec, killed %sgpr2_sgpr3, implicit-def dead %scc + SI_MASK_BRANCH %bb.2.exit, implicit %exec + + bb.1.atomic: + successors: %bb.2.exit(0x80000000) + liveins: %sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, %sgpr0_sgpr1, %sgpr2_sgpr3, %vgpr1_vgpr2_vgpr3_vgpr4:0x00000003 + + %sgpr0 = S_LOAD_DWORD_IMM killed %sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) + dead %vgpr0 = V_MOV_B32_e32 -1, implicit %exec + dead %vgpr0 = V_MOV_B32_e32 61440, implicit %exec + %sgpr4_sgpr5 = S_MOV_B64 0 + S_WAITCNT 127 + %vgpr0 = V_MOV_B32_e32 killed %sgpr0, implicit %exec, implicit %exec + S_WAITCNT 3952 + BUFFER_ATOMIC_SMAX_ADDR64 killed %vgpr0, killed %vgpr1_vgpr2, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit %exec :: (volatile load seq_cst 4 from %ir.gep) + + bb.2.exit: + liveins: %sgpr2_sgpr3 + + %exec = S_OR_B64 %exec, killed %sgpr2_sgpr3, implicit-def %scc + S_ENDPGM + +... + |