diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-10-27 23:05:31 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-10-27 23:05:31 +0000 |
commit | 5480a2423dc4807474a97f42818b4de0328ba217 (patch) | |
tree | 5bc67b736946e777e435edf0d1eaff62c6b2dda1 /test/CodeGen/MIR | |
parent | f95be0d2576f7518d9cc21bc6490548883a63d68 (diff) |
AMDGPU/SI: Handle hazard with > 8 byte VMEM stores
Reviewers: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D25577
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285359 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/MIR')
-rw-r--r-- | test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir | 79 |
1 files changed, 77 insertions, 2 deletions
diff --git a/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir b/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir index f1694746693..8936e6fa073 100644 --- a/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir @@ -1,11 +1,12 @@ # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,VI +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI --- | define void @div_fmas() { ret void } define void @s_getreg() { ret void } define void @s_setreg() { ret void } + define void @vmem_gt_8dw_store() { ret void } ... --- # GCN-LABEL: name: div_fmas @@ -159,3 +160,77 @@ body: | S_SETREG_B32 %sgpr1, 0 S_ENDPGM ... + +... +--- +# GCN-LABEL: name: vmem_gt_8dw_store + +# GCN-LABEL: bb.0: +# GCN: BUFFER_STORE_DWORD_OFFSET +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_DWORDX3_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_DWORDX4_OFFSET +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_DWORDX4_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 + +# GCN-LABEL: bb.1: +# GCN: FLAT_STORE_DWORDX2 +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_STORE_DWORDX3 +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_STORE_DWORDX4 +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_ATOMIC_CMPSWAP_X2 +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_ATOMIC_FCMPSWAP_X2 +# CIVI: S_NOP +# GCN: V_MOV_B32 + +name: vmem_gt_8dw_store + +body: | + bb.0: + successors: %bb.1 + BUFFER_STORE_DWORD_OFFSET %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_DWORDX3_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_FORMAT_XYZ_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_FORMAT_XYZW_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_ATOMIC_CMPSWAP_X2_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + S_BRANCH %bb.1 + + bb.1: + FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + S_ENDPGM + +... |