summaryrefslogtreecommitdiff
path: root/test/CodeGen/MIR
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-10-27 23:05:31 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-10-27 23:05:31 +0000
commit5480a2423dc4807474a97f42818b4de0328ba217 (patch)
tree5bc67b736946e777e435edf0d1eaff62c6b2dda1 /test/CodeGen/MIR
parentf95be0d2576f7518d9cc21bc6490548883a63d68 (diff)
AMDGPU/SI: Handle hazard with > 8 byte VMEM stores
Reviewers: arsenm Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D25577 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@285359 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/MIR')
-rw-r--r--test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir79
1 files changed, 77 insertions, 2 deletions
diff --git a/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir b/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir
index f1694746693..8936e6fa073 100644
--- a/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir
+++ b/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir
@@ -1,11 +1,12 @@
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,VI
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
--- |
define void @div_fmas() { ret void }
define void @s_getreg() { ret void }
define void @s_setreg() { ret void }
+ define void @vmem_gt_8dw_store() { ret void }
...
---
# GCN-LABEL: name: div_fmas
@@ -159,3 +160,77 @@ body: |
S_SETREG_B32 %sgpr1, 0
S_ENDPGM
...
+
+...
+---
+# GCN-LABEL: name: vmem_gt_8dw_store
+
+# GCN-LABEL: bb.0:
+# GCN: BUFFER_STORE_DWORD_OFFSET
+# GCN-NEXT: V_MOV_B32
+# GCN: BUFFER_STORE_DWORDX3_OFFSET
+# CIVI: S_NOP
+# GCN-NEXT: V_MOV_B32
+# GCN: BUFFER_STORE_DWORDX4_OFFSET
+# GCN-NEXT: V_MOV_B32
+# GCN: BUFFER_STORE_DWORDX4_OFFSET
+# CIVI: S_NOP
+# GCN-NEXT: V_MOV_B32
+# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET
+# CIVI: S_NOP
+# GCN-NEXT: V_MOV_B32
+# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET
+# CIVI: S_NOP
+# GCN-NEXT: V_MOV_B32
+
+# GCN-LABEL: bb.1:
+# GCN: FLAT_STORE_DWORDX2
+# GCN-NEXT: V_MOV_B32
+# GCN: FLAT_STORE_DWORDX3
+# CIVI: S_NOP
+# GCN-NEXT: V_MOV_B32
+# GCN: FLAT_STORE_DWORDX4
+# CIVI: S_NOP
+# GCN-NEXT: V_MOV_B32
+# GCN: FLAT_ATOMIC_CMPSWAP_X2
+# CIVI: S_NOP
+# GCN-NEXT: V_MOV_B32
+# GCN: FLAT_ATOMIC_FCMPSWAP_X2
+# CIVI: S_NOP
+# GCN: V_MOV_B32
+
+name: vmem_gt_8dw_store
+
+body: |
+ bb.0:
+ successors: %bb.1
+ BUFFER_STORE_DWORD_OFFSET %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ BUFFER_STORE_DWORDX3_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ BUFFER_STORE_FORMAT_XYZ_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ BUFFER_STORE_FORMAT_XYZW_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ BUFFER_ATOMIC_CMPSWAP_X2_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr
+ %vgpr3 = V_MOV_B32_e32 0, implicit %exec
+ S_ENDPGM
+
+...