diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-02-12 17:57:54 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-02-12 17:57:54 +0000 |
commit | abf168408a1ecc2a10ebc6ec0d816f1717f4eb9c (patch) | |
tree | 025816eb762e18fa6056bdb72563d6aecf175173 /test | |
parent | 3a0161ac775df27f6015540576ec769c7c68d035 (diff) |
[AMDGPU] Assembler: Swap operands of flat_store instructions to match AMD assembler
Historically, AMD internal sp3 assembler has flat_store* addr, data
format. To match existing code and to enable reuse, change LLVM
definitions to match. Also update MC and CodeGen tests.
Differential Revision: http://reviews.llvm.org/D16927
Patch by: Nikolay Haustov
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260694 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/AMDGPU/ctlz.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/ctlz_zero_undef.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/flat-address-space.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/hsa.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/salu-to-valu.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sint_to_fp.i64.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sint_to_fp.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/sra.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/uint_to_fp.i64.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/AMDGPU/uint_to_fp.ll | 2 | ||||
-rw-r--r-- | test/MC/AMDGPU/flat.s | 92 |
14 files changed, 69 insertions, 69 deletions
diff --git a/test/CodeGen/AMDGPU/ctlz.ll b/test/CodeGen/AMDGPU/ctlz.ll index baedf47eef0..e239ee26b9c 100644 --- a/test/CodeGen/AMDGPU/ctlz.ll +++ b/test/CodeGen/AMDGPU/ctlz.ll @@ -120,7 +120,7 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias % ; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]] ; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]] ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} -; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} +; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind { %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false) store i64 %ctlz, i64 addrspace(1)* %out @@ -146,7 +146,7 @@ define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind ; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]] ; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} -; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}} +; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}} define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index c1f84cd460c..16e1f2edd27 100644 --- a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -123,7 +123,7 @@ define void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %va ; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]] ; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} -; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} +; SI: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} define void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll index 34e36a3e37c..5ca57fd3d35 100644 --- a/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/test/CodeGen/AMDGPU/flat-address-space.ll @@ -17,7 +17,7 @@ ; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]] ; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] ; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] -; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} +; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]] define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 { %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* store i32 %x, i32 addrspace(4)* %fptr, align 4 diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll index 14c130a8190..073c1858244 100644 --- a/test/CodeGen/AMDGPU/hsa.ll +++ b/test/CodeGen/AMDGPU/hsa.ll @@ -51,7 +51,7 @@ ; On VI+ we also need to set MTYPE = 2 ; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000 ; Make sure we generate flat store for HSA -; HSA: flat_store_dword v{{[0-9]+}} +; HSA: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll index f9385761931..c22eac7e271 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -25,7 +25,7 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0 ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s6{{$}} ; ALL-NOT: [[VCOPY]] -; ALL: {{buffer|flat}}_store_dword [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 @@ -53,7 +53,7 @@ define void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL-NOT: [[VCOPY]] -; ALL: {{buffer|flat}}_store_dword [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 @@ -89,7 +89,7 @@ define void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { ; HSA: v_mov_b32_e32 [[VCOPY:v[0-9]+]], s7{{$}} ; ALL-NOT: [[VCOPY]] -; ALL: {{buffer|flat}}_store_dword [[VCOPY]] +; ALL: {{buffer|flat}}_store_dword {{.*}}[[VCOPY]] ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 ; ALL-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll index bf366fc4a93..28ef7b82ef8 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -15,7 +15,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 ; ALL-NOT: v0 -; ALL: {{buffer|flat}}_store_dword v0 +; ALL: {{buffer|flat}}_store_dword {{.*}}v0 define void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.x() store i32 %id, i32 addrspace(1)* %out @@ -30,7 +30,7 @@ define void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 ; ALL-NOT: v1 -; ALL: {{buffer|flat}}_store_dword v1 +; ALL: {{buffer|flat}}_store_dword {{.*}}v1 define void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.y() store i32 %id, i32 addrspace(1)* %out @@ -45,7 +45,7 @@ define void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 ; ALL-NOT: v2 -; ALL: {{buffer|flat}}_store_dword v2 +; ALL: {{buffer|flat}}_store_dword {{.*}}v2 define void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.z() store i32 %id, i32 addrspace(1)* %out diff --git a/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll b/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll index b6a9179a212..e1fad13e0b5 100644 --- a/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll +++ b/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll @@ -4,7 +4,7 @@ ; and can be eliminated ; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range: ; CHECK-NOT: v0 -; CHECK: {{flat|buffer}}_store_dword v0 +; CHECK: {{flat|buffer}}_store_dword {{.*}}v0 define void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 @@ -15,7 +15,7 @@ entry: ; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range: ; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0 -; CHECK: {{flat|buffer}}_store_dword [[MASKED]] +; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]] define void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 @@ -27,7 +27,7 @@ entry: ; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range_m1: ; CHECK-NOT: v0 ; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0 -; CHECK: {{flat|buffer}}_store_dword [[MASKED]] +; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]] define void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1 diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 715ca2a4129..f92b53db2a8 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -56,7 +56,7 @@ done: ; preds = %loop ; FIXME: We should be using flat load for HSA. ; GCN: buffer_load_dword [[OUT:v[0-9]+]] ; GCN-NOHSA: buffer_store_dword [[OUT]] -; GCN-HSA: flat_store_dword [[OUT]] +; GCN-HSA: flat_store_dword {{.*}}, [[OUT]] define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: %tmp = icmp ne i32 %a, 0 @@ -104,7 +104,7 @@ entry: ; GCN-NOHSA: v_add_i32_e32 ; GCN-NOHSA: buffer_store_dword ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() @@ -248,7 +248,7 @@ entry: ; GCN-HSA: flat_load_dword [[MOVED:v[0-9]+]], v[{{[0-9+:[0-9]+}}] ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] ; GCN-NOHSA: buffer_store_dword [[ADD]] -; GCN-HSA: flat_store_dword [[ADD]] +; GCN-HSA: flat_store_dword {{.*}}, [[ADD]] define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/test/CodeGen/AMDGPU/sint_to_fp.i64.ll index ad89ba5c7cb..1581ce2752e 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -26,7 +26,7 @@ define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}} ; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]], -; GCN: {{buffer|flat}}_store_dword [[SIGN_SEL]] +; GCN: {{buffer|flat}}_store_dword {{.*}}[[SIGN_SEL]] define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/sint_to_fp.ll b/test/CodeGen/AMDGPU/sint_to_fp.ll index 851085c9535..75ffdd2cc85 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.ll @@ -103,7 +103,7 @@ define void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 { ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} ; SI: v_cmp_eq_i32 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0 -; SI: {{buffer|flat}}_store_dword [[RESULT]], +; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm define void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll index bf1de020219..37314d590c3 100644 --- a/test/CodeGen/AMDGPU/sra.ll +++ b/test/CodeGen/AMDGPU/sra.ll @@ -216,7 +216,7 @@ define void @s_ashr_32_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { ; SI: buffer_load_dword v[[HI:[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; VI: flat_load_dword v[[HI:[0-9]+]] ; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]] -; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[HI]]:[[SHIFT]]{{\]}} +; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}} define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %tid = call i32 @llvm.r600.read.tidig.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -245,7 +245,7 @@ define void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { ; VI: flat_load_dword v[[HI:[0-9]+]] ; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]] ; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]] -; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[SHIFT]]:[[COPY]]{{\]}} +; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[SHIFT]]:[[COPY]]{{\]}} define void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %tid = call i32 @llvm.r600.read.tidig.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/test/CodeGen/AMDGPU/uint_to_fp.i64.ll index 3ab11442d5c..27c41e41a0e 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.i64.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.i64.ll @@ -22,7 +22,7 @@ define void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { ; GCN-DAG: v_cmp_lt_u64 ; GCN: v_add_i32_e32 [[VR:v[0-9]+]] -; GCN: {{buffer|flat}}_store_dword [[VR]] +; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]] define void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/uint_to_fp.ll b/test/CodeGen/AMDGPU/uint_to_fp.ll index a3343d1e2d9..0c3d54cf0d0 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.ll @@ -103,7 +103,7 @@ define void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 { ; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} ; SI: v_cmp_eq_i32 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0 -; SI: {{buffer|flat}}_store_dword [[RESULT]], +; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm define void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() diff --git a/test/MC/AMDGPU/flat.s b/test/MC/AMDGPU/flat.s index 20e6c042733..a6b8cdf6351 100644 --- a/test/MC/AMDGPU/flat.s +++ b/test/MC/AMDGPU/flat.s @@ -95,69 +95,69 @@ flat_load_dword v1, v[3:4] tfe slc glc // CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01] // VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01] -flat_store_dword v1, v[3:4] +flat_store_dword v[3:4], v1 // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v1, v[3:4] glc +flat_store_dword v[3:4], v1 glc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v1, v[3:4] glc slc +flat_store_dword v[3:4], v1 glc slc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v1, v[3:4] glc tfe +flat_store_dword v[3:4], v1 glc tfe // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] glc slc tfe +flat_store_dword v[3:4], v1 glc slc tfe // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] glc tfe slc +flat_store_dword v[3:4], v1 glc tfe slc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] slc +flat_store_dword v[3:4], v1 slc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_dword v[3:4], v1 slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v1, v[3:4] slc glc +flat_store_dword v[3:4], v1 slc glc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v1, v[3:4] slc tfe +flat_store_dword v[3:4], v1 slc tfe // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] slc glc tfe +flat_store_dword v[3:4], v1 slc glc tfe // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] slc tfe glc +flat_store_dword v[3:4], v1 slc tfe glc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] tfe +flat_store_dword v[3:4], v1 tfe // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 tfe ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] tfe glc +flat_store_dword v[3:4], v1 tfe glc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] tfe slc +flat_store_dword v[3:4], v1 tfe slc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] tfe glc slc +flat_store_dword v[3:4], v1 tfe glc slc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] -flat_store_dword v1, v[3:4] tfe slc glc +flat_store_dword v[3:4], v1 tfe slc glc // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] +// CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] // FIXME: For atomic instructions, glc must be placed immediately following // the data regiser. These forms aren't currently supported: @@ -248,31 +248,31 @@ flat_load_dwordx3 v[5:7], v[3:4] // CI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05] // VI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x58,0xdc,0x03,0x00,0x00,0x05] -flat_store_byte v1, v[3:4] +flat_store_byte v[3:4], v1 // NOSI: error: -// CIVI: flat_store_byte v1, v[3:4] ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_byte v[3:4], v1 ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00] -flat_store_short v1, v[3:4] +flat_store_short v[3:4], v1 // NOSI: error: -// CIVI: flat_store_short v1, v[3:4] ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_short v[3:4], v1 ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v1, v[3:4] +flat_store_dword v[3:4], v1 // NOSI: error: -// CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] -flat_store_dwordx2 v[1:2], v[3:4] +flat_store_dwordx2 v[3:4], v[1:2] // NOSI: error: -// CIVI: flat_store_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00] +// CIVI: flat_store_dwordx2 v[3:4], v[1:2] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00] -flat_store_dwordx4 v[5:8], v[3:4] +flat_store_dwordx4 v[3:4], v[5:8] // NOSI: error: -// CI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] -// VI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] +// CI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] +// VI: flat_store_dwordx4 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] -flat_store_dwordx3 v[5:7], v[3:4] +flat_store_dwordx3 v[3:4], v[5:7] // NOSI: error: -// CI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] -// VI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] +// CI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00] +// VI: flat_store_dwordx3 v[3:4], v[5:7] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00] flat_atomic_swap v[3:4], v5 // NOSI: error: |