AMDGPU: Fix multi-use shl/add combine

This was using a custom function that didn't handle the addressing modes properly for private. Use isLegalAddressingMode to avoid duplicating this. Additionally, skip the combine if there is only one use since the standard combine will handle it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318013 91177308-0d34-0410-b5e6-96231b3b80d8
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-11-13 05:11:54 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2017-11-13 05:11:54 +0000
commit: 3e3d015f524b45ee60a6ee27edff2a18aeeabd34 (patch)
tree: 555650a57f73830f3d49e2f245c2d2fcd56eaf47 /test/CodeGen/AMDGPU
parent: d024fd1406dcfb597fc17337c2e31c746e84bb93 (diff)
1 files changed, 173 insertions, 69 deletions
diff --git a/test/CodeGen/AMDGPU/shl_add_ptr.ll b/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 9147eb58c6a..60fdab667b7 100644
--- a/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; Test that doing a shift of a pointer with a constant add will be
 ; folded into the constant offset addressing mode even if the add has
@@ -15,10 +15,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
 
-; SI-LABEL: {{^}}load_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}load_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -32,13 +32,13 @@ define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 add
 ; Make sure once the first use is folded into the addressing mode, the
 ; remaining add use goes through the normal shl + add constant fold.
 
-; SI-LABEL: {{^}}load_shl_base_lds_1:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
-; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
-; SI-DAG: buffer_store_dword [[RESULT]]
-; SI-DAG: buffer_store_dword [[ADDUSE]]
-; SI: s_endpgm
+; GCN-LABEL: {{^}}load_shl_base_lds_1:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
+; GCN: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
+; GCN-DAG: buffer_store_dword [[RESULT]]
+; GCN-DAG: buffer_store_dword [[ADDUSE]]
+; GCN: s_endpgm
 define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -52,9 +52,9 @@ define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 add
 
 @maxlds = addrspace(3) global [65536 x i8] undef, align 4
 
-; SI-LABEL: {{^}}load_shl_base_lds_max_offset
-; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
-; SI: s_endpgm
+; GCN-LABEL: {{^}}load_shl_base_lds_max_offset
+; GCN: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
+; GCN: s_endpgm
 define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 65535
@@ -68,11 +68,11 @@ define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i
 ; The two globals are placed adjacent in memory, so the same base
 ; pointer can be used with an offset into the second one.
 
-; SI-LABEL: {{^}}load_shl_base_lds_2:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: s_mov_b32 m0, -1
-; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
-; SI: s_endpgm
+; GCN-LABEL: {{^}}load_shl_base_lds_2:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: s_mov_b32 m0, -1
+; GCN-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
+; GCN: s_endpgm
 define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 64
@@ -85,10 +85,10 @@ define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
   ret void
 }
 
-; SI-LABEL: {{^}}store_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}store_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -115,10 +115,10 @@ define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 ad
 ; }
 
 
-; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -130,10 +130,10 @@ define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out,
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_swap_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -144,10 +144,10 @@ define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i3
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_add_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -158,10 +158,10 @@ define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_sub_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -172,10 +172,10 @@ define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_and_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -186,10 +186,10 @@ define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_or_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -200,10 +200,10 @@ define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_xor_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -224,10 +224,10 @@ define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ;   ret void
 ; }
 
-; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_min_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -238,10 +238,10 @@ define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_max_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -252,10 +252,10 @@ define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_umin_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -266,10 +266,10 @@ define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i3
   ret void
 }
 
-; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
-; SI: s_endpgm
+; GCN-LABEL: {{^}}atomic_umax_shl_base_lds_0:
+; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; GCN: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; GCN: s_endpgm
 define amdgpu_kernel void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
@@ -280,5 +280,109 @@ define amdgpu_kernel void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i3
   ret void
 }
 
+; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_lds:
+; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
+; GCN: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:32
+
+; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
+; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}} offset:64
+define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 {
+  %idx.add = add nuw i32 %idx, 4
+  %shl0 = shl i32 %idx.add, 3
+  %shl1 = shl i32 %idx.add, 4
+  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
+  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
+  store volatile i32 9, i32 addrspace(3)* %ptr0
+  store volatile i32 10, i32 addrspace(3)* %ptr1
+  ret void
+}
+
+; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_max_lds_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
+; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
+; GCN-DAG: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]]
+; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
+define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
+  %idx.add = add nuw i32 %idx, 8191
+  %shl0 = shl i32 %idx.add, 3
+  %shl1 = shl i32 %idx.add, 4
+  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
+  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
+  store volatile i32 9, i32 addrspace(3)* %ptr0
+  store volatile i32 10, i32 addrspace(3)* %ptr1
+  ret void
+}
+
+; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_lds_offset:
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
+; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+$}}
+; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+$}}
+define void @shl_add_ptr_combine_2use_both_max_lds_offset(i32 %idx) #0 {
+  %idx.add = add nuw i32 %idx, 4096
+  %shl0 = shl i32 %idx.add, 4
+  %shl1 = shl i32 %idx.add, 5
+  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
+  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
+  store volatile i32 9, i32 addrspace(3)* %ptr0
+  store volatile i32 10, i32 addrspace(3)* %ptr1
+  ret void
+}
+
+; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_private:
+; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 2, v0
+; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen offset:16
+
+; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 3, v0
+; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], s4 offen offset:32
+define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
+  %idx = zext i16 %idx.arg to i32
+  %idx.add = add nuw i32 %idx, 4
+  %shl0 = shl i32 %idx.add, 2
+  %shl1 = shl i32 %idx.add, 3
+  %ptr0 = inttoptr i32 %shl0 to i32*
+  %ptr1 = inttoptr i32 %shl1 to i32*
+  store volatile i32 9, i32* %ptr0
+  store volatile i32 10, i32* %ptr1
+  ret void
+}
+
+; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_max_private_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen offset:4088
+; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
+; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], s4 offen{{$}}
+define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #0 {
+  %idx = zext i16 %idx.arg to i32
+  %idx.add = add nuw i32 %idx, 511
+  %shl0 = shl i32 %idx.add, 3
+  %shl1 = shl i32 %idx.add, 4
+  %ptr0 = inttoptr i32 %shl0 to i32*
+  %ptr1 = inttoptr i32 %shl1 to i32*
+  store volatile i32 9, i32* %ptr0
+  store volatile i32 10, i32* %ptr1
+  ret void
+}
+; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen{{$}}
+; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], s4 offen{{$}}
+define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.arg) #0 {
+  %idx = zext i16 %idx.arg to i32
+  %idx.add = add nuw i32 %idx, 256
+  %shl0 = shl i32 %idx.add, 4
+  %shl1 = shl i32 %idx.add, 5
+  %ptr0 = inttoptr i32 %shl0 to i32*
+  %ptr1 = inttoptr i32 %shl1 to i32*
+  store volatile i32 9, i32* %ptr0
+  store volatile i32 10, i32* %ptr1
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-11-13 05:11:54 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2017-11-13 05:11:54 +0000
commit	3e3d015f524b45ee60a6ee27edff2a18aeeabd34 (patch)
tree	555650a57f73830f3d49e2f245c2d2fcd56eaf47 /test/CodeGen/AMDGPU
parent	d024fd1406dcfb597fc17337c2e31c746e84bb93 (diff)