summaryrefslogtreecommitdiff
path: root/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
blob: ebeed0dd4435b1cdf7db4d07a63945237dd602b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s

; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
; CHECK: %alloca = alloca i32
; CHECK: select i1 undef, i32* undef, i32* %alloca
define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
  %alloca = alloca i32, align 4
  %select = select i1 undef, i32* undef, i32* %alloca
  store i32 0, i32* %select, align 4
  ret void
}

; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers(
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
  %select = select i1 undef, i32* %ptr0, i32* %ptr1
  store i32 0, i32* %select, align 4
  ret void
}

; FIXME: This should be promotable but requires knowing that both will be promoted first.

; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
; CHECK: %alloca0 = alloca i32, i32 16, align 4
; CHECK: %alloca1 = alloca i32, i32 16, align 4
; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1
define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
  %alloca0 = alloca i32, i32 16, align 4
  %alloca1 = alloca i32, i32 16, align 4
  %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
  %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
  %select = select i1 undef, i32* %ptr0, i32* %ptr1
  store i32 0, i32* %select, align 4
  ret void
}

; TODO: Maybe this should be canonicalized to select on the constant and GEP after.
; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers(
; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3
; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1
  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3
  %select = select i1 undef, i32* %ptr0, i32* %ptr1
  store i32 0, i32* %select, align 4
  ret void
}

; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
; CHECK: getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_select_input_select.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %b
; CHECK: %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %c
; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
  %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
  %select0 = select i1 undef, i32* %ptr0, i32* %ptr1
  %select1 = select i1 undef, i32* %select0, i32* %ptr2
  store i32 0, i32* %select1, align 4
  ret void
}

define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
entry:
  %alloca = alloca [16 x i32], align 4
  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
  store i32 0, i32* %ptr0
  br i1 undef, label %bb1, label %bb2

bb1:
  %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
  %select0 = select i1 undef, i32* undef, i32* %ptr2
  store i32 0, i32* %ptr1
  br label %bb2

bb2:
  %phi.ptr = phi i32* [ %ptr0, %entry ], [ %select0, %bb1 ]
  %select1 = select i1 undef, i32* %phi.ptr, i32* %ptr1
  store i32 0, i32* %select1, align 4
  ret void
}

; CHECK-LABEL: @select_null_rhs(
; CHECK-NOT: alloca
; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
define amdgpu_kernel void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
bb:
  %tmp = alloca double, align 8
  store double 0.000000e+00, double* %tmp, align 8
  %tmp2 = icmp eq i32 %arg1, 0
  %tmp3 = select i1 %tmp2, double* %tmp, double* null
  store double 1.000000e+00, double* %tmp3, align 8
  %tmp4 = load double, double* %tmp, align 8
  store double %tmp4, double addrspace(1)* %arg
  ret void
}

; CHECK-LABEL: @select_null_lhs(
; CHECK-NOT: alloca
; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
define amdgpu_kernel void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
bb:
  %tmp = alloca double, align 8
  store double 0.000000e+00, double* %tmp, align 8
  %tmp2 = icmp eq i32 %arg1, 0
  %tmp3 = select i1 %tmp2, double* null, double* %tmp
  store double 1.000000e+00, double* %tmp3, align 8
  %tmp4 = load double, double* %tmp, align 8
  store double %tmp4, double addrspace(1)* %arg
  ret void
}

attributes #0 = { norecurse nounwind "amdgpu-waves-per-eu"="1,1" }
attributes #1 = { norecurse nounwind }