summaryrefslogtreecommitdiff
path: root/test/CodeGen/NVPTX
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-07-20 18:39:47 +0000
committerArtem Belevich <tra@google.com>2016-07-20 18:39:47 +0000
commit86d9bce72679d1dc79d4dd9a51900cb02518465b (patch)
treef60690696e6ada5f70d8941d7a3f3b1841854a7e /test/CodeGen/NVPTX
parent5f9c12405959836fb07b8bce02d4938e3d7ea712 (diff)
[NVPTX] Improve lowering of byval args of device functions.
Avoid unnecessary spills of byval arguments of device functions to local space on SASS level and subsequent pointer conversion to generic address space that follows. Instead, make a local copy in IR, provide a way to access arguments directly, and let LLVM optimize the copy away when possible. Differential Review: https://reviews.llvm.org/D21421 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276153 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r--test/CodeGen/NVPTX/bug21465.ll2
-rw-r--r--test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll32
2 files changed, 26 insertions, 8 deletions
diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll
index 2eae41f73a0..acd3cee9848 100644
--- a/test/CodeGen/NVPTX/bug21465.ll
+++ b/test/CodeGen/NVPTX/bug21465.ll
@@ -15,7 +15,7 @@ entry:
%b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
%0 = load i32, i32* %b, align 4
; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
-; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4]
+; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4]
store i32 %0, i32* %output, align 4
; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]]
ret void
diff --git a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
index 2fffa3eeac1..bdb1d3c546c 100644
--- a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
+++ b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
@@ -28,20 +28,38 @@ define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) {
%struct.S = type { i32*, i32* }
-define void @ptr_in_byval(%struct.S* byval %input, i32* %output) {
-; CHECK-LABEL: .visible .entry ptr_in_byval(
-; CHECK: cvta.to.global.u64
-; CHECK: cvta.to.global.u64
+define void @ptr_in_byval_kernel(%struct.S* byval %input, i32* %output) {
+; CHECK-LABEL: .visible .entry ptr_in_byval_kernel(
+; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_kernel_param_1]
+; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]];
+; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_kernel_param_0+8]
+; CHECK: cvta.to.global.u64 %[[iptr_g:.*]], %[[iptr]];
+ %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
+ %b = load i32*, i32** %b_ptr, align 4
+ %v = load i32, i32* %b, align 4
+; CHECK: ld.global.u32 %[[val:.*]], [%[[iptr_g]]]
+ store i32 %v, i32* %output, align 4
+; CHECK: st.global.u32 [%[[optr_g]]], %[[val]]
+ ret void
+}
+
+; Regular functions lower byval arguments differently. We need to make
+; sure that we're loading byval argument data using [symbol+offset].
+; There's also no assumption that all pointers within are in global space.
+define void @ptr_in_byval_func(%struct.S* byval %input, i32* %output) {
+; CHECK-LABEL: .visible .func ptr_in_byval_func(
+; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_func_param_1]
+; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_func_param_0+8]
%b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
%b = load i32*, i32** %b_ptr, align 4
%v = load i32, i32* %b, align 4
-; CHECK: ld.global.u32
+; CHECK: ld.u32 %[[val:.*]], [%[[iptr]]]
store i32 %v, i32* %output, align 4
-; CHECK: st.global.u32
+; CHECK: st.u32 [%[[optr]]], %[[val]]
ret void
}
!nvvm.annotations = !{!0, !1, !2}
!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
!1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
-!2 = !{void (%struct.S*, i32*)* @ptr_in_byval, !"kernel", i32 1}
+!2 = !{void (%struct.S*, i32*)* @ptr_in_byval_kernel, !"kernel", i32 1}