diff options
author | Artem Belevich <tra@google.com> | 2016-07-20 18:39:47 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2016-07-20 18:39:47 +0000 |
commit | 86d9bce72679d1dc79d4dd9a51900cb02518465b (patch) | |
tree | f60690696e6ada5f70d8941d7a3f3b1841854a7e /test/CodeGen/NVPTX | |
parent | 5f9c12405959836fb07b8bce02d4938e3d7ea712 (diff) |
[NVPTX] Improve lowering of byval args of device functions.
Avoid unnecessary spills of byval arguments of device functions to
local space on SASS level and subsequent pointer conversion to generic
address space that follows. Instead, make a local copy in IR, provide
a way to access arguments directly, and let LLVM optimize the copy away
when possible.
Differential Review: https://reviews.llvm.org/D21421
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276153 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r-- | test/CodeGen/NVPTX/bug21465.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll | 32 |
2 files changed, 26 insertions, 8 deletions
diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll index 2eae41f73a0..acd3cee9848 100644 --- a/test/CodeGen/NVPTX/bug21465.ll +++ b/test/CodeGen/NVPTX/bug21465.ll @@ -15,7 +15,7 @@ entry: %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1 %0 = load i32, i32* %b, align 4 ; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}] -; PTX: ld.param.u32 [[value:%r[0-9]+]], [{{%rd[0-9]+}}+4] +; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4] store i32 %0, i32* %output, align 4 ; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]] ret void diff --git a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll index 2fffa3eeac1..bdb1d3c546c 100644 --- a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll +++ b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll @@ -28,20 +28,38 @@ define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) { %struct.S = type { i32*, i32* } -define void @ptr_in_byval(%struct.S* byval %input, i32* %output) { -; CHECK-LABEL: .visible .entry ptr_in_byval( -; CHECK: cvta.to.global.u64 -; CHECK: cvta.to.global.u64 +define void @ptr_in_byval_kernel(%struct.S* byval %input, i32* %output) { +; CHECK-LABEL: .visible .entry ptr_in_byval_kernel( +; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_kernel_param_1] +; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]]; +; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_kernel_param_0+8] +; CHECK: cvta.to.global.u64 %[[iptr_g:.*]], %[[iptr]]; + %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1 + %b = load i32*, i32** %b_ptr, align 4 + %v = load i32, i32* %b, align 4 +; CHECK: ld.global.u32 %[[val:.*]], [%[[iptr_g]]] + store i32 %v, i32* %output, align 4 +; CHECK: st.global.u32 [%[[optr_g]]], %[[val]] + ret void +} + +; Regular functions lower byval arguments differently. We need to make +; sure that we're loading byval argument data using [symbol+offset]. +; There's also no assumption that all pointers within are in global space. +define void @ptr_in_byval_func(%struct.S* byval %input, i32* %output) { +; CHECK-LABEL: .visible .func ptr_in_byval_func( +; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_func_param_1] +; CHECK: ld.param.u64 %[[iptr:rd.*]], [ptr_in_byval_func_param_0+8] %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1 %b = load i32*, i32** %b_ptr, align 4 %v = load i32, i32* %b, align 4 -; CHECK: ld.global.u32 +; CHECK: ld.u32 %[[val:.*]], [%[[iptr]]] store i32 %v, i32* %output, align 4 -; CHECK: st.global.u32 +; CHECK: st.u32 [%[[optr]]], %[[val]] ret void } !nvvm.annotations = !{!0, !1, !2} !0 = !{void (float*, float*)* @kernel, !"kernel", i32 1} !1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1} -!2 = !{void (%struct.S*, i32*)* @ptr_in_byval, !"kernel", i32 1} +!2 = !{void (%struct.S*, i32*)* @ptr_in_byval_kernel, !"kernel", i32 1} |