diff options
author | Michael Kuperstein <mkuper@google.com> | 2017-07-06 22:18:54 +0000 |
---|---|---|
committer | Michael Kuperstein <mkuper@google.com> | 2017-07-06 22:18:54 +0000 |
commit | 1803a9f23426a722e25b5c4c6e55b790b5fbe930 (patch) | |
tree | 20fc675737cc9944a02e927cbf26a343a6bf194d /test/CodeGen/NVPTX | |
parent | 513399718a222409a3e840938a01063019d33af3 (diff) |
[NVPTX] Add lowering of i128 params.
The patch adds support of i128 params lowering. The changes are quite trivial to
support i128 as a "special case" of integer type. With this patch, we lower i128
params the same way as aggregates of size 16 bytes: .param .b8 _ [16].
Currently, NVPTX can't deal with the 128 bit integers:
* in some cases because of failed assertions like
ValVTs.size() == OutVals.size() && "Bad return value decomposition"
* in other cases emitting PTX with .i128 or .u128 types (which are not valid [1])
[1] http://docs.nvidia.com/cuda/parallel-thread-execution/index.html#fundamental-types
Differential Revision: https://reviews.llvm.org/D34555
Patch by: Denys Zariaiev (denys.zariaiev@gmail.com)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307326 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r-- | test/CodeGen/NVPTX/i128-global.ll | 7 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/i128-param.ll | 58 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/i128-retval.ll | 28 |
3 files changed, 93 insertions, 0 deletions
diff --git a/test/CodeGen/NVPTX/i128-global.ll b/test/CodeGen/NVPTX/i128-global.ll new file mode 100644 index 00000000000..cd9609295f5 --- /dev/null +++ b/test/CodeGen/NVPTX/i128-global.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +; CHECK: .visible .global .align 16 .b8 G1[16] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +@G1 = global i128 1 + +; CHECK: .visible .global .align 16 .b8 G2[16]; +@G2 = global i128 0
\ No newline at end of file diff --git a/test/CodeGen/NVPTX/i128-param.ll b/test/CodeGen/NVPTX/i128-param.ll new file mode 100644 index 00000000000..7cb603546ae --- /dev/null +++ b/test/CodeGen/NVPTX/i128-param.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 | FileCheck %s + +; CHECK-LABEL: .visible .func callee( +; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16], +; CHECK-NEXT: .param .align 16 .b8 callee_param_1[16], +define void @callee(i128, i128, i128*) { + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0]; + ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [callee_param_1]; + + ; CHECK: mul.lo.s64 %[[REG4:rd[0-9]+]], %[[REG0]], %[[REG3]]; + ; CHECK-NEXT: mul.hi.u64 %[[REG5:rd[0-9]+]], %[[REG0]], %[[REG2]]; + ; CHECK-NEXT: add.s64 %[[REG6:rd[0-9]+]], %[[REG5]], %[[REG4]]; + ; CHECK-NEXT: mul.lo.s64 %[[REG7:rd[0-9]+]], %[[REG1]], %[[REG2]]; + ; CHECK-NEXT: add.s64 %[[REG8:rd[0-9]+]], %[[REG6]], %[[REG7]]; + ; CHECK-NEXT: mul.lo.s64 %[[REG9:rd[0-9]+]], %[[REG0]], %[[REG2]]; + %a = mul i128 %0, %1 + + store i128 %a, i128* %2 + ret void +} + +; CHECK-LABEL: .visible .entry caller_kernel( +; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_0[16], +; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_1[16], +define ptx_kernel void @caller_kernel(i128, i128, i128*) { +start: + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_kernel_param_0]; + ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1]; + + ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 + ; CHECK: .param .align 16 .b8 param0[16]; + ; CHECK-NEXT: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]} + ; CHECK: .param .align 16 .b8 param1[16]; + ; CHECK-NEXT: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] + call void @callee(i128 %0, i128 %1, i128* %2) + + ret void +} + +; CHECK-LABEL: .visible .func caller_func( +; CHECK-NEXT: .param .align 16 .b8 caller_func_param_0[16], +; CHECK-NEXT: .param .align 16 .b8 caller_func_param_1[16], +define void @caller_func(i128, i128, i128*) { +start: + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_func_param_0] + ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1] + + ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 + ; CHECK: .param .align 16 .b8 param0[16]; + ; CHECK: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]} + ; CHECK: .param .align 16 .b8 param1[16]; + ; CHECK: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] + call void @callee(i128 %0, i128 %1, i128* %2) + + ret void +} diff --git a/test/CodeGen/NVPTX/i128-retval.ll b/test/CodeGen/NVPTX/i128-retval.ll new file mode 100644 index 00000000000..015b0199d83 --- /dev/null +++ b/test/CodeGen/NVPTX/i128-retval.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[16]) callee( +define i128 @callee(i128) { + ; CHECK: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0]; + ; CHECK: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]} + ret i128 %0 +} + +; CHECK-LABEL: .visible .func caller( +define void @caller(i128, i128*) { +start: + ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_param_0]; + ; CHECK-DAG: ld.param.u64 %[[OUT:rd[0-9]+]], [caller_param_1]; + + ; CHECK: { // callseq 0, 0 + ; CHECK: .param .align 16 .b8 retval0[16]; + ; CHECK: call.uni (retval0), + ; CHECK: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [retval0+0]; + ; CHECK: } // callseq 0 + %a = call i128 @callee(i128 %0) + + ; CHECK-DAG: st.u64 [%[[OUT]]], %[[REG2]]; + ; CHECK-DAG: st.u64 [%[[OUT]]+8], %[[REG3]]; + store i128 %a, i128* %1 + + ret void +} |