summaryrefslogtreecommitdiff
path: root/test/CodeGen/NVPTX
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2017-07-06 22:18:54 +0000
committerMichael Kuperstein <mkuper@google.com>2017-07-06 22:18:54 +0000
commit1803a9f23426a722e25b5c4c6e55b790b5fbe930 (patch)
tree20fc675737cc9944a02e927cbf26a343a6bf194d /test/CodeGen/NVPTX
parent513399718a222409a3e840938a01063019d33af3 (diff)
[NVPTX] Add lowering of i128 params.
The patch adds support of i128 params lowering. The changes are quite trivial to support i128 as a "special case" of integer type. With this patch, we lower i128 params the same way as aggregates of size 16 bytes: .param .b8 _ [16]. Currently, NVPTX can't deal with the 128 bit integers: * in some cases because of failed assertions like ValVTs.size() == OutVals.size() && "Bad return value decomposition" * in other cases emitting PTX with .i128 or .u128 types (which are not valid [1]) [1] http://docs.nvidia.com/cuda/parallel-thread-execution/index.html#fundamental-types Differential Revision: https://reviews.llvm.org/D34555 Patch by: Denys Zariaiev (denys.zariaiev@gmail.com) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307326 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r--test/CodeGen/NVPTX/i128-global.ll7
-rw-r--r--test/CodeGen/NVPTX/i128-param.ll58
-rw-r--r--test/CodeGen/NVPTX/i128-retval.ll28
3 files changed, 93 insertions, 0 deletions
diff --git a/test/CodeGen/NVPTX/i128-global.ll b/test/CodeGen/NVPTX/i128-global.ll
new file mode 100644
index 00000000000..cd9609295f5
--- /dev/null
+++ b/test/CodeGen/NVPTX/i128-global.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+; CHECK: .visible .global .align 16 .b8 G1[16] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+@G1 = global i128 1
+
+; CHECK: .visible .global .align 16 .b8 G2[16];
+@G2 = global i128 0 \ No newline at end of file
diff --git a/test/CodeGen/NVPTX/i128-param.ll b/test/CodeGen/NVPTX/i128-param.ll
new file mode 100644
index 00000000000..7cb603546ae
--- /dev/null
+++ b/test/CodeGen/NVPTX/i128-param.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK-LABEL: .visible .func callee(
+; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16],
+; CHECK-NEXT: .param .align 16 .b8 callee_param_1[16],
+define void @callee(i128, i128, i128*) {
+ ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
+ ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [callee_param_1];
+
+ ; CHECK: mul.lo.s64 %[[REG4:rd[0-9]+]], %[[REG0]], %[[REG3]];
+ ; CHECK-NEXT: mul.hi.u64 %[[REG5:rd[0-9]+]], %[[REG0]], %[[REG2]];
+ ; CHECK-NEXT: add.s64 %[[REG6:rd[0-9]+]], %[[REG5]], %[[REG4]];
+ ; CHECK-NEXT: mul.lo.s64 %[[REG7:rd[0-9]+]], %[[REG1]], %[[REG2]];
+ ; CHECK-NEXT: add.s64 %[[REG8:rd[0-9]+]], %[[REG6]], %[[REG7]];
+ ; CHECK-NEXT: mul.lo.s64 %[[REG9:rd[0-9]+]], %[[REG0]], %[[REG2]];
+ %a = mul i128 %0, %1
+
+ store i128 %a, i128* %2
+ ret void
+}
+
+; CHECK-LABEL: .visible .entry caller_kernel(
+; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_0[16],
+; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_1[16],
+define ptx_kernel void @caller_kernel(i128, i128, i128*) {
+start:
+ ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_kernel_param_0];
+ ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
+
+ ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
+ ; CHECK: .param .align 16 .b8 param0[16];
+ ; CHECK-NEXT: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]}
+ ; CHECK: .param .align 16 .b8 param1[16];
+ ; CHECK-NEXT: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]}
+ ; CHECK: } // callseq [[CALLSEQ_ID]]
+ call void @callee(i128 %0, i128 %1, i128* %2)
+
+ ret void
+}
+
+; CHECK-LABEL: .visible .func caller_func(
+; CHECK-NEXT: .param .align 16 .b8 caller_func_param_0[16],
+; CHECK-NEXT: .param .align 16 .b8 caller_func_param_1[16],
+define void @caller_func(i128, i128, i128*) {
+start:
+ ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_func_param_0]
+ ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
+
+ ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
+ ; CHECK: .param .align 16 .b8 param0[16];
+ ; CHECK: st.param.v2.b64 [param0+0], {%[[REG0]], %[[REG1]]}
+ ; CHECK: .param .align 16 .b8 param1[16];
+ ; CHECK: st.param.v2.b64 [param1+0], {%[[REG2]], %[[REG3]]}
+ ; CHECK: } // callseq [[CALLSEQ_ID]]
+ call void @callee(i128 %0, i128 %1, i128* %2)
+
+ ret void
+}
diff --git a/test/CodeGen/NVPTX/i128-retval.ll b/test/CodeGen/NVPTX/i128-retval.ll
new file mode 100644
index 00000000000..015b0199d83
--- /dev/null
+++ b/test/CodeGen/NVPTX/i128-retval.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+; CHECK-LABEL: .visible .func (.param .align 16 .b8 func_retval0[16]) callee(
+define i128 @callee(i128) {
+ ; CHECK: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
+ ; CHECK: st.param.v2.b64 [func_retval0+0], {%[[REG0]], %[[REG1]]}
+ ret i128 %0
+}
+
+; CHECK-LABEL: .visible .func caller(
+define void @caller(i128, i128*) {
+start:
+ ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_param_0];
+ ; CHECK-DAG: ld.param.u64 %[[OUT:rd[0-9]+]], [caller_param_1];
+
+ ; CHECK: { // callseq 0, 0
+ ; CHECK: .param .align 16 .b8 retval0[16];
+ ; CHECK: call.uni (retval0),
+ ; CHECK: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [retval0+0];
+ ; CHECK: } // callseq 0
+ %a = call i128 @callee(i128 %0)
+
+ ; CHECK-DAG: st.u64 [%[[OUT]]], %[[REG2]];
+ ; CHECK-DAG: st.u64 [%[[OUT]]+8], %[[REG3]];
+ store i128 %a, i128* %1
+
+ ret void
+}