diff options
author | Artem Belevich <tra@google.com> | 2015-09-10 18:24:23 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2015-09-10 18:24:23 +0000 |
commit | 4298cae65607c6ff2dae34675b47e7ac880bc6fc (patch) | |
tree | b045fc448509404277162dc30318c1374167df60 /test/CodeGenCUDA | |
parent | 80ba331d315f32305dbc43baeb229e9f44952992 (diff) |
[CUDA] Postprocess bitcode linked in during device-side CUDA compilation.
Link in and internalize the symbols we need from supplied bitcode library.
Differential Revision: http://reviews.llvm.org/D11664
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@247317 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGenCUDA')
-rw-r--r-- | test/CodeGenCUDA/Inputs/device-code.ll | 38 | ||||
-rw-r--r-- | test/CodeGenCUDA/link-device-bitcode.cu | 56 |
2 files changed, 94 insertions, 0 deletions
diff --git a/test/CodeGenCUDA/Inputs/device-code.ll b/test/CodeGenCUDA/Inputs/device-code.ll new file mode 100644 index 0000000000..5943a000c1 --- /dev/null +++ b/test/CodeGenCUDA/Inputs/device-code.ll @@ -0,0 +1,38 @@ +; Simple bit of IR to mimic CUDA's libdevice. We want to be +; able to link with it and we need to make sure all __nvvm_reflect +; calls are eliminated by the time PTX has been produced. + +target triple = "nvptx-unknown-cuda" + +declare i32 @__nvvm_reflect(i8*) + +@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00" + +define void @unused_subfunc(float %a) { + ret void +} + +define void @used_subfunc(float %a) { + ret void +} + +define float @_Z17device_mul_or_addff(float %a, float %b) { + %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) + %cmp = icmp ne i32 %reflect, 0 + br i1 %cmp, label %use_mul, label %use_add + +use_mul: + %ret1 = fmul float %a, %b + br label %exit + +use_add: + %ret2 = fadd float %a, %b + br label %exit + +exit: + %ret = phi float [%ret1, %use_mul], [%ret2, %use_add] + + call void @used_subfunc(float %ret) + + ret float %ret +} diff --git a/test/CodeGenCUDA/link-device-bitcode.cu b/test/CodeGenCUDA/link-device-bitcode.cu new file mode 100644 index 0000000000..45e5bcff99 --- /dev/null +++ b/test/CodeGenCUDA/link-device-bitcode.cu @@ -0,0 +1,56 @@ +// Test for linking with CUDA's libdevice as outlined in +// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice +// +// REQUIRES: nvptx-registered-target +// +// Prepare bitcode file to link with +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc -o %t.bc \ +// RUN: %S/Inputs/device-code.ll +// +// Make sure function in device-code gets linked in and internalized. +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ +// RUN: -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s \ +// RUN: | FileCheck %s -check-prefix CHECK-IR +// +// Make sure function in device-code gets linked but is not internalized +// without -fcuda-uses-libdevice +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ +// RUN: -mlink-bitcode-file %t.bc -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s \ +// RUN: | FileCheck %s -check-prefix CHECK-IR-NLD +// +// Make sure NVVMReflect pass is enabled in NVPTX back-end. +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ +// RUN: -mlink-bitcode-file %t.bc -fcuda-uses-libdevice -S -o /dev/null %s \ +// RUN: -backend-option -debug-pass=Structure 2>&1 \ +// RUN: | FileCheck %s -check-prefix CHECK-REFLECT + +#include "Inputs/cuda.h" + +__device__ float device_mul_or_add(float a, float b); +extern "C" __device__ double __nv_sin(double x); +extern "C" __device__ double __nv_exp(double x); + +// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf( +// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf( +__device__ void should_not_be_internalized(float *data) {} + +// Make sure kernel call has not been internalized. +// CHECK-IR-LABEL: define void @_Z6kernelPfS_ +// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_( +__global__ __attribute__((used)) void kernel(float *out, float *in) { + *out = device_mul_or_add(in[0], in[1]); + *out += __nv_exp(__nv_sin(*out)); + should_not_be_internalized(out); +} + +// Make sure device_mul_or_add() is present in IR, is internal and +// calls __nvvm_reflect(). +// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff( +// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff( +// CHECK-IR: call i32 @__nvvm_reflect +// CHECK-IR: ret float + +// Verify that NVVMReflect pass is among the passes run by NVPTX back-end. +// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1 |