[cuda] Include GPU binary into host object file and generate init/deinit code.

- added -fcuda-include-gpubinary option to incorporate results of device-side compilation into host-side one. - generate code to register GPU binaries and associated kernels with CUDA runtime and clean-up on exit. - added test case for init/deinit code generation. Differential Revision: http://reviews.llvm.org/D9507 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@236765 91177308-0d34-0410-b5e6-96231b3b80d8
author: Artem Belevich <tra@google.com> 2015-05-07 19:34:16 +0000
committer: Artem Belevich <tra@google.com> 2015-05-07 19:34:16 +0000
commit: 1508f392a41ea676ff4307facc42fba0c2be833a (patch)
tree: 4c18445c90737d251790be9ffdbc8a7c000714cc /test/CodeGenCUDA
parent: 73bd7cd7e4f9bf99bcc1bea87038f1266cdda548 (diff)
1 files changed, 40 insertions, 1 deletions
diff --git a/test/CodeGenCUDA/device-stub.cu b/test/CodeGenCUDA/device-stub.cu
index ed94d10848..99b6493cbc 100644
--- a/test/CodeGenCUDA/device-stub.cu
+++ b/test/CodeGenCUDA/device-stub.cu
@@ -1,7 +1,21 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -fcuda-include-gpubinary %s -o - | FileCheck %s
 
 #include "Inputs/cuda.h"
 
+// Make sure that all parts of GPU code init/cleanup are there:
+// * constant unnamed string with the kernel name
+// CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00", align 1
+// * constant unnamed string with GPU binary
+// CHECK: private unnamed_addr constant{{.*}}\00"
+// * constant struct that wraps GPU binary
+// CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } 
+// CHECK:       { i32 1180844977, i32 1, {{.*}}, i64 0, i64 0), i8* null }
+// * variable to save GPU binary handle after initialization
+// CHECK: @__cuda_gpubin_handle = internal global i8** null
+// * Make sure our constructor/destructor was added to global ctor/dtor list.
+// CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
+// CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor
+
 // Test that we build the correct number of calls to cudaSetupArgument followed
 // by a call to cudaLaunch.
 
@@ -11,3 +25,28 @@
 // CHECK: call{{.*}}cudaSetupArgument
 // CHECK: call{{.*}}cudaLaunch
 __global__ void kernelfunc(int i, int j, int k) {}
+
+// Test that we've built correct kernel launch sequence.
+// CHECK: define{{.*}}hostfunc
+// CHECK: call{{.*}}cudaConfigureCall
+// CHEKC: call{{.*}}kernelfunc
+void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
+
+// Test that we've built a function to register kernels
+// CHECK: define internal void @__cuda_register_kernels
+// CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
+
+// Test that we've built contructor..
+// CHECK: define internal void @__cuda_module_ctor
+//   .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper)
+// CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
+//   .. stores return value in __cuda_gpubin_handle
+// CHECK-NEXT: store{{.*}}__cuda_gpubin_handle
+//   .. and then calls __cuda_register_kernels
+// CHECK-NEXT: call void @__cuda_register_kernels
+
+// Test that we've created destructor.
+// CHECK: define internal void @__cuda_module_dtor
+// CHECK: load{{.*}}__cuda_gpubin_handle
+// CHECK-NEXT: call void @__cudaUnregisterFatBinary
+
author	Artem Belevich <tra@google.com>	2015-05-07 19:34:16 +0000
committer	Artem Belevich <tra@google.com>	2015-05-07 19:34:16 +0000
commit	1508f392a41ea676ff4307facc42fba0c2be833a (patch)
tree	4c18445c90737d251790be9ffdbc8a7c000714cc /test/CodeGenCUDA
parent	73bd7cd7e4f9bf99bcc1bea87038f1266cdda548 (diff)