diff options
author | Justin Lebar <jlebar@google.com> | 2017-01-15 16:54:35 +0000 |
---|---|---|
committer | Justin Lebar <jlebar@google.com> | 2017-01-15 16:54:35 +0000 |
commit | 352f7fdadc58bcc8d9abcca7c766e8e92c28e522 (patch) | |
tree | 4e9d1b1453b8b22ccb54b45d95d28a38b2c27949 /test/CodeGen/NVPTX | |
parent | 50520f329ad3e2e150a67e82ac8725b167daeee3 (diff) |
[NVPTX] Let there be One True Way to set NVVMReflect params.
Summary:
Previously there were three ways to inform the NVVMReflect pass whether
you wanted to flush denormals to zero:
* An LLVM command-line option
* Parameters to the NVVMReflect constructor
* Metadata on the module itself.
This change removes the first two, leaving only the third.
The motivation for this change, aside from simplifying things, is that
we want LLVM to be aware of whether it's operating in FTZ mode, so other
passes can use this information. Ideally we'd have a target-generic
piece of metadata on the module. This change moves us in that
direction.
Reviewers: tra
Subscribers: jholewinski, llvm-commits
Differential Revision: https://reviews.llvm.org/D28700
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292068 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r-- | test/CodeGen/NVPTX/nvvm-reflect.ll | 65 |
1 files changed, 36 insertions, 29 deletions
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll index 8c75dfc30a5..165597d6baf 100644 --- a/test/CodeGen/NVPTX/nvvm-reflect.ll +++ b/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -1,30 +1,38 @@ -; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0 -; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1 +; We run nvvm-reflect (and then optimize) this module twice, once with metadata +; that enables FTZ, and again with metadata that disables it. -@str = private unnamed_addr addrspace(4) constant [8 x i8] c"USE_MUL\00" +; RUN: cat %s > %t.noftz +; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz +; RUN: opt %t.noftz -S -nvvm-reflect -O2 \ +; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK + +; RUN: cat %s > %t.ftz +; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz +; RUN: opt %t.ftz -S -nvvm-reflect -O2 \ +; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK + +@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00" declare i32 @__nvvm_reflect(i8*) declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*) +; CHECK-LABEL: @foo define float @foo(float %a, float %b) { -; USE_MUL_0: define float @foo -; USE_MUL_0-NOT: call i32 @__nvvm_reflect -; USE_MUL_1: define float @foo -; USE_MUL_1-NOT: call i32 @__nvvm_reflect - %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) +; CHECK-NOT: call i32 @__nvvm_reflect + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0)) %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) %cmp = icmp ugt i32 %reflect, 0 br i1 %cmp, label %use_mul, label %use_add use_mul: -; USE_MUL_1: fmul float %a, %b -; USE_MUL_0-NOT: fadd float %a, %b +; USE_FTZ_1: fmul float %a, %b +; USE_FTZ_0-NOT: fadd float %a, %b %ret1 = fmul float %a, %b br label %exit use_add: -; USE_MUL_0: fadd float %a, %b -; USE_MUL_1-NOT: fmul float %a, %b +; USE_FTZ_0: fadd float %a, %b +; USE_FTZ_1-NOT: fmul float %a, %b %ret2 = fadd float %a, %b br label %exit @@ -35,14 +43,12 @@ exit: declare i32 @llvm.nvvm.reflect.p0i8(i8*) -; USE_MUL_0: define i32 @intrinsic -; USE_MUL_1: define i32 @intrinsic +; CHECK-LABEL: define i32 @intrinsic define i32 @intrinsic() { -; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect -; USE_MUL_0: ret i32 0 -; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect -; USE_MUL_1: ret i32 1 - %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) +; CHECK-NOT: call i32 @llvm.nvvm.reflect +; USE_FTZ_0: ret i32 0 +; USE_FTZ_1: ret i32 1 + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0)) %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr) ret i32 %reflect } @@ -50,26 +56,24 @@ define i32 @intrinsic() { ; CUDA-7.0 passes __nvvm_reflect argument slightly differently. ; Verify that it works, too -@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00" +@"$str" = private addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00" +; CHECK-LABEL: @bar define float @bar(float %a, float %b) { -; USE_MUL_0: define float @bar -; USE_MUL_0-NOT: call i32 @__nvvm_reflect -; USE_MUL_1: define float @bar -; USE_MUL_1-NOT: call i32 @__nvvm_reflect - %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) +; CHECK-NOT: call i32 @__nvvm_reflect + %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) %cmp = icmp ne i32 %reflect, 0 br i1 %cmp, label %use_mul, label %use_add use_mul: -; USE_MUL_1: fmul float %a, %b -; USE_MUL_0-NOT: fadd float %a, %b +; USE_FTZ_1: fmul float %a, %b +; USE_FTZ_0-NOT: fadd float %a, %b %ret1 = fmul float %a, %b br label %exit use_add: -; USE_MUL_0: fadd float %a, %b -; USE_MUL_1-NOT: fmul float %a, %b +; USE_FTZ_0: fadd float %a, %b +; USE_FTZ_1-NOT: fmul float %a, %b %ret2 = fadd float %a, %b br label %exit @@ -77,3 +81,6 @@ exit: %ret = phi float [%ret1, %use_mul], [%ret2, %use_add] ret float %ret } + +!llvm.module.flags = !{!0} +; A module flag is added to the end of this file by the RUN lines at the top. |