diff options
author | Justin Lebar <jlebar@google.com> | 2016-09-11 01:39:04 +0000 |
---|---|---|
committer | Justin Lebar <jlebar@google.com> | 2016-09-11 01:39:04 +0000 |
commit | 877859e49f26e19f89b85cb0fe0c106d02cc6543 (patch) | |
tree | d81d3271f8a0bd5ca08b53c29c05d843ae50f0fc /test/CodeGen/NVPTX | |
parent | c71d5b41efd662fc262220af694474b4a7ebad81 (diff) |
[NVPTX] Use ldg for explicitly invariant loads.
Summary:
With this change (plus some changes to prevent !invariant from being
clobbered within llvm), clang will be able to model the __ldg CUDA
builtin as an invariant load, rather than as a target-specific llvm
intrinsic. This will let the optimizer play with these loads --
specifically, we should be able to vectorize them in the load-store
vectorizer.
Reviewers: tra
Subscribers: jholewinski, hfinkel, llvm-commits, chandlerc
Differential Revision: https://reviews.llvm.org/D23477
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281152 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r-- | test/CodeGen/NVPTX/ldg-invariant.ll | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/test/CodeGen/NVPTX/ldg-invariant.ll b/test/CodeGen/NVPTX/ldg-invariant.ll new file mode 100644 index 00000000000..40dad1f1769 --- /dev/null +++ b/test/CodeGen/NVPTX/ldg-invariant.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s + +; Check that invariant loads from the global addrspace are lowered to +; ld.global.nc. + +; CHECK-LABEL: @ld_global +define i32 @ld_global(i32 addrspace(1)* %ptr) { +; CHECK: ld.global.nc.{{[a-z]}}32 + %a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0 + ret i32 %a +} + +; CHECK-LABEL: @ld_not_invariant +define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) { +; CHECK: ld.global.{{[a-z]}}32 + %a = load i32, i32 addrspace(1)* %ptr + ret i32 %a +} + +; CHECK-LABEL: @ld_not_global_addrspace +define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) { +; CHECK: ld.{{[a-z]}}32 + %a = load i32, i32 addrspace(0)* %ptr + ret i32 %a +} + +!0 = !{} |