summaryrefslogtreecommitdiff
path: root/test/CodeGen/NVPTX
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-09-11 01:39:04 +0000
committerJustin Lebar <jlebar@google.com>2016-09-11 01:39:04 +0000
commit877859e49f26e19f89b85cb0fe0c106d02cc6543 (patch)
treed81d3271f8a0bd5ca08b53c29c05d843ae50f0fc /test/CodeGen/NVPTX
parentc71d5b41efd662fc262220af694474b4a7ebad81 (diff)
[NVPTX] Use ldg for explicitly invariant loads.
Summary: With this change (plus some changes to prevent !invariant from being clobbered within llvm), clang will be able to model the __ldg CUDA builtin as an invariant load, rather than as a target-specific llvm intrinsic. This will let the optimizer play with these loads -- specifically, we should be able to vectorize them in the load-store vectorizer. Reviewers: tra Subscribers: jholewinski, hfinkel, llvm-commits, chandlerc Differential Revision: https://reviews.llvm.org/D23477 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281152 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r--test/CodeGen/NVPTX/ldg-invariant.ll27
1 files changed, 27 insertions, 0 deletions
diff --git a/test/CodeGen/NVPTX/ldg-invariant.ll b/test/CodeGen/NVPTX/ldg-invariant.ll
new file mode 100644
index 00000000000..40dad1f1769
--- /dev/null
+++ b/test/CodeGen/NVPTX/ldg-invariant.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+; Check that invariant loads from the global addrspace are lowered to
+; ld.global.nc.
+
+; CHECK-LABEL: @ld_global
+define i32 @ld_global(i32 addrspace(1)* %ptr) {
+; CHECK: ld.global.nc.{{[a-z]}}32
+ %a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0
+ ret i32 %a
+}
+
+; CHECK-LABEL: @ld_not_invariant
+define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) {
+; CHECK: ld.global.{{[a-z]}}32
+ %a = load i32, i32 addrspace(1)* %ptr
+ ret i32 %a
+}
+
+; CHECK-LABEL: @ld_not_global_addrspace
+define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) {
+; CHECK: ld.{{[a-z]}}32
+ %a = load i32, i32 addrspace(0)* %ptr
+ ret i32 %a
+}
+
+!0 = !{}