summaryrefslogtreecommitdiff
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2017-09-21 18:44:49 +0000
committerArtem Belevich <tra@google.com>2017-09-21 18:44:49 +0000
commitc02a4f5a57b8786e77949bba6c6383cd068d2105 (patch)
treee0cc6a28894f34e1224fdd5daf482e069e525a36 /lib/Target/NVPTX
parent84148478f0f79ad695d82601769f7e353815fcd4 (diff)
[NVPTX] Implemented bar.warp.sync, barrier.sync, and vote{.sync} instructions/intrinsics/builtins.
Differential Revision: https://reviews.llvm.org/D38148 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313898 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td3
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td63
2 files changed, 66 insertions, 0 deletions
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index ff5095e257f..0a4fb0ed33f 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -155,6 +155,9 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def true : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
+def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
+
+def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 44c3db65111..baa008f5494 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -71,6 +71,38 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
[(int_nvvm_bar_sync imm:$i)]>;
+def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
+ [(int_nvvm_bar_warp_sync imm:$i)]>,
+ Requires<[hasPTX60, hasSM30]>;
+def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
+ [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
+ Requires<[hasPTX60, hasSM30]>;
+
+def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
+ [(int_nvvm_barrier_sync imm:$i)]>,
+ Requires<[hasPTX60, hasSM30]>;
+def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
+ [(int_nvvm_barrier_sync Int32Regs:$i)]>,
+ Requires<[hasPTX60, hasSM30]>;
+
+def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
+ "barrier.sync \t$id, $cnt;",
+ [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
+ Requires<[hasPTX60, hasSM30]>;
+def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
+ "barrier.sync \t$id, $cnt;",
+ [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
+ Requires<[hasPTX60, hasSM30]>;
+def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
+ "barrier.sync \t$id, $cnt;",
+ [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
+ Requires<[hasPTX60, hasSM30]>;
+def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
+ "barrier.sync \t$id, $cnt;",
+ [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
+ Requires<[hasPTX60, hasSM30]>;
+
+
// shfl.{up,down,bfly,idx}.b32
multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
// The last two parameters to shfl can be regs or imms. ptxas is smart
@@ -184,6 +216,37 @@ defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_
defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
+
+// vote.{all,any,uni,ballot}
+multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
+ def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
+ "vote." # mode # " \t$dest, $pred;",
+ [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
+ Requires<[hasPTX60, hasSM30]>;
+}
+
+defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
+defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
+defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
+defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
+
+// vote.sync.{all,any,uni,ballot}
+multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
+ def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
+ "vote.sync." # mode # " \t$dest, $pred, $mask;",
+ [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
+ Requires<[hasPTX60, hasSM30]>;
+ def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
+ "vote.sync." # mode #" \t$dest, $pred, $mask;",
+ [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
+ Requires<[hasPTX60, hasSM30]>;
+}
+
+defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
+defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
+defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
+defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
+
} // isConvergent = 1
//-----------------------------------