diff options
author | Artem Belevich <tra@google.com> | 2017-09-21 18:44:49 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2017-09-21 18:44:49 +0000 |
commit | c02a4f5a57b8786e77949bba6c6383cd068d2105 (patch) | |
tree | e0cc6a28894f34e1224fdd5daf482e069e525a36 /lib/Target/NVPTX | |
parent | 84148478f0f79ad695d82601769f7e353815fcd4 (diff) |
[NVPTX] Implemented bar.warp.sync, barrier.sync, and vote{.sync} instructions/intrinsics/builtins.
Differential Revision: https://reviews.llvm.org/D38148
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313898 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r-- | lib/Target/NVPTX/NVPTXInstrInfo.td | 3 | ||||
-rw-r--r-- | lib/Target/NVPTX/NVPTXIntrinsics.td | 63 |
2 files changed, 66 insertions, 0 deletions
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index ff5095e257f..0a4fb0ed33f 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -155,6 +155,9 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; def true : Predicate<"true">; def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; +def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">; + +def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">; def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 44c3db65111..baa008f5494 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -71,6 +71,38 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", [(int_nvvm_bar_sync imm:$i)]>; +def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", + [(int_nvvm_bar_warp_sync imm:$i)]>, + Requires<[hasPTX60, hasSM30]>; +def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", + [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, + Requires<[hasPTX60, hasSM30]>; + +def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", + [(int_nvvm_barrier_sync imm:$i)]>, + Requires<[hasPTX60, hasSM30]>; +def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", + [(int_nvvm_barrier_sync Int32Regs:$i)]>, + Requires<[hasPTX60, hasSM30]>; + +def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), + "barrier.sync \t$id, $cnt;", + [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, + Requires<[hasPTX60, hasSM30]>; +def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), + "barrier.sync \t$id, $cnt;", + [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, + Requires<[hasPTX60, hasSM30]>; +def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), + "barrier.sync \t$id, $cnt;", + [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, + Requires<[hasPTX60, hasSM30]>; +def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), + "barrier.sync \t$id, $cnt;", + [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, + Requires<[hasPTX60, hasSM30]>; + + // shfl.{up,down,bfly,idx}.b32 multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { // The last two parameters to shfl can be regs or imms. ptxas is smart @@ -184,6 +216,37 @@ defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_ defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>; defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>; + +// vote.{all,any,uni,ballot} +multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { + def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), + "vote." # mode # " \t$dest, $pred;", + [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, + Requires<[hasPTX60, hasSM30]>; +} + +defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; +defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; +defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; +defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; + +// vote.sync.{all,any,uni,ballot} +multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { + def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), + "vote.sync." # mode # " \t$dest, $pred, $mask;", + [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, + Requires<[hasPTX60, hasSM30]>; + def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), + "vote.sync." # mode #" \t$dest, $pred, $mask;", + [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, + Requires<[hasPTX60, hasSM30]>; +} + +defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; +defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; +defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; +defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; + } // isConvergent = 1 //----------------------------------- |