diff options
author | Coby Tayree <coby.tayree@intel.com> | 2017-11-26 09:36:41 +0000 |
---|---|---|
committer | Coby Tayree <coby.tayree@intel.com> | 2017-11-26 09:36:41 +0000 |
commit | a897faafb78649a4c5ad32911f2a7e7174f9cc93 (patch) | |
tree | c1de337a47a16f5eb7000b720d6aace47ad943fd /lib | |
parent | 0d7d3a3c3255f9bafe37485d33262aa10ef8f29f (diff) |
[x86][icelake]GFNI
galois field arithmetic (GF(2^8)) insns:
gf2p8affineinvqb
gf2p8affineqb
gf2p8mulb
Differential Revision: https://reviews.llvm.org/D40373
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318993 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Support/Host.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 52 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 79 | ||||
-rw-r--r-- | lib/Target/X86/X86IntrinsicsInfo.h | 22 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 4 |
11 files changed, 175 insertions, 3 deletions
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 31f86eb3fec..3ce636ffcda 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -1217,6 +1217,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; + Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 56a6d57c195..8c1136341de 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -169,6 +169,9 @@ def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; +def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true", + "Enable Galois Field Arithmetic Instructions", + [FeatureSSE2]>; def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true", "Enable vpclmulqdq instructions", [FeatureAVX, FeaturePCLMUL]>; @@ -698,8 +701,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [ FeatureVBMI2, FeatureVNNI, FeatureVPCLMULQDQ, - FeatureVPOPCNTDQ - // TODO: Add GFNI when it is implemented. + FeatureVPOPCNTDQ, + FeatureGFNI ]>; class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2163efd30aa..892c7e24abd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -25254,6 +25254,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD"; case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS"; case X86ISD::VPSHUFBITQMB: return "X86ISD::VPSHUFBITQMB"; + case X86ISD::GF2P8MULB: return "X86ISD::GF2P8MULB"; + case X86ISD::GF2P8AFFINEQB: return "X86ISD::GF2P8AFFINEQB"; + case X86ISD::GF2P8AFFINEINVQB: return "X86ISD::GF2P8AFFINEINVQB"; } return nullptr; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 61b03be52a9..90830f4d5d1 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -587,6 +587,9 @@ namespace llvm { // Conversions between float and half-float. CVTPS2PH, CVTPH2PS, CVTPH2PS_RND, + // Galois Field Arithmetic Instructions + GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, + // LWP insert record. LWPINS, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 626ad00933c..1f2e7197ba7 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -10242,3 +10242,55 @@ multiclass VPSHUFBITQMB_common<AVX512VLVectorVTInfo VTI> { defm VPSHUFBITQMB : VPSHUFBITQMB_common<avx512vl_i8_info>; +//===----------------------------------------------------------------------===// +// GFNI +//===----------------------------------------------------------------------===// + +multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> { + let Predicates = [HasGFNI, HasAVX512, HasBWI] in + defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, + SSE_INTALU_ITINS_P, 1>, EVEX_V512; + let Predicates = [HasGFNI, HasVLX, HasBWI] in { + defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, + SSE_INTALU_ITINS_P, 1>, EVEX_V256; + defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, + SSE_INTALU_ITINS_P, 1>, EVEX_V128; + } +} + +defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>, + EVEX_CD8<8, CD8VF>, T8PD; + +multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, + X86VectorVTInfo VTI, + X86VectorVTInfo BcstVTI> + : avx512_3Op_rm_imm8<Op, OpStr, OpNode, VTI, VTI> { + let ExeDomain = VTI.ExeDomain in + defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), + (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), + OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", + "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", + (OpNode (VTI.VT VTI.RC:$src1), + (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), + (i8 imm:$src3))>, EVEX_B; +} + +multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> { + let Predicates = [HasGFNI, HasAVX512, HasBWI] in + defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v64i8_info, + v8i64_info>, EVEX_V512; + let Predicates = [HasGFNI, HasVLX, HasBWI] in { + defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v32i8x_info, + v4i64x_info>, EVEX_V256; + defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v16i8x_info, + v2i64x_info>, EVEX_V128; + } +} + +defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", + X86GF2P8affineinvqb>, + EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; +defm GF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", + X86GF2P8affineqb>, + EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; + diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index b013d66a21d..cb27fcce349 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -672,6 +672,11 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>; +// galois field arithmetic +def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; +def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; +def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 97d3e6dfb44..a790d1a4141 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -848,6 +848,7 @@ def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">; def NoVLX_Or_NoVPCLMULQDQ : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVPCLMULQDQ()">; def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">; +def HasGFNI : Predicate<"Subtarget->hasGFNI()">; def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def NoFMA4 : Predicate<"!Subtarget->hasFMA4()">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dc52f867dd5..03da8c3665d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8466,3 +8466,82 @@ def : Pat<(xor FR128:$src1, FR128:$src2), (COPY_TO_REGCLASS (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +//===----------------------------------------------------------------------===// +// GFNI instructions +//===----------------------------------------------------------------------===// + +multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, + RegisterClass RC, PatFrag MemOpFrag, + X86MemOperand X86MemOp, bit Is2Addr = 0> { + let ExeDomain = SSEPackedInt, + AsmString = !if(Is2Addr, + OpcodeStr##"\t{$src2, $dst|$dst, $src2}", + OpcodeStr##"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { + let isCommutable = 1 in + def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", + [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))], + SSE_INTALU_ITINS_P.rr>, + Sched<[SSE_INTALU_ITINS_P.Sched]>, T8PD; + + def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", + [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, + (bitconvert (MemOpFrag addr:$src2)))))], + SSE_INTALU_ITINS_P.rm>, + Sched<[SSE_INTALU_ITINS_P.Sched.Folded, ReadAfterLd]>, T8PD; + } +} + +multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, + SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, + X86MemOperand X86MemOp, bit Is2Addr = 0> { + let AsmString = !if(Is2Addr, + OpStr##"\t{$src3, $src2, $dst|$dst, $src2, $src3}", + OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { + def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, u8imm:$src3), "", + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))], + SSE_INTALU_ITINS_P.rr, SSEPackedInt>, + Sched<[WriteVecALU]>; + def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", + [(set RC:$dst, (OpVT (OpNode RC:$src1, + (bitconvert (MemOpFrag addr:$src2)), + imm:$src3)))], + SSE_INTALU_ITINS_P.rm, SSEPackedInt>, + Sched<[WriteVecALU.Folded, ReadAfterLd]>; + } +} + +multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { + let Constraints = "$src1 = $dst", + Predicates = [HasGFNI, UseSSE2] in + defm NAME : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, + VR128, loadv2i64, i128mem, 1>; + let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { + defm V##NAME : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128, + loadv2i64, i128mem>, VEX_4V, VEX_W; + defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256, + loadv4i64, i256mem>, VEX_4V, VEX_L, VEX_W; + } +} + +// GF2P8MULB +let Constraints = "$src1 = $dst", + Predicates = [HasGFNI, UseSSE2] in +defm GF2P8MULB : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memopv2i64, + i128mem, 1>; +let Predicates = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { + defm VGF2P8MULB : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, loadv2i64, + i128mem>, VEX_4V; + defm VGF2P8MULBY : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, loadv4i64, + i256mem>, VEX_4V, VEX_L; +} +// GF2P8AFFINEINVQB, GF2P8AFFINEQB +let isCommutable = 0 in { + defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", + X86GF2P8affineinvqb>, TAPD; + defm GF2P8AFFINEQB : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", + X86GF2P8affineqb>, TAPD; +} + diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 598994d07ad..fae0889950b 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -1170,7 +1170,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), @@ -1700,6 +1700,26 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), + + X86_INTRINSIC_DATA(vgf2p8affineinvqb_128, INTR_TYPE_3OP, + X86ISD::GF2P8AFFINEINVQB, 0), + X86_INTRINSIC_DATA(vgf2p8affineinvqb_256, INTR_TYPE_3OP, + X86ISD::GF2P8AFFINEINVQB, 0), + X86_INTRINSIC_DATA(vgf2p8affineinvqb_512, INTR_TYPE_3OP, + X86ISD::GF2P8AFFINEINVQB, 0), + X86_INTRINSIC_DATA(vgf2p8affineqb_128, INTR_TYPE_3OP, + X86ISD::GF2P8AFFINEQB, 0), + X86_INTRINSIC_DATA(vgf2p8affineqb_256, INTR_TYPE_3OP, + X86ISD::GF2P8AFFINEQB, 0), + X86_INTRINSIC_DATA(vgf2p8affineqb_512, INTR_TYPE_3OP, + X86ISD::GF2P8AFFINEQB, 0), + X86_INTRINSIC_DATA(vgf2p8mulb_128, INTR_TYPE_2OP, + X86ISD::GF2P8MULB, 0), + X86_INTRINSIC_DATA(vgf2p8mulb_256, INTR_TYPE_2OP, + X86ISD::GF2P8MULB, 0), + X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP, + X86ISD::GF2P8MULB, 0), + X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0), X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0), diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 0f995404618..72c08e21799 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -299,6 +299,7 @@ void X86Subtarget::initializeEnvironment() { HasXSAVES = false; HasPCLMUL = false; HasVPCLMULQDQ = false; + HasGFNI = false; HasFMA = false; HasFMA4 = false; HasXOP = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 50e1a742a0f..740b9ddba09 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -128,6 +128,9 @@ protected: bool HasPCLMUL; bool HasVPCLMULQDQ; + /// Target has Galois Field Arithmetic instructions + bool HasGFNI; + /// Target has 3-operand fused multiply-add bool HasFMA; @@ -480,6 +483,7 @@ public: bool hasXSAVES() const { return HasXSAVES; } bool hasPCLMUL() const { return HasPCLMUL; } bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } + bool hasGFNI() const { return HasGFNI; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. bool hasFMA() const { return HasFMA; } |