summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrFMA.td
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2012-08-20 06:21:25 +0000
committerCraig Topper <craig.topper@gmail.com>2012-08-20 06:21:25 +0000
commit75d8ad461f1ab7542123086d51e61530a5229d9a (patch)
tree286e01419f71b122ad131d21c51b53ea217b36a6 /lib/Target/X86/X86InstrFMA.td
parentf4eb22a01c20bbd03b3b23fb894c8c0dc1383c5f (diff)
Remove FMA3 intrinsic instructions in favor of patterns.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162194 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86InstrFMA.td')
-rw-r--r--lib/Target/X86/X86InstrFMA.td216
1 files changed, 146 insertions, 70 deletions
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 408f87f5053..43d8fd66be6 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -42,15 +42,7 @@ let neverHasSideEffects = 1 in {
// Intrinsic for 213 pattern
multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256, SDNode Op213,
- ValueType OpVT128, ValueType OpVT256> {
- def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
- VR128:$src3))]>;
-
+ SDNode Op213, ValueType OpVT128, ValueType OpVT256> {
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -58,13 +50,6 @@ multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op213 VR128:$src2,
VR128:$src1, VR128:$src3)))]>;
- def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src2, VR128:$src1,
- (MemFrag128 addr:$src3)))]>;
-
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
@@ -72,14 +57,6 @@ multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op213 VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
-
- def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
- VR256:$src3))]>;
-
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@@ -87,13 +64,6 @@ multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (OpVT256 (Op213 VR256:$src2, VR256:$src1,
VR256:$src3)))]>;
- def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src2, VR256:$src1,
- (MemFrag256 addr:$src3)))]>;
-
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
@@ -107,11 +77,10 @@ multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
- Intrinsic Int128, Intrinsic Int256, SDNode Op,
- ValueType OpTy128, ValueType OpTy256> {
+ SDNode Op, ValueType OpTy128, ValueType OpTy256> {
defm r213 : fma3p_rm_int <opc213, !strconcat(OpcodeStr,
!strconcat("213", PackTy)), MemFrag128, MemFrag256,
- Int128, Int256, Op, OpTy128, OpTy256>;
+ Op, OpTy128, OpTy256>;
defm r132 : fma3p_rm <opc132,
!strconcat(OpcodeStr, !strconcat("132", PackTy))>;
defm r231 : fma3p_rm <opc231,
@@ -121,69 +90,176 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmadd_ps,
- int_x86_fma_vfmadd_ps_256, X86Fmadd,
- v4f32, v8f32>;
+ memopv8f32, X86Fmadd, v4f32, v8f32>;
defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfmsub_ps,
- int_x86_fma_vfmsub_ps_256, X86Fmsub,
- v4f32, v8f32>;
+ memopv8f32, X86Fmsub, v4f32, v8f32>;
defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
- memopv4f32, memopv8f32,
- int_x86_fma_vfmaddsub_ps,
- int_x86_fma_vfmaddsub_ps_256, X86Fmaddsub,
+ memopv4f32, memopv8f32, X86Fmaddsub,
v4f32, v8f32>;
defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
- memopv4f32, memopv8f32,
- int_x86_fma_vfmsubadd_ps,
- int_x86_fma_vfmsubadd_ps_256, X86Fmsubadd,
+ memopv4f32, memopv8f32, X86Fmsubadd,
v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmadd_pd,
- int_x86_fma_vfmadd_pd_256, X86Fmadd, v2f64,
- v4f64>, VEX_W;
+ memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfmsub_pd,
- int_x86_fma_vfmsub_pd_256, X86Fmsub, v2f64,
- v4f64>, VEX_W;
+ memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
- memopv2f64, memopv4f64,
- int_x86_fma_vfmaddsub_pd,
- int_x86_fma_vfmaddsub_pd_256, X86Fmaddsub,
+ memopv2f64, memopv4f64, X86Fmaddsub,
v2f64, v4f64>, VEX_W;
defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
- memopv2f64, memopv4f64,
- int_x86_fma_vfmsubadd_pd,
- int_x86_fma_vfmsubadd_pd_256, X86Fmsubadd,
+ memopv2f64, memopv4f64, X86Fmsubadd,
v2f64, v4f64>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmadd_ps,
- int_x86_fma_vfnmadd_ps_256, X86Fnmadd, v4f32,
- v8f32>;
+ memopv8f32, X86Fnmadd, v4f32, v8f32>;
defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
- memopv8f32, int_x86_fma_vfnmsub_ps,
- int_x86_fma_vfnmsub_ps_256, X86Fnmsub, v4f32,
- v8f32>;
+ memopv8f32, X86Fnmsub, v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
- memopv4f64, int_x86_fma_vfnmadd_pd,
- int_x86_fma_vfnmadd_pd_256, X86Fnmadd, v2f64,
- v4f64>, VEX_W;
+ memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
- memopv2f64,
- memopv4f64, int_x86_fma_vfnmsub_pd,
- int_x86_fma_vfnmsub_pd_256, X86Fnmsub, v2f64,
+ memopv2f64, memopv4f64, X86Fnmsub, v2f64,
v4f64>, VEX_W;
}
+let Predicates = [HasFMA] in {
+ def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1,
+ (memopv4f32 addr:$src3)),
+ (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1,
+ (memopv8f32 addr:$src3)),
+ (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+ (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1,
+ (memopv2f64 addr:$src3)),
+ (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+ def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+ (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+ def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1,
+ (memopv4f64 addr:$src3)),
+ (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+} // Predicates = [HasFMA]
+
let Constraints = "$src1 = $dst" in {
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
RegisterClass RC> {