summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2016-08-02 12:44:27 +0000
committerSam Parker <sam.parker@arm.com>2016-08-02 12:44:27 +0000
commitd18c8aae0886a8b5628a53d22e31c03262223be3 (patch)
treeed4796e14a57cce5adfebbda26a3e2027a88f038 /lib
parent02c28617506659b17fa09052a63a3e4c9606db5e (diff)
[ARM] Improve smul* and smla* isel for Thumb2
Added (sra (shl x, 16), 16) to the sext_16_node PatLeaf for ARM to simplify some pattern matching. This has allowed several patterns for smul* and smla* to be removed as well as making it easier to add the matching for the corresponding instructions for Thumb2 targets. Also added two Pat classes that are predicated on Thumb2 with the hasDSP flag and UseMulOps flags. Updated the smul codegen test with the wider range of patterns plus the ThumbV6 and ThumbV6T2 targets. Differential Revision: https://reviews.llvm.org/D22908 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277450 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td6
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td41
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td19
3 files changed, 41 insertions, 25 deletions
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 37a83f70a1f..25f92df7e11 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -999,6 +999,12 @@ class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
}
+class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, HasDSP];
+}
+class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP];
+}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c9735f3ec27..b7abc48fe08 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -358,7 +358,23 @@ def imm16_31 : ImmLeaf<i32, [{
// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
def sext_16_node : PatLeaf<(i32 GPR:$a), [{
- return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+ if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17)
+ return true;
+
+ if (N->getOpcode() != ISD::SRA)
+ return false;
+ if (N->getOperand(0).getOpcode() != ISD::SHL)
+ return false;
+
+ auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!ShiftVal || ShiftVal->getZExtValue() != 16)
+ return false;
+
+ ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
+ if (!ShiftVal || ShiftVal->getZExtValue() != 16)
+ return false;
+
+ return true;
}]>;
/// Split a 32-bit immediate into two 16 bit parts.
@@ -5492,45 +5508,22 @@ def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (SMULBB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
(SMULBB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
(SMULBT GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (SMULTB GPR:$a, GPR:$b)>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
(SMULTB GPR:$a, GPR:$b)>;
-
-def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16)))),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
(SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
(SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
def : ARMV5MOPat<(add GPR:$acc,
- (mul (sra GPR:$a, (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16)))),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-
// Pre-v7 uses MCR for synchronization barriers.
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
Requires<[IsARM, HasV6]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 8f59a5b8cfc..d908b8a906b 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2640,7 +2640,15 @@ def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>;
def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>;
-class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc, list<dag> pattern>
+def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
+ (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
+ (t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
+ (t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+
+class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
+ list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
Requires<[IsThumb2, HasDSP, UseMulOps]> {
@@ -2667,6 +2675,15 @@ def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>;
def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
+ (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra,
+ (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
+ (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra,
+ (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
+ (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+
class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
: T2FourReg_mac<1, op22_20, op7_4,
(outs rGPR:$Ra, rGPR:$Rd),