[SystemZ, TableGen] Fix shift count handling

The DAG combiner logic to simplify AND masks in shift counts is invalid. While it is true that the SystemZ shift instructions ignore all but the low 6 bits of the shift count, it is still invalid to simplify the AND masks while the DAG still uses the standard shift operators (which are *not* defined to match the SystemZ instruction behavior). Instead, this patch performs equivalent operations during instruction selection. For completely removing the AND, this now happens via additional DAG match patterns implemented by a multi-alternative PatFrags. For simplifying a 32-bit AND to a 16-bit AND, the existing DAG patterns were already mostly OK, they just needed an output XForm to actually truncate the immediate value. Unfortunately, the latter change also exposed a bug in TableGen: it seems XForms are currently only handled correctly for direct operands of the outermost operation node. This patch also fixes that bug by simply recurring through the whole pattern. This should be NFC for all other targets. Differential Revision: https://reviews.llvm.org/D50096 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338521 91177308-0d34-0410-b5e6-96231b3b80d8
author: Ulrich Weigand <ulrich.weigand@de.ibm.com> 2018-08-01 11:57:58 +0000
committer: Ulrich Weigand <ulrich.weigand@de.ibm.com> 2018-08-01 11:57:58 +0000
commit: afca5c23cfc340f2af5fa913d28c1398456dc8ab (patch)
tree: 512332f201281aabc7ee575fd05f16a81461aec2 /lib/Target/SystemZ
parent: c6fbe17b98748ca0556ff61ca7cf3a3e15674a36 (diff)
5 files changed, 35 insertions, 103 deletions
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 1ad0e964c1e..e76fa71dacd 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -527,10 +527,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
   setTargetDAGCombine(ISD::BSWAP);
-  setTargetDAGCombine(ISD::SHL);
-  setTargetDAGCombine(ISD::SRA);
-  setTargetDAGCombine(ISD::SRL);
-  setTargetDAGCombine(ISD::ROTL);
 
   // Handle intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -5524,76 +5520,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
   return SDValue();
 }
 
-SDValue SystemZTargetLowering::combineSHIFTROT(
-    SDNode *N, DAGCombinerInfo &DCI) const {
-
-  SelectionDAG &DAG = DCI.DAG;
-
-  // Shift/rotate instructions only use the last 6 bits of the second operand
-  // register. If the second operand is the result of an AND with an immediate
-  // value that has its last 6 bits set, we can safely remove the AND operation.
-  //
-  // If the AND operation doesn't have the last 6 bits set, we can't remove it
-  // entirely, but we can still truncate it to a 16-bit value. This prevents
-  // us from ending up with a NILL with a signed operand, which will cause the
-  // instruction printer to abort.
-  SDValue N1 = N->getOperand(1);
-  if (N1.getOpcode() == ISD::AND) {
-    SDValue AndMaskOp = N1->getOperand(1);
-    auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
-
-    // The AND mask is constant
-    if (AndMask) {
-      auto AmtVal = AndMask->getZExtValue();
-
-      // Bottom 6 bits are set
-      if ((AmtVal & 0x3f) == 0x3f) {
-        SDValue AndOp = N1->getOperand(0);
-
-        // This is the only use, so remove the node
-        if (N1.hasOneUse()) {
-          // Combine the AND away
-          DCI.CombineTo(N1.getNode(), AndOp);
-
-          // Return N so it isn't rechecked
-          return SDValue(N, 0);
-
-        // The node will be reused, so create a new node for this one use
-        } else {
-          SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
-                                        N->getValueType(0), N->getOperand(0),
-                                        AndOp);
-          DCI.AddToWorklist(Replace.getNode());
-
-          return Replace;
-        }
-
-      // We can't remove the AND, but we can use NILL here (normally we would
-      // use NILF). Only keep the last 16 bits of the mask. The actual
-      // transformation will be handled by .td definitions.
-      } else if (AmtVal >> 16 != 0) {
-        SDValue AndOp = N1->getOperand(0);
-
-        auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
-                                       SDLoc(AndMaskOp),
-                                       AndMaskOp.getValueType());
-
-        auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
-                                  AndOp, NewMask);
-
-        SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
-                                      N->getValueType(0), N->getOperand(0),
-                                      NewAnd);
-        DCI.AddToWorklist(Replace.getNode());
-
-        return Replace;
-      }
-    }
-  }
-
-  return SDValue();
-}
-
 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
   // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
   // set by the CCReg instruction using the CCValid / CCMask masks,
@@ -5752,10 +5678,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
   case ISD::BSWAP:              return combineBSWAP(N, DCI);
-  case ISD::SHL:
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::ROTL:               return combineSHIFTROT(N, DCI);
   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
   case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
   case SystemZISD::GET_CCMASK:  return combineGET_CCMASK(N, DCI);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 0ca93a38a01..267e31a8521 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -602,7 +602,6 @@ private:
   SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
-  SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 9d731226995..bb5b7aae883 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1352,8 +1352,8 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
 //===----------------------------------------------------------------------===//
 
 // Logical shift left.
-defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
-def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shiftop<shl>, GR32>;
+def SLLG : BinaryRSY<"sllg", 0xEB0D, shiftop<shl>, GR64>;
 def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
 
 // Arithmetic shift left.
@@ -1364,20 +1364,20 @@ let Defs = [CC] in {
 }
 
 // Logical shift right.
-defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
-def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, shiftop<srl>, GR32>;
+def SRLG : BinaryRSY<"srlg", 0xEB0C, shiftop<srl>, GR64>;
 def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
 
 // Arithmetic shift right.
 let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
-  defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
-  def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
+  defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, shiftop<sra>, GR32>;
+  def SRAG : BinaryRSY<"srag", 0xEB0A, shiftop<sra>, GR64>;
   def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
 }
 
 // Rotate left.
-def RLL  : BinaryRSY<"rll",  0xEB1D, rotl, GR32>;
-def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
+def RLL  : BinaryRSY<"rll",  0xEB1D, shiftop<rotl>, GR32>;
+def RLLG : BinaryRSY<"rllg", 0xEB1C, shiftop<rotl>, GR64>;
 
 // Rotate second operand left and inserted selected bits into first operand.
 // These can act like 32-bit operands provided that the constant start and
@@ -2162,29 +2162,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
 // Complexity is added so that we match this before we match NILF on the AND
 // operation alone.
 let AddedComplexity = 4 in {
-  def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 
-  def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 
-  def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 
-  def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 
-  def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 
-  def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 
-  def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
-            (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 
-  def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
-            (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+  def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+            (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
 }
 
 // Peepholes for turning scalar operations into block operations.
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index da682cb4e5a..7bf32bf19a4 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -357,6 +357,7 @@ def imm32zx16 : Immediate<i32, [{
 }], UIMM16, "U16Imm">;
 
 def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
 
 // Full 32-bit immediates.  we need both signed and unsigned versions
 // because the assembler is picky.  E.g. AFI requires signed operands
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 3cfe23aec41..5103867e2d9 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -697,6 +697,16 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
   : PatFrag<(ops node:$addr),
             (store (operator), node:$addr)>;
 
+// Create a shift operator that optionally ignores an AND of the
+// shift count with an immediate if the bottom 6 bits are all set.
+def imm32bottom6set : PatLeaf<(i32 imm), [{
+  return (N->getZExtValue() & 0x3f) == 0x3f;
+}]>;
+class shiftop<SDPatternOperator operator>
+  : PatFrags<(ops node:$val, node:$count),
+             [(operator node:$val, node:$count),
+              (operator node:$val, (and node:$count, imm32bottom6set))]>;
+
 // Vector representation of all-zeros and all-ones.
 def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
 def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>	2018-08-01 11:57:58 +0000
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>	2018-08-01 11:57:58 +0000
commit	afca5c23cfc340f2af5fa913d28c1398456dc8ab (patch)
tree	512332f201281aabc7ee575fd05f16a81461aec2 /lib/Target/SystemZ
parent	c6fbe17b98748ca0556ff61ca7cf3a3e15674a36 (diff)