[SystemZ] Handle SADDO et.al. and ADD/SUBCARRY

This provides an optimized implementation of SADDO/SSUBO/UADDO/USUBO as well as ADDCARRY/SUBCARRY on top of the new CC implementation. In particular, multi-word arithmetic now uses UADDO/ADDCARRY instead of the old ADDC/ADDE logic, which means we no longer need to use "glue" links for those instructions. This also allows making full use of the memory-based instructions like ALSI, which couldn't be recognized due to limitations in the DAG matcher previously. Also, the llvm.sadd.with.overflow et.al. intrinsincs now expand to directly using the ADD instructions and checking for a CC 3 result. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@331203 91177308-0d34-0410-b5e6-96231b3b80d8
author: Ulrich Weigand <ulrich.weigand@de.ibm.com> 2018-04-30 17:54:28 +0000
committer: Ulrich Weigand <ulrich.weigand@de.ibm.com> 2018-04-30 17:54:28 +0000
commit: 815278b2cd8c9afce0f84879773a4c4a31645b64 (patch)
tree: 3ac448c1aee1d61b38959daf93c643cceba9357d /lib/Target/SystemZ
parent: 488cbd2beb113428aceb1c0e3cde4aef84a1c85a (diff)
8 files changed, 614 insertions, 77 deletions
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index 9a8e508e411..fdbde3d8dbc 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -47,6 +47,22 @@ const unsigned CCMASK_CMP_O  = CCMASK_ANY ^ CCMASK_CMP_UO;
 const unsigned CCMASK_ICMP = CCMASK_0 | CCMASK_1 | CCMASK_2;
 const unsigned CCMASK_FCMP = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
 
+// Condition-code mask assignments for arithmetical operations.
+const unsigned CCMASK_ARITH_EQ       = CCMASK_0;
+const unsigned CCMASK_ARITH_LT       = CCMASK_1;
+const unsigned CCMASK_ARITH_GT       = CCMASK_2;
+const unsigned CCMASK_ARITH_OVERFLOW = CCMASK_3;
+const unsigned CCMASK_ARITH          = CCMASK_ANY;
+
+// Condition-code mask assignments for logical operations.
+const unsigned CCMASK_LOGICAL_ZERO     = CCMASK_0 | CCMASK_2;
+const unsigned CCMASK_LOGICAL_NONZERO  = CCMASK_1 | CCMASK_2;
+const unsigned CCMASK_LOGICAL_CARRY    = CCMASK_2 | CCMASK_3;
+const unsigned CCMASK_LOGICAL_NOCARRY  = CCMASK_0 | CCMASK_1;
+const unsigned CCMASK_LOGICAL_BORROW   = CCMASK_LOGICAL_NOCARRY;
+const unsigned CCMASK_LOGICAL_NOBORROW = CCMASK_LOGICAL_CARRY;
+const unsigned CCMASK_LOGICAL          = CCMASK_ANY;
+
 // Condition-code mask assignments for CS.
 const unsigned CCMASK_CS_EQ = CCMASK_0;
 const unsigned CCMASK_CS_NE = CCMASK_1;
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 26af3f4ebcc..3e13cf249a5 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -310,6 +310,11 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
   // Try to use scatter instruction Opcode to implement store Store.
   bool tryScatter(StoreSDNode *Store, unsigned Opcode);
 
+  // Change a chain of {load; op; store} of the same value into a simple op
+  // through memory of that value, if the uses of the modified value and its
+  // address are suitable.
+  bool tryFoldLoadStoreIntoMemOperand(SDNode *Node);
+
   // Return true if Load and Store are loads and stores of the same size
   // and are guaranteed not to overlap.  Such operations can be implemented
   // using block (SS-format) instructions.
@@ -1196,6 +1201,171 @@ bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
   return true;
 }
 
+// Check whether or not the chain ending in StoreNode is suitable for doing
+// the {load; op; store} to modify transformation.
+static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
+                                        SDValue StoredVal, SelectionDAG *CurDAG,
+                                        LoadSDNode *&LoadNode,
+                                        SDValue &InputChain) {
+  // Is the stored value result 0 of the operation?
+  if (StoredVal.getResNo() != 0)
+    return false;
+
+  // Are there other uses of the loaded value than the operation?
+  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0))
+    return false;
+
+  // Is the store non-extending and non-indexed?
+  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
+    return false;
+
+  SDValue Load = StoredVal->getOperand(0);
+  // Is the stored value a non-extending and non-indexed load?
+  if (!ISD::isNormalLoad(Load.getNode()))
+    return false;
+
+  // Return LoadNode by reference.
+  LoadNode = cast<LoadSDNode>(Load);
+
+  // Is store the only read of the loaded value?
+  if (!Load.hasOneUse())
+    return false;
+
+  // Is the address of the store the same as the load?
+  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
+      LoadNode->getOffset() != StoreNode->getOffset())
+    return false;
+
+  // Check if the chain is produced by the load or is a TokenFactor with
+  // the load output chain as an operand. Return InputChain by reference.
+  SDValue Chain = StoreNode->getChain();
+
+  bool ChainCheck = false;
+  if (Chain == Load.getValue(1)) {
+    ChainCheck = true;
+    InputChain = LoadNode->getChain();
+  } else if (Chain.getOpcode() == ISD::TokenFactor) {
+    SmallVector<SDValue, 4> ChainOps;
+    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
+      SDValue Op = Chain.getOperand(i);
+      if (Op == Load.getValue(1)) {
+        ChainCheck = true;
+        // Drop Load, but keep its chain. No cycle check necessary.
+        ChainOps.push_back(Load.getOperand(0));
+        continue;
+      }
+
+      // Make sure using Op as part of the chain would not cause a cycle here.
+      // In theory, we could check whether the chain node is a predecessor of
+      // the load. But that can be very expensive. Instead visit the uses and
+      // make sure they all have smaller node id than the load.
+      int LoadId = LoadNode->getNodeId();
+      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+             UE = UI->use_end(); UI != UE; ++UI) {
+        if (UI.getUse().getResNo() != 0)
+          continue;
+        if (UI->getNodeId() > LoadId)
+          return false;
+      }
+
+      ChainOps.push_back(Op);
+    }
+
+    if (ChainCheck)
+      // Make a new TokenFactor with all the other input chains except
+      // for the load.
+      InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
+                                   MVT::Other, ChainOps);
+  }
+  if (!ChainCheck)
+    return false;
+
+  return true;
+}
+
+// Change a chain of {load; op; store} of the same value into a simple op
+// through memory of that value, if the uses of the modified value and its
+// address are suitable.
+//
+// The tablegen pattern memory operand pattern is currently not able to match
+// the case where the CC on the original operation are used.
+//
+// See the equivalent routine in X86ISelDAGToDAG for further comments.
+bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
+  StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
+  SDValue StoredVal = StoreNode->getOperand(1);
+  unsigned Opc = StoredVal->getOpcode();
+  SDLoc DL(StoreNode);
+
+  // Before we try to select anything, make sure this is memory operand size
+  // and opcode we can handle. Note that this must match the code below that
+  // actually lowers the opcodes.
+  EVT MemVT = StoreNode->getMemoryVT();
+  unsigned NewOpc = 0;
+  bool NegateOperand = false;
+  switch (Opc) {
+  default:
+    return false;
+  case SystemZISD::SSUBO:
+    NegateOperand = true;
+    /* fall through */
+  case SystemZISD::SADDO:
+    if (MemVT == MVT::i32)
+      NewOpc = SystemZ::ASI;
+    else if (MemVT == MVT::i64)
+      NewOpc = SystemZ::AGSI;
+    else
+      return false;
+    break;
+  case SystemZISD::USUBO:
+    NegateOperand = true;
+    /* fall through */
+  case SystemZISD::UADDO:
+    if (MemVT == MVT::i32)
+      NewOpc = SystemZ::ALSI;
+    else if (MemVT == MVT::i64)
+      NewOpc = SystemZ::ALGSI;
+    else
+      return false;
+    break;
+  }
+
+  LoadSDNode *LoadNode = nullptr;
+  SDValue InputChain;
+  if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,
+                                   InputChain))
+    return false;
+
+  SDValue Operand = StoredVal.getOperand(1);
+  auto *OperandC = dyn_cast<ConstantSDNode>(Operand);
+  if (!OperandC)
+    return false;
+  auto OperandV = OperandC->getAPIntValue();
+  if (NegateOperand)
+    OperandV = -OperandV;
+  if (OperandV.getMinSignedBits() > 8)
+    return false;
+  Operand = CurDAG->getTargetConstant(OperandV, DL, MemVT);
+
+  SDValue Base, Disp;
+  if (!selectBDAddr20Only(StoreNode->getBasePtr(), Base, Disp))
+    return false;
+
+  SDValue Ops[] = { Base, Disp, Operand, InputChain };
+  MachineSDNode *Result =
+    CurDAG->getMachineNode(NewOpc, DL, MVT::i32, MVT::Other, Ops);
+
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
+  MemOp[0] = StoreNode->getMemOperand();
+  MemOp[1] = LoadNode->getMemOperand();
+  Result->setMemRefs(MemOp, MemOp + 2);
+
+  ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
+  ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
+  CurDAG->RemoveDeadNode(Node);
+  return true;
+}
+
 bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
                                                LoadSDNode *Load) const {
   // Check that the two memory operands have the same size.
@@ -1358,6 +1528,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
   }
 
   case ISD::STORE: {
+    if (tryFoldLoadStoreIntoMemOperand(Node))
+      return;
     auto *Store = cast<StoreSDNode>(Node);
     unsigned ElemBitSize = Store->getValue().getValueSizeInBits();
     if (ElemBitSize == 32) {
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index ab314e1f311..55e82d62cef 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -164,6 +164,18 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SDIVREM, VT, Custom);
       setOperationAction(ISD::UDIVREM, VT, Custom);
 
+      // Support addition/subtraction with overflow.
+      setOperationAction(ISD::SADDO, VT, Custom);
+      setOperationAction(ISD::SSUBO, VT, Custom);
+
+      // Support addition/subtraction with carry.
+      setOperationAction(ISD::UADDO, VT, Custom);
+      setOperationAction(ISD::USUBO, VT, Custom);
+
+      // Support carry in as value rather than glue.
+      setOperationAction(ISD::ADDCARRY, VT, Custom);
+      setOperationAction(ISD::SUBCARRY, VT, Custom);
+
       // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
       // stores, putting a serialization instruction after the stores.
       setOperationAction(ISD::ATOMIC_LOAD,  VT, Custom);
@@ -3204,6 +3216,99 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
                                    MVT::i64, HighOp, Low32);
 }
 
+// Lower SADDO/SSUBO/UADDO/USUBO nodes.
+SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  SDNode *N = Op.getNode();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDLoc DL(N);
+  unsigned BaseOp = 0;
+  unsigned CCValid = 0;
+  unsigned CCMask = 0;
+
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown instruction!");
+  case ISD::SADDO:
+    BaseOp = SystemZISD::SADDO;
+    CCValid = SystemZ::CCMASK_ARITH;
+    CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
+    break;
+  case ISD::SSUBO:
+    BaseOp = SystemZISD::SSUBO;
+    CCValid = SystemZ::CCMASK_ARITH;
+    CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
+    break;
+  case ISD::UADDO:
+    BaseOp = SystemZISD::UADDO;
+    CCValid = SystemZ::CCMASK_LOGICAL;
+    CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
+    break;
+  case ISD::USUBO:
+    BaseOp = SystemZISD::USUBO;
+    CCValid = SystemZ::CCMASK_LOGICAL;
+    CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
+    break;
+  }
+
+  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+
+  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
+  if (N->getValueType(1) == MVT::i1)
+    SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
+}
+
+// Lower ADDCARRY/SUBCARRY nodes.
+SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
+                                                SelectionDAG &DAG) const {
+
+  SDNode *N = Op.getNode();
+  MVT VT = N->getSimpleValueType(0);
+
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue Carry = Op.getOperand(2);
+  SDLoc DL(N);
+  unsigned BaseOp = 0;
+  unsigned CCValid = 0;
+  unsigned CCMask = 0;
+
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown instruction!");
+  case ISD::ADDCARRY:
+    BaseOp = SystemZISD::ADDCARRY;
+    CCValid = SystemZ::CCMASK_LOGICAL;
+    CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
+    break;
+  case ISD::SUBCARRY:
+    BaseOp = SystemZISD::SUBCARRY;
+    CCValid = SystemZ::CCMASK_LOGICAL;
+    CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
+    break;
+  }
+
+  // Set the condition code from the carry flag.
+  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
+                      DAG.getConstant(CCValid, DL, MVT::i32),
+                      DAG.getConstant(CCMask, DL, MVT::i32));
+
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
+
+  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
+  if (N->getValueType(1) == MVT::i1)
+    SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
+
+  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
+}
+
 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
                                           SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
@@ -4693,6 +4798,14 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerSDIVREM(Op, DAG);
   case ISD::UDIVREM:
     return lowerUDIVREM(Op, DAG);
+  case ISD::SADDO:
+  case ISD::SSUBO:
+  case ISD::UADDO:
+  case ISD::USUBO:
+    return lowerXALUO(Op, DAG);
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY:
+    return lowerADDSUBCARRY(Op, DAG);
   case ISD::OR:
     return lowerOR(Op, DAG);
   case ISD::CTPOP:
@@ -4871,6 +4984,13 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(UMUL_LOHI);
     OPCODE(SDIVREM);
     OPCODE(UDIVREM);
+    OPCODE(SADDO);
+    OPCODE(SSUBO);
+    OPCODE(UADDO);
+    OPCODE(USUBO);
+    OPCODE(ADDCARRY);
+    OPCODE(SUBCARRY);
+    OPCODE(GET_CCMASK);
     OPCODE(MVC);
     OPCODE(MVC_LOOP);
     OPCODE(NC);
@@ -5560,6 +5680,48 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
   return SDValue();
 }
 
+
+SDValue SystemZTargetLowering::combineGET_CCMASK(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+
+  // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
+  auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
+  if (!CCValid || !CCMask)
+    return SDValue();
+  int CCValidVal = CCValid->getZExtValue();
+  int CCMaskVal = CCMask->getZExtValue();
+
+  SDValue Select = N->getOperand(0);
+  if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
+    return SDValue();
+
+  auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
+  auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
+  if (!SelectCCValid || !SelectCCMask)
+    return SDValue();
+  int SelectCCValidVal = SelectCCValid->getZExtValue();
+  int SelectCCMaskVal = SelectCCMask->getZExtValue();
+
+  auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
+  auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
+  if (!TrueVal || !FalseVal)
+    return SDValue();
+  if (TrueVal->getZExtValue() != 0 && FalseVal->getZExtValue() == 0)
+    ;
+  else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() != 0)
+    SelectCCMaskVal ^= SelectCCValidVal;
+  else
+    return SDValue();
+
+  if (SelectCCValidVal & ~CCValidVal)
+    return SDValue();
+  if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
+    return SDValue();
+
+  return Select->getOperand(4);
+}
+
 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   switch(N->getOpcode()) {
@@ -5580,6 +5742,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ROTL:               return combineSHIFTROT(N, DCI);
   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
   case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
+  case SystemZISD::GET_CCMASK:  return combineGET_CCMASK(N, DCI);
   }
 
   return SDValue();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 16a8291aba6..0ca93a38a01 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -93,6 +93,19 @@ enum NodeType : unsigned {
   SDIVREM,
   UDIVREM,
 
+  // Add/subtract with overflow/carry.  These have the same operands as
+  // the corresponding standard operations, except with the carry flag
+  // replaced by a condition code value.
+  SADDO, SSUBO, UADDO, USUBO, ADDCARRY, SUBCARRY,
+
+  // Set the condition code from a boolean value in operand 0.
+  // Operand 1 is a mask of all condition-code values that may result of this
+  // operation, operand 2 is a mask of condition-code values that may result
+  // if the boolean is true.
+  // Note that this operation is always optimized away, we will never
+  // generate any code for it.
+  GET_CCMASK,
+
   // Use a series of MVCs to copy bytes from one memory location to another.
   // The operands are:
   // - the target address
@@ -548,6 +561,8 @@ private:
   SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
@@ -590,6 +605,7 @@ private:
   SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
 
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c12d02b1793..f094cf18dfd 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1191,6 +1191,36 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
     return BuiltMI;
   }
 
+  if ((Opcode == SystemZ::ALFI && OpNum == 0 &&
+       isInt<8>((int32_t)MI.getOperand(2).getImm())) ||
+      (Opcode == SystemZ::ALGFI && OpNum == 0 &&
+       isInt<8>((int64_t)MI.getOperand(2).getImm()))) {
+    // AL(G)FI %reg, CONST -> AL(G)SI %mem, CONST
+    Opcode = (Opcode == SystemZ::ALFI ? SystemZ::ALSI : SystemZ::ALGSI);
+    MachineInstr *BuiltMI =
+        BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode))
+            .addFrameIndex(FrameIndex)
+            .addImm(0)
+            .addImm((int8_t)MI.getOperand(2).getImm());
+    transferDeadCC(&MI, BuiltMI);
+    return BuiltMI;
+  }
+
+  if ((Opcode == SystemZ::SLFI && OpNum == 0 &&
+       isInt<8>((int32_t)-MI.getOperand(2).getImm())) ||
+      (Opcode == SystemZ::SLGFI && OpNum == 0 &&
+       isInt<8>((int64_t)-MI.getOperand(2).getImm()))) {
+    // SL(G)FI %reg, CONST -> AL(G)SI %mem, -CONST
+    Opcode = (Opcode == SystemZ::SLFI ? SystemZ::ALSI : SystemZ::ALGSI);
+    MachineInstr *BuiltMI =
+        BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(Opcode))
+            .addFrameIndex(FrameIndex)
+            .addImm(0)
+            .addImm((int8_t)-MI.getOperand(2).getImm());
+    transferDeadCC(&MI, BuiltMI);
+    return BuiltMI;
+  }
+
   if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
     bool Op0IsGPR = (Opcode == SystemZ::LGDR);
     bool Op1IsGPR = (Opcode == SystemZ::LDGR);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index b5dd1c66335..5ac0c43fb3f 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -891,12 +891,12 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
 // Addition
 //===----------------------------------------------------------------------===//
 
-// Plain addition.
+// Addition producing a signed overflow flag.
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
   // Addition of a register.
   let isCommutable = 1 in {
-    defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, add, GR32, GR32>;
-    defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, add, GR64, GR64>;
+    defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_saddo, GR32, GR32>;
+    defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, z_saddo, GR64, GR64>;
   }
   def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
 
@@ -907,38 +907,38 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
               Requires<[FeatureHighWord]>;
 
   // Addition of signed 16-bit immediates.
-  defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
-  defm AHI  : BinaryRIAndK<"ahi",  0xA7A, 0xECD8, add, GR32, imm32sx16>;
-  defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>;
+  defm AHIMux : BinaryRIAndKPseudo<"ahimux", z_saddo, GRX32, imm32sx16>;
+  defm AHI  : BinaryRIAndK<"ahi",  0xA7A, 0xECD8, z_saddo, GR32, imm32sx16>;
+  defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, z_saddo, GR64, imm64sx16>;
 
   // Addition of signed 32-bit immediates.
-  def AFIMux : BinaryRIPseudo<add, GRX32, simm32>,
+  def AFIMux : BinaryRIPseudo<z_saddo, GRX32, simm32>,
                Requires<[FeatureHighWord]>;
-  def AFI  : BinaryRIL<"afi",  0xC29, add, GR32, simm32>;
-  def AIH  : BinaryRIL<"aih",  0xCC8, add, GRH32, simm32>,
+  def AFI  : BinaryRIL<"afi",  0xC29, z_saddo, GR32, simm32>;
+  def AIH  : BinaryRIL<"aih",  0xCC8, z_saddo, GRH32, simm32>,
              Requires<[FeatureHighWord]>;
-  def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
+  def AGFI : BinaryRIL<"agfi", 0xC28, z_saddo, GR64, imm64sx32>;
 
   // Addition of memory.
-  defm AH  : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>;
-  defm A   : BinaryRXPair<"a",  0x5A, 0xE35A, add, GR32, load, 4>;
-  def  AGH : BinaryRXY<"agh", 0xE338, add, GR64, asextloadi16, 2>,
+  defm AH  : BinaryRXPair<"ah", 0x4A, 0xE37A, z_saddo, GR32, asextloadi16, 2>;
+  defm A   : BinaryRXPair<"a",  0x5A, 0xE35A, z_saddo, GR32, load, 4>;
+  def  AGH : BinaryRXY<"agh", 0xE338, z_saddo, GR64, asextloadi16, 2>,
              Requires<[FeatureMiscellaneousExtensions2]>;
-  def  AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>;
-  def  AG  : BinaryRXY<"ag",  0xE308, add, GR64, load, 8>;
+  def  AGF : BinaryRXY<"agf", 0xE318, z_saddo, GR64, asextloadi32, 4>;
+  def  AG  : BinaryRXY<"ag",  0xE308, z_saddo, GR64, load, 8>;
 
   // Addition to memory.
-  def ASI  : BinarySIY<"asi",  0xEB6A, add, imm32sx8>;
-  def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>;
+  def ASI  : BinarySIY<"asi",  0xEB6A, null_frag, imm32sx8>;
+  def AGSI : BinarySIY<"agsi", 0xEB7A, null_frag, imm64sx8>;
 }
-defm : SXB<add, GR64, AGFR>;
+defm : SXB<z_saddo, GR64, AGFR>;
 
 // Addition producing a carry.
 let Defs = [CC] in {
   // Addition of a register.
   let isCommutable = 1 in {
-    defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, addc, GR32, GR32>;
-    defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, addc, GR64, GR64>;
+    defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, z_uaddo, GR32, GR32>;
+    defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, z_uaddo, GR64, GR64>;
   }
   def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
 
@@ -949,56 +949,104 @@ let Defs = [CC] in {
                Requires<[FeatureHighWord]>;
 
   // Addition of signed 16-bit immediates.
-  def ALHSIK  : BinaryRIE<"alhsik",  0xECDA, addc, GR32, imm32sx16>,
+  def ALHSIK  : BinaryRIE<"alhsik",  0xECDA, z_uaddo, GR32, imm32sx16>,
                 Requires<[FeatureDistinctOps]>;
-  def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, addc, GR64, imm64sx16>,
+  def ALGHSIK : BinaryRIE<"alghsik", 0xECDB, z_uaddo, GR64, imm64sx16>,
                 Requires<[FeatureDistinctOps]>;
 
   // Addition of unsigned 32-bit immediates.
-  def ALFI  : BinaryRIL<"alfi",  0xC2B, addc, GR32, uimm32>;
-  def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
+  def ALFI  : BinaryRIL<"alfi",  0xC2B, z_uaddo, GR32, uimm32>;
+  def ALGFI : BinaryRIL<"algfi", 0xC2A, z_uaddo, GR64, imm64zx32>;
 
   // Addition of signed 32-bit immediates.
   def ALSIH : BinaryRIL<"alsih", 0xCCA, null_frag, GRH32, simm32>,
               Requires<[FeatureHighWord]>;
 
   // Addition of memory.
-  defm AL   : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>;
-  def  ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>;
-  def  ALG  : BinaryRXY<"alg",  0xE30A, addc, GR64, load, 8>;
+  defm AL   : BinaryRXPair<"al", 0x5E, 0xE35E, z_uaddo, GR32, load, 4>;
+  def  ALGF : BinaryRXY<"algf", 0xE31A, z_uaddo, GR64, azextloadi32, 4>;
+  def  ALG  : BinaryRXY<"alg",  0xE30A, z_uaddo, GR64, load, 8>;
 
   // Addition to memory.
   def ALSI  : BinarySIY<"alsi",  0xEB6E, null_frag, imm32sx8>;
   def ALGSI : BinarySIY<"algsi", 0xEB7E, null_frag, imm64sx8>;
 }
-defm : ZXB<addc, GR64, ALGFR>;
+defm : ZXB<z_uaddo, GR64, ALGFR>;
 
 // Addition producing and using a carry.
 let Defs = [CC], Uses = [CC] in {
   // Addition of a register.
-  def ALCR  : BinaryRRE<"alcr",  0xB998, adde, GR32, GR32>;
-  def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>;
+  def ALCR  : BinaryRRE<"alcr",  0xB998, z_addcarry, GR32, GR32>;
+  def ALCGR : BinaryRRE<"alcgr", 0xB988, z_addcarry, GR64, GR64>;
 
   // Addition of memory.
-  def ALC  : BinaryRXY<"alc",  0xE398, adde, GR32, load, 4>;
-  def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>;
+  def ALC  : BinaryRXY<"alc",  0xE398, z_addcarry, GR32, load, 4>;
+  def ALCG : BinaryRXY<"alcg", 0xE388, z_addcarry, GR64, load, 8>;
 }
 
 // Addition that does not modify the condition code.
 def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>,
              Requires<[FeatureHighWord]>;
 
+// Map plain addition to either arithmetic or logical operation.
+
+def : Pat<(add GR32:$src1, GR32:$src2),
+          (AR GR32:$src1, GR32:$src2)>;
+def : Pat<(add GR64:$src1, GR64:$src2),
+          (AGR GR64:$src1, GR64:$src2)>;
+defm : SXB<add, GR64, AGFR>;
+defm : ZXB<add, GR64, ALGFR>;
+
+def : Pat<(add GRX32:$src1, imm32sx16:$src2),
+          (AHIMux GRX32:$src1, imm32sx16:$src2)>, Requires<[FeatureHighWord]>;
+def : Pat<(add GR32:$src1, imm32sx16:$src2),
+          (AHI GR32:$src1, imm32sx16:$src2)>;
+def : Pat<(add GR64:$src1, imm64sx16:$src2),
+          (AGHI GR64:$src1, imm64sx16:$src2)>;
+def : Pat<(add GRX32:$src1, simm32:$src2),
+          (AFIMux GRX32:$src1, simm32:$src2)>, Requires<[FeatureHighWord]>;
+def : Pat<(add GR32:$src1, simm32:$src2),
+          (AFI GR32:$src1, simm32:$src2)>;
+def : Pat<(add GRH32:$src1, simm32:$src2),
+          (AIH GRH32:$src1, simm32:$src2)>, Requires<[FeatureHighWord]>;
+def : Pat<(add GR64:$src1, imm64sx32:$src2),
+          (AGFI GR64:$src1, imm64sx32:$src2)>;
+def : Pat<(add GR64:$src1, imm64zx32:$src2),
+          (ALGFI GR64:$src1, imm64zx32:$src2)>;
+
+def : Pat<(add GR32:$src1, (asextloadi16 bdxaddr12pair:$addr)),
+          (AH GR32:$src1, bdxaddr12pair:$addr)>;
+def : Pat<(add GR32:$src1, (asextloadi16 bdxaddr20pair:$addr)),
+          (AHY GR32:$src1, bdxaddr20pair:$addr)>;
+def : Pat<(add GR32:$src1, (load bdxaddr12pair:$addr)),
+          (A GR32:$src1, bdxaddr12pair:$addr)>;
+def : Pat<(add GR32:$src1, (load bdxaddr20pair:$addr)),
+          (AY GR32:$src1, bdxaddr20pair:$addr)>;
+def : Pat<(add GR64:$src1, (asextloadi16 bdxaddr20only:$addr)),
+          (AGH GR64:$src1, bdxaddr20only:$addr)>,
+      Requires<[FeatureMiscellaneousExtensions2]>;
+def : Pat<(add GR64:$src1, (asextloadi32 bdxaddr20only:$addr)),
+          (AGF GR64:$src1, bdxaddr20only:$addr)>;
+def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
+          (ALGF GR64:$src1, bdxaddr20only:$addr)>;
+def : Pat<(add GR64:$src1, (load bdxaddr20only:$addr)),
+          (AG GR64:$src1, bdxaddr20only:$addr)>;
+
+def : Pat<(store (add (load bdaddr20only:$addr), imm32sx8:$src2), bdaddr20only:$addr),
+          (ASI bdaddr20only:$addr, imm32sx8:$src2)>;
+def : Pat<(store (add (load bdaddr20only:$addr), imm64sx8:$src2), bdaddr20only:$addr),
+          (AGSI bdaddr20only:$addr, imm64sx8:$src2)>;
+
 //===----------------------------------------------------------------------===//
 // Subtraction
 //===----------------------------------------------------------------------===//
 
-// Plain subtraction.  Although immediate forms exist, we use the
-// add-immediate instruction instead.
+// Subtraction producing a signed overflow flag.
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
   // Subtraction of a register.
-  defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, sub, GR32, GR32>;
+  defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, z_ssubo, GR32, GR32>;
   def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
-  defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, sub, GR64, GR64>;
+  defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, z_ssubo, GR64, GR64>;
 
   // Subtraction from a high register.
   def SHHHR : BinaryRRFa<"shhhr", 0xB9C9, null_frag, GRH32, GRH32, GRH32>,
@@ -1007,21 +1055,39 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
               Requires<[FeatureHighWord]>;
 
   // Subtraction of memory.
-  defm SH  : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
-  defm S   : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
-  def  SGH : BinaryRXY<"sgh", 0xE339, sub, GR64, asextloadi16, 2>,
+  defm SH  : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssubo, GR32, asextloadi16, 2>;
+  defm S   : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssubo, GR32, load, 4>;
+  def  SGH : BinaryRXY<"sgh", 0xE339, z_ssubo, GR64, asextloadi16, 2>,
              Requires<[FeatureMiscellaneousExtensions2]>;
-  def  SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>;
-  def  SG  : BinaryRXY<"sg",  0xE309, sub, GR64, load, 8>;
+  def  SGF : BinaryRXY<"sgf", 0xE319, z_ssubo, GR64, asextloadi32, 4>;
+  def  SG  : BinaryRXY<"sg",  0xE309, z_ssubo, GR64, load, 8>;
+}
+defm : SXB<z_ssubo, GR64, SGFR>;
+
+// Subtracting an immediate is the same as adding the negated immediate.
+let AddedComplexity = 1 in {
+  def : Pat<(z_ssubo GR32:$src1, imm32sx16n:$src2),
+            (AHIMux GR32:$src1, imm32sx16n:$src2)>,
+        Requires<[FeatureHighWord]>;
+  def : Pat<(z_ssubo GR32:$src1, simm32n:$src2),
+            (AFIMux GR32:$src1, simm32n:$src2)>,
+        Requires<[FeatureHighWord]>;
+  def : Pat<(z_ssubo GR32:$src1, imm32sx16n:$src2),
+            (AHI GR32:$src1, imm32sx16n:$src2)>;
+  def : Pat<(z_ssubo GR32:$src1, simm32n:$src2),
+            (AFI GR32:$src1, simm32n:$src2)>;
+  def : Pat<(z_ssubo GR64:$src1, imm64sx16n:$src2),
+            (AGHI GR64:$src1, imm64sx16n:$src2)>;
+  def : Pat<(z_ssubo GR64:$src1, imm64sx32n:$src2),
+            (AGFI GR64:$src1, imm64sx32n:$src2)>;
 }
-defm : SXB<sub, GR64, SGFR>;
 
 // Subtraction producing a carry.
 let Defs = [CC] in {
   // Subtraction of a register.
-  defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, subc, GR32, GR32>;
+  defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, z_usubo, GR32, GR32>;
   def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
-  defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, subc, GR64, GR64>;
+  defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, z_usubo, GR64, GR64>;
 
   // Subtraction from a high register.
   def SLHHHR : BinaryRRFa<"slhhhr", 0xB9CB, null_frag, GRH32, GRH32, GRH32>,
@@ -1029,29 +1095,68 @@ let Defs = [CC] in {
   def SLHHLR : BinaryRRFa<"slhhlr", 0xB9DB, null_frag, GRH32, GRH32, GR32>,
                Requires<[FeatureHighWord]>;
 
-  // Subtraction of unsigned 32-bit immediates.  These don't match
-  // subc because we prefer addc for constants.
-  def SLFI  : BinaryRIL<"slfi",  0xC25, null_frag, GR32, uimm32>;
-  def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>;
+  // Subtraction of unsigned 32-bit immediates.
+  def SLFI  : BinaryRIL<"slfi",  0xC25, z_usubo, GR32, uimm32>;
+  def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usubo, GR64, imm64zx32>;
 
   // Subtraction of memory.
-  defm SL   : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>;
-  def  SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>;
-  def  SLG  : BinaryRXY<"slg",  0xE30B, subc, GR64, load, 8>;
+  defm SL   : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usubo, GR32, load, 4>;
+  def  SLGF : BinaryRXY<"slgf", 0xE31B, z_usubo, GR64, azextloadi32, 4>;
+  def  SLG  : BinaryRXY<"slg",  0xE30B, z_usubo, GR64, load, 8>;
+}
+defm : ZXB<z_usubo, GR64, SLGFR>;
+
+// Subtracting an immediate is the same as adding the negated immediate.
+let AddedComplexity = 1 in {
+  def : Pat<(z_usubo GR32:$src1, imm32sx16n:$src2),
+            (ALHSIK GR32:$src1, imm32sx16n:$src2)>,
+        Requires<[FeatureDistinctOps]>;
+  def : Pat<(z_usubo GR64:$src1, imm64sx16n:$src2),
+            (ALGHSIK GR64:$src1, imm64sx16n:$src2)>,
+        Requires<[FeatureDistinctOps]>;
 }
-defm : ZXB<subc, GR64, SLGFR>;
 
 // Subtraction producing and using a carry.
 let Defs = [CC], Uses = [CC] in {
   // Subtraction of a register.
-  def SLBR  : BinaryRRE<"slbr",  0xB999, sube, GR32, GR32>;
-  def SLBGR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>;
+  def SLBR  : BinaryRRE<"slbr",  0xB999, z_subcarry, GR32, GR32>;
+  def SLBGR : BinaryRRE<"slbgr", 0xB989, z_subcarry, GR64, GR64>;
 
   // Subtraction of memory.
-  def SLB  : BinaryRXY<"slb",  0xE399, sube, GR32, load, 4>;
-  def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load, 8>;
+  def SLB  : BinaryRXY<"slb",  0xE399, z_subcarry, GR32, load, 4>;
+  def SLBG : BinaryRXY<"slbg", 0xE389, z_subcarry, GR64, load, 8>;
 }
 
+// Map plain subtraction to either arithmetic or logical operation.
+
+def : Pat<(sub GR32:$src1, GR32:$src2),
+          (SR GR32:$src1, GR32:$src2)>;
+def : Pat<(sub GR64:$src1, GR64:$src2),
+          (SGR GR64:$src1, GR64:$src2)>;
+defm : SXB<sub, GR64, SGFR>;
+defm : ZXB<sub, GR64, SLGFR>;
+
+def : Pat<(add GR64:$src1, imm64zx32n:$src2),
+          (SLGFI GR64:$src1, imm64zx32n:$src2)>;
+
+def : Pat<(sub GR32:$src1, (asextloadi16 bdxaddr12pair:$addr)),
+          (SH GR32:$src1, bdxaddr12pair:$addr)>;
+def : Pat<(sub GR32:$src1, (asextloadi16 bdxaddr20pair:$addr)),
+          (SHY GR32:$src1, bdxaddr20pair:$addr)>;
+def : Pat<(sub GR32:$src1, (load bdxaddr12pair:$addr)),
+          (S GR32:$src1, bdxaddr12pair:$addr)>;
+def : Pat<(sub GR32:$src1, (load bdxaddr20pair:$addr)),
+          (SY GR32:$src1, bdxaddr20pair:$addr)>;
+def : Pat<(sub GR64:$src1, (asextloadi16 bdxaddr20only:$addr)),
+          (SGH GR64:$src1, bdxaddr20only:$addr)>,
+      Requires<[FeatureMiscellaneousExtensions2]>;
+def : Pat<(sub GR64:$src1, (asextloadi32 bdxaddr20only:$addr)),
+          (SGF GR64:$src1, bdxaddr20only:$addr)>;
+def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
+          (SLGF GR64:$src1, bdxaddr20only:$addr)>;
+def : Pat<(sub GR64:$src1, (load bdxaddr20only:$addr)),
+          (SG GR64:$src1, bdxaddr20only:$addr)>;
+
 //===----------------------------------------------------------------------===//
 // AND
 //===----------------------------------------------------------------------===//
@@ -2119,20 +2224,6 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in {
 // Peepholes.
 //===----------------------------------------------------------------------===//
 
-// Use AL* for GR64 additions of unsigned 32-bit values.
-defm : ZXB<add, GR64, ALGFR>;
-def  : Pat<(add GR64:$src1, imm64zx32:$src2),
-           (ALGFI GR64:$src1, imm64zx32:$src2)>;
-def  : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
-           (ALGF GR64:$src1, bdxaddr20only:$addr)>;
-
-// Use SL* for GR64 subtractions of unsigned 32-bit values.
-defm : ZXB<sub, GR64, SLGFR>;
-def  : Pat<(add GR64:$src1, imm64zx32n:$src2),
-           (SLGFI GR64:$src1, imm64zx32n:$src2)>;
-def  : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
-           (SLGF GR64:$src1, bdxaddr20only:$addr)>;
-
 // Avoid generating 2 XOR instructions. (xor (and x, y), y) is
 // equivalent to (and (xor x, -1), y)
 def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 8171eae6460..da682cb4e5a 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -219,6 +219,12 @@ def SIMM16 : SDNodeXForm<imm, [{
                                    MVT::i64);
 }]>;
 
+// Negate and then truncate an immediate to a 16-bit signed quantity.
+def NEGSIMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int16_t(-N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
+}]>;
+
 // Truncate an immediate to a 16-bit unsigned quantity.
 def UIMM16 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), SDLoc(N),
@@ -231,24 +237,30 @@ def SIMM32 : SDNodeXForm<imm, [{
                                    MVT::i64);
 }]>;
 
+// Negate and then truncate an immediate to a 32-bit unsigned quantity.
+def NEGSIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int32_t(-N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
+}]>;
+
 // Truncate an immediate to a 32-bit unsigned quantity.
 def UIMM32 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), SDLoc(N),
                                    MVT::i64);
 }]>;
 
+// Negate and then truncate an immediate to a 32-bit unsigned quantity.
+def NEGUIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N),
+                                   MVT::i64);
+}]>;
+
 // Truncate an immediate to a 48-bit unsigned quantity.
 def UIMM48 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(uint64_t(N->getZExtValue()) & 0xffffffffffff,
                                    SDLoc(N), MVT::i64);
 }]>;
 
-// Negate and then truncate an immediate to a 32-bit unsigned quantity.
-def NEGIMM32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N),
-                                   MVT::i64);
-}]>;
-
 //===----------------------------------------------------------------------===//
 // Immediate asm operands.
 //===----------------------------------------------------------------------===//
@@ -336,6 +348,10 @@ def imm32sx16 : Immediate<i32, [{
   return isInt<16>(N->getSExtValue());
 }], SIMM16, "S16Imm">;
 
+def imm32sx16n : Immediate<i32, [{
+  return isInt<16>(-N->getSExtValue());
+}], NEGSIMM16, "S16Imm">;
+
 def imm32zx16 : Immediate<i32, [{
   return isUInt<16>(N->getZExtValue());
 }], UIMM16, "U16Imm">;
@@ -348,6 +364,10 @@ def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
 def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
 def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
 
+def simm32n : Immediate<i32, [{
+  return isInt<32>(-N->getSExtValue());
+}], NEGSIMM32, "S32Imm">;
+
 def imm32 : ImmLeaf<i32, [{}]>;
 
 //===----------------------------------------------------------------------===//
@@ -423,6 +443,10 @@ def imm64sx16 : Immediate<i64, [{
   return isInt<16>(N->getSExtValue());
 }], SIMM16, "S16Imm">;
 
+def imm64sx16n : Immediate<i64, [{
+  return isInt<16>(-N->getSExtValue());
+}], NEGSIMM16, "S16Imm">;
+
 def imm64zx16 : Immediate<i64, [{
   return isUInt<16>(N->getZExtValue());
 }], UIMM16, "U16Imm">;
@@ -431,13 +455,17 @@ def imm64sx32 : Immediate<i64, [{
   return isInt<32>(N->getSExtValue());
 }], SIMM32, "S32Imm">;
 
+def imm64sx32n : Immediate<i64, [{
+  return isInt<32>(-N->getSExtValue());
+}], NEGSIMM32, "S32Imm">;
+
 def imm64zx32 : Immediate<i64, [{
   return isUInt<32>(N->getZExtValue());
 }], UIMM32, "U32Imm">;
 
 def imm64zx32n : Immediate<i64, [{
   return isUInt<32>(-N->getSExtValue());
-}], NEGIMM32, "U32Imm">;
+}], NEGUIMM32, "U32Imm">;
 
 def imm64zx48 : Immediate<i64, [{
   return isUInt<64>(N->getZExtValue());
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 6bec1e0200b..355f289a0f5 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -45,6 +45,17 @@ def SDT_ZGR128Binary        : SDTypeProfile<1, 2,
                                             [SDTCisVT<0, untyped>,
                                              SDTCisInt<1>,
                                              SDTCisInt<2>]>;
+def SDT_ZBinaryWithFlags    : SDTypeProfile<2, 2,
+                                            [SDTCisInt<0>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>]>;
+def SDT_ZBinaryWithCarry    : SDTypeProfile<2, 3,
+                                            [SDTCisInt<0>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisVT<1, i32>]>;
 def SDT_ZAtomicLoadBinaryW  : SDTypeProfile<1, 5,
                                             [SDTCisVT<0, i32>,
                                              SDTCisPtrTy<1>,
@@ -262,6 +273,12 @@ def z_smul_lohi         : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>;
 def z_umul_lohi         : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>;
 def z_sdivrem           : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>;
 def z_udivrem           : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>;
+def z_saddo             : SDNode<"SystemZISD::SADDO", SDT_ZBinaryWithFlags>;
+def z_ssubo             : SDNode<"SystemZISD::SSUBO", SDT_ZBinaryWithFlags>;
+def z_uaddo             : SDNode<"SystemZISD::UADDO", SDT_ZBinaryWithFlags>;
+def z_usubo             : SDNode<"SystemZISD::USUBO", SDT_ZBinaryWithFlags>;
+def z_addcarry_1        : SDNode<"SystemZISD::ADDCARRY", SDT_ZBinaryWithCarry>;
+def z_subcarry_1        : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>;
 
 def z_membarrier        : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
                                  [SDNPHasChain, SDNPSideEffect]>;
@@ -432,6 +449,10 @@ def z_select_ccmask
             (z_select_ccmask_1 node:$true, node:$false,
                                node:$valid, node:$mask, CC)>;
 def z_ipm : PatFrag<(ops), (z_ipm_1 CC)>;
+def z_addcarry : PatFrag<(ops node:$lhs, node:$rhs),
+                              (z_addcarry_1 node:$lhs, node:$rhs, CC)>;
+def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs),
+                              (z_subcarry_1 node:$lhs, node:$rhs, CC)>;
 
 // Signed and unsigned comparisons.
 def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>	2018-04-30 17:54:28 +0000
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>	2018-04-30 17:54:28 +0000
commit	815278b2cd8c9afce0f84879773a4c4a31645b64 (patch)
tree	3ac448c1aee1d61b38959daf93c643cceba9357d /lib/Target/SystemZ
parent	488cbd2beb113428aceb1c0e3cde4aef84a1c85a (diff)