summaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC/PPCMIPeephole.cpp
diff options
context:
space:
mode:
authorHiroshi Inoue <inouehrs@jp.ibm.com>2017-10-16 04:12:57 +0000
committerHiroshi Inoue <inouehrs@jp.ibm.com>2017-10-16 04:12:57 +0000
commita7d4828a91f5e9f6a77369ace1e9f37804b8ffad (patch)
tree1ea22f5d7fd1daf2421adb94e28f06de4b63b2ec /lib/Target/PowerPC/PPCMIPeephole.cpp
parent4175d2c7f05866447ea2960c3de78a5af1556c67 (diff)
[PowerPC] Eliminate sign- and zero-extensions if already sign- or zero-extended
This patch enables redundant sign- and zero-extension elimination in PowerPC MI Peephole pass. If the input value of a sign- or zero-extension is known to be already sign- or zero-extended, the operation is redundant and can be eliminated. One common case is sign-extensions for a method parameter or for a method return value; they must be sign- or zero-extended as defined in PPC ELF ABI. For example of the following simple code, two extsw instructions are generated before the invocation of int_func and before the return. With this patch, both extsw are eliminated. void int_func(int); void ii_test(int a) { if (a & 1) return int_func(a); } Such redundant sign- or zero-extensions are quite common in many programs; e.g. I observed about 60,000 occurrences of the elimination while compiling the LLVM+CLANG. Differential Revision: https://reviews.llvm.org/D31319 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315888 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/PowerPC/PPCMIPeephole.cpp')
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp216
1 files changed, 216 insertions, 0 deletions
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 8e7e067a21e..d135287a845 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -29,14 +29,27 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
#include "MCTargetDesc/PPCPredicates.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-mi-peepholes"
+STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
+STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
+static cl::opt<bool>
+ EnableSExtElimination("ppc-eliminate-signext",
+ cl::desc("enable elimination of sign-extensions"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+ EnableZExtElimination("ppc-eliminate-zeroext",
+ cl::desc("enable elimination of zero-extensions"),
+ cl::init(true), cl::Hidden);
+
namespace llvm {
void initializePPCMIPeepholePass(PassRegistry&);
}
@@ -110,6 +123,59 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
return MRI->getVRegDef(Reg);
}
+// This function returns number of known zero bits in output of MI
+// starting from the most significant bit.
+static unsigned
+getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
+ Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
+ return MI->getOperand(3).getImm();
+
+ if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
+ MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
+ return MI->getOperand(3).getImm();
+
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
+ Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
+ MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
+ return 32 + MI->getOperand(3).getImm();
+
+ if (Opcode == PPC::ANDIo) {
+ uint16_t Imm = MI->getOperand(2).getImm();
+ return 48 + countLeadingZeros(Imm);
+ }
+
+ if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
+ Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
+ Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
+ // The result ranges from 0 to 32.
+ return 58;
+
+ if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
+ Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
+ // The result ranges from 0 to 64.
+ return 57;
+
+ if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
+ Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
+ Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
+ Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
+ return 48;
+
+ if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
+ Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
+ Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
+ Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
+ return 56;
+
+ if (TII->isZeroExtended(*MI))
+ return 32;
+
+ return 0;
+}
+
// Perform peephole optimizations.
bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;
@@ -367,6 +433,156 @@ bool PPCMIPeephole::simplifyCode(void) {
}
break;
}
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64: {
+ if (!EnableSExtElimination) break;
+ unsigned NarrowReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
+ // If we've used a zero-extending load that we will sign-extend,
+ // just do a sign-extending load.
+ if (SrcMI->getOpcode() == PPC::LHZ ||
+ SrcMI->getOpcode() == PPC::LHZX) {
+ if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
+ break;
+ auto is64Bit = [] (unsigned Opcode) {
+ return Opcode == PPC::EXTSH8;
+ };
+ auto isXForm = [] (unsigned Opcode) {
+ return Opcode == PPC::LHZX;
+ };
+ auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
+ if (is64Bit)
+ if (isXForm) return PPC::LHAX8;
+ else return PPC::LHA8;
+ else
+ if (isXForm) return PPC::LHAX;
+ else return PPC::LHA;
+ };
+ unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
+ isXForm(SrcMI->getOpcode()));
+ DEBUG(dbgs() << "Zero-extending load\n");
+ DEBUG(SrcMI->dump());
+ DEBUG(dbgs() << "and sign-extension\n");
+ DEBUG(MI.dump());
+ DEBUG(dbgs() << "are merged into sign-extending load\n");
+ SrcMI->setDesc(TII->get(Opc));
+ SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+ ToErase = &MI;
+ Simplified = true;
+ NumEliminatedSExt++;
+ }
+ break;
+ }
+ case PPC::EXTSW:
+ case PPC::EXTSW_32:
+ case PPC::EXTSW_32_64: {
+ if (!EnableSExtElimination) break;
+ unsigned NarrowReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
+ // If we've used a zero-extending load that we will sign-extend,
+ // just do a sign-extending load.
+ if (SrcMI->getOpcode() == PPC::LWZ ||
+ SrcMI->getOpcode() == PPC::LWZX) {
+ if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
+ break;
+ auto is64Bit = [] (unsigned Opcode) {
+ return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
+ };
+ auto isXForm = [] (unsigned Opcode) {
+ return Opcode == PPC::LWZX;
+ };
+ auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
+ if (is64Bit)
+ if (isXForm) return PPC::LWAX;
+ else return PPC::LWA;
+ else
+ if (isXForm) return PPC::LWAX_32;
+ else return PPC::LWA_32;
+ };
+ unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
+ isXForm(SrcMI->getOpcode()));
+ DEBUG(dbgs() << "Zero-extending load\n");
+ DEBUG(SrcMI->dump());
+ DEBUG(dbgs() << "and sign-extension\n");
+ DEBUG(MI.dump());
+ DEBUG(dbgs() << "are merged into sign-extending load\n");
+ SrcMI->setDesc(TII->get(Opc));
+ SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+ ToErase = &MI;
+ Simplified = true;
+ NumEliminatedSExt++;
+ } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
+ TII->isSignExtended(*SrcMI)) {
+ // We can eliminate EXTSW if the input is known to be already
+ // sign-extended.
+ DEBUG(dbgs() << "Removing redundant sign-extension\n");
+ unsigned TmpReg =
+ MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
+ TmpReg);
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
+ MI.getOperand(0).getReg())
+ .addReg(TmpReg)
+ .addReg(NarrowReg)
+ .addImm(PPC::sub_32);
+ ToErase = &MI;
+ Simplified = true;
+ NumEliminatedSExt++;
+ }
+ break;
+ }
+ case PPC::RLDICL: {
+ // We can eliminate RLDICL (e.g. for zero-extension)
+ // if all bits to clear are already zero in the input.
+ // This code assume following code sequence for zero-extension.
+ // %vreg6<def> = COPY %vreg5:sub_32; (optional)
+ // %vreg8<def> = IMPLICIT_DEF;
+ // %vreg7<def,tied1> = INSERT_SUBREG %vreg8<tied0>, %vreg6, sub_32;
+ if (!EnableZExtElimination) break;
+
+ if (MI.getOperand(2).getImm() != 0)
+ break;
+
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+ if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
+ SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
+ break;
+
+ MachineInstr *ImpDefMI, *SubRegMI;
+ ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
+ if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
+
+ SrcMI = SubRegMI;
+ if (SubRegMI->getOpcode() == PPC::COPY) {
+ unsigned CopyReg = SubRegMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(CopyReg))
+ SrcMI = MRI->getVRegDef(CopyReg);
+ }
+
+ unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
+ if (MI.getOperand(3).getImm() <= KnownZeroCount) {
+ DEBUG(dbgs() << "Removing redundant zero-extension\n");
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(SrcReg);
+ ToErase = &MI;
+ Simplified = true;
+ NumEliminatedZExt++;
+ }
+ break;
+ }
// TODO: Any instruction that has an immediate form fed only by a PHI
// whose operands are all load immediate can be folded away. We currently