summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp22
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp16
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h3
-rw-r--r--test/CodeGen/PowerPC/coalesce-ext.ll17
4 files changed, 55 insertions, 3 deletions
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index a762ed7c51d..b16fa79cd66 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -156,6 +156,14 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
if (!DstRC)
return false;
+ // The ext instr may be operating on a sub-register of SrcReg as well.
+ // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
+ // register.
+ // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
+ // SrcReg:SubIdx should be replaced.
+ bool UseSrcSubIdx = TM->getRegisterInfo()->
+ getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;
+
// The source has other uses. See if we can replace the other uses with use of
// the result of the extension.
SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
@@ -184,6 +192,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
}
+ // Only accept uses of SrcReg:SubIdx.
+ if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
+ continue;
+
// It's an error to translate this:
//
// %reg1025 = <sext> %reg1024
@@ -259,10 +271,14 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
}
unsigned NewVR = MRI->createVirtualRegister(RC);
- BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
+ MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
.addReg(DstReg, 0, SubIdx);
-
+ // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
+ if (UseSrcSubIdx) {
+ Copy->getOperand(0).setSubReg(SubIdx);
+ Copy->getOperand(0).setIsUndef();
+ }
UseMO->setReg(NewVR);
++NumReuse;
Changed = true;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 28b3bc1596e..47f09dca77d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -79,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
return new PPCScoreboardHazardRecognizer(II, DAG);
}
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: return false;
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = PPC::sub_32;
+ return true;
+ }
+}
+
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7d49aa129e3..374213ea435 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -92,6 +92,9 @@ public:
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const;
+ bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/test/CodeGen/PowerPC/coalesce-ext.ll b/test/CodeGen/PowerPC/coalesce-ext.ll
new file mode 100644
index 00000000000..08a2cc773fd
--- /dev/null
+++ b/test/CodeGen/PowerPC/coalesce-ext.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=ppc64 < %s | FileCheck %s
+; Check that the peephole optimizer knows about sext and zext instructions.
+; CHECK: test1sext
+define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+ %C = add i64 %A, %B
+ ; CHECK: add [[SUM:r[0-9]+]], r3, r4
+ %D = trunc i64 %C to i32
+ %E = shl i64 %C, 32
+ %F = ashr i64 %E, 32
+ ; CHECK: extsw [[EXT:r[0-9]+]], [[SUM]]
+ store volatile i64 %F, i64 *%P2
+ ; CHECK: std [[EXT]]
+ store volatile i32 %D, i32* %P
+ ; Reuse low bits of extended register, don't extend live range of SUM.
+ ; CHECK: stw [[EXT]]
+ ret i32 %D
+}