summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Paulsson <paulsson@linux.vnet.ibm.com>2017-09-29 14:31:39 +0000
committerJonas Paulsson <paulsson@linux.vnet.ibm.com>2017-09-29 14:31:39 +0000
commite2ff20cbface83819e447eef1345ddaa9bfec441 (patch)
treeafb341e3ec2e33ad65aebc6827c37602b1f0d111
parent1be91bb29f0fc97083ee5eeee701f1a5a9388e50 (diff)
[SystemZ] implement shouldCoalesce()
Implement shouldCoalesce() to help regalloc avoid running out of GR128 registers. If a COPY involving a subreg of a GR128 is coalesced, the live range of the GR128 virtual register will be extended. If this happens where there are enough phys-reg clobbers present, regalloc will run out of registers (if there is not a single GR128 allocatable register available). This patch tries to allow coalescing only when it can prove that this will be safe by checking the (local) interval in question. Review: Ulrich Weigand, Quentin Colombet https://reviews.llvm.org/D37899 https://bugs.llvm.org/show_bug.cgi?id=34610 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314516 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Target/TargetRegisterInfo.h4
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp3
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h4
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h5
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp67
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h12
-rw-r--r--test/CodeGen/SystemZ/regalloc-GR128.ll18
9 files changed, 112 insertions, 6 deletions
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index afa6a89a890..b2f6f991ae5 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -40,6 +40,7 @@ class MachineFunction;
class MachineInstr;
class RegScavenger;
class VirtRegMap;
+class LiveIntervals;
class TargetRegisterClass {
public:
@@ -959,7 +960,8 @@ public:
unsigned SubReg,
const TargetRegisterClass *DstRC,
unsigned DstSubReg,
- const TargetRegisterClass *NewRC) const
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const
{ return true; }
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 255d17078a1..152149ac9a7 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1583,7 +1583,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
std::swap(SrcRC, DstRC);
}
if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
- CP.getNewRC())) {
+ CP.getNewRC(), *LIS)) {
DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
return false;
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 7c73f92eed2..a367bd7e129 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1474,7 +1474,8 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
unsigned SubReg,
const TargetRegisterClass *DstRC,
unsigned DstSubReg,
- const TargetRegisterClass *NewRC) const {
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const {
unsigned SrcSize = getRegSizeInBits(*SrcRC);
unsigned DstSize = getRegSizeInBits(*DstRC);
unsigned NewSize = getRegSizeInBits(*NewRC);
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 65655b79c21..bf814b6974a 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -22,6 +22,7 @@
namespace llvm {
+class LiveIntervals;
class MachineRegisterInfo;
class SISubtarget;
class SIMachineFunctionInfo;
@@ -212,7 +213,8 @@ public:
unsigned SubReg,
const TargetRegisterClass *DstRC,
unsigned DstSubReg,
- const TargetRegisterClass *NewRC) const override;
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 17269268112..bf39aebaf44 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -807,7 +807,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
unsigned SubReg,
const TargetRegisterClass *DstRC,
unsigned DstSubReg,
- const TargetRegisterClass *NewRC) const {
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const {
auto MBB = MI->getParent();
auto MF = MBB->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 2e91d9d4be2..a8e947184ea 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -27,6 +27,8 @@
namespace llvm {
+class LiveIntervals;
+
/// Register allocation hints.
namespace ARMRI {
@@ -204,7 +206,8 @@ public:
unsigned SubReg,
const TargetRegisterClass *DstRC,
unsigned DstSubReg,
- const TargetRegisterClass *NewRC) const override;
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const override;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index d14a0fb0b0b..05f93ce5162 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -10,6 +10,7 @@
#include "SystemZRegisterInfo.h"
#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -152,6 +153,72 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
+bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const {
+ assert (MI->isCopy() && "Only expecting COPY instructions");
+
+ // Coalesce anything which is not a COPY involving a subreg to/from GR128.
+ if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) &&
+ (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64)))
+ return true;
+
+ // Allow coalescing of a GR128 subreg COPY only if the live ranges are small
+ // and local to one MBB with not too much interferring registers. Otherwise
+ // regalloc may run out of registers.
+
+ unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0);
+ unsigned GR128Reg = MI->getOperand(WideOpNo).getReg();
+ unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg();
+ LiveInterval &IntGR128 = LIS.getInterval(GR128Reg);
+ LiveInterval &IntGRNar = LIS.getInterval(GRNarReg);
+
+ // Check that the two virtual registers are local to MBB.
+ MachineBasicBlock *MBB = MI->getParent();
+ if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) ||
+ LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB))
+ return false;
+
+ // Find the first and last MIs of the registers.
+ MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
+ if (WideOpNo == 1) {
+ FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex());
+ LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex());
+ } else {
+ FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex());
+ LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex());
+ }
+ assert (FirstMI && LastMI && "No instruction from index?");
+
+ // Check if coalescing seems safe by finding the set of clobbered physreg
+ // pairs in the region.
+ BitVector PhysClobbered(getNumRegs());
+ MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI;
+ MEE++;
+ for (; MII != MEE; ++MII) {
+ for (const MachineOperand &MO : MII->operands())
+ if (MO.isReg() && isPhysicalRegister(MO.getReg())) {
+ for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/);
+ SI.isValid(); ++SI)
+ if (NewRC->contains(*SI)) {
+ PhysClobbered.set(*SI);
+ break;
+ }
+ }
+ }
+
+ // Demand an arbitrary margin of free regs.
+ unsigned const DemandedFreeGR128 = 3;
+ if (PhysClobbered.count() > (NewRC->getNumRegs() - DemandedFreeGR128))
+ return false;
+
+ return true;
+}
+
unsigned
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const SystemZFrameLowering *TFI = getFrameLowering(MF);
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index e41c06c98af..8b690e6da9f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -18,6 +18,8 @@
namespace llvm {
+class LiveIntervals;
+
namespace SystemZ {
// Return the subreg to use for referring to the even and odd registers
// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
@@ -59,6 +61,16 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const override;
+
+ /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true.
+ bool shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const override;
+
unsigned getFrameRegister(const MachineFunction &MF) const override;
};
diff --git a/test/CodeGen/SystemZ/regalloc-GR128.ll b/test/CodeGen/SystemZ/regalloc-GR128.ll
new file mode 100644
index 00000000000..e84e23613d9
--- /dev/null
+++ b/test/CodeGen/SystemZ/regalloc-GR128.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -O3 -o /dev/null
+;
+; Test that regalloc does not run out of registers
+
+; This test will include a GR128 virtual reg.
+define void @test0(i64 %dividend, i64 %divisor) {
+ %rem = urem i64 %dividend, %divisor
+ call void asm sideeffect "", "{r0},{r1},{r2},{r3},{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14}"(i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 %rem)
+ ret void
+}
+
+; This test will include an ADDR128 virtual reg.
+define i64 @test1(i64 %dividend, i64 %divisor) {
+%rem = urem i64 %dividend, %divisor
+call void asm sideeffect "", "{r2},{r3},{r4},{r5},{r6},{r7},{r8},{r9},{r10},{r11},{r12},{r13},{r14}"(i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 %rem)
+%ret = add i64 %rem, 1
+ret i64 %ret
+}