summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/MachineOutliner.h14
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp243
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--test/CodeGen/AArch64/machine-outliner-regsave.mir112
-rw-r--r--test/CodeGen/AArch64/machine-outliner.ll12
-rw-r--r--test/CodeGen/AArch64/machine-outliner.mir6
6 files changed, 298 insertions, 93 deletions
diff --git a/include/llvm/CodeGen/MachineOutliner.h b/include/llvm/CodeGen/MachineOutliner.h
index 4249a99a891..95bfc24b57f 100644
--- a/include/llvm/CodeGen/MachineOutliner.h
+++ b/include/llvm/CodeGen/MachineOutliner.h
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
namespace llvm {
namespace outliner {
@@ -74,6 +75,13 @@ public:
/// cost model information.
LiveRegUnits LRU;
+ /// Contains the accumulated register liveness information for the
+ /// instructions in this \p Candidate.
+ ///
+ /// This is optionally used by the target to determine which registers have
+ /// been used across the sequence.
+ LiveRegUnits UsedInSequence;
+
/// Return the number of instructions in this Candidate.
unsigned getLength() const { return Len; }
@@ -137,6 +145,12 @@ public:
// outlining candidate.
std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(),
[this](MachineInstr &MI) { LRU.stepBackward(MI); });
+
+ // Walk over the sequence itself and figure out which registers were used
+ // in the sequence.
+ UsedInSequence.init(TRI);
+ std::for_each(front(), std::next(back()),
+ [this](MachineInstr &MI) { UsedInSequence.accumulate(MI); });
}
};
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index b571c4207c9..5e59fa570ed 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4851,75 +4851,92 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
- /// Constants defining how certain sequences should be outlined.
- /// This encompasses how an outlined function should be called, and what kind of
- /// frame should be emitted for that outlined function.
- ///
- /// \p MachineOutlinerDefault implies that the function should be called with
- /// a save and restore of LR to the stack.
- ///
- /// That is,
- ///
- /// I1 Save LR OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 Restore LR I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 3 (save + BL + restore)
- /// * Frame construction overhead: 1 (ret)
- /// * Requires stack fixups? Yes
- ///
- /// \p MachineOutlinerTailCall implies that the function is being created from
- /// a sequence of instructions ending in a return.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> B OUTLINED_FUNCTION I1
- /// RET I2
- /// RET
- ///
- /// * Call construction overhead: 1 (B)
- /// * Frame construction overhead: 0 (Return included in sequence)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerNoLRSave implies that the function should be called using
- /// a BL instruction, but doesn't require LR to be saved and restored. This
- /// happens when LR is known to be dead.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// I3 I2
- /// I3
- /// RET
- ///
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 1 (RET)
- /// * Requires stack fixups? No
- ///
- /// \p MachineOutlinerThunk implies that the function is being created from
- /// a sequence of instructions ending in a call. The outlined function is
- /// called with a BL instruction, and the outlined function tail-calls the
- /// original call destination.
- ///
- /// That is,
- ///
- /// I1 OUTLINED_FUNCTION:
- /// I2 --> BL OUTLINED_FUNCTION I1
- /// BL f I2
- /// B f
- /// * Call construction overhead: 1 (BL)
- /// * Frame construction overhead: 0
- /// * Requires stack fixups? No
- ///
+/// Constants defining how certain sequences should be outlined.
+/// This encompasses how an outlined function should be called, and what kind of
+/// frame should be emitted for that outlined function.
+///
+/// \p MachineOutlinerDefault implies that the function should be called with
+/// a save and restore of LR to the stack.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? Yes
+///
+/// \p MachineOutlinerTailCall implies that the function is being created from
+/// a sequence of instructions ending in a return.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> B OUTLINED_FUNCTION I1
+/// RET I2
+/// RET
+///
+/// * Call construction overhead: 1 (B)
+/// * Frame construction overhead: 0 (Return included in sequence)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerNoLRSave implies that the function should be called using
+/// a BL instruction, but doesn't require LR to be saved and restored. This
+/// happens when LR is known to be dead.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 1 (RET)
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerThunk implies that the function is being created from
+/// a sequence of instructions ending in a call. The outlined function is
+/// called with a BL instruction, and the outlined function tail-calls the
+/// original call destination.
+///
+/// That is,
+///
+/// I1 OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// BL f I2
+/// B f
+/// * Call construction overhead: 1 (BL)
+/// * Frame construction overhead: 0
+/// * Requires stack fixups? No
+///
+/// \p MachineOutlinerRegSave implies that the function should be called with a
+/// save and restore of LR to an available register. This allows us to avoid
+/// stack fixups. Note that this outlining variant is compatible with the
+/// NoLRSave case.
+///
+/// That is,
+///
+/// I1 Save LR OUTLINED_FUNCTION:
+/// I2 --> BL OUTLINED_FUNCTION I1
+/// I3 Restore LR I2
+/// I3
+/// RET
+///
+/// * Call construction overhead: 3 (save + BL + restore)
+/// * Frame construction overhead: 1 (ret)
+/// * Requires stack fixups? No
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
MachineOutlinerNoLRSave, /// Emit a call and return.
MachineOutlinerThunk, /// Emit a call and tail-call.
+ MachineOutlinerRegSave /// Same as default, but save to a register.
};
enum MachineOutlinerMBBFlags {
@@ -4927,6 +4944,27 @@ enum MachineOutlinerMBBFlags {
HasCalls = 0x4
};
+unsigned
+AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
+ MachineFunction *MF = C.getMF();
+ const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+
+ // Check if there is an available register across the sequence that we can
+ // use.
+ for (unsigned Reg : AArch64::GPR64RegClass) {
+ if (!ARI->isReservedReg(*MF, Reg) &&
+ Reg != AArch64::LR && // LR is not reserved, but don't use it.
+ Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
+ Reg != AArch64::X17 && // Ditto for X17.
+ C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
+ return Reg;
+ }
+
+ // No suitable register. Return 0.
+ return 0u;
+}
+
outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
@@ -5015,11 +5053,27 @@ AArch64InstrInfo::getOutliningCandidateInfo(
SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
}
- // LR is live, so we need to save it to the stack.
+ // LR is live, so we need to save it. Decide whether it should be saved to
+ // the stack, or if it can be saved to a register.
else {
- FrameID = MachineOutlinerDefault;
- NumBytesToCreateFrame = 4;
- SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
+ [this](outliner::Candidate &C) {
+ return findRegisterToSaveLRTo(C);
+ })) {
+ // Every candidate has an available callee-saved register for the save.
+ // We can save LR to a register.
+ FrameID = MachineOutlinerRegSave;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerRegSave, 12);
+ }
+
+ else {
+ // At least one candidate does not have an available callee-saved
+ // register. We must save LR to the stack.
+ FrameID = MachineOutlinerDefault;
+ NumBytesToCreateFrame = 4;
+ SetCandidateCallInfo(MachineOutlinerDefault, 12);
+ }
}
// Check if the range contains a call. These require a save + restore of the
@@ -5424,7 +5478,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
MBB.insert(MBB.end(), ret);
// Did we have to modify the stack by saving the link register?
- if (OF.FrameConstructionID == MachineOutlinerNoLRSave)
+ if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
// We modified the stack.
@@ -5457,13 +5511,41 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
// We want to return the spot where we inserted the call.
MachineBasicBlock::iterator CallPt;
- // We have a default call. Save the link register.
- MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR)
- .addReg(AArch64::SP)
- .addImm(-16);
- It = MBB.insert(It, STRXpre);
+ // Instructions for saving and restoring LR around the call instruction we're
+ // going to insert.
+ MachineInstr *Save;
+ MachineInstr *Restore;
+ // Can we save to a register?
+ if (C.CallConstructionID == MachineOutlinerRegSave) {
+ // FIXME: This logic should be sunk into a target-specific interface so that
+ // we don't have to recompute the register.
+ unsigned Reg = findRegisterToSaveLRTo(C);
+ assert(Reg != 0 && "No callee-saved register available?");
+
+ // Save and restore LR from that register.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::LR)
+ .addImm(0);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
+ .addReg(AArch64::XZR)
+ .addReg(Reg)
+ .addImm(0);
+ } else {
+ // We have the default case. Save and restore from SP.
+ Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ }
+
+ It = MBB.insert(It, Save);
It++;
// Insert the call.
@@ -5472,14 +5554,7 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
CallPt = It;
It++;
- // Restore the link register.
- MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
- .addReg(AArch64::SP, RegState::Define)
- .addReg(AArch64::LR, RegState::Define)
- .addReg(AArch64::SP)
- .addImm(16);
- It = MBB.insert(It, LDRXpost);
-
+ It = MBB.insert(It, Restore);
return CallPt;
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 98f43d4775f..11882e238b7 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -272,6 +272,10 @@ private:
ArrayRef<MachineOperand> Cond) const;
bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
const MachineRegisterInfo *MRI) const;
+
+ /// Returns an unused general-purpose register which can be used for
+ /// constructing an outlined call if one exists. Returns 0 otherwise.
+ unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
};
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
diff --git a/test/CodeGen/AArch64/machine-outliner-regsave.mir b/test/CodeGen/AArch64/machine-outliner-regsave.mir
new file mode 100644
index 00000000000..6d00bd39cde
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-outliner-regsave.mir
@@ -0,0 +1,112 @@
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=prologepilog \
+# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
+# Check that we save LR to a callee-saved register when possible.
+# foo() should use a callee-saved register. However, bar() should not.
+--- |
+
+ define void @foo() #0 {
+ ret void
+ }
+
+ define void @bar() #0 {
+ ret void
+ }
+
+ attributes #0 = { minsize noinline noredzone "no-frame-pointer-elim"="true" }
+...
+---
+# Make sure that when we outline and a register is available, we
+# use it to save + restore LR instead of SP.
+# CHECK: name: foo
+# CHECK-DAG: bb.0
+# CHECK-DAG: $x[[REG:[0-9]+]] = ORRXrs $xzr, $lr, 0
+# CHECK-NEXT: BL
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
+# CHECK-DAG: bb.1
+# CHECK-DAG: $x[[REG]] = ORRXrs $xzr, $lr, 0
+# CHECK-NEXT: BL
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
+# CHECK-DAG: bb.2
+# CHECK-DAG: $x[[REG]] = ORRXrs $xzr, $lr, 0
+# CHECK-NEXT: BL
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
+name: foo
+tracksRegLiveness: true
+fixedStack:
+body: |
+ bb.0:
+ liveins: $lr, $w9
+ $x25 = ORRXri $xzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 2
+ bb.1:
+ liveins: $lr, $w9
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 2
+ bb.2:
+ liveins: $lr, $w9
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 1
+ $w9 = ORRWri $wzr, 2
+ RET undef $lr
+
+...
+---
+# Convoluted case that shows that we'll still save to the stack when there are
+# no approprate registers available.
+# The live-in lists do not contain x16 or x17 since including them would cause
+# nothing to be outlined.
+# They also deliberately don't contain x18 to show that on Darwin we won't store
+# to that.
+# CHECK-LABEL: name: bar
+# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
+# CHECK-NEXT: BL
+# CHECK-DAG: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
+# CHECK-NEXT: BL
+# CHECK-DAG: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
+# CHECK-NEXT: BL
+# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+name: bar
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w12 = ORRWri $wzr, 2
+ bb.1:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w12 = ORRWri $wzr, 2
+ bb.2:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w10 = ORRWri $wzr, 1
+ $w12 = ORRWri $wzr, 2
+ bb.3:
+ liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
+ RET undef $lr
+
diff --git a/test/CodeGen/AArch64/machine-outliner.ll b/test/CodeGen/AArch64/machine-outliner.ll
index 1b45409b799..9d922c27f88 100644
--- a/test/CodeGen/AArch64/machine-outliner.ll
+++ b/test/CodeGen/AArch64/machine-outliner.ll
@@ -82,17 +82,17 @@ define void @dog() #0 {
; CHECK: .p2align 2
; CHECK-NEXT: [[OUTLINED]]:
; CHECK: orr w8, wzr, #0x1
-; CHECK-NEXT: str w8, [sp, #44]
+; CHECK-NEXT: str w8, [sp, #28]
; CHECK-NEXT: orr w8, wzr, #0x2
-; CHECK-NEXT: str w8, [sp, #40]
+; CHECK-NEXT: str w8, [sp, #24]
; CHECK-NEXT: orr w8, wzr, #0x3
-; CHECK-NEXT: str w8, [sp, #36]
+; CHECK-NEXT: str w8, [sp, #20]
; CHECK-NEXT: orr w8, wzr, #0x4
-; CHECK-NEXT: str w8, [sp, #32]
+; CHECK-NEXT: str w8, [sp, #16]
; CHECK-NEXT: mov w8, #5
-; CHECK-NEXT: str w8, [sp, #28]
+; CHECK-NEXT: str w8, [sp, #12]
; CHECK-NEXT: orr w8, wzr, #0x6
-; CHECK-NEXT: str w8, [sp, #24]
+; CHECK-NEXT: str w8, [sp, #8]
; CHECK-NEXT: ret
attributes #0 = { noredzone "target-cpu"="cyclone" }
diff --git a/test/CodeGen/AArch64/machine-outliner.mir b/test/CodeGen/AArch64/machine-outliner.mir
index 024bee47075..bd1abdccd44 100644
--- a/test/CodeGen/AArch64/machine-outliner.mir
+++ b/test/CodeGen/AArch64/machine-outliner.mir
@@ -28,19 +28,19 @@
# CHECK-LABEL: name: main
# CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]]
-# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG:[0-9]+]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
-# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
-# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
+# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1