summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Jiang <jtony@ca.ibm.com>2017-07-11 16:42:20 +0000
committerTony Jiang <jtony@ca.ibm.com>2017-07-11 16:42:20 +0000
commitf6179755b35936e58a2098f0ddc3ce7113385448 (patch)
tree5fc38c186ec12072247ac05d979a8126ca87fe1b
parent609a5df2257e4da2336f0aa162e03fd8429fc4cd (diff)
[PPC] Fix two bugs in frame lowering.
1. The available program storage region of the red zone to compilers is 288 bytes rather than 244 bytes. 2. The formula for negative number alignment calculation should be y = x & ~(n-1) rather than y = (x + (n-1)) & ~(n-1). Differential Revision: https://reviews.llvm.org/D34337 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307672 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp36
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h7
-rw-r--r--test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll32
-rw-r--r--test/CodeGen/PowerPC/svr4-redzone.ll6
-rw-r--r--test/CodeGen/PowerPC/tailcall1-64.ll6
5 files changed, 66 insertions, 21 deletions
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index c2c115cb6da..b49c3345a17 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -435,22 +435,19 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- // If we are a leaf function, and use up to 224 bytes of stack space,
- // don't have a frame pointer, calls, or dynamic alloca then we do not need
- // to adjust the stack pointer (we fit in the Red Zone).
- // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
- // stackless code if all local vars are reg-allocated.
- bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
unsigned LR = RegInfo->getRARegister();
- if (!DisableRedZone &&
- (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
- !Subtarget.isSVR4ABI() || // allocated locals.
- FrameSize == 0) &&
- FrameSize <= 224 && // Fits in red zone.
- !MFI.hasVarSizedObjects() && // No dynamic alloca.
- !MFI.adjustsStack() && // No calls.
- !MustSaveLR(MF, LR) &&
- !RegInfo->hasBasePointer(MF)) { // No special alignment.
+ bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+ bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
+ !MFI.adjustsStack() && // No calls.
+ !MustSaveLR(MF, LR) && // No need to save LR.
+ !RegInfo->hasBasePointer(MF); // No special alignment.
+
+ // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
+ // code if all local vars are reg-allocated.
+ bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
+
+ // Check whether we can skip adjusting the stack pointer (by using red zone)
+ if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
// No need for frame
if (UpdateMF)
MFI.setStackSize(0);
@@ -1869,8 +1866,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
}
if (HasVRSaveArea) {
- // Insert alignment padding, we need 16-byte alignment.
- LowerBound = (LowerBound - 15) & ~(15);
+ // Insert alignment padding, we need 16-byte alignment. Note: for postive
+ // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
+ // we are using negative number here (the stack grows downward). We should
+ // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
+ // is the alignment size ( n = 16 here) and y is the size after aligning.
+ assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
+ LowerBound &= ~(15);
for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
int FI = VRegs[i].getFrameIdx();
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 5a97f595ad8..90d11f46a38 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -272,6 +272,13 @@ public:
return 16;
}
+
+ // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
+ // red zone and PPC64 SVR4ABI has a 288-byte red zone.
+ unsigned getRedZoneSize() const {
+ return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0);
+ }
+
bool hasHTM() const { return HasHTM; }
bool hasFusion() const { return HasFusion; }
bool hasFloat128() const { return HasFloat128; }
diff --git a/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll b/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll
new file mode 100644
index 00000000000..87b45beeab7
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll
@@ -0,0 +1,32 @@
+; Note the formula for negative number alignment calculation should be y = x & ~(n-1) rather than y = (x + (n-1)) & ~(n-1).
+; after patch https://reviews.llvm.org/D34337, we could save 16 bytes in the best case.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
+
+define signext i32 @bar(i32 signext %ii) {
+entry:
+ %0 = tail call i32 asm sideeffect "add $0, $1, $2\0A", "=r,r,r,~{f14},~{r15},~{v20}"(i32 %ii, i32 10)
+ ret i32 %0
+; Before the fix by patch D34337:
+; stdu 1, -544(1)
+; std 15, 264(1)
+; stfd 14, 400(1)
+; stdu 1, -560(1)
+; std 15, 280(1)
+; stfd 14, 416(1)
+
+; After the fix by patch D34337:
+; CHECK-LE: stdu 1, -528(1)
+; CHECK-LE:std 15, 248(1)
+; CHECK-LE:stfd 14, 384(1)
+; CHECK-BE: stdu 1, -544(1)
+; CHECK-BE:std 15, 264(1)
+; CHECK-BE:stfd 14, 400(1)
+}
+
+define signext i32 @foo() {
+entry:
+ %call = tail call signext i32 @bar(i32 signext 5)
+ ret i32 %call
+}
+
diff --git a/test/CodeGen/PowerPC/svr4-redzone.ll b/test/CodeGen/PowerPC/svr4-redzone.ll
index 7bb6cc180c9..26c4410ded6 100644
--- a/test/CodeGen/PowerPC/svr4-redzone.ll
+++ b/test/CodeGen/PowerPC/svr4-redzone.ll
@@ -29,11 +29,11 @@ entry:
define i8* @bigstack() nounwind {
entry:
- %0 = alloca i8, i32 230
+ %0 = alloca i8, i32 290
ret i8* %0
}
; PPC32-LABEL: bigstack:
-; PPC32: stwu 1, -240(1)
+; PPC32: stwu 1, -304(1)
; PPC64-LABEL: bigstack:
-; PPC64: stdu 1, -288(1)
+; PPC64: stdu 1, -352(1)
diff --git a/test/CodeGen/PowerPC/tailcall1-64.ll b/test/CodeGen/PowerPC/tailcall1-64.ll
index 3dc2672556e..21d6046a30c 100644
--- a/test/CodeGen/PowerPC/tailcall1-64.ll
+++ b/test/CodeGen/PowerPC/tailcall1-64.ll
@@ -1,4 +1,5 @@
; RUN: llc -relocation-model=static -verify-machineinstrs < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8
+; RUN: llc -relocation-model=static -verify-machineinstrs -march=ppc64 < %s | FileCheck %s
define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
entry:
ret i32 %a3
@@ -6,6 +7,9 @@ entry:
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
entry:
- %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
+ %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )
ret i32 %tmp11
+; CHECK-LABEL: tailcaller
+; CHECK-NOT: stdu
+; CHECK: b tailcallee
}