diff options
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.cpp | 33 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleP7.td | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCScheduleP8.td | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 10 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/ppc-crbits-onoff.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/ppc64-fastcc.ll | 10 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/sjlj.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/tls-store2.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vsx-fma-m.ll | 30 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vsx-fma-sp.ll | 13 |
12 files changed, 96 insertions, 34 deletions
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 696a83860e5..bf6e4029640 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden); +static cl::opt<bool> +UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, + cl::desc("Use the old (incorrect) instruction latency calculation")); + // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return new ScoreboardHazardRecognizer(II, DAG); } +unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + if (!ItinData || UseOldLatencyCalc) + return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); + + // The default implementation of getInstrLatency calls getStageLatency, but + // getStageLatency does not do the right thing for us. While we have + // itinerary, most cores are fully pipelined, and so the itineraries only + // express the first part of the pipeline, not every stage. Instead, we need + // to use the listed output operand cycle number (using operand 0 here, which + // is an output). + + unsigned Latency = 1; + unsigned DefClass = MI->getDesc().getSchedClass(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) + continue; + + int Cycle = ItinData->getOperandCycle(DefClass, i); + if (Cycle < 0) + continue; + + Latency = std::max(Latency, (unsigned) Cycle); + } + + return Latency; +} int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index e2d6346aa53..40badae644d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -95,6 +95,10 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = nullptr) const override; + int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td index 635d154d10b..267f5672618 100644 --- a/lib/Target/PowerPC/PPCScheduleP7.td +++ b/lib/Target/PowerPC/PPCScheduleP7.td @@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries< P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2, P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td index 020739baec3..69e6d05c660 100644 --- a/lib/Target/PowerPC/PPCScheduleP8.td +++ b/lib/Target/PowerPC/PPCScheduleP8.td @@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries< P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f352fa647ac..58d3c3d3fa2 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -136,6 +136,16 @@ protected: // source of the copy, it must still be live here. We can't use // interval testing for a physical register, so as long as we're // walking the MIs we may as well test liveness here. + // + // FIXME: There is a case that occurs in practice, like this: + // %vreg9<def> = COPY %F1; VSSRC:%vreg9 + // ... + // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9 + // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9 + // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC: + // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC: + // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC: + // which prevents an otherwise-profitable transformation. bool OtherUsers = false, KillsAddendSrc = false; for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); J != JE; --J) { diff --git a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll index 88648df5fa3..c69f30017d8 100644 --- a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll +++ b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll @@ -15,8 +15,8 @@ entry: ; CHECK-DAG: cmplwi {{[0-9]+}}, 3, 0 ; CHECK-DAG: li [[REG2:[0-9]+]], 1 ; CHECK-DAG: cntlzw [[REG3:[0-9]+]], -; CHECK: isel 3, 0, [[REG2]] -; CHECK: and 3, 3, [[REG3]] +; CHECK: isel [[REG4:[0-9]+]], 0, [[REG2]] +; CHECK: and 3, [[REG4]], [[REG3]] ; CHECK: blr } diff --git a/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll index f90519836c2..92d6d556738 100644 --- a/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll +++ b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll @@ -35,7 +35,7 @@ define fastcc double @f2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, doub } define void @cg2(i64 %v) #0 { - tail call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0) + call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0) ret void ; CHECK-LABEL: @cg2 @@ -44,11 +44,11 @@ define void @cg2(i64 %v) #0 { } define void @cf2(double %v) #0 { - tail call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0) + call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0) ret void ; CHECK-LABEL: @cf2 -; CHECK: mr 2, 1 +; CHECK: fmr 2, 1 ; CHECK: blr } diff --git a/test/CodeGen/PowerPC/ppc64-fastcc.ll b/test/CodeGen/PowerPC/ppc64-fastcc.ll index bb1365a3b67..69e15d104da 100644 --- a/test/CodeGen/PowerPC/ppc64-fastcc.ll +++ b/test/CodeGen/PowerPC/ppc64-fastcc.ll @@ -521,8 +521,9 @@ define void @cv13(<4 x i32> %v) #0 { ret void ; CHECK-LABEL: @cv13 -; CHECK: li [[REG1:[0-9]+]], 96 -; CHECK: stvx 2, 1, [[REG1]] +; CHECK-DAG: li [[REG1:[0-9]+]], 96 +; CHECK-DAG: vor [[REG2:[0-9]+]], 2, 2 +; CHECK: stvx [[REG2]], 1, [[REG1]] ; CHECK: blr } @@ -531,8 +532,9 @@ define void @cv14(<4 x i32> %v) #0 { ret void ; CHECK-LABEL: @cv14 -; CHECK: li [[REG1:[0-9]+]], 128 -; CHECK: stvx 2, 1, [[REG1]] +; CHECK-DAG: li [[REG1:[0-9]+]], 128 +; CHECK-DAG: vor [[REG2:[0-9]+]], 2, 2 +; CHECK: stvx [[REG2]], 1, [[REG1]] ; CHECK: blr } diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll index 62403e71196..dcbdd69d5d5 100644 --- a/test/CodeGen/PowerPC/sjlj.ll +++ b/test/CodeGen/PowerPC/sjlj.ll @@ -18,10 +18,10 @@ entry: ; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l ; CHECK: ld 31, 0([[REG]]) ; CHECK: ld [[REG2:[0-9]+]], 8([[REG]]) -; CHECK: ld 1, 16([[REG]]) -; CHECK: mtctr [[REG2]] -; CHECK: ld 30, 32([[REG]]) -; CHECK: ld 2, 24([[REG]]) +; CHECK-DAG: ld 1, 16([[REG]]) +; CHECK-DAG: mtctr [[REG2]] +; CHECK-DAG: ld 30, 32([[REG]]) +; CHECK-DAG: ld 2, 24([[REG]]) ; CHECK: bctr return: ; No predecessors! diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll index e9aa17e8c0f..649508637f4 100644 --- a/test/CodeGen/PowerPC/tls-store2.ll +++ b/test/CodeGen/PowerPC/tls-store2.ll @@ -29,6 +29,8 @@ entry: ; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_call@tlsgd) ; CHECK-NEXT: nop -; CHECK: std {{[0-9]+}}, 0(3) +; FIXME: We don't really need the copy here either, we could move the store up. +; CHECK: mr [[REG1:[0-9]+]], 3 +; CHECK: std {{[0-9]+}}, 0([[REG1]]) declare void @__once_call_impl() diff --git a/test/CodeGen/PowerPC/vsx-fma-m.ll b/test/CodeGen/PowerPC/vsx-fma-m.ll index d85927396e3..4f556b6b79c 100644 --- a/test/CodeGen/PowerPC/vsx-fma-m.ll +++ b/test/CodeGen/PowerPC/vsx-fma-m.ll @@ -49,12 +49,13 @@ entry: ; CHECK-LABEL: @test2 ; CHECK-DAG: li [[C1:[0-9]+]], 8 ; CHECK-DAG: li [[C2:[0-9]+]], 16 -; CHECK-DAG: xsmaddmdp 3, 2, 1 -; CHECK-DAG: xsmaddmdp 4, 2, 1 -; CHECK-DAG: xsmaddadp 1, 2, 5 -; CHECK-DAG: stxsdx 3, 0, 8 -; CHECK-DAG: stxsdx 4, 8, [[C1]] -; CHECK-DAG: stxsdx 1, 8, [[C2]] +; FIXME: We no longer get this because of copy ordering at the MI level. +; CHECX-DAG: xsmaddmdp 3, 2, 1 +; CHECX-DAG: xsmaddmdp 4, 2, 1 +; CHECX-DAG: xsmaddadp 1, 2, 5 +; CHECX-DAG: stxsdx 3, 0, 8 +; CHECX-DAG: stxsdx 4, 8, [[C1]] +; CHECX-DAG: stxsdx 1, 8, [[C2]] ; CHECK: blr ; CHECK-FISL-LABEL: @test2 @@ -213,14 +214,15 @@ entry: ret void ; CHECK-LABEL: @testv2 -; CHECK-DAG: xvmaddmdp 36, 35, 34 -; CHECK-DAG: xvmaddmdp 37, 35, 34 -; CHECK-DAG: li [[C1:[0-9]+]], 16 -; CHECK-DAG: li [[C2:[0-9]+]], 32 -; CHECK-DAG: xvmaddadp 34, 35, 38 -; CHECK-DAG: stxvd2x 36, 0, 3 -; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]] -; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]] +; FIXME: We currently don't get this because of copy ordering on the MI level. +; CHECX-DAG: xvmaddmdp 36, 35, 34 +; CHECX-DAG: xvmaddmdp 37, 35, 34 +; CHECX-DAG: li [[C1:[0-9]+]], 16 +; CHECX-DAG: li [[C2:[0-9]+]], 32 +; CHECX-DAG: xvmaddadp 34, 35, 38 +; CHECX-DAG: stxvd2x 36, 0, 3 +; CHECX-DAG: stxvd2x 37, 3, [[C1:[0-9]+]] +; CHECX-DAG: stxvd2x 34, 3, [[C2:[0-9]+]] ; CHECK: blr ; CHECK-FISL-LABEL: @testv2 diff --git a/test/CodeGen/PowerPC/vsx-fma-sp.ll b/test/CodeGen/PowerPC/vsx-fma-sp.ll index 1c3e457f92c..b4dd2e1627c 100644 --- a/test/CodeGen/PowerPC/vsx-fma-sp.ll +++ b/test/CodeGen/PowerPC/vsx-fma-sp.ll @@ -42,12 +42,13 @@ entry: ; CHECK-LABEL: @test2sp ; CHECK-DAG: li [[C1:[0-9]+]], 4 ; CHECK-DAG: li [[C2:[0-9]+]], 8 -; CHECK-DAG: xsmaddmsp 3, 2, 1 -; CHECK-DAG: xsmaddmsp 4, 2, 1 -; CHECK-DAG: xsmaddasp 1, 2, 5 -; CHECK-DAG: stxsspx 3, 0, 8 -; CHECK-DAG: stxsspx 4, 8, [[C1]] -; CHECK-DAG: stxsspx 1, 8, [[C2]] +; FIXME: We now miss this because of copy ordering at the MI level. +; CHECX-DAG: xsmaddmsp 3, 2, 1 +; CHECX-DAG: xsmaddmsp 4, 2, 1 +; CHECX-DAG: xsmaddasp 1, 2, 5 +; CHECX-DAG: stxsspx 3, 0, 8 +; CHECX-DAG: stxsspx 4, 8, [[C1]] +; CHECX-DAG: stxsspx 1, 8, [[C2]] ; CHECK: blr ; CHECK-FISL-LABEL: @test2sp |