summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86InstrInfo.td16
-rwxr-xr-xlib/Target/X86/X86SchedBroadwell.td9
-rw-r--r--lib/Target/X86/X86SchedHaswell.td9
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td9
-rw-r--r--lib/Target/X86/X86SchedSkylakeClient.td9
-rwxr-xr-xlib/Target/X86/X86SchedSkylakeServer.td9
-rw-r--r--lib/Target/X86/X86Schedule.td1
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td1
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td1
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td1
-rw-r--r--lib/Target/X86/X86ScheduleZnver1.td1
-rw-r--r--utils/TableGen/CodeGenSchedule.cpp92
-rw-r--r--utils/TableGen/CodeGenSchedule.h2
13 files changed, 112 insertions, 48 deletions
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 7509b312c10..bc7afd32d49 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1750,7 +1750,7 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
@@ -1783,7 +1783,7 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
[]>, TB, NotMemoryFoldable;
}
-let SchedRW = [WriteALU] in {
+let SchedRW = [WriteBitTest] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
@@ -1818,7 +1818,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // SchedRW
let hasSideEffects = 0 in {
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1842,7 +1842,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
@@ -1861,7 +1861,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1885,7 +1885,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
@@ -1908,7 +1908,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
Requires<[In64BitMode]>;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB, NotMemoryFoldable;
@@ -1932,7 +1932,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
NotMemoryFoldable;
}
-let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in {
+let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td
index 3aeb2da2448..a1945c668d8 100755
--- a/lib/Target/X86/X86SchedBroadwell.td
+++ b/lib/Target/X86/X86SchedBroadwell.td
@@ -137,6 +137,7 @@ def : WriteRes<WriteSETCCStore, [BWPort06,BWPort4,BWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [BWPort06]>;
+def : WriteRes<WriteBitTest,[BWPort06]>; // Bit Test instrs
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
@@ -603,14 +604,6 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>;
-def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> {
let Latency = 1;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 824e9e28a61..9abff9c704e 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -150,6 +150,7 @@ def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [HWPort06]>;
+def : WriteRes<WriteBitTest,[HWPort06]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -895,14 +896,6 @@ def HWWriteResGroup7 : SchedWriteRes<[HWPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>;
-def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> {
let Latency = 1;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index d43c4e3cb0f..060da449a05 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -145,6 +145,7 @@ def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SBPort05]>;
+def : WriteRes<WriteBitTest,[SBPort05]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -570,14 +571,6 @@ def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
-def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
let Latency = 1;
diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td
index 53d8e63152a..f64f3816a99 100644
--- a/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/lib/Target/X86/X86SchedSkylakeClient.td
@@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKLPort06]>;
+def : WriteRes<WriteBitTest,[SKLPort06]>; //
// Bit counts.
defm : SKLWriteResPair<WriteBSF, [SKLPort1], 3>;
@@ -605,14 +606,6 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> {
let Latency = 1;
diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td
index 129fc2e7f46..0e2615e3755 100755
--- a/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/lib/Target/X86/X86SchedSkylakeServer.td
@@ -136,6 +136,7 @@ def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
let NumMicroOps = 3;
}
def : WriteRes<WriteLAHFSAHF, [SKXPort06]>;
+def : WriteRes<WriteBitTest,[SKXPort06]>; //
// Integer shifts and rotates.
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
@@ -618,14 +619,6 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> {
let ResourceCycles = [1];
}
def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
-def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8",
- "BT(16|32|64)rr",
- "BTC(16|32|64)ri8",
- "BTC(16|32|64)rr",
- "BTR(16|32|64)ri8",
- "BTR(16|32|64)rr",
- "BTS(16|32|64)ri8",
- "BTS(16|32|64)rr")>;
def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> {
let Latency = 1;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 44079bf566d..d1ec8c66d20 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -142,6 +142,7 @@ def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
+def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
// Integer shifts and rotates.
defm WriteShift : X86SchedWritePair;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 2880d47ce2d..86cb0aaa507 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -108,6 +108,7 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
+def : WriteRes<WriteBitTest,[AtomPort01]>;
defm : X86WriteResUnsupported<WriteIMulH>;
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index 44687e31984..499e3454c03 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -188,6 +188,7 @@ defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional m
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
+def : WriteRes<WriteBitTest,[JALU01]>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index 9107e9fdba2..d45136c479e 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -120,6 +120,7 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
let ResourceCycles = [2,1];
}
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td
index 5c8ed43b2a5..8c13dc13128 100644
--- a/lib/Target/X86/X86ScheduleZnver1.td
+++ b/lib/Target/X86/X86ScheduleZnver1.td
@@ -198,6 +198,7 @@ defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
+def : WriteRes<WriteBitTest,[ZnALU]>;
// Bit counts.
defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 9331fadf409..ebfdd36577a 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,6 +34,16 @@ using namespace llvm;
#define DEBUG_TYPE "subtarget-emitter"
+#ifdef EXPENSIVE_CHECKS
+// FIXME: TableGen is failed iff EXPENSIVE_CHECKS defined
+static constexpr bool OptCheckSchedClasses = true;
+#else
+// FIXME: the default value should be false
+static cl::opt<bool> OptCheckSchedClasses(
+ "check-sched-class-table", cl::init(true), cl::Hidden,
+ cl::desc("Check sched class table on different types of inconsistencies"));
+#endif
+
#ifndef NDEBUG
static void dumpIdxVec(ArrayRef<unsigned> V) {
for (unsigned Idx : V)
@@ -223,6 +234,7 @@ CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK,
collectOptionalProcessorInfo();
checkCompleteness();
+ checkSchedClasses();
}
void CodeGenSchedModels::collectRetireControlUnits() {
@@ -699,6 +711,86 @@ void CodeGenSchedModels::collectSchedClasses() {
}
}
+void CodeGenSchedModels::checkSchedClasses() {
+ if (!OptCheckSchedClasses)
+ return;
+
+ std::string str;
+ raw_string_ostream OS(str);
+
+ // Check each instruction for each model to see if its overridden too often.
+ // Iff YES it's a candidate for more fine-grained Sched Class.
+ for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
+ StringRef InstName = Inst->TheDef->getName();
+ unsigned SCIdx = getSchedClassIdx(*Inst);
+ if (!SCIdx)
+ continue;
+ CodeGenSchedClass &SC = getSchedClass(SCIdx);
+ if (SC.Writes.empty())
+ continue;
+ const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs;
+ if (RWDefs.empty())
+ continue;
+ // FIXME: what should be threshold here?
+ if (RWDefs.size() > (ProcModels.size() / 2)) {
+ // FIXME: this dump hangs the execution !!!
+ // SC.dump(&Target.getSchedModels());
+ OS << "SchedRW machine model for inst '" << InstName << "' (";
+ for (auto I : SC.Writes)
+ OS << " " << SchedWrites[I].Name;
+ for (auto I : SC.Reads)
+ OS << " " << SchedReads[I].Name;
+ OS << " ) should be updated /improvedbecause it's overriden " << RWDefs.size()
+ << " times out of " << ProcModels.size() << " models:\n\t";
+ for (Record *RWDef : RWDefs)
+ OS << " " << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName;
+ PrintWarning(OS.str());
+ str.clear();
+ }
+
+ // TODO: here we should check latency/uop in SC vs. RWDef. Maybe we
+ // should do it iff RWDefs.size() == 1 only.
+ // Iff latency/uop are the same then warn about unnecessary redefine.
+ if (RWDefs.size()) {
+ for (Record *RWDef : RWDefs) {
+ IdxVec Writes;
+ IdxVec Reads;
+ findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes,
+ Reads);
+
+ if ((Writes.size() == SC.Writes.size()) &&
+ (Reads.size() == SC.Reads.size())) {
+ // TODO: do we need sorting Write & Reads?
+ for (unsigned I = 0, S = SC.Writes.size(); I < S; I++) {
+ auto SCSchedW = SchedWrites[SC.Writes[I]];
+ auto SchedW = SchedWrites[Writes[I]];
+ if (!SCSchedW.TheDef || !SchedW.TheDef)
+ continue;
+ const RecordVal *R = SCSchedW.TheDef->getValue("Latency");
+ // FIXME: We should deal with default Latency here
+ if (!R || !R->getValue())
+ continue;
+ auto SCLat = SCSchedW.TheDef->getValueAsInt("Latency");
+ auto SCuOp = SCSchedW.TheDef->getValueAsInt("NumMicroOps");
+ auto Lat = SchedW.TheDef->getValueAsInt("Latency");
+ auto uOp = SchedW.TheDef->getValueAsInt("NumMicroOps");
+ if ((SCLat == Lat) && (SCuOp == uOp))
+ OS << "Overridden verion of inst '" << InstName
+ << "' has the same latency & uOp values as the original one "
+ "for model '"
+ << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName
+ << "'\n";
+ }
+ if (!str.empty()) {
+ PrintWarning(OS.str());
+ str.clear();
+ }
+ }
+ }
+ }
+ }
+}
+
// Get the SchedClass index for an instruction.
unsigned
CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const {
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index 07c11596ade..ce53d67bd81 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -443,6 +443,8 @@ private:
void collectSchedClasses();
+ void checkSchedClasses();
+
void collectRetireControlUnits();
void collectRegisterFiles();