summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Support/AArch64TargetParser.def2
-rw-r--r--lib/Target/AArch64/AArch64.td17
-rw-r--r--lib/Target/AArch64/AArch64SchedXGene.td2372
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp3
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h3
5 files changed, 2396 insertions, 1 deletions
diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def
index 30c7924ea5f..3b23defd9b6 100644
--- a/include/llvm/Support/AArch64TargetParser.def
+++ b/include/llvm/Support/AArch64TargetParser.def
@@ -98,6 +98,8 @@ AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+AARCH64_CPU_NAME("xgene", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
// Invalid CPU
AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
#undef AARCH64_CPU_NAME
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 75fb937de9b..6d8bf02e075 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -196,6 +196,7 @@ include "AArch64SchedKryo.td"
include "AArch64SchedM1.td"
include "AArch64SchedThunderX.td"
include "AArch64SchedThunderX2T99.td"
+include "AArch64SchedXGene.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@@ -429,6 +430,21 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
FeaturePredictableSelectIsExpensive,
FeatureNEON]>;
+def ProcXGene : SubtargetFeature<"xgene", "ARMProcFamily", "XGene",
+ "X-Gene", [
+ FeatureBalanceFPOps,
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureFullFP16,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureUseAA
+ ]>;
+
+
def : ProcessorModel<"generic", NoSchedModel, [
FeatureFPARMv8,
FeatureFuseAES,
@@ -460,6 +476,7 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+def : ProcessorModel<"xgene", XGeneModel, [ProcXGene]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/lib/Target/AArch64/AArch64SchedXGene.td b/lib/Target/AArch64/AArch64SchedXGene.td
new file mode 100644
index 00000000000..772451382f7
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedXGene.td
@@ -0,0 +1,2372 @@
+//==- AArch64SchedXGene.td - X-Gene Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM XGene processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// X-Gene machine model for scheduling and other instruction cost heuristics.
+def XGeneModel : SchedMachineModel {
+ let MicroOpBufferSize = 64;// Value of 64 confirmed by APM
+ let IssueWidth = 4; // 4 micro-ops are dispatched per cycle.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 64;// Determined by experiments
+
+ // Enable partial & runtime unrolling. The magic number is chosen based on
+ // experiments and benchmarking data.
+ // Tried with 8, 12, 16, 24; 12 seems to be the best for CoreMark
+ // coremark: any value but twelve gives at least -2% (DO NOT CHANGE, I guess)
+ let LoopMicroOpBufferSize = 4; // TODO: try with high values such as 50
+ let CompleteModel = 1;
+ list<Predicate> UnsupportedFeatures = [HasSVE];
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// I think this should be locked at 16; good results both in coremark and spec
+// A buffer size of 16 seems suitable according to spec/gobmk
+def XGeneUnitB : ProcResource<1> { let BufferSize = 16; } // Branch
+def XGeneUnitLd : ProcResource<1> { let BufferSize = 16; } // Load
+def XGeneUnitSt : ProcResource<1> { let BufferSize = 16; } // Store
+def XGeneUnitIXn : ProcResource<2> { let BufferSize = 20; } // Int ALU
+def XGeneUnitFSU : ProcResource<1> { let BufferSize = 16; } // Float ALU
+def XGeneUnitFDiv : ProcResource<1> { let BufferSize = 16; } // Float Division
+
+// On this machine there are two arithmetic units, but only one of them can run
+// all instructions; the other unit can run a subset of the instructions; we are
+// trying to achieve this by defining a third dummy unit to be used as a "lock"
+// the lock limits the scheduling of the restricted instructions
+// TODO: try lower BufferSize (10) for IXB to create back-pressure
+def XGeneLockIXB : ProcResource<1> { let BufferSize = 10; } // Int ALU B lock
+
+// On this machine, int division and multiplication ops can be issued only
+// once every two cycles (for each separately), we are using these locks
+// to model this particularity
+def XGeneLockDiv : ProcResource<1> { let BufferSize = 16; } // Int Division
+def XGeneLockMul : ProcResource<1> { let BufferSize = 16; } // Int Multipl
+
+// On this machine, the sqrt and div instructions cannot be issued at the same
+// time (for both together), therefore we are using this lock for these two
+def XGeneLockFInst : ProcResource<1> { let BufferSize = 16; } // Float lock
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = XGeneModel in {
+
+// ALU instructions which can run on both ALU units
+def : WriteRes<WriteI, []>;
+def : WriteRes<WriteImm, []>;
+def : WriteRes<WriteIEReg, []>;
+// ALU instructions which are restricted to IXB
+def : WriteRes<WriteISReg, []>;
+def : WriteRes<WriteIS, []>;
+def : WriteRes<WriteExtr, []>;
+
+// MAC instructions can run only on IXB
+def : WriteRes<WriteIM32, []>;
+def : WriteRes<WriteIM64, []>;
+
+// DIV instructions can run only on IXB
+def : WriteRes<WriteID32, []>;
+def : WriteRes<WriteID64, []>;
+
+// Load
+def : WriteRes<WriteLD, []>;
+def : WriteRes<WriteLDIdx, []>;
+def : WriteRes<WriteLDHi, []>;
+
+// Pre/post indexing gonna be accounted for each individual instructions
+def : WriteRes<WriteAdr, []>;
+
+// Store
+def : WriteRes<WriteST, []>;
+def : WriteRes<WriteSTP, []>;
+def : WriteRes<WriteSTIdx, []>;
+def : WriteRes<WriteSTX, []>;
+
+// WriteAtomic - not supported
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch - always no latency
+def : WriteRes<WriteBr, [XGeneUnitB]> { let Latency = 0; }
+def : WriteRes<WriteBrReg, [XGeneUnitB]> { let Latency = 0; }
+def : WriteRes<WriteSys, [XGeneUnitB]> { let Latency = 0; }
+def : WriteRes<WriteBarrier, [XGeneUnitB]> { let Latency = 0; }
+def : WriteRes<WriteHint, [XGeneUnitB]> { let Latency = 0; }
+
+// FP ALU
+def : WriteRes<WriteF, []>;
+def : WriteRes<WriteFCmp, []>;
+def : WriteRes<WriteFCvt, []>;
+def : WriteRes<WriteFCopy, []>;
+def : WriteRes<WriteFImm, []>;
+def : WriteRes<WriteV, []>;
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, []>;
+def : WriteRes<WriteFDiv, []>;
+
+//---
+// AdvSIMD Data Processing (Scalar FP)
+//---
+def XGeneWriteF1Asm : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Adre : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Asre : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Falu : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fcmp : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 10;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fcvt : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fdivs : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> {
+ let Latency = 22;
+ let ResourceCycles = [8, 22];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> {
+ let Latency = 28;
+ let ResourceCycles = [11, 28];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fhcvt : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fmov : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fsel : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fsqrs : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> {
+ let Latency = 22;
+ let ResourceCycles = [8, 22];
+ let NumMicroOps = 1; }
+def XGeneWriteF1Fsqrd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> {
+ let Latency = 38;
+ let ResourceCycles = [17, 38];
+ let NumMicroOps = 1; }
+// instructions with store ops are extra-special because the chip will be using
+// data bypass; latencies are measured since the registers become available and
+// are as following:
+// for int, 1 for the address register, -1 for the data register
+// for float, 4 for the address register, 1 for the data register
+// for complex, 4 for the address register, 2 for the data register
+// we assumed latencies from data register availability
+def XGeneWriteF1St1Lf : SchedWriteRes<[XGeneUnitLd, XGeneUnitSt]> {
+ let Latency = 9;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteF1Sf1Ld : SchedWriteRes<[XGeneUnitLd, XGeneUnitSt]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteF1Fcvt1Sf1Ld : SchedWriteRes<[XGeneUnitFSU, XGeneUnitLd, XGeneUnitSt]> {
+ let Latency = 11;
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteF1St1Lf1Falu : SchedWriteRes<[XGeneUnitFSU, XGeneUnitLd, XGeneUnitSt]> {
+ let Latency = 14;
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = 3; }
+
+//---
+// Load instructions
+// NOTE: the way load latencies are calculated here is as follows:
+// biggest load first: 5 (int - Ld), 10 (float - Lf) or 11 (complex - Lc)
+// arithmethics is parallelized with the loads, so it does not affect latency
+// 1 for each other load - since they are pipelined, the only thing which
+// further contributes to the latency is the issue time
+//---
+
+// Integer loads
+def XGeneWriteLD1Ld : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteLD1LdLd : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1LdAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 5;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1LdLdAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 6;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1Ld1Sbfm : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1Ld1SbfmAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 2];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1Ld1LdSbfm1Sbfm : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 7;
+ let ResourceCycles = [2, 2];
+ let NumMicroOps = 4; }
+def XGeneWriteLD1Ld1LdSbfm1SbfmAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 7;
+ let ResourceCycles = [2, 3];
+ let NumMicroOps = 5; }
+
+// Float/SIMD loads (1LfLf and 1LfLfAlu already covered by vector loads)
+def XGeneWriteLD1LfAlu1Lf : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 11;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1LfLfAlu1LfLf : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 13;
+ let ResourceCycles = [4, 1];
+ let NumMicroOps = 5; }
+def XGeneWriteLD1LfLfAlu1LfLfAlu: SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 13;
+ let ResourceCycles = [4, 2];
+ let NumMicroOps = 6; }
+
+// Vector loads
+def XGeneWriteLD1Lc : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 11;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteLD1LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 11;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1LcLc : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 12;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1LcLcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 12;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1X3Lc : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 13;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1X3LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 13;
+ let ResourceCycles = [3, 1];
+ let NumMicroOps = 4; }
+def XGeneWriteLD1X4Lc : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 14;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteLD1X4LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 14;
+ let ResourceCycles = [4, 1];
+ let NumMicroOps = 5; }
+def XGeneWriteLD1X6Lc : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 16;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6; }
+def XGeneWriteLD1X6LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 16;
+ let ResourceCycles = [6, 1];
+ let NumMicroOps = 7; }
+def XGeneWriteLD1X8Lc : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 18;
+ let ResourceCycles = [8];
+ let NumMicroOps = 8; }
+def XGeneWriteLD1X8LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 18;
+ let ResourceCycles = [8, 1];
+ let NumMicroOps = 9; }
+def XGeneWriteLD1Lf : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 10;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteLD1LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 10;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1LfLf : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 11;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1LfLfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 11;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1X3Lf : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 12;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1X3LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 12;
+ let ResourceCycles = [3, 1];
+ let NumMicroOps = 4; }
+def XGeneWriteLD1X4Lf : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 13;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteLD1X4LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 13;
+ let ResourceCycles = [4, 1];
+ let NumMicroOps = 5; }
+def XGeneWriteLD1X6Lf : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 15;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6; }
+def XGeneWriteLD1X6LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 15;
+ let ResourceCycles = [6, 1];
+ let NumMicroOps = 7; }
+def XGeneWriteLD1X8Lf : SchedWriteRes<[XGeneUnitLd]> {
+ let Latency = 17;
+ let ResourceCycles = [8];
+ let NumMicroOps = 8; }
+def XGeneWriteLD1X8LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 17;
+ let ResourceCycles = [8, 1];
+ let NumMicroOps = 9; }
+def XGeneWriteLD1Lf1Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> {
+ let Latency = 13;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteLD1LfAlu1Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> {
+ let Latency = 13;
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteLD1LfLf1AsiAsi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> {
+ let Latency = 17;
+ let ResourceCycles = [2, 2];
+ let NumMicroOps = 4; }
+def XGeneWriteLD1LfLfAlu1AsiAsi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> {
+ let Latency = 17;
+ let ResourceCycles = [2, 1, 2];
+ let NumMicroOps = 5; }
+def XGeneWriteLD1X3Lf1X3Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> {
+ let Latency = 21;
+ let ResourceCycles = [3, 3];
+ let NumMicroOps = 6; }
+def XGeneWriteLD1X3LfAlu1X3Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> {
+ let Latency = 21;
+ let ResourceCycles = [3, 1, 3];
+ let NumMicroOps = 7; }
+def XGeneWriteLD1X4Lf1X4Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> {
+ let Latency = 25;
+ let ResourceCycles = [4, 4];
+ let NumMicroOps = 8; }
+def XGeneWriteLD1X4LfAlu1X4Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> {
+ let Latency = 25;
+ let ResourceCycles = [4, 1, 4];
+ let NumMicroOps = 9; }
+
+//---
+// Store instructions
+// NOTE: Stores generally have a latency of zero - this value was considered for
+// all the stores below; on the other hand, if there is a dependent load
+// following the store, the latencies have different values;
+// the current model will not cover these special cases
+//---
+
+// Integer stores
+def XGeneWriteST1St : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteST1StAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteST1StSt : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteST1StStAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+
+// Float/SIMD stores (1LfLf and 1LfLfAlu already covered by vector stores)
+def XGeneWriteST1SfAlu1Sf : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteST1SfSfAlu1SfSf : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [4, 1];
+ let NumMicroOps = 5; }
+def XGeneWriteST1SfSfAlu1SfSfAlu: SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 2;
+ let ResourceCycles = [4, 2];
+ let NumMicroOps = 6; }
+
+// Vector stores
+def XGeneWriteST1Sc : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteST1ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteST1ScSc : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteST1ScScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteST1X3Sc : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3; }
+def XGeneWriteST1X3ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [3, 1];
+ let NumMicroOps = 4; }
+def XGeneWriteST1X4Sc : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteST1X4ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [4, 1];
+ let NumMicroOps = 5; }
+def XGeneWriteST1X6Sc : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6; }
+def XGeneWriteST1X6ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [6, 1];
+ let NumMicroOps = 7; }
+def XGeneWriteST1X8Sc : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [8];
+ let NumMicroOps = 8; }
+def XGeneWriteST1X8ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [8, 1];
+ let NumMicroOps = 9; }
+def XGeneWriteST1Sf : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteST1SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteST1SfSf : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteST1SfSfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteST1X3Sf : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3; }
+def XGeneWriteST1X3SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [3, 1];
+ let NumMicroOps = 4; }
+def XGeneWriteST1X4Sf : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteST1X4SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [4, 1];
+ let NumMicroOps = 5; }
+def XGeneWriteST1X6Sf : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6; }
+def XGeneWriteST1X6SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [6, 1];
+ let NumMicroOps = 7; }
+def XGeneWriteST1X8Sf : SchedWriteRes<[XGeneUnitSt]> {
+ let Latency = 0;
+ let ResourceCycles = [8];
+ let NumMicroOps = 8; }
+def XGeneWriteST1X8SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [8, 1];
+ let NumMicroOps = 9; }
+
+//---
+// Integer Data Processing
+//---
+def XGeneWriteI1Sbfm1Alu : SchedWriteRes<[XGeneUnitIXn]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteI1Alb1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> {
+ let Latency = 3;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteI1Sbfm : SchedWriteRes<[XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteI1Car: SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 1; }
+def XGeneWriteI1Set: SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 1; }
+def XGeneWriteI1Sbfm1Set : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> {
+ let Latency = 2;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteI1Alu : SchedWriteRes<[XGeneUnitIXn]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteI1Mlw1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> {
+ let Latency = 5;
+ let ResourceCycles = [2, 1, 2];
+ let NumMicroOps = 2; }
+def XGeneWriteI1Mlx1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> {
+ let Latency = 6;
+ let ResourceCycles = [2, 1, 2];
+ let NumMicroOps = 2; }
+def XGeneWriteI1Mlw : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> {
+ let Latency = 4;
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = 1; }
+def XGeneWriteI1Mlx : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> {
+ let Latency = 5;
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = 1; }
+def XGeneWriteI1Div : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockDiv]> {
+ let Latency = 26;
+ let ResourceCycles = [1, 1, 26];
+ let NumMicroOps = 1; }
+def XGeneWriteI1Alb : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> {
+ let Latency = 2;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 1; }
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+//---
+def XGeneWriteVI1Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVI1Ass : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVI1Asl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVI1Asm : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVI1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1AssAss : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1AslAsl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1AsmAsm : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 10;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1ApolApol : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1AsaAsa1Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 9;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3; }
+def XGeneWriteVI1AsaAsa1Ass : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 9;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3; }
+def XGeneWriteVI1AsaAsa1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 12;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteVI1AssAss1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 12;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteVI1AsaAsa2Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 12;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteVI1Adre : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVI1Asre : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVI1Asl1Asl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1Ass1Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1Ass1Asi : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1Ass1Ass : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1Fmov : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+// instructions with store ops are extra-special because the chip will be using
+// data bypass; latencies are measured since the registers become available and
+// are as following:
+// for int, 1 for the address register, -1 for the data register
+// for float, 4 for the address register, 1 for the data register
+// for complex, 4 for the address register, 2 for the data register
+// we assumed latencies from data register availability
+def XGeneWriteVI1St1Lf : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd]> {
+ let Latency = 9;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1St1Lf1Asi : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitFSU]> {
+ let Latency = 12; // assumed Asi latency 3
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteVI1St1Lf1Falu : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitFSU]> {
+ let Latency = 14; // assumed Falu latency 5
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteVI1Sf1Ld : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd]> {
+ let Latency = 6;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2; }
+def XGeneWriteVI1Sf1Ld1Sbfm : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 7; // assumed Sbfm latency 1
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteVI1Sf1Ld1Ubfm : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitIXn]> {
+ let Latency = 7; // assumed Ubfm latency 1
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = 3; }
+def XGeneWriteVI2Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI2AsaAsa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 12;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteVI3Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 9;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3; }
+def XGeneWriteVI4Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 12;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteVI2Asl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVI2AslAsl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 8;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteVI4Asl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 8;
+ let ResourceCycles = [4];
+ let NumMicroOps = 4; }
+def XGeneWriteVI4AslAsl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 8; }
+def XGeneWriteVI6Asl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 12;
+ let ResourceCycles = [6];
+ let NumMicroOps = 6; }
+def XGeneWriteVI6AslAsl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 24;
+ let ResourceCycles = [12];
+ let NumMicroOps = 12; }
+def XGeneWriteVI8Asl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 8; }
+def XGeneWriteVI8AslAsl : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 32;
+ let ResourceCycles = [16];
+ let NumMicroOps = 16; }
+
+//---
+// AdvSIMD Data Processing (Vector FP)
+//---
+def XGeneWriteVF1Asm : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1AsmAsm : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 10;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVF1Falu : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5; // assumed Falu latency of 5
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1FaluFalu : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 10;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVF1Fcvt : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5; // assumed Fcvt latency of 5
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1FcvtFcvt : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 10;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVF1Fdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> {
+ let Latency = 28;
+ let ResourceCycles = [11, 28];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1FdivdFdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> {
+ let Latency = 56;
+ let ResourceCycles = [22, 56];
+ let NumMicroOps = 2; }
+def XGeneWriteVF1Fhcvt : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1Fmov : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1FmovFmov : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVF1Fsel : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1FselFsel : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+def XGeneWriteVF1Fsqrd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> {
+ let Latency = 38;
+ let ResourceCycles = [17, 38];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1Adre : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF1Asre : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1; }
+def XGeneWriteVF2Asa : SchedWriteRes<[XGeneUnitFSU]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2; }
+
+//---
+// Read Advances
+// No forwarding for these reads
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// Arithmetic instructions which set the state flag introduce one more cycle of
+// latency when the flag is required by a conditional
+def XGeneWriteISFlags : SchedWriteRes<[]>;
+def XGeneReadISFlags : SchedReadAdvance<-1, [XGeneWriteISFlags]>;
+def XGeneReadISFlagsVar : SchedReadVariant<[
+ SchedVar<NoSchedPred, [XGeneReadISFlags]>]>;
+def : SchedAlias<ReadISReg, XGeneReadISFlagsVar>;
+
+// Store instructions introduce extra latency cycles when the registers are
+// used in a dependent load as such:
+// Store type Address register Data register
+// Integer 1 -1
+// Float 4 1
+// Complex 4 2
+def XGeneWriteSTI : SchedWriteRes<[]>;
+def XGeneWriteSTF : SchedWriteRes<[]>;
+
+// Scalar loads
+def XGeneReadLDSTI : SchedReadAdvance<-1, [XGeneWriteSTI]>;
+def XGeneReadLDSTF : SchedReadAdvance<-4, [XGeneWriteSTF]>;
+def XGeneReadLDVar : SchedReadVariant<[
+ SchedVar<NoSchedPred, [XGeneReadLDSTI]>,
+ SchedVar<NoSchedPred, [XGeneReadLDSTF]>]>;
+def : SchedAlias<ReadAdrBase, XGeneReadLDVar>;
+
+// Vector loads are affected by the same latencies as regular loads when it
+// comes to preceding stores using the same registers
+def XGeneReadVLDSTI : SchedReadAdvance<-1, [XGeneWriteSTI]>;
+def XGeneReadVLDSTF : SchedReadAdvance<-4, [XGeneWriteSTF]>;
+def XGeneReadVLDVar : SchedReadVariant<[
+ SchedVar<NoSchedPred, [XGeneReadVLDSTI]>,
+ SchedVar<NoSchedPred, [XGeneReadVLDSTF]>]>;
+def : SchedAlias<ReadVLD, XGeneReadVLDVar>;
+
+//---
+// Grouping instructions with similar requirements in groups with specific names
+// Naming scheme
+// XGeneWrite[GROUP][opList]
+// GROUP can be an instruction group, eg. LD, ST, ALU etc
+// opList is a list of ops in the format [k1][Op1][k2][Op2]...[kN][OpN], for
+// example: load op + load op + arithmetic op -> 1Ld1Ld1Alu
+// if the ops are independent, they will be grouped together under the same
+// op identifier: load op + load op & arithmetic op -> 1Ld1LdAlu
+// load op & load op + load op -> 1LdLd1Ld // just an example
+// if the ops are identical, they can be grouped as such:
+// * within a group: prepend with x<n>, where n is how many times the sequence
+// is repeated: 1LdLdLdLd -> 1X4Ld
+// * multiple groups can be put together as such: 1LdLd1LdLd -> 2LdLd
+// Group names used for this machine model (with usual latencies)
+// Nop / Nop (latency 0)
+// Br BU Branch (latency 0)
+// Alu IXn Arithmetic/logical op (latency 1)
+// Sbfm IXn Sbfm (latency 1 or 2)
+// Ubfm IXn Ubfm (latency 1 or 2)
+// Alb IXB Arithmetic/logical op on IXB (latency 2)
+// bfm, extr, shift/rotate, SIMD are mapped to Alb
+// Car IXB Carry (latency 1)
+// Set IXB Flag setting (latency 1)
+// Div IXB Integer division (latency 7, 10, 14, 18, 26 etc.)
+// Mlw IXB Integer 32bit multiplication (latency 4)
+// Mlx IXB Integer 64bit multiplication (latency 5)
+// Asa FSU ASIMD arithmetic (latency 3)
+// Asi FSU ASIMD insert (latency 3, also depends on previous destination)
+// Asl FSU ASIMD logical (latency 2 or 3 in MA or FP stores)
+// Ass FSU ASIMD shift (latency 3)
+// Asm FSU ASIMD multiply (latency 5)
+// Adre FSU ASIMD sre/dre (latency 5)
+// Apol FSU ASIMD polymul (latency 3)
+// Asre FSU ASIMD sre/dre (latency 3)
+// Falu FSU Floating point arithmetic (latency 5, 6, 7)
+// Fcmp FSU Floating point compare (latency 10, 11, 19)
+// Fcvt FSU Floating point convert (latency 5, 6, 7)
+// Fdivs FSU Single precision division (latency 22, 24 +1 +2)
+// Fdivd FSU Double precision division (latency 24, 28 +1 +2)
+// Fhcvt FSU Floating point half convert (latency 3)
+// Fmov FSU Floating point move (latency 2 or 3 in MA or FP stores)
+// Fmul FSU ASIMD multiply (latency 5)
+// Fsel FSU Floating point select (latency 3)
+// Fsqrs FSU Single precision sqrt (latency 22, 24 +1 +2)
+// Fsqrd FSU Double precision sqrt (latency 24, 38 +1 +2) TODO: 38?
+// Lc LD Complex load (latency 11)
+// Ld LD Integer load (latency 5)
+// Lf LD Floating point load (latency 10)
+// Sc ST Complex store (latency 0)
+// St ST Integer store (latency 0)
+// Sf ST Floating point store (latency 0)
+//---
+
+//---
+// AdvSIMD Data Processing (Scalar FP)
+// * NOTE: in the arm64 instruction model of llvm, the scalar floating point
+// * instructions are defined as vector instructions with one element v1i64
+// * We will stay consistent with this model and put the one-element vector
+// * instructions in the scalar group
+// Floating-point immediate:
+// 1Fmov: FMOV (immediate)
+// Floating-point data-processing:
+// 1Fmov: FMOV (register)
+// 1Fmov: FABS, FNEG (1 source)
+// 1Fsqrs: FSQRT (1 source single precision)
+// 1Fsqrd: FSQRT (1 source double precision)
+// 1Falu: FMUL, FADD, FSUB, FNMUL (2 source)
+// 1Fdivs: FDIV (2 source single precision)
+// 1Fdivd: FDIV (2 source double precision)
+// 1Fsel: FMAX, FMIN, FMAXNM, FMINNM (2 source)
+// 1Falu: FMADD, FMSUB, FNMADD, FNMSUB (3 source)
+// Floating-point compare:
+// 1Fcmp: FCMP, FCMPE (all)
+// Floating-point convert:
+// 1Fcvt: FRINTN, FRINTP, FRINTM, FRINTZ, FRINTA, FRINTX, FRINTI (all)
+// 1Falu: FCVT (1 source single to double or double to single)
+// 1Fhcvt: FCVT (1 source to or from half precision)
+// Floating-point conditional:
+// 1Fcmp: FCCMP, FCCMPE (compare)
+// 1Fsel: FCSEL (select)
+// Floating-point<->integer conversions:
+// 1Fcvt1Sf1Ld: FCVTNS, FCVTAS, FCVTPS, FCVTMS (integer)
+// 1Fcvt1Sf1Ld: FCVTNU, FCVTAU, FCVTPU, FCVTMU (integer)
+// 1Fcvt1Sf1Ld: FCVTZS, FCVTZU (integer)
+// 1St1Lf1Falu: SCVTF, UCVTF (integer)
+// 1Fmov: FMOV (general register to FP register with Rn=XZR, WZR)
+// 1St1Lf: FMOV (general register to FP register with other Rn)
+// 1Sf1Ld: FMOV (from FP register to general register)
+// Floating-point<->fixed-point conversions:
+// 1Fcvt1Sf1Ld: FCVTZS, FCVTZU (fixed-point)
+// 1St1Lf1Falu: SCVTF, UCVTF (fixed-point)
+// AdvSIMD scalar three same:
+// 1Falu: FMULX, FRECPS, FRSQRTS, FABD (three same)
+// 1Fsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (three same)
+// AdvSIMD two-reg misc:
+// 1Falu: FCVTXN (two reg)
+// 1Falu: SCVTF, UCVTF (integer)
+// 1Fcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (two reg)
+// 1Fcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (two reg)
+// 1Fcvt: FCVTZS, FCVTZU (integer)
+// 1Fsel: FCMGT, FCMEQ, FCMLT, FCMGE, FCMLE (zero)
+// 1Adre: FRECPE, FRECPX (two reg)
+// 1Asre: FRSQRTE (two reg)
+// AdvSIMD scalar pairwise:
+// 1Falu: FADDP (pair)
+// 1Fsel: FMAXP, FMINP, FMAXNMP, FMINNMP (pair)
+// AdvSIMD scalar x indexed element
+// * NOTE: These seem to be bundled with vector elements in the arm model, we will
+// * model scalar elements here and vector elements in the vector section where
+// * they rightfully belong
+// 1Asm: FMUL, FMULX, FMLA, FMLS (by element)
+// AdvSIMD scalar shift by immediate:
+// 1Falu: SCVTF, UCVTF (fixed-point)
+// 1Fcvt: FCVTZS, FCVTZU (fixed-point)
+//---
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|H|S)i$")>;
+
+def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fmov], (instregex "FABS(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fmov], (instregex "FNEG(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fsqrs], (instregex "FSQRT(H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fsqrd], (instregex "FSQRTDr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FMUL(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FADD(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FSUB(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FNMUL(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Fdivs], (instregex "FDIV(H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Fdivd], (instregex "FDIVDrr$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAX(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMIN(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXNM(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINNM(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FMADD(D|H|S)rrr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FMSUB(D|H|S)rrr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FNMADD(D|H|S)rrr$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FNMSUB(D|H|S)rrr$")>;
+
+def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCMP(D|H|S)r(r|i)$")>;
+def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCMPE(D|H|S)r(r|i)$")>;
+
+def : InstRW<[XGeneWriteF1Fcvt], (instregex "FRINT(N|P|M|Z|A|X|I)(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FCVT(SD|DS)r$")>;
+def : InstRW<[XGeneWriteF1Fhcvt], (instregex "FCVT(HS|HD|SH|DH)r$")>;
+
+def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCCMP(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCCMPE(D|H|S)rr$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FCSEL(D|H|S)rrr$")>;
+
+def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTN(S|U)U(W|X)(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTM(S|U)U(W|X)(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTA(S|U)U(W|X)(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTP(S|U)U(W|X)(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZ(S|U)U(W|X)(D|H|S)r$")>;
+def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "SCVTFU(W|X)(D|H|S)ri$")>;
+def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|S)0$")>;
+def : InstRW<[XGeneWriteF1St1Lf], (instregex "FMOV(WH|XH|WS|XD|XDHigh)r$")>;
+def : InstRW<[XGeneWriteF1Sf1Ld], (instregex "FMOV(HW|HX|SW|DX|DXHigh)r$")>;
+
+def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZSS(W|X)(D|H|S)ri$")>;
+def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZUS(W|X)(D|H|S)ri$")>;
+def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "SCVTFS(W|X)(D|H|S)ri$")>;
+def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "UCVTFS(W|X)(D|H|S)ri$")>;
+
+def : InstRW<[XGeneWriteF1Falu], (instregex "FMULX(16|32|64)$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FRECPS(16|32|64)$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FRSQRTS(16|32|64)$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "FABD(16|32|64)$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FCM(EQ|GE|GT)(16|32|64)$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FAC(GE|GT)(16|32|64)$")>;
+
+def : InstRW<[XGeneWriteF1Falu], (instregex "FCVTXNv1i64$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "SCVTFv1(i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVT(N|M|A|P)Sv1(f16|i32|i64)$")>;
+def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVT(N|M|A|P)Uv1(f16|i32|i64)$")>;
+def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVTZ(S|U)v1(f16|i32|i64)$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMGTv1(i16|i32|i64)rz$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMEQv1(i16|i32|i64)rz$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMLTv1(i16|i32|i64)rz$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMGEv1(i16|i32|i64)rz$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMLEv1(i16|i32|i64)rz$")>;
+def : InstRW<[XGeneWriteF1Adre], (instregex "FRECP(E|X)v1(f16|i32|i64)$")>;
+def : InstRW<[XGeneWriteF1Asre], (instregex "FRSQRTEv1(f16|i32|i64)$")>;
+
+def : InstRW<[XGeneWriteF1Falu], (instregex "FADDPv2(i16|i32|i64)p$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXPv2(i16|i32|i64)p$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINPv2(i16|i32|i64)p$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXNMPv2(i16|i32|i64)p$")>;
+def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINNMPv2(i16|i32|i64)p$")>;
+
+def : InstRW<[XGeneWriteF1Asm], (instregex "FMULv1(i16|i32|i64)_indexed$")>;
+def : InstRW<[XGeneWriteF1Asm], (instregex "FMULXv1(i16|i32|i64)_indexed$")>;
+def : InstRW<[XGeneWriteF1Asm], (instregex "FML(A|S)v1(i16|i32|i64)_indexed$")>;
+
+// TODO: maybe add |h| to these groups as well? it would make sense
+def : InstRW<[XGeneWriteF1Falu], (instregex "SCVTF(s|d)$")>;
+def : InstRW<[XGeneWriteF1Falu], (instregex "UCVTF(s|d)$")>;
+def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVTZ(S|U)(s|d)$")>;
+
+//---
+// Load instructions (38 groups in total)
+// 1Ld: LDR (literal)
+// 1Ld: LDURB, LDURH, LDUR (unscaled immediate)
+// 1Ld: LDRB, LDRH, LDR (register offset)
+// 1Ld: LDRB, LDRH, LDR (unsigned immediate)
+// 1LdLd: LDP (offset)
+// 1LdAlu LDRB, LDRH, LDR (immediate post-indexed)
+// 1LdAlu LDRB, LDRH, LDR (immediate pre-indexed)
+// 1LdLdAlu LDP (post-indexed)
+// 1LdLdAlu LDP (pre-indexed)
+// 1Ld1Sbfm LDRSW (literal)
+// 1Ld1Sbfm LDURSB, LDURSH, LDURSW (unscaled immediate)
+// 1Ld1Sbfm LDRSB, LDRSH, LDRSW (register offset)
+// 1Ld1Sbfm LDRSB, LDRSH, LDRSW (unsigned immediate)
+// 1Ld1SbfmAlu LDRSB, LDRSH, LDRSW (immediate post-indexed)
+// 1Ld1SbfmAlu LDRSB, LDRSH, LDRSW (immediate pre-indexed)
+// 1Ld1LdSbfm1Sbfm LDPSW (offset)
+// 1Ld1LdSbfm1SbfmAlu LDPSW (post-indexed)
+// 1Ld1LdSbfm1SbfmAlu LDPSW (pre-indexed)
+//---
+def : InstRW<[XGeneWriteLD1Ld], (instregex "LDR(W|X|S|D|Q)l$")>;
+def : InstRW<[XGeneWriteLD1Ld], (instregex "LDUR(X|W|HH|BB)i$")>;
+def : InstRW<[XGeneWriteLD1Ld], (instregex "LDR(BB|HH|W|X)ro(X|W)$")>;
+def : InstRW<[XGeneWriteLD1Ld], (instregex "LDR(BB|HH|W|X|B|H|S|D|Q)ui$")>;
+
+def : InstRW<[XGeneWriteLD1LdLd], (instregex "LDP(W|X)i$")>;
+
+def : InstRW<[XGeneWriteLD1LdAlu], (instregex "LDR(BB|HH|W|X)post$")>;
+def : InstRW<[XGeneWriteLD1LdAlu], (instregex "LDR(BB|HH|W|X)pre$")>;
+
+def : InstRW<[XGeneWriteLD1LdLdAlu], (instregex "LDP(W|X)post$")>;
+def : InstRW<[XGeneWriteLD1LdLdAlu], (instregex "LDP(W|X)pre$")>;
+
+def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDRSWl$")>;
+def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDURS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDRS(BW|BX|HW|HX|W)ro(X|W)$")>;
+def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDRS(BW|BX|HW|HX|W)ui$")>;
+
+def : InstRW<[XGeneWriteLD1Ld1SbfmAlu], (instregex "LDRS(BW|BX|HW|HX|W)post$")>;
+def : InstRW<[XGeneWriteLD1Ld1SbfmAlu], (instregex "LDRS(BW|BX|HW|HX|W)pre$")>;
+
+def : InstRW<[XGeneWriteLD1Ld1LdSbfm1Sbfm], (instregex "LDPSWi$")>;
+def : InstRW<[XGeneWriteLD1Ld1LdSbfm1SbfmAlu], (instregex "LDPSWpost$")>;
+def : InstRW<[XGeneWriteLD1Ld1LdSbfm1SbfmAlu], (instregex "LDPSWpre$")>;
+
+// For read advance - all integer load ops
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(W|X|S|D|Q)l$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDUR(X|W|HH|BB)i$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)ro(X|W)$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X|B|H|S|D|Q)ui$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)i$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)post$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)pre$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)post$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)pre$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRSWl$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDURS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)ro(X|W)$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)post$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)pre$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWi$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWpost$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWpre$")>;
+
+//---
+// Load instructions - 64-bit FP/SIMD (8 groups in total)
+// 1Lf: LDUR (literal, unscaled immediate, unsigned immediate)
+// 1LfAlu: LDR (immediate post-indexed)
+// 1LfAlu: LDR (immediate pre-indexed)
+// 1LfAlu: LDR (register offset)
+// 1LfLf: LDP (offset)
+// 1LfLfAlu: LDP (post-indexed, pre-indexed)
+//---
+// for LDUR, the llvm arm64 model only defines the unscaled immediates
+def : InstRW<[XGeneWriteLD1Lf], (instregex "LDUR(B|H|S|D)i$")>;
+def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LDR(B|H|S|D)post$")>;
+def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LDR(B|H|S|D)pre$")>;
+def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LDR(B|H|S|D)ro(X|W)$")>;
+def : InstRW<[XGeneWriteLD1LfLf], (instregex "LDP(D|S)i$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDP(D|S)post$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDP(D|S)pre$")>;
+
+//---
+// Load instructions - 128-bit FP/SIMD (8 groups in total)
+// 1LfLf: LDUR (literal, unscaled immediate, unsigned immediate)
+// 1LfLfAlu: LDR (immediate post-indexed)
+// 1LfLfAlu: LDR (immediate pre-indexed)
+// 1LfAlu1Lf: LDR (register offset)
+// 1LfLfAlu1LfLf: LDP (offset)
+// 1LfLfAlu1LfLfAlu: LDP (post-indexed, pre-indexed)
+//---
+// for LDUR, the llvm arm64 model only defines the unscaled immediates
+def : InstRW<[XGeneWriteLD1LfLf], (instregex "LDURQi$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDRQpost$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDRQpre$")>;
+def : InstRW<[XGeneWriteLD1LfAlu1Lf], (instregex "LDRQro(X|W)$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu1LfLf], (instregex "LDPQi$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu1LfLfAlu], (instregex "LDPQpost$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu1LfLfAlu], (instregex "LDPQpre$")>;
+
+// For read advance - all float load ops
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDUR(B|H|S|D|Q)i$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D|Q)post$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D|Q)pre$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D|Q)ro(X|W)$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S|Q)i$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S|Q)post$")>;
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S|Q)pre$")>;
+
+//---
+// Vector Load (66 groups in total)
+// 1Lc: LD1 (one register 2S/4H/8B)
+// 1LcAlu: LD1 (one register 2S/4H/8B pre/post indexed)
+// 1LcLc: LD1 (one register 4S/8H/16B)
+// 1LcLc: LD1 (two registers 2S/4H/8B)
+// 1LcLc: LD2 (two registers 2S/4H/8B)
+// 1LcLcAlu: LD1 (one register 4S/8H/16B pre/post indexed)
+// 1LcLcAlu: LD1 (two registers 2S/4H/8B pre/post indexed)
+// 1LcLcAlu: LD2 (two registers 2S/4H/8B pre/post indexed)
+// 1X3Lc: LD1 (three registers 2S/4H/8B)
+// 1X3Lc: LD3 (three registers 2S/4H/8B)
+// 1X3LcAlu: LD1 (three registers 2S/4H/8B pre/post indexed)
+// 1X3LcAlu: LD3 (three registers 2S/4H/8B pre/post indexed)
+// 1X4Lc: LD1 (two registers 4S/8H/16B)
+// 1X4Lc: LD1 (four registers 2S/4H/8B)
+// 1X4Lc: LD2 (two registers 4S/8H/16B)
+// 1X4Lc: LD4 (four registers 2S/4H/8B)
+// 1X4LcAlu: LD1 (two registers 4S/8H/16B pre/post indexed)
+// 1X4LcAlu: LD1 (four registers 2S/4H/8B pre/post indexed)
+// 1X4LcAlu: LD2 (two registers 4S/8H/16B pre/post indexed)
+// 1X4LcAlu: LD4 (four registers 2S/4H/8B pre/post indexed)
+// 1X6Lc: LD1 (three registers 4S/8H/16B)
+// 1X6Lc: LD3 (three registers 4S/8H/16B)
+// 1X6LcAlu: LD1 (three registers 4S/8H/16B pre/post indexed)
+// 1X6LcAlu: LD3 (three registers 4S/8H/16B pre/post indexed)
+// 1X8Lc: LD1 (four registers 4S/8H/16B)
+// 1X8Lc: LD4 (four registers 4S/8H/16B)
+// 1X8LcAlu: LD1 (four registers 4S/8H/16B pre/post indexed)
+// 1X8LcAlu: LD4 (four registers 4S/8H/16B pre/post indexed)
+// 1Lf: LD1 (one register 1D)
+// 1Lf: LD1R (other)
+// 1LfAlu: LD1 (one register 1D pre/post indexed)
+// 1LfAlu: LD1R (pre/post indexed)
+// 1LfLf: LD1 (one register 2D)
+// 1LfLf: LD1 (two registers 1D)
+// 1LfLf: LD2R (other)
+// 1LfLfAlu: LD1 (one register 2D pre/post indexed)
+// 1LfLfAlu: LD1 (two registers 1D pre/post indexed)
+// 1LfLfAlu: LD2R (pre/post indexed)
+// 1X3Lf: LD1 (three registers 1D)
+// 1X3Lf: LD3R (other)
+// 1X3LfAlu: LD1 (three registers 1D pre/post indexed)
+// 1X3LfAlu: LD3R (none pre/post indexed)
+// 1X4Lf: LD1 (two registers 2D)
+// 1X4Lf: LD1 (four registers 1D)
+// 1X4Lf: LD2 (two registers 2D)
+// 1X4Lf: LD4R (other)
+// 1X4LfAlu: LD1 (two registers 2D pre/post indexed)
+// 1X4LfAlu: LD1 (four registers 1D pre/post indexed)
+// 1X4LfAlu: LD2 (two registers 2D pre/post indexed)
+// 1X4LfAlu: LD4R (pre/post indexed)
+// 1X6Lf: LD1 (three registers 2D)
+// 1X6Lf: LD3 (three registers 2D)
+// 1X6LfAlu: LD1 (three registers 2D pre/post indexed)
+// 1X6LfAlu: LD3 (three registers 2D pre/post indexed)
+// 1X8Lf: LD1 (four registers 2D)
+// 1X8Lf: LD4 (four registers 2D)
+// 1X8LfAlu: LD1 (four registers 2D pre/post indexed)
+// 1X8LfAlu: LD4 (four registers 2D pre/post indexed)
+// 1Lf1Asi: LD1 (one register)
+// 1LfAlu1Asi: LD1 (one register pre/post indexed)
+// 1LfLf1AsiAsi: LD2 (two registers)
+// 1LfLfAlu1AsiAsi: LD2 (two registers pre/post indexed)
+// 1X3Lf1X3Asi: LD3 (three registers)
+// 1X3LfAlu1X3Asi: LD3 (three registers pre/post indexed)
+// 1X4Lf1X4Asi: LD4 (four registers)
+// 1X4LfAlu1X4Asi: LD4 (four registers pre/post indexed)
+//---
+def : InstRW<[XGeneWriteLD1Lc], (instregex "LD1Onev(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1LcAlu], (instregex "LD1Onev(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1LcLc], (instregex "LD1Onev(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1LcLc], (instregex "LD1Twov(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1LcLc], (instregex "LD2Twov(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1LcLcAlu], (instregex "LD1Onev(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1LcLcAlu], (instregex "LD1Twov(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteLD1LcLcAlu], (instregex "LD2Twov(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X3Lc], (instregex "LD1Threev(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1X3Lc], (instregex "LD3Threev(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1X3LcAlu], (instregex "LD1Threev(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X3LcAlu], (instregex "LD3Threev(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD1Twov(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD1Fourv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD2Twov(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD4Fourv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD1Twov(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD1Fourv(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD2Twov(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD4Fourv(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X6Lc], (instregex "LD1Threev(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X6Lc], (instregex "LD3Threev(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X6LcAlu], (instregex "LD1Threev(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X6LcAlu], (instregex "LD3Threev(4s|8h|16b)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X8Lc], (instregex "LD1Fourv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X8Lc], (instregex "LD4Fourv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X8LcAlu], (instregex "LD1Fourv(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X8LcAlu], (instregex "LD4Fourv(4s|8h|16b)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Onev1d$")>;
+def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Rv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Rv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Rv(1d|2d)$")>;
+def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Onev1d_POST$")>;
+def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Rv(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Rv(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Rv(1d|2d)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD1Onev2d$")>;
+def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD1Twov1d$")>;
+def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD2Rv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD2Rv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD2Rv(1d|2d)$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD1Onev2d_POST$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD1Twov1d_POST$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD2Rv(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD2Rv(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD2Rv(1d|2d)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD1Threev1d$")>;
+def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD3Rv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD3Rv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD3Rv(1d|2d)$")>;
+def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD1Threev1d_POST$")>;
+def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD3Rv(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD3Rv(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD3Rv(1d|2d)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD1Twov2d$")>;
+def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD1Fourv1d$")>;
+def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD2Twov2d$")>;
+def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD4Rv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD4Rv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD4Rv(1d|2d)$")>;
+def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD1Twov2d_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD1Fourv1d_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD2Twov2d_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD4Rv(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD4Rv(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD4Rv(1d|2d)_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X6Lf], (instregex "LD1Threev2d$")>;
+def : InstRW<[XGeneWriteLD1X6Lf], (instregex "LD3Threev2d$")>;
+def : InstRW<[XGeneWriteLD1X6LfAlu], (instregex "LD1Threev2d_POST$")>;
+def : InstRW<[XGeneWriteLD1X6LfAlu], (instregex "LD3Threev2d_POST$")>;
+
+def : InstRW<[XGeneWriteLD1X8Lf], (instregex "LD1Fourv2d$")>;
+def : InstRW<[XGeneWriteLD1X8Lf], (instregex "LD4Fourv2d$")>;
+def : InstRW<[XGeneWriteLD1X8LfAlu], (instregex "LD1Fourv2d_POST$")>;
+def : InstRW<[XGeneWriteLD1X8LfAlu], (instregex "LD4Fourv2d_POST$")>;
+
+def : InstRW<[XGeneWriteLD1Lf1Asi], (instregex "LD1(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteLD1LfAlu1Asi], (instregex "LD1(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[XGeneWriteLD1LfLf1AsiAsi], (instregex "LD2(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteLD1LfLfAlu1AsiAsi], (instregex "LD2(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[XGeneWriteLD1X3Lf1X3Asi], (instregex "LD3(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteLD1X3LfAlu1X3Asi], (instregex "LD3(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[XGeneWriteLD1X4Lf1X4Asi], (instregex "LD4(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteLD1X4LfAlu1X4Asi], (instregex "LD4(i8|i16|i32|i64)_POST$")>;
+
+//All vector loads for Read Advance
+def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD(1|2|3|4).*$")>;
+
+//---
+// Store instructions (18 groups in total)
+// 1St: STURB, STURH, STUR (unscaled immediate)
+// 1St: STRB, STRH, STR (register offset)
+// 1St: STRB, STRH, STR (unsigned immediate)
+// 1StAlu: STRB, STRH, STR (immediate post-indexed)
+// 1StAlu: STRB, STRH, STR (immediate pre-indexed)
+// 1StSt: STP (offset)
+// 1StStAlu: STP (post-indexed)
+// 1StStAlu: STP (pre-indexed)
+//---
+def : InstRW<[XGeneWriteST1St], (instregex "STUR(BB|HH|W|X)i$")>;
+def : InstRW<[XGeneWriteST1St], (instregex "STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[XGeneWriteST1St], (instregex "STR(X|W|HH|BB)ui$")>;
+
+def : InstRW<[XGeneWriteST1StAlu], (instregex "STR(W|X|BB|HH)post$")>;
+def : InstRW<[XGeneWriteST1StAlu], (instregex "STR(W|X|BB|HH)pre$")>;
+
+def : InstRW<[XGeneWriteST1StSt], (instregex "STP(W|X)i$")>;
+
+def : InstRW<[XGeneWriteST1StStAlu], (instregex "STP(W|X)post$")>;
+def : InstRW<[XGeneWriteST1StStAlu], (instregex "STP(W|X)pre$")>;
+
+// Scalar int stores for Read Advance
+def : InstRW<[XGeneWriteSTI], (instregex "STUR(BB|HH|W|X)i$")>;
+def : InstRW<[XGeneWriteSTI], (instregex "STR(BB|HH|W|X)ro(W|X)$")>;
+def : InstRW<[XGeneWriteSTI], (instregex "STR(X|W|HH|BB)ui$")>;
+def : InstRW<[XGeneWriteSTI], (instregex "STR(W|X|BB|HH)post$")>;
+def : InstRW<[XGeneWriteSTI], (instregex "STR(W|X|BB|HH)pre$")>;
+def : InstRW<[XGeneWriteSTI], (instregex "STP(W|X)i$")>;
+def : InstRW<[XGeneWriteSTI], (instregex "STP(W|X)post$")>;
+def : InstRW<[XGeneWriteSTI], (instregex "STP(W|X)pre$")>;
+
+//---
+// Store instructions - 64-bit FP/SIMD (8 groups in total)
+// 1Sf: STUR (literal, unscaled immediate, unsigned immediate)
+// 1SfAlu: STR (immediate post-indexed)
+// 1SfAlu: STR (immediate pre-indexed)
+// 1SfAlu: STR (register offset)
+// 1SfSf: STP (offset)
+// 1SfSfAlu: STP (post-indexed, pre-indexed)
+//---
+// for STUR, the llvm arm64 model only defines the unscaled immediates
+def : InstRW<[XGeneWriteST1Sf], (instregex "STUR(B|H|S|D)i$")>;
+def : InstRW<[XGeneWriteST1SfAlu], (instregex "STR(B|H|S|D)post$")>;
+def : InstRW<[XGeneWriteST1SfAlu], (instregex "STR(B|H|S|D)pre$")>;
+def : InstRW<[XGeneWriteST1SfAlu], (instregex "STR(B|H|S|D)ro(X|W)$")>;
+def : InstRW<[XGeneWriteST1SfSf], (instregex "STP(D|S)i$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STP(D|S)post$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STP(D|S)pre$")>;
+
+//---
+// Store instructions - 128-bit FP/SIMD (8 groups in total)
+// 1SfSf: STUR (literal, unscaled immediate, unsigned immediate)
+// 1SfSfAlu: STR (immediate post-indexed)
+// 1SfSfAlu: STR (immediate pre-indexed)
+// 1SfAlu1Sf: STR (register offset)
+// 1SfSfAlu1SfSf: STP (offset)
+// 1SfSfAlu1SfSfAlu: STP (post-indexed, pre-indexed)
+//---
+// for STUR, the llvm arm64 model only defines the unscaled immediates
+def : InstRW<[XGeneWriteST1SfSf], (instregex "STURQi$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STRQpost$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STRQpre$")>;
+def : InstRW<[XGeneWriteST1SfAlu1Sf], (instregex "STRQro(X|W)$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu1SfSf], (instregex "STPQi$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu1SfSfAlu], (instregex "STPQpost$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu1SfSfAlu], (instregex "STPQpre$")>;
+
+// Scalar float stores for Read Advance
+def : InstRW<[XGeneWriteSTF], (instregex "STUR(B|H|S|D|Q)i$")>;
+def : InstRW<[XGeneWriteSTF], (instregex "STR(B|H|S|D|Q)post$")>;
+def : InstRW<[XGeneWriteSTF], (instregex "STR(B|H|S|D|Q)pre$")>;
+def : InstRW<[XGeneWriteSTF], (instregex "STR(B|H|S|D|Q)ro(X|W)$")>;
+def : InstRW<[XGeneWriteSTF], (instregex "STP(D|S|Q)i$")>;
+def : InstRW<[XGeneWriteSTF], (instregex "STP(D|S|Q)post$")>;
+def : InstRW<[XGeneWriteSTF], (instregex "STP(D|S|Q)pre$")>;
+
+//---
+// Vector Store (66 groups in total)
+// Multiple Structures
+// 1Sc: ST1 (one register 2S/4H/8B)
+// 1ScAlu: ST1 (one register 2S/4H/8B pre/post indexed)
+// 1ScSc: ST1 (one register 4S/8H/16B)
+// 1ScSc: ST1 (two registers 2S/4H/8B)
+// 1ScSc: ST2 (two registers 2S/4H/8B)
+// 1ScScAlu: ST1 (one register 4S/8H/16B pre/post indexed)
+// 1ScScAlu: ST1 (two registers 2S/4H/8B pre/post indexed)
+// 1ScScAlu: ST2 (two registers 2S/4H/8B pre/post indexed)
+// 1X3Sc: ST1 (three registers 2S/4H/8B)
+// 1X3Sc: ST3 (three registers 2S/4H/8B)
+// 1X3ScAlu: ST1 (three registers 2S/4H/8B pre/post indexed)
+// 1X3ScAlu: ST3 (three registers 2S/4H/8B pre/post indexed)
+// 1X4Sc: ST1 (two registers 4S/8H/16B)
+// 1X4Sc: ST1 (four registers 2S/4H/8B)
+// 1X4Sc: ST2 (two registers 4S/8H/16B)
+// 1X4Sc: ST4 (four registers 2S/4H/8B)
+// 1X4ScAlu: ST1 (two registers 4S/8H/16B pre/post indexed)
+// 1X4ScAlu: ST1 (four registers 2S/4H/8B pre/post indexed)
+// 1X4ScAlu: ST2 (two registers 4S/8H/16B pre/post indexed)
+// 1X4ScAlu: ST4 (four registers 2S/4H/8B pre/post indexed)
+// 1X6Sc: ST1 (three registers 4S/8H/16B)
+// 1X6Sc: ST3 (three registers 4S/8H/16B)
+// 1X6ScAlu: ST1 (three registers 4S/8H/16B pre/post indexed)
+// 1X6ScAlu: ST3 (three registers 4S/8H/16B pre/post indexed)
+// 1X8Sc: ST1 (four registers 4S/8H/16B)
+// 1X8Sc: ST4 (four registers 4S/8H/16B)
+// 1X8ScAlu: ST1 (four registers 4S/8H/16B pre/post indexed)
+// 1X8ScAlu: ST4 (four registers 4S/8H/16B pre/post indexed)
+// 1Sf: ST1 (one register 1D)
+// 1SfAlu: ST1 (one register 1D pre/post indexed)
+// 1SfSf: ST1 (one register 2D)
+// 1SfSf: ST1 (two registers 1D)
+// 1SfSfAlu: ST1 (one register 2D pre/post indexed)
+// 1SfSfAlu: ST1 (two registers 1D pre/post indexed)
+// 1X3Sf: ST1 (three registers 1D)
+// 1X3SfAlu: ST1 (three registers 1D pre/post indexed)
+// 1X4Sf: ST1 (two registers 2D)
+// 1X4Sf: ST1 (four registers 1D)
+// 1X4Sf: ST2 (two registers 2D)
+// 1X4SfAlu: ST1 (two registers 2D pre/post indexed)
+// 1X4SfAlu: ST1 (four registers 1D pre/post indexed)
+// 1X4SfAlu: ST2 (two registers 2D pre/post indexed)
+// 1X6Sf: ST1 (three registers 2D)
+// 1X6Sf: ST3 (three registers 2D)
+// 1X6SfAlu: ST1 (three registers 2D pre/post indexed)
+// 1X6SfAlu: ST3 (three registers 2D pre/post indexed)
+// 1X8Sf: ST1 (four registers 2D)
+// 1X8Sf: ST4 (four registers 2D)
+// 1X8SfAlu: ST1 (four registers 2D pre/post indexed)
+// 1X8SfAlu: ST4 (four registers 2D pre/post indexed)
+// Single Structure
+// 1Sf: ST1 (one register)
+// 1SfAlu: ST1 (one register pre/post indexed)
+// 1SfSf: ST2 (two registers)
+// 1SfSfAlu: ST2 (two registers pre/post indexed)
+// 1X3Sf: ST3 (three registers)
+// 1X3SfAlu: ST3 (three registers pre/post indexed)
+// 1X4Sf: ST4 (four registers)
+// 1X4SfAlu: ST4 (four registers pre/post indexed)
+//---
+def : InstRW<[XGeneWriteST1Sc], (instregex "ST1Onev(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteST1ScAlu], (instregex "ST1Onev(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteST1ScSc], (instregex "ST1Onev(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteST1ScSc], (instregex "ST1Twov(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteST1ScSc], (instregex "ST2Twov(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteST1ScScAlu], (instregex "ST1Onev(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteST1ScScAlu], (instregex "ST1Twov(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteST1ScScAlu], (instregex "ST2Twov(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteST1X3Sc], (instregex "ST1Threev(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteST1X3Sc], (instregex "ST3Threev(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteST1X3ScAlu], (instregex "ST1Threev(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteST1X3ScAlu], (instregex "ST3Threev(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST1Twov(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST1Fourv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST2Twov(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST4Fourv(2s|4h|8b)$")>;
+def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST1Twov(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST1Fourv(2s|4h|8b)_POST$")>;
+def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST2Twov(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST4Fourv(2s|4h|8b)_POST$")>;
+
+def : InstRW<[XGeneWriteST1X6Sc], (instregex "ST1Threev(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteST1X6Sc], (instregex "ST3Threev(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteST1X6ScAlu], (instregex "ST1Threev(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteST1X6ScAlu], (instregex "ST3Threev(4s|8h|16b)_POST$")>;
+
+def : InstRW<[XGeneWriteST1X8Sc], (instregex "ST1Fourv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteST1X8Sc], (instregex "ST4Fourv(4s|8h|16b)$")>;
+def : InstRW<[XGeneWriteST1X8ScAlu], (instregex "ST1Fourv(4s|8h|16b)_POST$")>;
+def : InstRW<[XGeneWriteST1X8ScAlu], (instregex "ST4Fourv(4s|8h|16b)_POST$")>;
+
+def : InstRW<[XGeneWriteST1Sf], (instregex "ST1Onev1d$")>;
+def : InstRW<[XGeneWriteST1SfAlu], (instregex "ST1Onev1d_POST$")>;
+
+def : InstRW<[XGeneWriteST1SfSf], (instregex "ST1Onev2d$")>;
+def : InstRW<[XGeneWriteST1SfSf], (instregex "ST1Twov1d$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "ST1Onev2d_POST$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "ST1Twov1d_POST$")>;
+
+def : InstRW<[XGeneWriteST1X3Sf], (instregex "ST1Threev1d$")>;
+def : InstRW<[XGeneWriteST1X3SfAlu], (instregex "ST1Threev1d_POST$")>;
+
+def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST1Twov2d$")>;
+def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST1Fourv1d$")>;
+def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST2Twov2d$")>;
+def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST1Twov2d_POST$")>;
+def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST1Fourv1d_POST$")>;
+def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST2Twov2d_POST$")>;
+
+def : InstRW<[XGeneWriteST1X6Sf], (instregex "ST1Threev2d$")>;
+def : InstRW<[XGeneWriteST1X6Sf], (instregex "ST3Threev2d$")>;
+def : InstRW<[XGeneWriteST1X6SfAlu], (instregex "ST1Threev2d_POST$")>;
+def : InstRW<[XGeneWriteST1X6SfAlu], (instregex "ST3Threev2d_POST$")>;
+
+def : InstRW<[XGeneWriteST1X8Sf], (instregex "ST1Fourv2d$")>;
+def : InstRW<[XGeneWriteST1X8Sf], (instregex "ST4Fourv2d$")>;
+def : InstRW<[XGeneWriteST1X8SfAlu], (instregex "ST1Fourv2d_POST$")>;
+def : InstRW<[XGeneWriteST1X8SfAlu], (instregex "ST4Fourv2d_POST$")>;
+
+def : InstRW<[XGeneWriteST1Sf], (instregex "ST1(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteST1SfAlu], (instregex "ST1(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[XGeneWriteST1SfSf], (instregex "ST2(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "ST2(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[XGeneWriteST1X3Sf], (instregex "ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteST1X3SfAlu], (instregex "ST3(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST4(i8|i16|i32|i64)$")>;
+def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST4(i8|i16|i32|i64)_POST$")>;
+
+// All vector stores for read advance, they go in the "Store Float" group
+def : InstRW<[XGeneWriteSTF], (instregex "ST.*$")>;
+
+//---
+// Data Processing Register
+// 1Sbfm1Alu: LSL, LSR, ASR (shifted register)
+// 1Sbfm1Alu: LSLV, LSRV, ASRV, RORV: shift/rotate op.
+// 1Sbfm1Alu: UXTW, UXTX (shifted register)
+// 1Alb1Alu: ROR (shifted register)
+// 1Sbfm SBFM
+// 1Car: ADC (Add/subtract (with carry): carry op.)
+// 1Set: CCMP (Conditional compare (register): logical op., produces flag)
+// 1Alu: CSEL Conditional select: arithmetic op.
+// 1Mlw1Alu MADD, SMADDL, UMADDL with other Ra (32bit)
+// 1Mlx1Alu MADD, SMADDL, UMADDL with other Ra (64bit)
+// 1Mlw1Alu MSUB, SMSUBL, UMSUBL (32bit)
+// 1Mlx1Alu MSUB, SMSUBL, UMSUBL (64bit)
+// 1Mlx UMULH, SMULH (64bit)
+// 1Div UDIV, SDIV
+// 1Alb REV, REV32, REV64
+// 1Alu BFM
+// 1Alu MRS, MSR
+//---
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "LS(L|R)V(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ASRV(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "UBFM(W|X)ri$")>;
+ // alias of UXTW
+def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "RORV(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Sbfm], (instregex "SBFM(W|X)ri$")>;
+
+def : InstRW<[XGeneWriteI1Car], (instregex "ADC(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Car], (instregex "ADCS(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Set], (instregex "CCMP(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Set], (instregex "CCMP(W|X)i$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "CSEL(W|X)r$")>;
+
+def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "MADD(W)rrr$")>;
+def : InstRW<[XGeneWriteI1Mlx1Alu], (instregex "MADD(X)rrr$")>;
+def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "(S|U)MADDLrrr$")>;
+def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "MSUB(W)rrr$")>;
+def : InstRW<[XGeneWriteI1Mlx1Alu], (instregex "MSUB(X)rrr$")>;
+def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "(S|U)MSUBLrrr$")>;
+
+def : InstRW<[XGeneWriteI1Mlx], (instregex "(S|U)MULHrr$")>;
+
+def : InstRW<[XGeneWriteI1Div], (instregex "SDIVWr$")>;
+def : InstRW<[XGeneWriteI1Div], (instregex "SDIVXr$")>;
+def : InstRW<[XGeneWriteI1Div], (instregex "UDIV(W|X)r$")>;
+
+def : InstRW<[XGeneWriteI1Alb], (instregex "REV(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Alb], (instregex "REV16(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Alb], (instregex "REV32Xr$")>;
+
+def : InstRW<[XGeneWriteI1Alu], (instregex "BFM(W|X)ri$")>;
+
+def : InstRW<[XGeneWriteI1Alu], (instregex "BLR$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "MRS$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "MSR$")>;
+
+// Taken form list of missing instructions
+def : InstRW<[XGeneWriteI1Alu], (instregex "ADD(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "ADD(W|X)rx$")>;
+def : InstRW<[XGeneWriteI1Set], (instregex "ADDS(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "ADDS(W|X)rx$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "SUB(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "SUB(W|X)rx$")>;
+def : InstRW<[XGeneWriteI1Set], (instregex "SUBS(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "SUBS(W|X)rx$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "AND(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteI1Set], (instregex "ANDS(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "BIC(W|X)rr$")>;
+def : InstRW<[XGeneWriteI1Set], (instregex "BICS(W|X)rr$")>;
+
+def : InstRW<[XGeneWriteI1Alu], (instregex "EON(W|X)rr$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "EOR(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "ORN(W|X)rr$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "ORR(W|X)r(r|i)$")>;
+
+def : InstRW<[XGeneWriteI1Alu], (instregex "CLS(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "CLZ(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "RBIT(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "CSINC(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "CSINV(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "CSNEG(W|X)r$")>;
+
+def : InstRW<[XGeneWriteI1Car], (instregex "SBC(W|X)r$")>;
+def : InstRW<[XGeneWriteI1Car], (instregex "SBCS(W|X)r$")>;
+
+def : InstRW<[XGeneWriteI1Alb], (instregex "EXTR(W|X)rri$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "MOV(N|Z|K)(W|X)i$")>;
+
+def : InstRW<[XGeneWriteI1Alu], (instregex "ADR$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "ADRP$")>;
+
+def : InstRW<[XGeneWriteI1Set], (instregex "CCMN(W|X)(r|i)$")>;
+
+def : InstRW<[XGeneWriteI1Alu], (instregex "TBZ(W|X)$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "TBNZ(W|X)$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "CBZ(W|X)$")>;
+def : InstRW<[XGeneWriteI1Alu], (instregex "CBNZ(W|X)$")>;
+
+// shifted
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ADD(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "ADDS(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "SUB(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "SUBS(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "AND(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "ANDS(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "BIC(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "BICS(W|X)rs$")>;
+
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "EON(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "EOR(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ORN(W|X)rs$")>;
+def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ORR(W|X)rs$")>;
+
+def : InstRW<[XGeneWriteISFlags], (instregex "ADCS(W|X)r$")>;
+def : InstRW<[XGeneWriteISFlags], (instregex "ADDS(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteISFlags], (instregex "ADDS(W|X)rx$")>;
+def : InstRW<[XGeneWriteISFlags], (instregex "SUBS(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteISFlags], (instregex "SUBS(W|X)rx$")>;
+def : InstRW<[XGeneWriteISFlags], (instregex "ANDS(W|X)r(r|i)$")>;
+def : InstRW<[XGeneWriteISFlags], (instregex "BICS(W|X)rr$")>;
+def : InstRW<[XGeneWriteISFlags], (instregex "SBCS(W|X)r$")>;
+
+def : InstRW<[XGeneReadISFlags], (instregex "CCMP(W|X)r$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CCMP(W|X)i$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CSEL(W|X)r$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CLS(W|X)r$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CLZ(W|X)r$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CSINC(W|X)r$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CSINV(W|X)r$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CSNEG(W|X)r$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CCMN(W|X)(r|i)$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CBZ(W|X)$")>;
+def : InstRW<[XGeneReadISFlags], (instregex "CBNZ(W|X)$")>;
+
+// TODO: STLX et al.
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// Three same vector instructions
+// 1Asa: ADD, SUB, ADDP (Q=0)
+// 1Asa: SHADD, SQADD, SRHADD, SHSUB, SQSUB (Q=0)
+// 1Asa: UHADD, UQADD, URHADD, UHSUB, UQSUB (Q=0)
+// 1Asa: CMGT, CMGE, CMTST, CMHI, CMHS, CMEQ (Q=0 register)
+// 1Asa: SMAX, SMIN, SABD, SMAXP, SMINP (Q=0)
+// 1Asa: UMAX, UMIN, UABD, UMAXP, UMINP (Q=0)
+// 1Ass: SSHL, SQSHL, SRSHL, SQRSHL (Q=0)
+// 1Ass: USHL, UQSHL, URSHL, UQRSHL (Q=0)
+// 1Asm: MUL, MLA, MLS, SQDMULH, SQRDMULH (Q=0)
+// 1Asl: AND, BIC, ORR, ORN, EOR, BSL, BIT, BIF (Q=0)
+// TODO: maybe PMUL uses Apol? That would make more sense I think
+// 1Asl: PMUL (Q=0)
+// 1AsaAsa: ADD, SUB, ADDP (Q=1)
+// 1AsaAsa: SHADD, SQADD, SRHADD, SHSUB, SQSUB (Q=1)
+// 1AsaAsa: UHADD, UQADD, URHADD, UHSUB, UQSUB (Q=1)
+// 1AsaAsa: CMGT, CMGE, CMTST, CMHI, CMHS, CMEQ (Q=1 register)
+// 1AsaAsa: SMAX, SMIN, SABD, SMAXP, SMINP (Q=1)
+// 1AsaAsa: UMAX, UMIN, UABD, UMAXP, UMINP (Q=1)
+// 1AssAss: SSHL, SQSHL, SRSHL, SQRSHL (Q=1)
+// 1AssAss: USHL, UQSHL, URSHL, UQRSHL (Q=1)
+// 1AsmAsm: MUL, MLA, MLS, SQDMULH, SQRDMULH (Q=1)
+// 1AslAsl: AND, BIC, ORR, ORN, EOR, BSL, BIT, BIF (Q=1)
+// 1AslAsl: PMUL (Q=1)
+// 2Asa: SABA/UABA (Q=0)
+// 2AsaAsa: SABA/UABA (Q=1)
+//---
+def : InstRW<[XGeneWriteVI1Asa], (instregex "ADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SUBv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "ADDPv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SHADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SQADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SRHADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SHSUBv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SQSUBv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UHADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UQADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "URHADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UHSUBv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UQSUBv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GT|GE|EQ)v(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(TST|HI|HS)v(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SMAXv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SMINv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SABDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SMAXPv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SMINPv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UMAXv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UMINv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UABDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UMAXPv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UMINPv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SSHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SRSHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SQRSHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "USHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "UQSHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "URSHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "UQRSHLv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "MULv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "MLAv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "MLSv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "SQDMULHv(4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "SQRDMULHv(4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "ANDv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "BICv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "ORRv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "ORNv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "EORv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "BSLv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "BITv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "BIFv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "PMULv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ADDv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SUBv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ADDPv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SHADDv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQADDv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SRHADDv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SHSUBv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQSUBv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UHADDv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UQADDv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "URHADDv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UHSUBv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UQSUBv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GT|GE|EQ)v(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(TST|HI|HS)v(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMAXv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMINv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMAXPv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMINPv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMAXv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMINv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMAXPv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMINPv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRSHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "URSHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHLv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MULv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MLAv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MLSv(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULHv(8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQRDMULHv(8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ANDv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BICv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORRv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORNv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "EORv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BSLv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BITv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BIFv(16i8)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "PMULv(16i8)$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// Three different vector instructions
+// 1AsaAsa: SADDL, SSUBL, SABDL (Q=0, Q=1)
+// 1AsaAsa: UADDL, USUBL, UABDL (Q=0, Q=1)
+// 1AsaAsa: SADDW, SSUBW (Q=0, Q=1)
+// 1AsaAsa: UADDW, USUBW (Q=0, Q=1)
+// 1AsmAsm: SMLAL, SMLSL, SMULL (Q=0, Q=1)
+// 1AsmAsm: SQDMLAL, SQDMLSL, SQDMULL (Q=0, Q=1)
+// 1AsmAsm: UMLAL, UMLSL, UMULL ((Q=0, Q=1)
+// 1AsmAsm: UQDMLAL, UQDMLSL, UQDMULL (Q=0, Q=1)
+// NOTE: UQDM... are not modelled in llvm
+// 1ApolApol: PMULL (Q=0, Q=1)
+// 1AsaAsa1Ass: ADDHN, SUBHN, RADDHN, RSUBHN (Q=0, Q=1)
+// 2AsaAsa: SABAL, UABAL (Q=0, Q=1)
+//---
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1ApolApol], (instregex "PMULLv(8i8|16i8|1i64|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(2i64_v2i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(4i32_v4i16)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(8i16_v8i8)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(2i64_v2i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(4i32_v4i16)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(8i16_v8i8)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(2i64_v2i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(4i32_v4i16)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(8i16_v8i8)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(2i64_v2i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(4i32_v4i16)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(8i16_v8i8)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(2i32|4i32)_v2i64$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(8i8|16i8)_v8i16$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(4i16|8i16)_v4i32$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(2i32|4i32)_v2i64$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD two-reg misc
+// 1Asa: SADDLP, SUQADD, SQABS, SQNEG (Q=0)
+// 1Asa: UADDLP, USQADD, ABS, NEG (Q=0)
+// 1Asa: CMGT, CMEQ, CMLT, CMGE, CMLE (zero Q=0)
+// 1Asl: CLS, CLZ, CNT, NOT, RBIT (Q=0)
+// 1Ass: REV64, REV32, REV16 (Q=0)
+// 1Ass: XTN, SQXTN, UQXTN, SQXTUN, SHLL (Q=0, Q=1)
+// 1AsaAsa: SADDLP, SUQADD, SQABS, SQNEG (Q=1)
+// 1AsaAsa: UADDLP, USQADD, ABS, NEG (Q=1)
+// 1AsaAsa: CMGT, CMEQ, CMLT, CMGE, CMLE (zero Q=1)
+// 1AslAsl: CLS, CLZ, CNT, NOT, RBIT (Q=1)
+// 1AssAss: REV64, REV32, REV16 (Q=1)
+// 1Adre: URECPE (Q=0, Q=1)
+// 1Asre: URSQRTE (Q=0, Q=1)
+// 2Asa: SADALP, UADALP (Q=0)
+// 2AsaAsa: SADALP, UADALP (Q=1)
+//---
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(8i8_v4i16)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(4i16_v2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(2i32_v1i64)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SUQADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SQABSv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "SQNEGv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(8i8_v4i16)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(4i16_v2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(2i32_v1i64)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "USQADDv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "ABSv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "NEGv(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GT|EQ|LT)v(8i8|4i16|2i32)rz$")>;
+def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GE|LE)v(8i8|4i16|2i32)rz$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "CL(S|Z)v(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "CNTv8i8$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "NOTv8i8$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "RBITv8i8$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "REV16v8i8$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "REV32v(8i8|4i16)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "REV64v(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(16i8_v8i16)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(8i16_v4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(4i32_v2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SUQADDv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQABSv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQNEGv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(16i8_v8i16)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(8i16_v4i32)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(4i32_v2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USQADDv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ABSv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "NEGv(16i8|8i16|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GT|EQ|LT)v(16i8|8i16|4i32|2i64)rz$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GE|LE)v(16i8|8i16|4i32|2i64)rz$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "CL(S|Z)v(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "CNTv16i8$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "NOTv16i8$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "RBITv16i8$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV16v16i8$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV32v(16i8|8i16)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV64v(16i8|8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1Adre], (instregex "URECPEv(2i32|4i32)$")>;
+def : InstRW<[XGeneWriteVI1Asre], (instregex "URSQRTEv(2i32|4i32)$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(8i8_v4i16)$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(4i16_v2i32)$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(2i32_v1i64)$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(8i8_v4i16)$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(4i16_v2i32)$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(2i32_v1i64)$")>;
+def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(16i8_v8i16)$")>;
+def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(8i16_v4i32)$")>;
+def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(4i32_v2i64)$")>;
+def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(16i8_v8i16)$")>;
+def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(8i16_v4i32)$")>;
+def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(4i32_v2i64)$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD across lanes
+// 1AsaAsa1Asa: SADDLV, UADDLV (Q=1 size=10)
+// 1AsaAsa2Asa: SADDLV, UADDLV (Q=1 size=00)
+// 1AsaAsa2Asa: SADDLV, UADDLV (Q=1 size=01)
+// 2Asa: SADDLV, UADDLV (Q=0 size=01)
+// 2Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=10)
+// 2Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=0 size=01)
+// 3Asa: SADDLV, UADDLV (Q=0 size=00)
+// 3Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=0 size=00)
+// 3Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=01)
+// 4Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=00)
+//---
+def : InstRW<[XGeneWriteVI1AsaAsa1Asa], (instregex "SADDLVv(4i32)v$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "SADDLVv(16i8)v$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "SADDLVv(8i16)v$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "SADDLVv(4i16)v$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa1Asa], (instregex "UADDLVv(4i32)v$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "UADDLVv(16i8)v$")>;
+def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "UADDLVv(8i16)v$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "UADDLVv(4i16)v$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "ADDVv(4i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "SMAXVv(4i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "SMINVv(4i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "UMAXVv(4i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVI2Asa], (instregex "UMINVv(4i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVI3Asa], (instregex "SADDLVv(8i8)v$")>;
+def : InstRW<[XGeneWriteVI3Asa], (instregex "UADDLVv(8i8)v$")>;
+def : InstRW<[XGeneWriteVI3Asa], (instregex "ADDVv(8i8|8i16)v$")>;
+def : InstRW<[XGeneWriteVI3Asa], (instregex "SMAXVv(8i8|8i16)v$")>;
+def : InstRW<[XGeneWriteVI3Asa], (instregex "SMINVv(8i8|8i16)v$")>;
+def : InstRW<[XGeneWriteVI3Asa], (instregex "UMAXVv(8i8|8i16)v$")>;
+def : InstRW<[XGeneWriteVI3Asa], (instregex "UMINVv(8i8|8i16)v$")>;
+def : InstRW<[XGeneWriteVI4Asa], (instregex "ADDVv(16i8)v$")>;
+def : InstRW<[XGeneWriteVI4Asa], (instregex "SMAXVv(16i8)v$")>;
+def : InstRW<[XGeneWriteVI4Asa], (instregex "SMINVv(16i8)v$")>;
+def : InstRW<[XGeneWriteVI4Asa], (instregex "UMAXVv(16i8)v$")>;
+def : InstRW<[XGeneWriteVI4Asa], (instregex "UMINVv(16i8)v$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD copy
+// 1Ass: DUP (element Q=0)
+// 1AslAsl: DUP (element Q=1 size=x1000)
+// 1Ass1Ass: DUP (element Q=1 size=other)
+// 1St1Lf: DUP (general register) (Q=0)
+// 1St1Lf1Falu: DUP (general register) (Q=1)
+// 1Fmov: INS (element imm5=01000)
+// 1Ass1Asi: INS (element imm5=other)
+// 1St1Lf: INS (general register imm5=01000)
+// 1St1Lf1Asi: INS (general register imm5=other)
+// 1Sf1Ld1Sbfm: SMOV (all)
+// 1Sf1Ld: UMOV (imm5=xxx00)
+// 1Sf1Ld1Ubfm: UMOV (imm5=other)
+//---
+def : InstRW<[XGeneWriteVI1Ass], (instregex "DUPv(2i32|4i16|8i8)lane$")>;
+def : InstRW<[XGeneWriteVI1Asl1Asl], (instregex "DUPv(2i64)lane$")>;
+def : InstRW<[XGeneWriteVI1Ass1Ass], (instregex "DUPv(4i32|8i16|16i8)lane$")>;
+def : InstRW<[XGeneWriteVI1St1Lf], (instregex "DUPv(8i8|4i16|2i32)gpr$")>;
+def : InstRW<[XGeneWriteVI1St1Lf1Falu], (instregex "DUPv(16i8|8i16|4i32|2i64)gpr$")>;
+def : InstRW<[XGeneWriteVI1Fmov], (instregex "INSv(i64)lane$")>;
+def : InstRW<[XGeneWriteVI1Ass1Asi], (instregex "INSv(i8|i16|i32)lane$")>;
+def : InstRW<[XGeneWriteVI1St1Lf], (instregex "INSv(i64)gpr$")>;
+def : InstRW<[XGeneWriteVI1St1Lf1Asi], (instregex "INSv(i8|i16|i32)gpr$")>;
+def : InstRW<[XGeneWriteVI1Sf1Ld1Sbfm], (instregex "SMOVv(i8|i16)to32$")>;
+def : InstRW<[XGeneWriteVI1Sf1Ld1Sbfm], (instregex "SMOVv(i8|i16|i32)to64$")>;
+def : InstRW<[XGeneWriteVI1Sf1Ld], (instregex "UMOVvi64$")>;
+def : InstRW<[XGeneWriteVI1Sf1Ld1Ubfm], (instregex "UMOVv(i8|i16|i32)$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD vector x indexed element
+// 1Asm: MUL, SQDMULH, SQRDMULH, MLA, MLS (by element Q=0)
+// 1AsmAsm: SMULL, SMLAL, SMLSL (by element)
+// 1AsmAsm: UMULL, UMLAL, UMLSL (by element)
+// 1AsmAsm: SQDMULL, SQDMLAL, SQDMLSL (by element)
+// 1AsmAsm: MUL, SQDMULH, SQRDMULH, MLA, MLS (by element Q=1)
+//---
+def : InstRW<[XGeneWriteVI1Asm], (instregex "MULv(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "ML(A|S)v(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "SQDMULHv(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1Asm], (instregex "SQRDMULHv(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(4i16|8i16|2i32|4i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(4i16|8i16|2i32|4i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(4i16|8i16|2i32|4i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(4i16|8i16|2i32|4i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(8i16|4i32|1i32|1i64)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(8i16|4i32|1i32|1i64)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MULv(8i16|4i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "ML(A|S)v(8i16|4i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULHv(8i16|4i32|1i16|1i32)_indexed$")>;
+def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQRDMULHv(8i16|4i32|1i16|1i32)_indexed$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD shift by immediate
+// 1Ass: SSHR, SRSHR (Q=0)
+// 1Ass: USHR, URSHR (Q=0)
+// 1Ass: SHL, SQSHL, SQSHLU (Q=0)
+// 1Ass: UQSHL (Q=0)
+// 1Ass: SRI, SLI (Q=0)
+// 1Ass: SQRSHRN, SQSHRUN, SQRSHRUN (Q=0, Q=1)
+// 1Ass: UQSHRN, UQRSHRN (Q=0, Q=1)
+// 1AssAss: SSHR, SRSHR (Q=1)
+// 1AssAss: USHR, URSHR (Q=1)
+// 1AssAss: SHL, SQSHL, SQSHLU (Q=1)
+// 1AssAss: UQSHL (Q=1)
+// 1AssAss: SRI, SLI (Q=1)
+// 1AssAss: SHRN, RSHRN, SQSHRN (Q=0, Q=1)
+// 1AssAss: SSHLL, USHLL (Q=0, Q=1)
+// 1Ass1Asa: SSRA, SRSRA (Q=0)
+// 1Ass1Asa: USRA, URSRA (Q=0)
+// 1AssAss1AsaAsa: SSRA, SRSRA (Q=1)
+// 1AssAss1AsaAsa: USRA, URSRA (Q=1)
+//---
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SSHRv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SRSHRv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "USHRv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "URSHRv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SHLv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "UQSHLv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLUv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SRIv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "SLIv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRUNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRUNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRUNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRUNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHRNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHRNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHRNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHRNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHRv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRSHRv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHRv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "URSHRv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHLv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHLv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLUv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRIv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SLIv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHRNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHRNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "RSHRNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "RSHRNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRNv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRNv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLLv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLLv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLLv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLLv(16i8|8i16|4i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "SSRAv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "SRSRAv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "USRAv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "URSRAv(8i8|4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "SSRAv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "SRSRAv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "USRAv(16i8|8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "URSRAv(16i8|8i16|4i32|2i64)_shift$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD modified immediate
+// 1Asl: MOVI, MVNI, ORR, BIC, FMOV (Q=0)
+// 1AslAsl: MOVI, MVNI, ORR, BIC, FMOV (Q=1)
+//---
+def : InstRW<[XGeneWriteVI1Asl], (instregex "MOVIv(4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "MVNIv(4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "ORRv(4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Asl], (instregex "BICv(4i16|2i32)$")>;
+// TODO: not sure about FMOVs
+def : InstRW<[XGeneWriteVI1Asl], (instregex "FMOVv(4f16|2f32)_ns$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "MOVIv(8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "MVNIv(8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORRv(8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BICv(8i16|4i32)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "FMOVv(8f16|4f32)_ns$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD TBL, TBX
+// All ops depend on Vm.
+// The first TBX op depends on Vd.
+// The first TBL/TBX ops depend on the low half of Vn, Vn+1, ... in turn, while the last TBL/TBX ops depend on the high half of Vn, Vn+1, ... in turn.
+// 2Asl: TBL/TBX (single register table Q=0)
+// 4Asl: TBL/TBX (two register table Q=0)
+// 6Asl: TBL/TBX (three register table Q=0)
+// 8Asl: TBL/TBX (four register table Q=0)
+// 2AslAsl: TBL/TBX (single register table Q=1)
+// 4AslAsl: TBL/TBX (two register table Q=1)
+// 6AslAsl: TBL/TBX (three register table Q=1)
+// 8AslAsl: TBL/TBX (four register table Q=1)
+//---
+def : InstRW<[XGeneWriteVI2Asl], (instregex "TB(L|X)v(8i8)One$")>;
+def : InstRW<[XGeneWriteVI4Asl], (instregex "TB(L|X)v(8i8)Two$")>;
+def : InstRW<[XGeneWriteVI6Asl], (instregex "TB(L|X)v(8i8)Three$")>;
+def : InstRW<[XGeneWriteVI8Asl], (instregex "TB(L|X)v(8i8)Four$")>;
+def : InstRW<[XGeneWriteVI2AslAsl], (instregex "TB(L|X)v(16i8)One$")>;
+def : InstRW<[XGeneWriteVI4AslAsl], (instregex "TB(L|X)v(16i8)Two$")>;
+def : InstRW<[XGeneWriteVI6AslAsl], (instregex "TB(L|X)v(16i8)Three$")>;
+def : InstRW<[XGeneWriteVI8AslAsl], (instregex "TB(L|X)v(16i8)Four$")>;
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD ZIP/UZP/TRN
+// 1Ass: ZIP1/ZIP2/UZP1/UZP2 (Q=0)
+// 1AslAsl: ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=11)
+// 1AssAss: ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=other)
+// 1AslAsl: TRN1/TRN2 (size=11)
+// 1Ass: TRN1/TRN2 (size=other Q=0)
+// 1AssAss: TRN1/TRN2 (size=other Q=1)
+//---
+def : InstRW<[XGeneWriteVI1Ass], (instregex "ZIP(1|2)v(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "UZP(1|2)v(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1Ass], (instregex "TRN(1|2)v(8i8|4i16|2i32)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ZIP(1|2)v(8i16)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "UZP(1|2)v(8i16)$")>;
+def : InstRW<[XGeneWriteVI1AslAsl], (instregex "TRN(1|2)v(8i16)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "ZIP(1|2)v(16i8|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "UZP(1|2)v(16i8|4i32|2i64)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "TRN(1|2)v(16i8|4i32|2i64)$")>;
+
+//def : InstRW<[XGeneWriteVI1AslAsl], (instregex "^ZIP(1|2)_(PPP|ZZZ)_(B|D|H|S)$")>;
+
+
+//---
+// AdvSIMD Data Processing (Vector Integer)
+// AdvSIMD EXT:
+// 1Ass: EXT (Q=0)
+// 1AssAss: EXT (Q=1)
+//---
+def : InstRW<[XGeneWriteVI1Ass], (instregex "EXTv(8i8)$")>;
+def : InstRW<[XGeneWriteVI1AssAss], (instregex "EXTv(16i8)$")>;
+
+//---
+// AdvSIMD Data Processing (Vector FP)
+// AdvSIMD three same:
+// 1Falu: FADD, FSUB, FMULX, FMLA, FMLS, FADDP (Q=0)
+// 1Falu: FRECPS, FRSQRTS (Q=0)
+// 1Falu: FABD (Q=0)
+// 1Fdivd: FDIV (Q=0)
+// 1Fsel: FMAX, FMAXNM, FMAXP, FMAXNMP (Q=0)
+// 1Fsel: FMIN, FMINNM, FMINP, FMINNMP (Q=0)
+// 1Fsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (Q=0)
+// 1FaluFalu: FADD, FSUB, FMULX, FMLA, FMLS, FADDP (Q=1)
+// 1FaluFalu: FRECPS, FRSQRTS (Q=1)
+// 1FaluFalu: FABD (Q=1)
+// 1FdivdFdivd: FDIV (Q=1)
+// 1FselFsel: FMAX, FMAXNM, FMAXP, FMAXNMP (Q=1)
+// 1FselFsel: FMIN, FMINNM, FMINP, FMINNMP (Q=1)
+// 1FselFsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (Q=1)
+//---
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FADDv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FSUBv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FMULXv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FADDPv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FML(A|S)v(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FRECPSv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FRSQRTSv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FABDv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fdivd], (instregex "FDIVv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXNMv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXPv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXNMPv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINNMv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINPv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINNMPv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(EQ|GE|GT)v(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FAC(GE|GT)v(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FADDv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FSUBv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FMULXv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FADDPv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FML(A|S)v(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FRECPSv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FRSQRTSv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FABDv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FdivdFdivd], (instregex "FDIVv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXNMv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXPv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXNMPv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINNMv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINPv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINNMPv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(EQ|GE|GT)v(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FAC(GE|GT)v(8f16|4f32|2f64)$")>;
+
+//---
+// AdvSIMD Data Processing (Vector FP)
+// AdvSIMD two-reg misc:
+// 1Fhcvt: FCVTN, FCVTL (size=0)
+// NOTE: FCVTL is missing from the documentation, but we will include it here
+// 1Falu: FCVTN, FCVTL (size=1)
+// 1Falu: FCVTXN (all)
+// 1Fcvt: FRINTN, FRINTM, FRINTA, FRINTP (Q=0)
+// 1Fcvt: FRINTZ, FRINTX, FRINTI (Q=0)
+// 1Fcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (Q=0)
+// 1Fcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (Q=0)
+// 1Fcvt: FCVTZS, FCVTZU (integer Q=0)
+// 1Falu: SCVTF, UCVTF (integer Q=0)
+// 1Fsel: FCMGT, FCMEQ, FCMLT (zero Q=0)
+// 1Fsel: FCMGE, FCMLE (zero Q=0)
+// 1Fmov: FABS, FNEG (Q=0)
+// 1FcvtFcvt: FRINTN, FRINTM, FRINTA, FRINTP (Q=1)
+// 1FcvtFcvt: FRINTZ, FRINTX, FRINTI (Q=1)
+// 1FcvtFcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (Q=1)
+// 1FcvtFcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (Q=1)
+// 1FcvtFcvt: FCVTZS, FCVTZU (integer Q=1)
+// 1FaluFalu: SCVTF, UCVTF (integer Q=1)
+// 1FselFsel: FCMGT, FCMEQ, FCMLT (zero Q=1)
+// 1FselFsel: FCMGE, FCMLE (zero Q=1)
+// 1FmovFmov: FABS, FNEG (Q=1)
+// 1Adre: FRECPE (all)
+// 1Asre: FRSQRTE (all)
+// 1Fsqrd: FSQRT (all)
+//---
+def : InstRW<[XGeneWriteVF1Fhcvt], (instregex "FCVT(N|L)v(4i16|8i16)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FCVT(N|L)v(2i32|4i32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "FCVTXNv(2f32|4f32)$")>;
+def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FRINT(N|M|A|P)v(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FRINT(Z|X|I)v(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVT(N|M|A|P|Z)Sv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVT(N|M|A|P|Z)Uv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "SCVTFv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "UCVTFv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(GT|EQ|LT)v(4i16|2i32)rz$")>;
+def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(GE|LE)v(4i16|2i32)rz$")>;
+def : InstRW<[XGeneWriteVF1Fmov], (instregex "FABSv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fmov], (instregex "FNEGv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FRINT(N|M|A|P)v(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FRINT(Z|X|I)v(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVT(N|M|A|P|Z)Sv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVT(N|M|A|P|Z)Uv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "SCVTFv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "UCVTFv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(GT|EQ|LT)v(8i16|4i32|2i64)rz$")>;
+def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(GE|LE)v(8i16|4i32|2i64)rz$")>;
+def : InstRW<[XGeneWriteVF1FmovFmov], (instregex "FABSv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1FmovFmov], (instregex "FNEGv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1Adre], (instregex "FRECPEv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Adre], (instregex "FRECPEv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1Asre], (instregex "FRSQRTEv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Asre], (instregex "FRSQRTEv(8f16|4f32|2f64)$")>;
+def : InstRW<[XGeneWriteVF1Fsqrd], (instregex "FSQRTv(4f16|2f32)$")>;
+def : InstRW<[XGeneWriteVF1Fsqrd], (instregex "FSQRTv(8f16|4f32|2f64)$")>;
+
+//---
+// AdvSIMD Data Processing (Vector FP)
+// AdvSIMD across lanes:
+// 2Asa: FMAXV, FMINV, FMAXNMV, FMINNMV
+// AdvSIMD vector x indexed element:
+// 1Asm: FMUL, FMULX, FMLA, FMLS (by element Q=0)
+// 1AsmAsm: FMUL, FMULX, FMLA, FMLS (by element Q=1)
+// AdvSIMD shift by immediate:
+// 1Falu: SCVTF, UCVTF (fixed-point Q=0)
+// 1Fcvt: FCVTZS, FCVTZU (fixed-point Q=0)
+// 1FaluFalu: SCVTF, UCVTF (fixed-point Q=1)
+// 1FcvtFcvt: FCVTZS, FCVTZU (fixed-point Q=1)
+//---
+def : InstRW<[XGeneWriteVF2Asa], (instregex "FMAXVv(4i16|8i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVF2Asa], (instregex "FMINVv(4i16|8i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVF2Asa], (instregex "FMAXNMVv(4i16|8i16|4i32)v$")>;
+def : InstRW<[XGeneWriteVF2Asa], (instregex "FMINNMVv(4i16|8i16|4i32)v$")>;
+
+def : InstRW<[XGeneWriteVF1Asm], (instregex "FMULv(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVF1Asm], (instregex "FMULXv(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVF1Asm], (instregex "FML(A|S)v(4i16|2i32)_indexed$")>;
+def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FMULv(8i16|4i32|2i64)_indexed$")>;
+def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FMULXv(8i16|4i32|2i64)_indexed$")>;
+def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FML(A|S)v(8i16|4i32|2i64)_indexed$")>;
+
+def : InstRW<[XGeneWriteVF1Falu], (instregex "SCVTFv(4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVF1Falu], (instregex "UCVTFv(4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVTZ(S|U)v(4i16|2i32)_shift$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "SCVTFv(8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "UCVTFv(8i16|4i32|2i64)_shift$")>;
+def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVTZ(S|U)v(8i16|4i32|2i64)_shift$")>;
+
+}
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 39b76443438..ca248a6bf6a 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -137,6 +137,9 @@ void AArch64Subtarget::initializeProperties() {
case CortexA75:
PrefFunctionAlignment = 4;
break;
+ case XGene:
+ MaxInterleaveFactor = 4;
+ break;
case Others: break;
}
}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 9245b2f396b..613556c5ba7 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -55,7 +55,8 @@ public:
ThunderX,
ThunderXT81,
ThunderXT83,
- ThunderXT88
+ ThunderXT88,
+ XGene
};
protected: