From ab852275b953675094b9f5c6f8c32c4bedd1568f Mon Sep 17 00:00:00 2001 From: Martin Elshuber Date: Tue, 30 Jan 2018 20:55:36 +0100 Subject: Added XGene model --- include/llvm/Support/AArch64TargetParser.def | 2 + lib/Target/AArch64/AArch64.td | 17 + lib/Target/AArch64/AArch64SchedXGene.td | 2372 ++++++++++++++++++++++++++ lib/Target/AArch64/AArch64Subtarget.cpp | 3 + lib/Target/AArch64/AArch64Subtarget.h | 3 +- 5 files changed, 2396 insertions(+), 1 deletion(-) create mode 100644 lib/Target/AArch64/AArch64SchedXGene.td diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def index 6772e5f9b73..a7b0d346f76 100644 --- a/include/llvm/Support/AArch64TargetParser.def +++ b/include/llvm/Support/AArch64TargetParser.def @@ -109,6 +109,8 @@ AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) +AARCH64_CPU_NAME("xgene", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) // Invalid CPU AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID) #undef AARCH64_CPU_NAME diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index a69d38144c7..21b236ecf78 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -247,6 +247,7 @@ include "AArch64SchedExynosM1.td" include "AArch64SchedExynosM3.td" include "AArch64SchedThunderX.td" include "AArch64SchedThunderX2T99.td" +include "AArch64SchedXGene.td" def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", [ @@ -497,6 +498,21 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", FeaturePredictableSelectIsExpensive, FeatureNEON]>; +def ProcXGene : SubtargetFeature<"xgene", "ARMProcFamily", "XGene", + "X-Gene", [ + FeatureBalanceFPOps, + FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFullFP16, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureUseAA + ]>; + + def : ProcessorModel<"generic", NoSchedModel, [ FeatureFPARMv8, FeatureFuseAES, @@ -529,6 +545,7 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; // Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; +def : ProcessorModel<"xgene", XGeneModel, [ProcXGene]>; //===----------------------------------------------------------------------===// // Assembly parser diff --git a/lib/Target/AArch64/AArch64SchedXGene.td b/lib/Target/AArch64/AArch64SchedXGene.td new file mode 100644 index 00000000000..772451382f7 --- /dev/null +++ b/lib/Target/AArch64/AArch64SchedXGene.td @@ -0,0 +1,2372 @@ +//==- AArch64SchedXGene.td - X-Gene Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM XGene processors. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedModel.h for details. + +// X-Gene machine model for scheduling and other instruction cost heuristics. +def XGeneModel : SchedMachineModel { + let MicroOpBufferSize = 64;// Value of 64 confirmed by APM + let IssueWidth = 4; // 4 micro-ops are dispatched per cycle. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 64;// Determined by experiments + + // Enable partial & runtime unrolling. The magic number is chosen based on + // experiments and benchmarking data. + // Tried with 8, 12, 16, 24; 12 seems to be the best for CoreMark + // coremark: any value but twelve gives at least -2% (DO NOT CHANGE, I guess) + let LoopMicroOpBufferSize = 4; // TODO: try with high values such as 50 + let CompleteModel = 1; + list UnsupportedFeatures = [HasSVE]; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// I think this should be locked at 16; good results both in coremark and spec +// A buffer size of 16 seems suitable according to spec/gobmk +def XGeneUnitB : ProcResource<1> { let BufferSize = 16; } // Branch +def XGeneUnitLd : ProcResource<1> { let BufferSize = 16; } // Load +def XGeneUnitSt : ProcResource<1> { let BufferSize = 16; } // Store +def XGeneUnitIXn : ProcResource<2> { let BufferSize = 20; } // Int ALU +def XGeneUnitFSU : ProcResource<1> { let BufferSize = 16; } // Float ALU +def XGeneUnitFDiv : ProcResource<1> { let BufferSize = 16; } // Float Division + +// On this machine there are two arithmetic units, but only one of them can run +// all instructions; the other unit can run a subset of the instructions; we are +// trying to achieve this by defining a third dummy unit to be used as a "lock" +// the lock limits the scheduling of the restricted instructions +// TODO: try lower BufferSize (10) for IXB to create back-pressure +def XGeneLockIXB : ProcResource<1> { let BufferSize = 10; } // Int ALU B lock + +// On this machine, int division and multiplication ops can be issued only +// once every two cycles (for each separately), we are using these locks +// to model this particularity +def XGeneLockDiv : ProcResource<1> { let BufferSize = 16; } // Int Division +def XGeneLockMul : ProcResource<1> { let BufferSize = 16; } // Int Multipl + +// On this machine, the sqrt and div instructions cannot be issued at the same +// time (for both together), therefore we are using this lock for these two +def XGeneLockFInst : ProcResource<1> { let BufferSize = 16; } // Float lock + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = XGeneModel in { + +// ALU instructions which can run on both ALU units +def : WriteRes; +def : WriteRes; +def : WriteRes; +// ALU instructions which are restricted to IXB +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// MAC instructions can run only on IXB +def : WriteRes; +def : WriteRes; + +// DIV instructions can run only on IXB +def : WriteRes; +def : WriteRes; + +// Load +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Pre/post indexing gonna be accounted for each individual instructions +def : WriteRes; + +// Store +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// WriteAtomic - not supported +def : WriteRes { let Unsupported = 1; } + +// Branch - always no latency +def : WriteRes { let Latency = 0; } +def : WriteRes { let Latency = 0; } +def : WriteRes { let Latency = 0; } +def : WriteRes { let Latency = 0; } +def : WriteRes { let Latency = 0; } + +// FP ALU +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// FP Mul, Div, Sqrt +def : WriteRes; +def : WriteRes; + +//--- +// AdvSIMD Data Processing (Scalar FP) +//--- +def XGeneWriteF1Asm : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Adre : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Asre : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Falu : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Fcmp : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 10; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Fcvt : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Fdivs : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { + let Latency = 22; + let ResourceCycles = [8, 22]; + let NumMicroOps = 1; } +def XGeneWriteF1Fdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { + let Latency = 28; + let ResourceCycles = [11, 28]; + let NumMicroOps = 1; } +def XGeneWriteF1Fhcvt : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Fmov : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 2; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Fsel : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteF1Fsqrs : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { + let Latency = 22; + let ResourceCycles = [8, 22]; + let NumMicroOps = 1; } +def XGeneWriteF1Fsqrd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { + let Latency = 38; + let ResourceCycles = [17, 38]; + let NumMicroOps = 1; } +// instructions with store ops are extra-special because the chip will be using +// data bypass; latencies are measured since the registers become available and +// are as following: +// for int, 1 for the address register, -1 for the data register +// for float, 4 for the address register, 1 for the data register +// for complex, 4 for the address register, 2 for the data register +// we assumed latencies from data register availability +def XGeneWriteF1St1Lf : SchedWriteRes<[XGeneUnitLd, XGeneUnitSt]> { + let Latency = 9; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteF1Sf1Ld : SchedWriteRes<[XGeneUnitLd, XGeneUnitSt]> { + let Latency = 6; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteF1Fcvt1Sf1Ld : SchedWriteRes<[XGeneUnitFSU, XGeneUnitLd, XGeneUnitSt]> { + let Latency = 11; + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = 3; } +def XGeneWriteF1St1Lf1Falu : SchedWriteRes<[XGeneUnitFSU, XGeneUnitLd, XGeneUnitSt]> { + let Latency = 14; + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = 3; } + +//--- +// Load instructions +// NOTE: the way load latencies are calculated here is as follows: +// biggest load first: 5 (int - Ld), 10 (float - Lf) or 11 (complex - Lc) +// arithmethics is parallelized with the loads, so it does not affect latency +// 1 for each other load - since they are pipelined, the only thing which +// further contributes to the latency is the issue time +//--- + +// Integer loads +def XGeneWriteLD1Ld : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteLD1LdLd : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteLD1LdAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 5; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteLD1LdLdAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 6; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } +def XGeneWriteLD1Ld1Sbfm : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 6; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteLD1Ld1SbfmAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 6; + let ResourceCycles = [1, 2]; + let NumMicroOps = 3; } +def XGeneWriteLD1Ld1LdSbfm1Sbfm : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 7; + let ResourceCycles = [2, 2]; + let NumMicroOps = 4; } +def XGeneWriteLD1Ld1LdSbfm1SbfmAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 7; + let ResourceCycles = [2, 3]; + let NumMicroOps = 5; } + +// Float/SIMD loads (1LfLf and 1LfLfAlu already covered by vector loads) +def XGeneWriteLD1LfAlu1Lf : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 11; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } +def XGeneWriteLD1LfLfAlu1LfLf : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 13; + let ResourceCycles = [4, 1]; + let NumMicroOps = 5; } +def XGeneWriteLD1LfLfAlu1LfLfAlu: SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 13; + let ResourceCycles = [4, 2]; + let NumMicroOps = 6; } + +// Vector loads +def XGeneWriteLD1Lc : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 11; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteLD1LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 11; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteLD1LcLc : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 12; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteLD1LcLcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 12; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } +def XGeneWriteLD1X3Lc : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 13; + let ResourceCycles = [3]; + let NumMicroOps = 3; } +def XGeneWriteLD1X3LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 13; + let ResourceCycles = [3, 1]; + let NumMicroOps = 4; } +def XGeneWriteLD1X4Lc : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 14; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteLD1X4LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 14; + let ResourceCycles = [4, 1]; + let NumMicroOps = 5; } +def XGeneWriteLD1X6Lc : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 16; + let ResourceCycles = [6]; + let NumMicroOps = 6; } +def XGeneWriteLD1X6LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 16; + let ResourceCycles = [6, 1]; + let NumMicroOps = 7; } +def XGeneWriteLD1X8Lc : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 18; + let ResourceCycles = [8]; + let NumMicroOps = 8; } +def XGeneWriteLD1X8LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 18; + let ResourceCycles = [8, 1]; + let NumMicroOps = 9; } +def XGeneWriteLD1Lf : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 10; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteLD1LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 10; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteLD1LfLf : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 11; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteLD1LfLfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 11; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } +def XGeneWriteLD1X3Lf : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 12; + let ResourceCycles = [3]; + let NumMicroOps = 3; } +def XGeneWriteLD1X3LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 12; + let ResourceCycles = [3, 1]; + let NumMicroOps = 4; } +def XGeneWriteLD1X4Lf : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 13; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteLD1X4LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 13; + let ResourceCycles = [4, 1]; + let NumMicroOps = 5; } +def XGeneWriteLD1X6Lf : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 15; + let ResourceCycles = [6]; + let NumMicroOps = 6; } +def XGeneWriteLD1X6LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 15; + let ResourceCycles = [6, 1]; + let NumMicroOps = 7; } +def XGeneWriteLD1X8Lf : SchedWriteRes<[XGeneUnitLd]> { + let Latency = 17; + let ResourceCycles = [8]; + let NumMicroOps = 8; } +def XGeneWriteLD1X8LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 17; + let ResourceCycles = [8, 1]; + let NumMicroOps = 9; } +def XGeneWriteLD1Lf1Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { + let Latency = 13; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteLD1LfAlu1Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { + let Latency = 13; + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = 3; } +def XGeneWriteLD1LfLf1AsiAsi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { + let Latency = 17; + let ResourceCycles = [2, 2]; + let NumMicroOps = 4; } +def XGeneWriteLD1LfLfAlu1AsiAsi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { + let Latency = 17; + let ResourceCycles = [2, 1, 2]; + let NumMicroOps = 5; } +def XGeneWriteLD1X3Lf1X3Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { + let Latency = 21; + let ResourceCycles = [3, 3]; + let NumMicroOps = 6; } +def XGeneWriteLD1X3LfAlu1X3Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { + let Latency = 21; + let ResourceCycles = [3, 1, 3]; + let NumMicroOps = 7; } +def XGeneWriteLD1X4Lf1X4Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { + let Latency = 25; + let ResourceCycles = [4, 4]; + let NumMicroOps = 8; } +def XGeneWriteLD1X4LfAlu1X4Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { + let Latency = 25; + let ResourceCycles = [4, 1, 4]; + let NumMicroOps = 9; } + +//--- +// Store instructions +// NOTE: Stores generally have a latency of zero - this value was considered for +// all the stores below; on the other hand, if there is a dependent load +// following the store, the latencies have different values; +// the current model will not cover these special cases +//--- + +// Integer stores +def XGeneWriteST1St : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteST1StAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteST1StSt : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteST1StStAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } + +// Float/SIMD stores (1LfLf and 1LfLfAlu already covered by vector stores) +def XGeneWriteST1SfAlu1Sf : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } +def XGeneWriteST1SfSfAlu1SfSf : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [4, 1]; + let NumMicroOps = 5; } +def XGeneWriteST1SfSfAlu1SfSfAlu: SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 2; + let ResourceCycles = [4, 2]; + let NumMicroOps = 6; } + +// Vector stores +def XGeneWriteST1Sc : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteST1ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteST1ScSc : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteST1ScScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } +def XGeneWriteST1X3Sc : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [3]; + let NumMicroOps = 3; } +def XGeneWriteST1X3ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [3, 1]; + let NumMicroOps = 4; } +def XGeneWriteST1X4Sc : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteST1X4ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [4, 1]; + let NumMicroOps = 5; } +def XGeneWriteST1X6Sc : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [6]; + let NumMicroOps = 6; } +def XGeneWriteST1X6ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [6, 1]; + let NumMicroOps = 7; } +def XGeneWriteST1X8Sc : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [8]; + let NumMicroOps = 8; } +def XGeneWriteST1X8ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [8, 1]; + let NumMicroOps = 9; } +def XGeneWriteST1Sf : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteST1SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteST1SfSf : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteST1SfSfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [2, 1]; + let NumMicroOps = 3; } +def XGeneWriteST1X3Sf : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [3]; + let NumMicroOps = 3; } +def XGeneWriteST1X3SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [3, 1]; + let NumMicroOps = 4; } +def XGeneWriteST1X4Sf : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteST1X4SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [4, 1]; + let NumMicroOps = 5; } +def XGeneWriteST1X6Sf : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [6]; + let NumMicroOps = 6; } +def XGeneWriteST1X6SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [6, 1]; + let NumMicroOps = 7; } +def XGeneWriteST1X8Sf : SchedWriteRes<[XGeneUnitSt]> { + let Latency = 0; + let ResourceCycles = [8]; + let NumMicroOps = 8; } +def XGeneWriteST1X8SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [8, 1]; + let NumMicroOps = 9; } + +//--- +// Integer Data Processing +//--- +def XGeneWriteI1Sbfm1Alu : SchedWriteRes<[XGeneUnitIXn]> { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteI1Alb1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { + let Latency = 3; + let ResourceCycles = [2, 1]; + let NumMicroOps = 2; } +def XGeneWriteI1Sbfm : SchedWriteRes<[XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteI1Car: SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { + let Latency = 1; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; } +def XGeneWriteI1Set: SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { + let Latency = 1; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; } +def XGeneWriteI1Sbfm1Set : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { + let Latency = 2; + let ResourceCycles = [2, 1]; + let NumMicroOps = 2; } +def XGeneWriteI1Alu : SchedWriteRes<[XGeneUnitIXn]> { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteI1Mlw1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { + let Latency = 5; + let ResourceCycles = [2, 1, 2]; + let NumMicroOps = 2; } +def XGeneWriteI1Mlx1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { + let Latency = 6; + let ResourceCycles = [2, 1, 2]; + let NumMicroOps = 2; } +def XGeneWriteI1Mlw : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { + let Latency = 4; + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = 1; } +def XGeneWriteI1Mlx : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { + let Latency = 5; + let ResourceCycles = [1, 1, 2]; + let NumMicroOps = 1; } +def XGeneWriteI1Div : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockDiv]> { + let Latency = 26; + let ResourceCycles = [1, 1, 26]; + let NumMicroOps = 1; } +def XGeneWriteI1Alb : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { + let Latency = 2; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; } + +//--- +// AdvSIMD Data Processing (Vector Integer) +//--- +def XGeneWriteVI1Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVI1Ass : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVI1Asl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 2; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVI1Asm : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVI1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1AssAss : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1AslAsl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 4; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1AsmAsm : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 10; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1ApolApol : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1AsaAsa1Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 9; + let ResourceCycles = [3]; + let NumMicroOps = 3; } +def XGeneWriteVI1AsaAsa1Ass : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 9; + let ResourceCycles = [3]; + let NumMicroOps = 3; } +def XGeneWriteVI1AsaAsa1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 12; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteVI1AssAss1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 12; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteVI1AsaAsa2Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 12; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteVI1Adre : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVI1Asre : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVI1Asl1Asl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 4; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1Ass1Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1Ass1Asi : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1Ass1Ass : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI1Fmov : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 2; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +// instructions with store ops are extra-special because the chip will be using +// data bypass; latencies are measured since the registers become available and +// are as following: +// for int, 1 for the address register, -1 for the data register +// for float, 4 for the address register, 1 for the data register +// for complex, 4 for the address register, 2 for the data register +// we assumed latencies from data register availability +def XGeneWriteVI1St1Lf : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd]> { + let Latency = 9; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteVI1St1Lf1Asi : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitFSU]> { + let Latency = 12; // assumed Asi latency 3 + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = 3; } +def XGeneWriteVI1St1Lf1Falu : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitFSU]> { + let Latency = 14; // assumed Falu latency 5 + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = 3; } +def XGeneWriteVI1Sf1Ld : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd]> { + let Latency = 6; + let ResourceCycles = [1, 1]; + let NumMicroOps = 2; } +def XGeneWriteVI1Sf1Ld1Sbfm : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 7; // assumed Sbfm latency 1 + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = 3; } +def XGeneWriteVI1Sf1Ld1Ubfm : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitIXn]> { + let Latency = 7; // assumed Ubfm latency 1 + let ResourceCycles = [1, 1, 1]; + let NumMicroOps = 3; } +def XGeneWriteVI2Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI2AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 12; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteVI3Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 9; + let ResourceCycles = [3]; + let NumMicroOps = 3; } +def XGeneWriteVI4Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 12; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteVI2Asl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 4; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVI2AslAsl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 8; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteVI4Asl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 8; + let ResourceCycles = [4]; + let NumMicroOps = 4; } +def XGeneWriteVI4AslAsl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 8; } +def XGeneWriteVI6Asl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 12; + let ResourceCycles = [6]; + let NumMicroOps = 6; } +def XGeneWriteVI6AslAsl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 24; + let ResourceCycles = [12]; + let NumMicroOps = 12; } +def XGeneWriteVI8Asl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 8; } +def XGeneWriteVI8AslAsl : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 32; + let ResourceCycles = [16]; + let NumMicroOps = 16; } + +//--- +// AdvSIMD Data Processing (Vector FP) +//--- +def XGeneWriteVF1Asm : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF1AsmAsm : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 10; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVF1Falu : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; // assumed Falu latency of 5 + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF1FaluFalu : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 10; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVF1Fcvt : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; // assumed Fcvt latency of 5 + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF1FcvtFcvt : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 10; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVF1Fdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { + let Latency = 28; + let ResourceCycles = [11, 28]; + let NumMicroOps = 1; } +def XGeneWriteVF1FdivdFdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { + let Latency = 56; + let ResourceCycles = [22, 56]; + let NumMicroOps = 2; } +def XGeneWriteVF1Fhcvt : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF1Fmov : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 2; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF1FmovFmov : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 4; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVF1Fsel : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF1FselFsel : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } +def XGeneWriteVF1Fsqrd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { + let Latency = 38; + let ResourceCycles = [17, 38]; + let NumMicroOps = 1; } +def XGeneWriteVF1Adre : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 3; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF1Asre : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 5; + let ResourceCycles = [1]; + let NumMicroOps = 1; } +def XGeneWriteVF2Asa : SchedWriteRes<[XGeneUnitFSU]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 2; } + +//--- +// Read Advances +// No forwarding for these reads +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Arithmetic instructions which set the state flag introduce one more cycle of +// latency when the flag is required by a conditional +def XGeneWriteISFlags : SchedWriteRes<[]>; +def XGeneReadISFlags : SchedReadAdvance<-1, [XGeneWriteISFlags]>; +def XGeneReadISFlagsVar : SchedReadVariant<[ + SchedVar]>; +def : SchedAlias; + +// Store instructions introduce extra latency cycles when the registers are +// used in a dependent load as such: +// Store type Address register Data register +// Integer 1 -1 +// Float 4 1 +// Complex 4 2 +def XGeneWriteSTI : SchedWriteRes<[]>; +def XGeneWriteSTF : SchedWriteRes<[]>; + +// Scalar loads +def XGeneReadLDSTI : SchedReadAdvance<-1, [XGeneWriteSTI]>; +def XGeneReadLDSTF : SchedReadAdvance<-4, [XGeneWriteSTF]>; +def XGeneReadLDVar : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// Vector loads are affected by the same latencies as regular loads when it +// comes to preceding stores using the same registers +def XGeneReadVLDSTI : SchedReadAdvance<-1, [XGeneWriteSTI]>; +def XGeneReadVLDSTF : SchedReadAdvance<-4, [XGeneWriteSTF]>; +def XGeneReadVLDVar : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +//--- +// Grouping instructions with similar requirements in groups with specific names +// Naming scheme +// XGeneWrite[GROUP][opList] +// GROUP can be an instruction group, eg. LD, ST, ALU etc +// opList is a list of ops in the format [k1][Op1][k2][Op2]...[kN][OpN], for +// example: load op + load op + arithmetic op -> 1Ld1Ld1Alu +// if the ops are independent, they will be grouped together under the same +// op identifier: load op + load op & arithmetic op -> 1Ld1LdAlu +// load op & load op + load op -> 1LdLd1Ld // just an example +// if the ops are identical, they can be grouped as such: +// * within a group: prepend with x, where n is how many times the sequence +// is repeated: 1LdLdLdLd -> 1X4Ld +// * multiple groups can be put together as such: 1LdLd1LdLd -> 2LdLd +// Group names used for this machine model (with usual latencies) +// Nop / Nop (latency 0) +// Br BU Branch (latency 0) +// Alu IXn Arithmetic/logical op (latency 1) +// Sbfm IXn Sbfm (latency 1 or 2) +// Ubfm IXn Ubfm (latency 1 or 2) +// Alb IXB Arithmetic/logical op on IXB (latency 2) +// bfm, extr, shift/rotate, SIMD are mapped to Alb +// Car IXB Carry (latency 1) +// Set IXB Flag setting (latency 1) +// Div IXB Integer division (latency 7, 10, 14, 18, 26 etc.) +// Mlw IXB Integer 32bit multiplication (latency 4) +// Mlx IXB Integer 64bit multiplication (latency 5) +// Asa FSU ASIMD arithmetic (latency 3) +// Asi FSU ASIMD insert (latency 3, also depends on previous destination) +// Asl FSU ASIMD logical (latency 2 or 3 in MA or FP stores) +// Ass FSU ASIMD shift (latency 3) +// Asm FSU ASIMD multiply (latency 5) +// Adre FSU ASIMD sre/dre (latency 5) +// Apol FSU ASIMD polymul (latency 3) +// Asre FSU ASIMD sre/dre (latency 3) +// Falu FSU Floating point arithmetic (latency 5, 6, 7) +// Fcmp FSU Floating point compare (latency 10, 11, 19) +// Fcvt FSU Floating point convert (latency 5, 6, 7) +// Fdivs FSU Single precision division (latency 22, 24 +1 +2) +// Fdivd FSU Double precision division (latency 24, 28 +1 +2) +// Fhcvt FSU Floating point half convert (latency 3) +// Fmov FSU Floating point move (latency 2 or 3 in MA or FP stores) +// Fmul FSU ASIMD multiply (latency 5) +// Fsel FSU Floating point select (latency 3) +// Fsqrs FSU Single precision sqrt (latency 22, 24 +1 +2) +// Fsqrd FSU Double precision sqrt (latency 24, 38 +1 +2) TODO: 38? +// Lc LD Complex load (latency 11) +// Ld LD Integer load (latency 5) +// Lf LD Floating point load (latency 10) +// Sc ST Complex store (latency 0) +// St ST Integer store (latency 0) +// Sf ST Floating point store (latency 0) +//--- + +//--- +// AdvSIMD Data Processing (Scalar FP) +// * NOTE: in the arm64 instruction model of llvm, the scalar floating point +// * instructions are defined as vector instructions with one element v1i64 +// * We will stay consistent with this model and put the one-element vector +// * instructions in the scalar group +// Floating-point immediate: +// 1Fmov: FMOV (immediate) +// Floating-point data-processing: +// 1Fmov: FMOV (register) +// 1Fmov: FABS, FNEG (1 source) +// 1Fsqrs: FSQRT (1 source single precision) +// 1Fsqrd: FSQRT (1 source double precision) +// 1Falu: FMUL, FADD, FSUB, FNMUL (2 source) +// 1Fdivs: FDIV (2 source single precision) +// 1Fdivd: FDIV (2 source double precision) +// 1Fsel: FMAX, FMIN, FMAXNM, FMINNM (2 source) +// 1Falu: FMADD, FMSUB, FNMADD, FNMSUB (3 source) +// Floating-point compare: +// 1Fcmp: FCMP, FCMPE (all) +// Floating-point convert: +// 1Fcvt: FRINTN, FRINTP, FRINTM, FRINTZ, FRINTA, FRINTX, FRINTI (all) +// 1Falu: FCVT (1 source single to double or double to single) +// 1Fhcvt: FCVT (1 source to or from half precision) +// Floating-point conditional: +// 1Fcmp: FCCMP, FCCMPE (compare) +// 1Fsel: FCSEL (select) +// Floating-point<->integer conversions: +// 1Fcvt1Sf1Ld: FCVTNS, FCVTAS, FCVTPS, FCVTMS (integer) +// 1Fcvt1Sf1Ld: FCVTNU, FCVTAU, FCVTPU, FCVTMU (integer) +// 1Fcvt1Sf1Ld: FCVTZS, FCVTZU (integer) +// 1St1Lf1Falu: SCVTF, UCVTF (integer) +// 1Fmov: FMOV (general register to FP register with Rn=XZR, WZR) +// 1St1Lf: FMOV (general register to FP register with other Rn) +// 1Sf1Ld: FMOV (from FP register to general register) +// Floating-point<->fixed-point conversions: +// 1Fcvt1Sf1Ld: FCVTZS, FCVTZU (fixed-point) +// 1St1Lf1Falu: SCVTF, UCVTF (fixed-point) +// AdvSIMD scalar three same: +// 1Falu: FMULX, FRECPS, FRSQRTS, FABD (three same) +// 1Fsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (three same) +// AdvSIMD two-reg misc: +// 1Falu: FCVTXN (two reg) +// 1Falu: SCVTF, UCVTF (integer) +// 1Fcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (two reg) +// 1Fcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (two reg) +// 1Fcvt: FCVTZS, FCVTZU (integer) +// 1Fsel: FCMGT, FCMEQ, FCMLT, FCMGE, FCMLE (zero) +// 1Adre: FRECPE, FRECPX (two reg) +// 1Asre: FRSQRTE (two reg) +// AdvSIMD scalar pairwise: +// 1Falu: FADDP (pair) +// 1Fsel: FMAXP, FMINP, FMAXNMP, FMINNMP (pair) +// AdvSIMD scalar x indexed element +// * NOTE: These seem to be bundled with vector elements in the arm model, we will +// * model scalar elements here and vector elements in the vector section where +// * they rightfully belong +// 1Asm: FMUL, FMULX, FMLA, FMLS (by element) +// AdvSIMD scalar shift by immediate: +// 1Falu: SCVTF, UCVTF (fixed-point) +// 1Fcvt: FCVTZS, FCVTZU (fixed-point) +//--- + +def : InstRW<[WriteI], (instrs COPY)>; + +def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|H|S)i$")>; + +def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Fmov], (instregex "FABS(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Fmov], (instregex "FNEG(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Fsqrs], (instregex "FSQRT(H|S)r$")>; +def : InstRW<[XGeneWriteF1Fsqrd], (instregex "FSQRTDr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FMUL(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FADD(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FSUB(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FNMUL(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Fdivs], (instregex "FDIV(H|S)rr$")>; +def : InstRW<[XGeneWriteF1Fdivd], (instregex "FDIVDrr$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAX(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMIN(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXNM(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINNM(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FMADD(D|H|S)rrr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FMSUB(D|H|S)rrr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FNMADD(D|H|S)rrr$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FNMSUB(D|H|S)rrr$")>; + +def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCMP(D|H|S)r(r|i)$")>; +def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCMPE(D|H|S)r(r|i)$")>; + +def : InstRW<[XGeneWriteF1Fcvt], (instregex "FRINT(N|P|M|Z|A|X|I)(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FCVT(SD|DS)r$")>; +def : InstRW<[XGeneWriteF1Fhcvt], (instregex "FCVT(HS|HD|SH|DH)r$")>; + +def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCCMP(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCCMPE(D|H|S)rr$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FCSEL(D|H|S)rrr$")>; + +def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTN(S|U)U(W|X)(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTM(S|U)U(W|X)(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTA(S|U)U(W|X)(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTP(S|U)U(W|X)(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZ(S|U)U(W|X)(D|H|S)r$")>; +def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "SCVTFU(W|X)(D|H|S)ri$")>; +def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|S)0$")>; +def : InstRW<[XGeneWriteF1St1Lf], (instregex "FMOV(WH|XH|WS|XD|XDHigh)r$")>; +def : InstRW<[XGeneWriteF1Sf1Ld], (instregex "FMOV(HW|HX|SW|DX|DXHigh)r$")>; + +def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZSS(W|X)(D|H|S)ri$")>; +def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZUS(W|X)(D|H|S)ri$")>; +def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "SCVTFS(W|X)(D|H|S)ri$")>; +def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "UCVTFS(W|X)(D|H|S)ri$")>; + +def : InstRW<[XGeneWriteF1Falu], (instregex "FMULX(16|32|64)$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FRECPS(16|32|64)$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FRSQRTS(16|32|64)$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "FABD(16|32|64)$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FCM(EQ|GE|GT)(16|32|64)$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FAC(GE|GT)(16|32|64)$")>; + +def : InstRW<[XGeneWriteF1Falu], (instregex "FCVTXNv1i64$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "SCVTFv1(i16|i32|i64)$")>; +def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVT(N|M|A|P)Sv1(f16|i32|i64)$")>; +def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVT(N|M|A|P)Uv1(f16|i32|i64)$")>; +def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVTZ(S|U)v1(f16|i32|i64)$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMGTv1(i16|i32|i64)rz$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMEQv1(i16|i32|i64)rz$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMLTv1(i16|i32|i64)rz$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMGEv1(i16|i32|i64)rz$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMLEv1(i16|i32|i64)rz$")>; +def : InstRW<[XGeneWriteF1Adre], (instregex "FRECP(E|X)v1(f16|i32|i64)$")>; +def : InstRW<[XGeneWriteF1Asre], (instregex "FRSQRTEv1(f16|i32|i64)$")>; + +def : InstRW<[XGeneWriteF1Falu], (instregex "FADDPv2(i16|i32|i64)p$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXPv2(i16|i32|i64)p$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINPv2(i16|i32|i64)p$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXNMPv2(i16|i32|i64)p$")>; +def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINNMPv2(i16|i32|i64)p$")>; + +def : InstRW<[XGeneWriteF1Asm], (instregex "FMULv1(i16|i32|i64)_indexed$")>; +def : InstRW<[XGeneWriteF1Asm], (instregex "FMULXv1(i16|i32|i64)_indexed$")>; +def : InstRW<[XGeneWriteF1Asm], (instregex "FML(A|S)v1(i16|i32|i64)_indexed$")>; + +// TODO: maybe add |h| to these groups as well? it would make sense +def : InstRW<[XGeneWriteF1Falu], (instregex "SCVTF(s|d)$")>; +def : InstRW<[XGeneWriteF1Falu], (instregex "UCVTF(s|d)$")>; +def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVTZ(S|U)(s|d)$")>; + +//--- +// Load instructions (38 groups in total) +// 1Ld: LDR (literal) +// 1Ld: LDURB, LDURH, LDUR (unscaled immediate) +// 1Ld: LDRB, LDRH, LDR (register offset) +// 1Ld: LDRB, LDRH, LDR (unsigned immediate) +// 1LdLd: LDP (offset) +// 1LdAlu LDRB, LDRH, LDR (immediate post-indexed) +// 1LdAlu LDRB, LDRH, LDR (immediate pre-indexed) +// 1LdLdAlu LDP (post-indexed) +// 1LdLdAlu LDP (pre-indexed) +// 1Ld1Sbfm LDRSW (literal) +// 1Ld1Sbfm LDURSB, LDURSH, LDURSW (unscaled immediate) +// 1Ld1Sbfm LDRSB, LDRSH, LDRSW (register offset) +// 1Ld1Sbfm LDRSB, LDRSH, LDRSW (unsigned immediate) +// 1Ld1SbfmAlu LDRSB, LDRSH, LDRSW (immediate post-indexed) +// 1Ld1SbfmAlu LDRSB, LDRSH, LDRSW (immediate pre-indexed) +// 1Ld1LdSbfm1Sbfm LDPSW (offset) +// 1Ld1LdSbfm1SbfmAlu LDPSW (post-indexed) +// 1Ld1LdSbfm1SbfmAlu LDPSW (pre-indexed) +//--- +def : InstRW<[XGeneWriteLD1Ld], (instregex "LDR(W|X|S|D|Q)l$")>; +def : InstRW<[XGeneWriteLD1Ld], (instregex "LDUR(X|W|HH|BB)i$")>; +def : InstRW<[XGeneWriteLD1Ld], (instregex "LDR(BB|HH|W|X)ro(X|W)$")>; +def : InstRW<[XGeneWriteLD1Ld], (instregex "LDR(BB|HH|W|X|B|H|S|D|Q)ui$")>; + +def : InstRW<[XGeneWriteLD1LdLd], (instregex "LDP(W|X)i$")>; + +def : InstRW<[XGeneWriteLD1LdAlu], (instregex "LDR(BB|HH|W|X)post$")>; +def : InstRW<[XGeneWriteLD1LdAlu], (instregex "LDR(BB|HH|W|X)pre$")>; + +def : InstRW<[XGeneWriteLD1LdLdAlu], (instregex "LDP(W|X)post$")>; +def : InstRW<[XGeneWriteLD1LdLdAlu], (instregex "LDP(W|X)pre$")>; + +def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDRSWl$")>; +def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDURS(BW|BX|HW|HX|W)i$")>; +def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDRS(BW|BX|HW|HX|W)ro(X|W)$")>; +def : InstRW<[XGeneWriteLD1Ld1Sbfm], (instregex "LDRS(BW|BX|HW|HX|W)ui$")>; + +def : InstRW<[XGeneWriteLD1Ld1SbfmAlu], (instregex "LDRS(BW|BX|HW|HX|W)post$")>; +def : InstRW<[XGeneWriteLD1Ld1SbfmAlu], (instregex "LDRS(BW|BX|HW|HX|W)pre$")>; + +def : InstRW<[XGeneWriteLD1Ld1LdSbfm1Sbfm], (instregex "LDPSWi$")>; +def : InstRW<[XGeneWriteLD1Ld1LdSbfm1SbfmAlu], (instregex "LDPSWpost$")>; +def : InstRW<[XGeneWriteLD1Ld1LdSbfm1SbfmAlu], (instregex "LDPSWpre$")>; + +// For read advance - all integer load ops +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(W|X|S|D|Q)l$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDUR(X|W|HH|BB)i$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)ro(X|W)$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X|B|H|S|D|Q)ui$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)i$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)post$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)pre$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)post$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)pre$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRSWl$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDURS(BW|BX|HW|HX|W)i$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)ro(X|W)$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)ui$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)post$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)pre$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWi$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWpost$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWpre$")>; + +//--- +// Load instructions - 64-bit FP/SIMD (8 groups in total) +// 1Lf: LDUR (literal, unscaled immediate, unsigned immediate) +// 1LfAlu: LDR (immediate post-indexed) +// 1LfAlu: LDR (immediate pre-indexed) +// 1LfAlu: LDR (register offset) +// 1LfLf: LDP (offset) +// 1LfLfAlu: LDP (post-indexed, pre-indexed) +//--- +// for LDUR, the llvm arm64 model only defines the unscaled immediates +def : InstRW<[XGeneWriteLD1Lf], (instregex "LDUR(B|H|S|D)i$")>; +def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LDR(B|H|S|D)post$")>; +def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LDR(B|H|S|D)pre$")>; +def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LDR(B|H|S|D)ro(X|W)$")>; +def : InstRW<[XGeneWriteLD1LfLf], (instregex "LDP(D|S)i$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDP(D|S)post$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDP(D|S)pre$")>; + +//--- +// Load instructions - 128-bit FP/SIMD (8 groups in total) +// 1LfLf: LDUR (literal, unscaled immediate, unsigned immediate) +// 1LfLfAlu: LDR (immediate post-indexed) +// 1LfLfAlu: LDR (immediate pre-indexed) +// 1LfAlu1Lf: LDR (register offset) +// 1LfLfAlu1LfLf: LDP (offset) +// 1LfLfAlu1LfLfAlu: LDP (post-indexed, pre-indexed) +//--- +// for LDUR, the llvm arm64 model only defines the unscaled immediates +def : InstRW<[XGeneWriteLD1LfLf], (instregex "LDURQi$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDRQpost$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LDRQpre$")>; +def : InstRW<[XGeneWriteLD1LfAlu1Lf], (instregex "LDRQro(X|W)$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu1LfLf], (instregex "LDPQi$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu1LfLfAlu], (instregex "LDPQpost$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu1LfLfAlu], (instregex "LDPQpre$")>; + +// For read advance - all float load ops +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDUR(B|H|S|D|Q)i$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D|Q)post$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D|Q)pre$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D|Q)ro(X|W)$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S|Q)i$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S|Q)post$")>; +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S|Q)pre$")>; + +//--- +// Vector Load (66 groups in total) +// 1Lc: LD1 (one register 2S/4H/8B) +// 1LcAlu: LD1 (one register 2S/4H/8B pre/post indexed) +// 1LcLc: LD1 (one register 4S/8H/16B) +// 1LcLc: LD1 (two registers 2S/4H/8B) +// 1LcLc: LD2 (two registers 2S/4H/8B) +// 1LcLcAlu: LD1 (one register 4S/8H/16B pre/post indexed) +// 1LcLcAlu: LD1 (two registers 2S/4H/8B pre/post indexed) +// 1LcLcAlu: LD2 (two registers 2S/4H/8B pre/post indexed) +// 1X3Lc: LD1 (three registers 2S/4H/8B) +// 1X3Lc: LD3 (three registers 2S/4H/8B) +// 1X3LcAlu: LD1 (three registers 2S/4H/8B pre/post indexed) +// 1X3LcAlu: LD3 (three registers 2S/4H/8B pre/post indexed) +// 1X4Lc: LD1 (two registers 4S/8H/16B) +// 1X4Lc: LD1 (four registers 2S/4H/8B) +// 1X4Lc: LD2 (two registers 4S/8H/16B) +// 1X4Lc: LD4 (four registers 2S/4H/8B) +// 1X4LcAlu: LD1 (two registers 4S/8H/16B pre/post indexed) +// 1X4LcAlu: LD1 (four registers 2S/4H/8B pre/post indexed) +// 1X4LcAlu: LD2 (two registers 4S/8H/16B pre/post indexed) +// 1X4LcAlu: LD4 (four registers 2S/4H/8B pre/post indexed) +// 1X6Lc: LD1 (three registers 4S/8H/16B) +// 1X6Lc: LD3 (three registers 4S/8H/16B) +// 1X6LcAlu: LD1 (three registers 4S/8H/16B pre/post indexed) +// 1X6LcAlu: LD3 (three registers 4S/8H/16B pre/post indexed) +// 1X8Lc: LD1 (four registers 4S/8H/16B) +// 1X8Lc: LD4 (four registers 4S/8H/16B) +// 1X8LcAlu: LD1 (four registers 4S/8H/16B pre/post indexed) +// 1X8LcAlu: LD4 (four registers 4S/8H/16B pre/post indexed) +// 1Lf: LD1 (one register 1D) +// 1Lf: LD1R (other) +// 1LfAlu: LD1 (one register 1D pre/post indexed) +// 1LfAlu: LD1R (pre/post indexed) +// 1LfLf: LD1 (one register 2D) +// 1LfLf: LD1 (two registers 1D) +// 1LfLf: LD2R (other) +// 1LfLfAlu: LD1 (one register 2D pre/post indexed) +// 1LfLfAlu: LD1 (two registers 1D pre/post indexed) +// 1LfLfAlu: LD2R (pre/post indexed) +// 1X3Lf: LD1 (three registers 1D) +// 1X3Lf: LD3R (other) +// 1X3LfAlu: LD1 (three registers 1D pre/post indexed) +// 1X3LfAlu: LD3R (none pre/post indexed) +// 1X4Lf: LD1 (two registers 2D) +// 1X4Lf: LD1 (four registers 1D) +// 1X4Lf: LD2 (two registers 2D) +// 1X4Lf: LD4R (other) +// 1X4LfAlu: LD1 (two registers 2D pre/post indexed) +// 1X4LfAlu: LD1 (four registers 1D pre/post indexed) +// 1X4LfAlu: LD2 (two registers 2D pre/post indexed) +// 1X4LfAlu: LD4R (pre/post indexed) +// 1X6Lf: LD1 (three registers 2D) +// 1X6Lf: LD3 (three registers 2D) +// 1X6LfAlu: LD1 (three registers 2D pre/post indexed) +// 1X6LfAlu: LD3 (three registers 2D pre/post indexed) +// 1X8Lf: LD1 (four registers 2D) +// 1X8Lf: LD4 (four registers 2D) +// 1X8LfAlu: LD1 (four registers 2D pre/post indexed) +// 1X8LfAlu: LD4 (four registers 2D pre/post indexed) +// 1Lf1Asi: LD1 (one register) +// 1LfAlu1Asi: LD1 (one register pre/post indexed) +// 1LfLf1AsiAsi: LD2 (two registers) +// 1LfLfAlu1AsiAsi: LD2 (two registers pre/post indexed) +// 1X3Lf1X3Asi: LD3 (three registers) +// 1X3LfAlu1X3Asi: LD3 (three registers pre/post indexed) +// 1X4Lf1X4Asi: LD4 (four registers) +// 1X4LfAlu1X4Asi: LD4 (four registers pre/post indexed) +//--- +def : InstRW<[XGeneWriteLD1Lc], (instregex "LD1Onev(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1LcAlu], (instregex "LD1Onev(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteLD1LcLc], (instregex "LD1Onev(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1LcLc], (instregex "LD1Twov(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1LcLc], (instregex "LD2Twov(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1LcLcAlu], (instregex "LD1Onev(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1LcLcAlu], (instregex "LD1Twov(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteLD1LcLcAlu], (instregex "LD2Twov(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteLD1X3Lc], (instregex "LD1Threev(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1X3Lc], (instregex "LD3Threev(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1X3LcAlu], (instregex "LD1Threev(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteLD1X3LcAlu], (instregex "LD3Threev(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD1Twov(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD1Fourv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD2Twov(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X4Lc], (instregex "LD4Fourv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD1Twov(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD1Fourv(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD2Twov(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1X4LcAlu], (instregex "LD4Fourv(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteLD1X6Lc], (instregex "LD1Threev(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X6Lc], (instregex "LD3Threev(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X6LcAlu], (instregex "LD1Threev(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1X6LcAlu], (instregex "LD3Threev(4s|8h|16b)_POST$")>; + +def : InstRW<[XGeneWriteLD1X8Lc], (instregex "LD1Fourv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X8Lc], (instregex "LD4Fourv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X8LcAlu], (instregex "LD1Fourv(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1X8LcAlu], (instregex "LD4Fourv(4s|8h|16b)_POST$")>; + +def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Onev1d$")>; +def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Rv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Rv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1Lf], (instregex "LD1Rv(1d|2d)$")>; +def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Onev1d_POST$")>; +def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Rv(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Rv(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1LfAlu], (instregex "LD1Rv(1d|2d)_POST$")>; + +def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD1Onev2d$")>; +def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD1Twov1d$")>; +def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD2Rv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD2Rv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1LfLf], (instregex "LD2Rv(1d|2d)$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD1Onev2d_POST$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD1Twov1d_POST$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD2Rv(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD2Rv(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu], (instregex "LD2Rv(1d|2d)_POST$")>; + +def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD1Threev1d$")>; +def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD3Rv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD3Rv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X3Lf], (instregex "LD3Rv(1d|2d)$")>; +def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD1Threev1d_POST$")>; +def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD3Rv(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD3Rv(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1X3LfAlu], (instregex "LD3Rv(1d|2d)_POST$")>; + +def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD1Twov2d$")>; +def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD1Fourv1d$")>; +def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD2Twov2d$")>; +def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD4Rv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD4Rv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteLD1X4Lf], (instregex "LD4Rv(1d|2d)$")>; +def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD1Twov2d_POST$")>; +def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD1Fourv1d_POST$")>; +def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD2Twov2d_POST$")>; +def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD4Rv(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD4Rv(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteLD1X4LfAlu], (instregex "LD4Rv(1d|2d)_POST$")>; + +def : InstRW<[XGeneWriteLD1X6Lf], (instregex "LD1Threev2d$")>; +def : InstRW<[XGeneWriteLD1X6Lf], (instregex "LD3Threev2d$")>; +def : InstRW<[XGeneWriteLD1X6LfAlu], (instregex "LD1Threev2d_POST$")>; +def : InstRW<[XGeneWriteLD1X6LfAlu], (instregex "LD3Threev2d_POST$")>; + +def : InstRW<[XGeneWriteLD1X8Lf], (instregex "LD1Fourv2d$")>; +def : InstRW<[XGeneWriteLD1X8Lf], (instregex "LD4Fourv2d$")>; +def : InstRW<[XGeneWriteLD1X8LfAlu], (instregex "LD1Fourv2d_POST$")>; +def : InstRW<[XGeneWriteLD1X8LfAlu], (instregex "LD4Fourv2d_POST$")>; + +def : InstRW<[XGeneWriteLD1Lf1Asi], (instregex "LD1(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteLD1LfAlu1Asi], (instregex "LD1(i8|i16|i32|i64)_POST$")>; +def : InstRW<[XGeneWriteLD1LfLf1AsiAsi], (instregex "LD2(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteLD1LfLfAlu1AsiAsi], (instregex "LD2(i8|i16|i32|i64)_POST$")>; +def : InstRW<[XGeneWriteLD1X3Lf1X3Asi], (instregex "LD3(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteLD1X3LfAlu1X3Asi], (instregex "LD3(i8|i16|i32|i64)_POST$")>; +def : InstRW<[XGeneWriteLD1X4Lf1X4Asi], (instregex "LD4(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteLD1X4LfAlu1X4Asi], (instregex "LD4(i8|i16|i32|i64)_POST$")>; + +//All vector loads for Read Advance +def : InstRW<[XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD(1|2|3|4).*$")>; + +//--- +// Store instructions (18 groups in total) +// 1St: STURB, STURH, STUR (unscaled immediate) +// 1St: STRB, STRH, STR (register offset) +// 1St: STRB, STRH, STR (unsigned immediate) +// 1StAlu: STRB, STRH, STR (immediate post-indexed) +// 1StAlu: STRB, STRH, STR (immediate pre-indexed) +// 1StSt: STP (offset) +// 1StStAlu: STP (post-indexed) +// 1StStAlu: STP (pre-indexed) +//--- +def : InstRW<[XGeneWriteST1St], (instregex "STUR(BB|HH|W|X)i$")>; +def : InstRW<[XGeneWriteST1St], (instregex "STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[XGeneWriteST1St], (instregex "STR(X|W|HH|BB)ui$")>; + +def : InstRW<[XGeneWriteST1StAlu], (instregex "STR(W|X|BB|HH)post$")>; +def : InstRW<[XGeneWriteST1StAlu], (instregex "STR(W|X|BB|HH)pre$")>; + +def : InstRW<[XGeneWriteST1StSt], (instregex "STP(W|X)i$")>; + +def : InstRW<[XGeneWriteST1StStAlu], (instregex "STP(W|X)post$")>; +def : InstRW<[XGeneWriteST1StStAlu], (instregex "STP(W|X)pre$")>; + +// Scalar int stores for Read Advance +def : InstRW<[XGeneWriteSTI], (instregex "STUR(BB|HH|W|X)i$")>; +def : InstRW<[XGeneWriteSTI], (instregex "STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[XGeneWriteSTI], (instregex "STR(X|W|HH|BB)ui$")>; +def : InstRW<[XGeneWriteSTI], (instregex "STR(W|X|BB|HH)post$")>; +def : InstRW<[XGeneWriteSTI], (instregex "STR(W|X|BB|HH)pre$")>; +def : InstRW<[XGeneWriteSTI], (instregex "STP(W|X)i$")>; +def : InstRW<[XGeneWriteSTI], (instregex "STP(W|X)post$")>; +def : InstRW<[XGeneWriteSTI], (instregex "STP(W|X)pre$")>; + +//--- +// Store instructions - 64-bit FP/SIMD (8 groups in total) +// 1Sf: STUR (literal, unscaled immediate, unsigned immediate) +// 1SfAlu: STR (immediate post-indexed) +// 1SfAlu: STR (immediate pre-indexed) +// 1SfAlu: STR (register offset) +// 1SfSf: STP (offset) +// 1SfSfAlu: STP (post-indexed, pre-indexed) +//--- +// for STUR, the llvm arm64 model only defines the unscaled immediates +def : InstRW<[XGeneWriteST1Sf], (instregex "STUR(B|H|S|D)i$")>; +def : InstRW<[XGeneWriteST1SfAlu], (instregex "STR(B|H|S|D)post$")>; +def : InstRW<[XGeneWriteST1SfAlu], (instregex "STR(B|H|S|D)pre$")>; +def : InstRW<[XGeneWriteST1SfAlu], (instregex "STR(B|H|S|D)ro(X|W)$")>; +def : InstRW<[XGeneWriteST1SfSf], (instregex "STP(D|S)i$")>; +def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STP(D|S)post$")>; +def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STP(D|S)pre$")>; + +//--- +// Store instructions - 128-bit FP/SIMD (8 groups in total) +// 1SfSf: STUR (literal, unscaled immediate, unsigned immediate) +// 1SfSfAlu: STR (immediate post-indexed) +// 1SfSfAlu: STR (immediate pre-indexed) +// 1SfAlu1Sf: STR (register offset) +// 1SfSfAlu1SfSf: STP (offset) +// 1SfSfAlu1SfSfAlu: STP (post-indexed, pre-indexed) +//--- +// for STUR, the llvm arm64 model only defines the unscaled immediates +def : InstRW<[XGeneWriteST1SfSf], (instregex "STURQi$")>; +def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STRQpost$")>; +def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "STRQpre$")>; +def : InstRW<[XGeneWriteST1SfAlu1Sf], (instregex "STRQro(X|W)$")>; +def : InstRW<[XGeneWriteST1SfSfAlu1SfSf], (instregex "STPQi$")>; +def : InstRW<[XGeneWriteST1SfSfAlu1SfSfAlu], (instregex "STPQpost$")>; +def : InstRW<[XGeneWriteST1SfSfAlu1SfSfAlu], (instregex "STPQpre$")>; + +// Scalar float stores for Read Advance +def : InstRW<[XGeneWriteSTF], (instregex "STUR(B|H|S|D|Q)i$")>; +def : InstRW<[XGeneWriteSTF], (instregex "STR(B|H|S|D|Q)post$")>; +def : InstRW<[XGeneWriteSTF], (instregex "STR(B|H|S|D|Q)pre$")>; +def : InstRW<[XGeneWriteSTF], (instregex "STR(B|H|S|D|Q)ro(X|W)$")>; +def : InstRW<[XGeneWriteSTF], (instregex "STP(D|S|Q)i$")>; +def : InstRW<[XGeneWriteSTF], (instregex "STP(D|S|Q)post$")>; +def : InstRW<[XGeneWriteSTF], (instregex "STP(D|S|Q)pre$")>; + +//--- +// Vector Store (66 groups in total) +// Multiple Structures +// 1Sc: ST1 (one register 2S/4H/8B) +// 1ScAlu: ST1 (one register 2S/4H/8B pre/post indexed) +// 1ScSc: ST1 (one register 4S/8H/16B) +// 1ScSc: ST1 (two registers 2S/4H/8B) +// 1ScSc: ST2 (two registers 2S/4H/8B) +// 1ScScAlu: ST1 (one register 4S/8H/16B pre/post indexed) +// 1ScScAlu: ST1 (two registers 2S/4H/8B pre/post indexed) +// 1ScScAlu: ST2 (two registers 2S/4H/8B pre/post indexed) +// 1X3Sc: ST1 (three registers 2S/4H/8B) +// 1X3Sc: ST3 (three registers 2S/4H/8B) +// 1X3ScAlu: ST1 (three registers 2S/4H/8B pre/post indexed) +// 1X3ScAlu: ST3 (three registers 2S/4H/8B pre/post indexed) +// 1X4Sc: ST1 (two registers 4S/8H/16B) +// 1X4Sc: ST1 (four registers 2S/4H/8B) +// 1X4Sc: ST2 (two registers 4S/8H/16B) +// 1X4Sc: ST4 (four registers 2S/4H/8B) +// 1X4ScAlu: ST1 (two registers 4S/8H/16B pre/post indexed) +// 1X4ScAlu: ST1 (four registers 2S/4H/8B pre/post indexed) +// 1X4ScAlu: ST2 (two registers 4S/8H/16B pre/post indexed) +// 1X4ScAlu: ST4 (four registers 2S/4H/8B pre/post indexed) +// 1X6Sc: ST1 (three registers 4S/8H/16B) +// 1X6Sc: ST3 (three registers 4S/8H/16B) +// 1X6ScAlu: ST1 (three registers 4S/8H/16B pre/post indexed) +// 1X6ScAlu: ST3 (three registers 4S/8H/16B pre/post indexed) +// 1X8Sc: ST1 (four registers 4S/8H/16B) +// 1X8Sc: ST4 (four registers 4S/8H/16B) +// 1X8ScAlu: ST1 (four registers 4S/8H/16B pre/post indexed) +// 1X8ScAlu: ST4 (four registers 4S/8H/16B pre/post indexed) +// 1Sf: ST1 (one register 1D) +// 1SfAlu: ST1 (one register 1D pre/post indexed) +// 1SfSf: ST1 (one register 2D) +// 1SfSf: ST1 (two registers 1D) +// 1SfSfAlu: ST1 (one register 2D pre/post indexed) +// 1SfSfAlu: ST1 (two registers 1D pre/post indexed) +// 1X3Sf: ST1 (three registers 1D) +// 1X3SfAlu: ST1 (three registers 1D pre/post indexed) +// 1X4Sf: ST1 (two registers 2D) +// 1X4Sf: ST1 (four registers 1D) +// 1X4Sf: ST2 (two registers 2D) +// 1X4SfAlu: ST1 (two registers 2D pre/post indexed) +// 1X4SfAlu: ST1 (four registers 1D pre/post indexed) +// 1X4SfAlu: ST2 (two registers 2D pre/post indexed) +// 1X6Sf: ST1 (three registers 2D) +// 1X6Sf: ST3 (three registers 2D) +// 1X6SfAlu: ST1 (three registers 2D pre/post indexed) +// 1X6SfAlu: ST3 (three registers 2D pre/post indexed) +// 1X8Sf: ST1 (four registers 2D) +// 1X8Sf: ST4 (four registers 2D) +// 1X8SfAlu: ST1 (four registers 2D pre/post indexed) +// 1X8SfAlu: ST4 (four registers 2D pre/post indexed) +// Single Structure +// 1Sf: ST1 (one register) +// 1SfAlu: ST1 (one register pre/post indexed) +// 1SfSf: ST2 (two registers) +// 1SfSfAlu: ST2 (two registers pre/post indexed) +// 1X3Sf: ST3 (three registers) +// 1X3SfAlu: ST3 (three registers pre/post indexed) +// 1X4Sf: ST4 (four registers) +// 1X4SfAlu: ST4 (four registers pre/post indexed) +//--- +def : InstRW<[XGeneWriteST1Sc], (instregex "ST1Onev(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteST1ScAlu], (instregex "ST1Onev(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteST1ScSc], (instregex "ST1Onev(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteST1ScSc], (instregex "ST1Twov(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteST1ScSc], (instregex "ST2Twov(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteST1ScScAlu], (instregex "ST1Onev(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteST1ScScAlu], (instregex "ST1Twov(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteST1ScScAlu], (instregex "ST2Twov(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteST1X3Sc], (instregex "ST1Threev(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteST1X3Sc], (instregex "ST3Threev(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteST1X3ScAlu], (instregex "ST1Threev(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteST1X3ScAlu], (instregex "ST3Threev(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST1Twov(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST1Fourv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST2Twov(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteST1X4Sc], (instregex "ST4Fourv(2s|4h|8b)$")>; +def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST1Twov(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST1Fourv(2s|4h|8b)_POST$")>; +def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST2Twov(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteST1X4ScAlu], (instregex "ST4Fourv(2s|4h|8b)_POST$")>; + +def : InstRW<[XGeneWriteST1X6Sc], (instregex "ST1Threev(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteST1X6Sc], (instregex "ST3Threev(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteST1X6ScAlu], (instregex "ST1Threev(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteST1X6ScAlu], (instregex "ST3Threev(4s|8h|16b)_POST$")>; + +def : InstRW<[XGeneWriteST1X8Sc], (instregex "ST1Fourv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteST1X8Sc], (instregex "ST4Fourv(4s|8h|16b)$")>; +def : InstRW<[XGeneWriteST1X8ScAlu], (instregex "ST1Fourv(4s|8h|16b)_POST$")>; +def : InstRW<[XGeneWriteST1X8ScAlu], (instregex "ST4Fourv(4s|8h|16b)_POST$")>; + +def : InstRW<[XGeneWriteST1Sf], (instregex "ST1Onev1d$")>; +def : InstRW<[XGeneWriteST1SfAlu], (instregex "ST1Onev1d_POST$")>; + +def : InstRW<[XGeneWriteST1SfSf], (instregex "ST1Onev2d$")>; +def : InstRW<[XGeneWriteST1SfSf], (instregex "ST1Twov1d$")>; +def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "ST1Onev2d_POST$")>; +def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "ST1Twov1d_POST$")>; + +def : InstRW<[XGeneWriteST1X3Sf], (instregex "ST1Threev1d$")>; +def : InstRW<[XGeneWriteST1X3SfAlu], (instregex "ST1Threev1d_POST$")>; + +def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST1Twov2d$")>; +def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST1Fourv1d$")>; +def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST2Twov2d$")>; +def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST1Twov2d_POST$")>; +def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST1Fourv1d_POST$")>; +def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST2Twov2d_POST$")>; + +def : InstRW<[XGeneWriteST1X6Sf], (instregex "ST1Threev2d$")>; +def : InstRW<[XGeneWriteST1X6Sf], (instregex "ST3Threev2d$")>; +def : InstRW<[XGeneWriteST1X6SfAlu], (instregex "ST1Threev2d_POST$")>; +def : InstRW<[XGeneWriteST1X6SfAlu], (instregex "ST3Threev2d_POST$")>; + +def : InstRW<[XGeneWriteST1X8Sf], (instregex "ST1Fourv2d$")>; +def : InstRW<[XGeneWriteST1X8Sf], (instregex "ST4Fourv2d$")>; +def : InstRW<[XGeneWriteST1X8SfAlu], (instregex "ST1Fourv2d_POST$")>; +def : InstRW<[XGeneWriteST1X8SfAlu], (instregex "ST4Fourv2d_POST$")>; + +def : InstRW<[XGeneWriteST1Sf], (instregex "ST1(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteST1SfAlu], (instregex "ST1(i8|i16|i32|i64)_POST$")>; +def : InstRW<[XGeneWriteST1SfSf], (instregex "ST2(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteST1SfSfAlu], (instregex "ST2(i8|i16|i32|i64)_POST$")>; +def : InstRW<[XGeneWriteST1X3Sf], (instregex "ST3(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteST1X3SfAlu], (instregex "ST3(i8|i16|i32|i64)_POST$")>; +def : InstRW<[XGeneWriteST1X4Sf], (instregex "ST4(i8|i16|i32|i64)$")>; +def : InstRW<[XGeneWriteST1X4SfAlu], (instregex "ST4(i8|i16|i32|i64)_POST$")>; + +// All vector stores for read advance, they go in the "Store Float" group +def : InstRW<[XGeneWriteSTF], (instregex "ST.*$")>; + +//--- +// Data Processing Register +// 1Sbfm1Alu: LSL, LSR, ASR (shifted register) +// 1Sbfm1Alu: LSLV, LSRV, ASRV, RORV: shift/rotate op. +// 1Sbfm1Alu: UXTW, UXTX (shifted register) +// 1Alb1Alu: ROR (shifted register) +// 1Sbfm SBFM +// 1Car: ADC (Add/subtract (with carry): carry op.) +// 1Set: CCMP (Conditional compare (register): logical op., produces flag) +// 1Alu: CSEL Conditional select: arithmetic op. +// 1Mlw1Alu MADD, SMADDL, UMADDL with other Ra (32bit) +// 1Mlx1Alu MADD, SMADDL, UMADDL with other Ra (64bit) +// 1Mlw1Alu MSUB, SMSUBL, UMSUBL (32bit) +// 1Mlx1Alu MSUB, SMSUBL, UMSUBL (64bit) +// 1Mlx UMULH, SMULH (64bit) +// 1Div UDIV, SDIV +// 1Alb REV, REV32, REV64 +// 1Alu BFM +// 1Alu MRS, MSR +//--- +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "LS(L|R)V(W|X)r$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ASRV(W|X)r$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "UBFM(W|X)ri$")>; + // alias of UXTW +def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "RORV(W|X)r$")>; +def : InstRW<[XGeneWriteI1Sbfm], (instregex "SBFM(W|X)ri$")>; + +def : InstRW<[XGeneWriteI1Car], (instregex "ADC(W|X)r$")>; +def : InstRW<[XGeneWriteI1Car], (instregex "ADCS(W|X)r$")>; +def : InstRW<[XGeneWriteI1Set], (instregex "CCMP(W|X)r$")>; +def : InstRW<[XGeneWriteI1Set], (instregex "CCMP(W|X)i$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "CSEL(W|X)r$")>; + +def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "MADD(W)rrr$")>; +def : InstRW<[XGeneWriteI1Mlx1Alu], (instregex "MADD(X)rrr$")>; +def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "(S|U)MADDLrrr$")>; +def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "MSUB(W)rrr$")>; +def : InstRW<[XGeneWriteI1Mlx1Alu], (instregex "MSUB(X)rrr$")>; +def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "(S|U)MSUBLrrr$")>; + +def : InstRW<[XGeneWriteI1Mlx], (instregex "(S|U)MULHrr$")>; + +def : InstRW<[XGeneWriteI1Div], (instregex "SDIVWr$")>; +def : InstRW<[XGeneWriteI1Div], (instregex "SDIVXr$")>; +def : InstRW<[XGeneWriteI1Div], (instregex "UDIV(W|X)r$")>; + +def : InstRW<[XGeneWriteI1Alb], (instregex "REV(W|X)r$")>; +def : InstRW<[XGeneWriteI1Alb], (instregex "REV16(W|X)r$")>; +def : InstRW<[XGeneWriteI1Alb], (instregex "REV32Xr$")>; + +def : InstRW<[XGeneWriteI1Alu], (instregex "BFM(W|X)ri$")>; + +def : InstRW<[XGeneWriteI1Alu], (instregex "BLR$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "MRS$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "MSR$")>; + +// Taken form list of missing instructions +def : InstRW<[XGeneWriteI1Alu], (instregex "ADD(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "ADD(W|X)rx$")>; +def : InstRW<[XGeneWriteI1Set], (instregex "ADDS(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "ADDS(W|X)rx$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "SUB(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "SUB(W|X)rx$")>; +def : InstRW<[XGeneWriteI1Set], (instregex "SUBS(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "SUBS(W|X)rx$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "AND(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteI1Set], (instregex "ANDS(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "BIC(W|X)rr$")>; +def : InstRW<[XGeneWriteI1Set], (instregex "BICS(W|X)rr$")>; + +def : InstRW<[XGeneWriteI1Alu], (instregex "EON(W|X)rr$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "EOR(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "ORN(W|X)rr$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "ORR(W|X)r(r|i)$")>; + +def : InstRW<[XGeneWriteI1Alu], (instregex "CLS(W|X)r$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "CLZ(W|X)r$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "RBIT(W|X)r$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "CSINC(W|X)r$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "CSINV(W|X)r$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "CSNEG(W|X)r$")>; + +def : InstRW<[XGeneWriteI1Car], (instregex "SBC(W|X)r$")>; +def : InstRW<[XGeneWriteI1Car], (instregex "SBCS(W|X)r$")>; + +def : InstRW<[XGeneWriteI1Alb], (instregex "EXTR(W|X)rri$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "MOV(N|Z|K)(W|X)i$")>; + +def : InstRW<[XGeneWriteI1Alu], (instregex "ADR$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "ADRP$")>; + +def : InstRW<[XGeneWriteI1Set], (instregex "CCMN(W|X)(r|i)$")>; + +def : InstRW<[XGeneWriteI1Alu], (instregex "TBZ(W|X)$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "TBNZ(W|X)$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "CBZ(W|X)$")>; +def : InstRW<[XGeneWriteI1Alu], (instregex "CBNZ(W|X)$")>; + +// shifted +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ADD(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "ADDS(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "SUB(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "SUBS(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "AND(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "ANDS(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "BIC(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "BICS(W|X)rs$")>; + +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "EON(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "EOR(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ORN(W|X)rs$")>; +def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ORR(W|X)rs$")>; + +def : InstRW<[XGeneWriteISFlags], (instregex "ADCS(W|X)r$")>; +def : InstRW<[XGeneWriteISFlags], (instregex "ADDS(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteISFlags], (instregex "ADDS(W|X)rx$")>; +def : InstRW<[XGeneWriteISFlags], (instregex "SUBS(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteISFlags], (instregex "SUBS(W|X)rx$")>; +def : InstRW<[XGeneWriteISFlags], (instregex "ANDS(W|X)r(r|i)$")>; +def : InstRW<[XGeneWriteISFlags], (instregex "BICS(W|X)rr$")>; +def : InstRW<[XGeneWriteISFlags], (instregex "SBCS(W|X)r$")>; + +def : InstRW<[XGeneReadISFlags], (instregex "CCMP(W|X)r$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CCMP(W|X)i$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CSEL(W|X)r$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CLS(W|X)r$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CLZ(W|X)r$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CSINC(W|X)r$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CSINV(W|X)r$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CSNEG(W|X)r$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CCMN(W|X)(r|i)$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CBZ(W|X)$")>; +def : InstRW<[XGeneReadISFlags], (instregex "CBNZ(W|X)$")>; + +// TODO: STLX et al. + +//--- +// AdvSIMD Data Processing (Vector Integer) +// Three same vector instructions +// 1Asa: ADD, SUB, ADDP (Q=0) +// 1Asa: SHADD, SQADD, SRHADD, SHSUB, SQSUB (Q=0) +// 1Asa: UHADD, UQADD, URHADD, UHSUB, UQSUB (Q=0) +// 1Asa: CMGT, CMGE, CMTST, CMHI, CMHS, CMEQ (Q=0 register) +// 1Asa: SMAX, SMIN, SABD, SMAXP, SMINP (Q=0) +// 1Asa: UMAX, UMIN, UABD, UMAXP, UMINP (Q=0) +// 1Ass: SSHL, SQSHL, SRSHL, SQRSHL (Q=0) +// 1Ass: USHL, UQSHL, URSHL, UQRSHL (Q=0) +// 1Asm: MUL, MLA, MLS, SQDMULH, SQRDMULH (Q=0) +// 1Asl: AND, BIC, ORR, ORN, EOR, BSL, BIT, BIF (Q=0) +// TODO: maybe PMUL uses Apol? That would make more sense I think +// 1Asl: PMUL (Q=0) +// 1AsaAsa: ADD, SUB, ADDP (Q=1) +// 1AsaAsa: SHADD, SQADD, SRHADD, SHSUB, SQSUB (Q=1) +// 1AsaAsa: UHADD, UQADD, URHADD, UHSUB, UQSUB (Q=1) +// 1AsaAsa: CMGT, CMGE, CMTST, CMHI, CMHS, CMEQ (Q=1 register) +// 1AsaAsa: SMAX, SMIN, SABD, SMAXP, SMINP (Q=1) +// 1AsaAsa: UMAX, UMIN, UABD, UMAXP, UMINP (Q=1) +// 1AssAss: SSHL, SQSHL, SRSHL, SQRSHL (Q=1) +// 1AssAss: USHL, UQSHL, URSHL, UQRSHL (Q=1) +// 1AsmAsm: MUL, MLA, MLS, SQDMULH, SQRDMULH (Q=1) +// 1AslAsl: AND, BIC, ORR, ORN, EOR, BSL, BIT, BIF (Q=1) +// 1AslAsl: PMUL (Q=1) +// 2Asa: SABA/UABA (Q=0) +// 2AsaAsa: SABA/UABA (Q=1) +//--- +def : InstRW<[XGeneWriteVI1Asa], (instregex "ADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SUBv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "ADDPv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SHADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SQADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SRHADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SHSUBv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SQSUBv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UHADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UQADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "URHADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UHSUBv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UQSUBv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GT|GE|EQ)v(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(TST|HI|HS)v(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SMAXv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SMINv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SABDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SMAXPv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SMINPv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UMAXv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UMINv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UABDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UMAXPv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UMINPv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SSHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SRSHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SQRSHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "USHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "UQSHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "URSHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "UQRSHLv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "MULv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "MLAv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "MLSv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "SQDMULHv(4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "SQRDMULHv(4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "ANDv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "BICv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "ORRv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "ORNv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "EORv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "BSLv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "BITv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "BIFv(8i8)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "PMULv(8i8)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ADDv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SUBv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ADDPv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SHADDv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQADDv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SRHADDv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SHSUBv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQSUBv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UHADDv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UQADDv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "URHADDv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UHSUBv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UQSUBv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GT|GE|EQ)v(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(TST|HI|HS)v(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMAXv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMINv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMAXPv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMINPv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMAXv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMINv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMAXPv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMINPv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRSHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "URSHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHLv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MULv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MLAv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MLSv(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULHv(8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQRDMULHv(8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ANDv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BICv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORRv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORNv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "EORv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BSLv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BITv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BIFv(16i8)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "PMULv(16i8)$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// Three different vector instructions +// 1AsaAsa: SADDL, SSUBL, SABDL (Q=0, Q=1) +// 1AsaAsa: UADDL, USUBL, UABDL (Q=0, Q=1) +// 1AsaAsa: SADDW, SSUBW (Q=0, Q=1) +// 1AsaAsa: UADDW, USUBW (Q=0, Q=1) +// 1AsmAsm: SMLAL, SMLSL, SMULL (Q=0, Q=1) +// 1AsmAsm: SQDMLAL, SQDMLSL, SQDMULL (Q=0, Q=1) +// 1AsmAsm: UMLAL, UMLSL, UMULL ((Q=0, Q=1) +// 1AsmAsm: UQDMLAL, UQDMLSL, UQDMULL (Q=0, Q=1) +// NOTE: UQDM... are not modelled in llvm +// 1ApolApol: PMULL (Q=0, Q=1) +// 1AsaAsa1Ass: ADDHN, SUBHN, RADDHN, RSUBHN (Q=0, Q=1) +// 2AsaAsa: SABAL, UABAL (Q=0, Q=1) +//--- +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1ApolApol], (instregex "PMULLv(8i8|16i8|1i64|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(2i64_v2i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(4i32_v4i16)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(8i16_v8i8)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(2i64_v2i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(4i32_v4i16)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(8i16_v8i8)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(2i64_v2i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(4i32_v4i16)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(8i16_v8i8)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(2i64_v2i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(4i32_v4i16)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(8i16_v8i8)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(2i32|4i32)_v2i64$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(8i8|16i8)_v8i16$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(4i16|8i16)_v4i32$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(2i32|4i32)_v2i64$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD two-reg misc +// 1Asa: SADDLP, SUQADD, SQABS, SQNEG (Q=0) +// 1Asa: UADDLP, USQADD, ABS, NEG (Q=0) +// 1Asa: CMGT, CMEQ, CMLT, CMGE, CMLE (zero Q=0) +// 1Asl: CLS, CLZ, CNT, NOT, RBIT (Q=0) +// 1Ass: REV64, REV32, REV16 (Q=0) +// 1Ass: XTN, SQXTN, UQXTN, SQXTUN, SHLL (Q=0, Q=1) +// 1AsaAsa: SADDLP, SUQADD, SQABS, SQNEG (Q=1) +// 1AsaAsa: UADDLP, USQADD, ABS, NEG (Q=1) +// 1AsaAsa: CMGT, CMEQ, CMLT, CMGE, CMLE (zero Q=1) +// 1AslAsl: CLS, CLZ, CNT, NOT, RBIT (Q=1) +// 1AssAss: REV64, REV32, REV16 (Q=1) +// 1Adre: URECPE (Q=0, Q=1) +// 1Asre: URSQRTE (Q=0, Q=1) +// 2Asa: SADALP, UADALP (Q=0) +// 2AsaAsa: SADALP, UADALP (Q=1) +//--- +def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(8i8_v4i16)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(4i16_v2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(2i32_v1i64)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SUQADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SQABSv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "SQNEGv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(8i8_v4i16)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(4i16_v2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(2i32_v1i64)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "USQADDv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "ABSv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "NEGv(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GT|EQ|LT)v(8i8|4i16|2i32)rz$")>; +def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GE|LE)v(8i8|4i16|2i32)rz$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "CL(S|Z)v(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "CNTv8i8$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "NOTv8i8$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "RBITv8i8$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "REV16v8i8$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "REV32v(8i8|4i16)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "REV64v(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(16i8_v8i16)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(8i16_v4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(4i32_v2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SUQADDv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQABSv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQNEGv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(16i8_v8i16)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(8i16_v4i32)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(4i32_v2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USQADDv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ABSv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "NEGv(16i8|8i16|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GT|EQ|LT)v(16i8|8i16|4i32|2i64)rz$")>; +def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GE|LE)v(16i8|8i16|4i32|2i64)rz$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "CL(S|Z)v(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "CNTv16i8$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "NOTv16i8$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "RBITv16i8$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV16v16i8$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV32v(16i8|8i16)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV64v(16i8|8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1Adre], (instregex "URECPEv(2i32|4i32)$")>; +def : InstRW<[XGeneWriteVI1Asre], (instregex "URSQRTEv(2i32|4i32)$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(8i8_v4i16)$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(4i16_v2i32)$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(2i32_v1i64)$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(8i8_v4i16)$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(4i16_v2i32)$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(2i32_v1i64)$")>; +def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(16i8_v8i16)$")>; +def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(8i16_v4i32)$")>; +def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(4i32_v2i64)$")>; +def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(16i8_v8i16)$")>; +def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(8i16_v4i32)$")>; +def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(4i32_v2i64)$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD across lanes +// 1AsaAsa1Asa: SADDLV, UADDLV (Q=1 size=10) +// 1AsaAsa2Asa: SADDLV, UADDLV (Q=1 size=00) +// 1AsaAsa2Asa: SADDLV, UADDLV (Q=1 size=01) +// 2Asa: SADDLV, UADDLV (Q=0 size=01) +// 2Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=10) +// 2Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=0 size=01) +// 3Asa: SADDLV, UADDLV (Q=0 size=00) +// 3Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=0 size=00) +// 3Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=01) +// 4Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=00) +//--- +def : InstRW<[XGeneWriteVI1AsaAsa1Asa], (instregex "SADDLVv(4i32)v$")>; +def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "SADDLVv(16i8)v$")>; +def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "SADDLVv(8i16)v$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "SADDLVv(4i16)v$")>; +def : InstRW<[XGeneWriteVI1AsaAsa1Asa], (instregex "UADDLVv(4i32)v$")>; +def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "UADDLVv(16i8)v$")>; +def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "UADDLVv(8i16)v$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "UADDLVv(4i16)v$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "ADDVv(4i16|4i32)v$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "SMAXVv(4i16|4i32)v$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "SMINVv(4i16|4i32)v$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "UMAXVv(4i16|4i32)v$")>; +def : InstRW<[XGeneWriteVI2Asa], (instregex "UMINVv(4i16|4i32)v$")>; +def : InstRW<[XGeneWriteVI3Asa], (instregex "SADDLVv(8i8)v$")>; +def : InstRW<[XGeneWriteVI3Asa], (instregex "UADDLVv(8i8)v$")>; +def : InstRW<[XGeneWriteVI3Asa], (instregex "ADDVv(8i8|8i16)v$")>; +def : InstRW<[XGeneWriteVI3Asa], (instregex "SMAXVv(8i8|8i16)v$")>; +def : InstRW<[XGeneWriteVI3Asa], (instregex "SMINVv(8i8|8i16)v$")>; +def : InstRW<[XGeneWriteVI3Asa], (instregex "UMAXVv(8i8|8i16)v$")>; +def : InstRW<[XGeneWriteVI3Asa], (instregex "UMINVv(8i8|8i16)v$")>; +def : InstRW<[XGeneWriteVI4Asa], (instregex "ADDVv(16i8)v$")>; +def : InstRW<[XGeneWriteVI4Asa], (instregex "SMAXVv(16i8)v$")>; +def : InstRW<[XGeneWriteVI4Asa], (instregex "SMINVv(16i8)v$")>; +def : InstRW<[XGeneWriteVI4Asa], (instregex "UMAXVv(16i8)v$")>; +def : InstRW<[XGeneWriteVI4Asa], (instregex "UMINVv(16i8)v$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD copy +// 1Ass: DUP (element Q=0) +// 1AslAsl: DUP (element Q=1 size=x1000) +// 1Ass1Ass: DUP (element Q=1 size=other) +// 1St1Lf: DUP (general register) (Q=0) +// 1St1Lf1Falu: DUP (general register) (Q=1) +// 1Fmov: INS (element imm5=01000) +// 1Ass1Asi: INS (element imm5=other) +// 1St1Lf: INS (general register imm5=01000) +// 1St1Lf1Asi: INS (general register imm5=other) +// 1Sf1Ld1Sbfm: SMOV (all) +// 1Sf1Ld: UMOV (imm5=xxx00) +// 1Sf1Ld1Ubfm: UMOV (imm5=other) +//--- +def : InstRW<[XGeneWriteVI1Ass], (instregex "DUPv(2i32|4i16|8i8)lane$")>; +def : InstRW<[XGeneWriteVI1Asl1Asl], (instregex "DUPv(2i64)lane$")>; +def : InstRW<[XGeneWriteVI1Ass1Ass], (instregex "DUPv(4i32|8i16|16i8)lane$")>; +def : InstRW<[XGeneWriteVI1St1Lf], (instregex "DUPv(8i8|4i16|2i32)gpr$")>; +def : InstRW<[XGeneWriteVI1St1Lf1Falu], (instregex "DUPv(16i8|8i16|4i32|2i64)gpr$")>; +def : InstRW<[XGeneWriteVI1Fmov], (instregex "INSv(i64)lane$")>; +def : InstRW<[XGeneWriteVI1Ass1Asi], (instregex "INSv(i8|i16|i32)lane$")>; +def : InstRW<[XGeneWriteVI1St1Lf], (instregex "INSv(i64)gpr$")>; +def : InstRW<[XGeneWriteVI1St1Lf1Asi], (instregex "INSv(i8|i16|i32)gpr$")>; +def : InstRW<[XGeneWriteVI1Sf1Ld1Sbfm], (instregex "SMOVv(i8|i16)to32$")>; +def : InstRW<[XGeneWriteVI1Sf1Ld1Sbfm], (instregex "SMOVv(i8|i16|i32)to64$")>; +def : InstRW<[XGeneWriteVI1Sf1Ld], (instregex "UMOVvi64$")>; +def : InstRW<[XGeneWriteVI1Sf1Ld1Ubfm], (instregex "UMOVv(i8|i16|i32)$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD vector x indexed element +// 1Asm: MUL, SQDMULH, SQRDMULH, MLA, MLS (by element Q=0) +// 1AsmAsm: SMULL, SMLAL, SMLSL (by element) +// 1AsmAsm: UMULL, UMLAL, UMLSL (by element) +// 1AsmAsm: SQDMULL, SQDMLAL, SQDMLSL (by element) +// 1AsmAsm: MUL, SQDMULH, SQRDMULH, MLA, MLS (by element Q=1) +//--- +def : InstRW<[XGeneWriteVI1Asm], (instregex "MULv(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "ML(A|S)v(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "SQDMULHv(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1Asm], (instregex "SQRDMULHv(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(4i16|8i16|2i32|4i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(4i16|8i16|2i32|4i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(4i16|8i16|2i32|4i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(4i16|8i16|2i32|4i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(8i16|4i32|1i32|1i64)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(8i16|4i32|1i32|1i64)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MULv(8i16|4i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "ML(A|S)v(8i16|4i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULHv(8i16|4i32|1i16|1i32)_indexed$")>; +def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQRDMULHv(8i16|4i32|1i16|1i32)_indexed$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD shift by immediate +// 1Ass: SSHR, SRSHR (Q=0) +// 1Ass: USHR, URSHR (Q=0) +// 1Ass: SHL, SQSHL, SQSHLU (Q=0) +// 1Ass: UQSHL (Q=0) +// 1Ass: SRI, SLI (Q=0) +// 1Ass: SQRSHRN, SQSHRUN, SQRSHRUN (Q=0, Q=1) +// 1Ass: UQSHRN, UQRSHRN (Q=0, Q=1) +// 1AssAss: SSHR, SRSHR (Q=1) +// 1AssAss: USHR, URSHR (Q=1) +// 1AssAss: SHL, SQSHL, SQSHLU (Q=1) +// 1AssAss: UQSHL (Q=1) +// 1AssAss: SRI, SLI (Q=1) +// 1AssAss: SHRN, RSHRN, SQSHRN (Q=0, Q=1) +// 1AssAss: SSHLL, USHLL (Q=0, Q=1) +// 1Ass1Asa: SSRA, SRSRA (Q=0) +// 1Ass1Asa: USRA, URSRA (Q=0) +// 1AssAss1AsaAsa: SSRA, SRSRA (Q=1) +// 1AssAss1AsaAsa: USRA, URSRA (Q=1) +//--- +def : InstRW<[XGeneWriteVI1Ass], (instregex "SSHRv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SRSHRv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "USHRv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "URSHRv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SHLv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "UQSHLv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLUv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SRIv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "SLIv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRUNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRUNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRUNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRUNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHRNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHRNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHRNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHRNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHRv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRSHRv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHRv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "URSHRv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHLv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHLv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLUv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRIv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SLIv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHRNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHRNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "RSHRNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "RSHRNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRNv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRNv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLLv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLLv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLLv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLLv(16i8|8i16|4i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "SSRAv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "SRSRAv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "USRAv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "URSRAv(8i8|4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "SSRAv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "SRSRAv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "USRAv(16i8|8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "URSRAv(16i8|8i16|4i32|2i64)_shift$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD modified immediate +// 1Asl: MOVI, MVNI, ORR, BIC, FMOV (Q=0) +// 1AslAsl: MOVI, MVNI, ORR, BIC, FMOV (Q=1) +//--- +def : InstRW<[XGeneWriteVI1Asl], (instregex "MOVIv(4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "MVNIv(4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "ORRv(4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Asl], (instregex "BICv(4i16|2i32)$")>; +// TODO: not sure about FMOVs +def : InstRW<[XGeneWriteVI1Asl], (instregex "FMOVv(4f16|2f32)_ns$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "MOVIv(8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "MVNIv(8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORRv(8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BICv(8i16|4i32)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "FMOVv(8f16|4f32)_ns$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD TBL, TBX +// All ops depend on Vm. +// The first TBX op depends on Vd. +// The first TBL/TBX ops depend on the low half of Vn, Vn+1, ... in turn, while the last TBL/TBX ops depend on the high half of Vn, Vn+1, ... in turn. +// 2Asl: TBL/TBX (single register table Q=0) +// 4Asl: TBL/TBX (two register table Q=0) +// 6Asl: TBL/TBX (three register table Q=0) +// 8Asl: TBL/TBX (four register table Q=0) +// 2AslAsl: TBL/TBX (single register table Q=1) +// 4AslAsl: TBL/TBX (two register table Q=1) +// 6AslAsl: TBL/TBX (three register table Q=1) +// 8AslAsl: TBL/TBX (four register table Q=1) +//--- +def : InstRW<[XGeneWriteVI2Asl], (instregex "TB(L|X)v(8i8)One$")>; +def : InstRW<[XGeneWriteVI4Asl], (instregex "TB(L|X)v(8i8)Two$")>; +def : InstRW<[XGeneWriteVI6Asl], (instregex "TB(L|X)v(8i8)Three$")>; +def : InstRW<[XGeneWriteVI8Asl], (instregex "TB(L|X)v(8i8)Four$")>; +def : InstRW<[XGeneWriteVI2AslAsl], (instregex "TB(L|X)v(16i8)One$")>; +def : InstRW<[XGeneWriteVI4AslAsl], (instregex "TB(L|X)v(16i8)Two$")>; +def : InstRW<[XGeneWriteVI6AslAsl], (instregex "TB(L|X)v(16i8)Three$")>; +def : InstRW<[XGeneWriteVI8AslAsl], (instregex "TB(L|X)v(16i8)Four$")>; + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD ZIP/UZP/TRN +// 1Ass: ZIP1/ZIP2/UZP1/UZP2 (Q=0) +// 1AslAsl: ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=11) +// 1AssAss: ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=other) +// 1AslAsl: TRN1/TRN2 (size=11) +// 1Ass: TRN1/TRN2 (size=other Q=0) +// 1AssAss: TRN1/TRN2 (size=other Q=1) +//--- +def : InstRW<[XGeneWriteVI1Ass], (instregex "ZIP(1|2)v(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "UZP(1|2)v(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1Ass], (instregex "TRN(1|2)v(8i8|4i16|2i32)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ZIP(1|2)v(8i16)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "UZP(1|2)v(8i16)$")>; +def : InstRW<[XGeneWriteVI1AslAsl], (instregex "TRN(1|2)v(8i16)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "ZIP(1|2)v(16i8|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "UZP(1|2)v(16i8|4i32|2i64)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "TRN(1|2)v(16i8|4i32|2i64)$")>; + +//def : InstRW<[XGeneWriteVI1AslAsl], (instregex "^ZIP(1|2)_(PPP|ZZZ)_(B|D|H|S)$")>; + + +//--- +// AdvSIMD Data Processing (Vector Integer) +// AdvSIMD EXT: +// 1Ass: EXT (Q=0) +// 1AssAss: EXT (Q=1) +//--- +def : InstRW<[XGeneWriteVI1Ass], (instregex "EXTv(8i8)$")>; +def : InstRW<[XGeneWriteVI1AssAss], (instregex "EXTv(16i8)$")>; + +//--- +// AdvSIMD Data Processing (Vector FP) +// AdvSIMD three same: +// 1Falu: FADD, FSUB, FMULX, FMLA, FMLS, FADDP (Q=0) +// 1Falu: FRECPS, FRSQRTS (Q=0) +// 1Falu: FABD (Q=0) +// 1Fdivd: FDIV (Q=0) +// 1Fsel: FMAX, FMAXNM, FMAXP, FMAXNMP (Q=0) +// 1Fsel: FMIN, FMINNM, FMINP, FMINNMP (Q=0) +// 1Fsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (Q=0) +// 1FaluFalu: FADD, FSUB, FMULX, FMLA, FMLS, FADDP (Q=1) +// 1FaluFalu: FRECPS, FRSQRTS (Q=1) +// 1FaluFalu: FABD (Q=1) +// 1FdivdFdivd: FDIV (Q=1) +// 1FselFsel: FMAX, FMAXNM, FMAXP, FMAXNMP (Q=1) +// 1FselFsel: FMIN, FMINNM, FMINP, FMINNMP (Q=1) +// 1FselFsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (Q=1) +//--- +def : InstRW<[XGeneWriteVF1Falu], (instregex "FADDv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FSUBv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FMULXv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FADDPv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FML(A|S)v(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FRECPSv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FRSQRTSv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FABDv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fdivd], (instregex "FDIVv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXNMv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXPv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXNMPv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINNMv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINPv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINNMPv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(EQ|GE|GT)v(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FAC(GE|GT)v(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FADDv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FSUBv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FMULXv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FADDPv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FML(A|S)v(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FRECPSv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FRSQRTSv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FABDv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FdivdFdivd], (instregex "FDIVv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXNMv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXPv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXNMPv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINNMv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINPv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINNMPv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(EQ|GE|GT)v(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FAC(GE|GT)v(8f16|4f32|2f64)$")>; + +//--- +// AdvSIMD Data Processing (Vector FP) +// AdvSIMD two-reg misc: +// 1Fhcvt: FCVTN, FCVTL (size=0) +// NOTE: FCVTL is missing from the documentation, but we will include it here +// 1Falu: FCVTN, FCVTL (size=1) +// 1Falu: FCVTXN (all) +// 1Fcvt: FRINTN, FRINTM, FRINTA, FRINTP (Q=0) +// 1Fcvt: FRINTZ, FRINTX, FRINTI (Q=0) +// 1Fcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (Q=0) +// 1Fcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (Q=0) +// 1Fcvt: FCVTZS, FCVTZU (integer Q=0) +// 1Falu: SCVTF, UCVTF (integer Q=0) +// 1Fsel: FCMGT, FCMEQ, FCMLT (zero Q=0) +// 1Fsel: FCMGE, FCMLE (zero Q=0) +// 1Fmov: FABS, FNEG (Q=0) +// 1FcvtFcvt: FRINTN, FRINTM, FRINTA, FRINTP (Q=1) +// 1FcvtFcvt: FRINTZ, FRINTX, FRINTI (Q=1) +// 1FcvtFcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (Q=1) +// 1FcvtFcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (Q=1) +// 1FcvtFcvt: FCVTZS, FCVTZU (integer Q=1) +// 1FaluFalu: SCVTF, UCVTF (integer Q=1) +// 1FselFsel: FCMGT, FCMEQ, FCMLT (zero Q=1) +// 1FselFsel: FCMGE, FCMLE (zero Q=1) +// 1FmovFmov: FABS, FNEG (Q=1) +// 1Adre: FRECPE (all) +// 1Asre: FRSQRTE (all) +// 1Fsqrd: FSQRT (all) +//--- +def : InstRW<[XGeneWriteVF1Fhcvt], (instregex "FCVT(N|L)v(4i16|8i16)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FCVT(N|L)v(2i32|4i32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "FCVTXNv(2f32|4f32)$")>; +def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FRINT(N|M|A|P)v(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FRINT(Z|X|I)v(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVT(N|M|A|P|Z)Sv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVT(N|M|A|P|Z)Uv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "SCVTFv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "UCVTFv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(GT|EQ|LT)v(4i16|2i32)rz$")>; +def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(GE|LE)v(4i16|2i32)rz$")>; +def : InstRW<[XGeneWriteVF1Fmov], (instregex "FABSv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fmov], (instregex "FNEGv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FRINT(N|M|A|P)v(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FRINT(Z|X|I)v(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVT(N|M|A|P|Z)Sv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVT(N|M|A|P|Z)Uv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "SCVTFv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "UCVTFv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(GT|EQ|LT)v(8i16|4i32|2i64)rz$")>; +def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(GE|LE)v(8i16|4i32|2i64)rz$")>; +def : InstRW<[XGeneWriteVF1FmovFmov], (instregex "FABSv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1FmovFmov], (instregex "FNEGv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1Adre], (instregex "FRECPEv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Adre], (instregex "FRECPEv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1Asre], (instregex "FRSQRTEv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Asre], (instregex "FRSQRTEv(8f16|4f32|2f64)$")>; +def : InstRW<[XGeneWriteVF1Fsqrd], (instregex "FSQRTv(4f16|2f32)$")>; +def : InstRW<[XGeneWriteVF1Fsqrd], (instregex "FSQRTv(8f16|4f32|2f64)$")>; + +//--- +// AdvSIMD Data Processing (Vector FP) +// AdvSIMD across lanes: +// 2Asa: FMAXV, FMINV, FMAXNMV, FMINNMV +// AdvSIMD vector x indexed element: +// 1Asm: FMUL, FMULX, FMLA, FMLS (by element Q=0) +// 1AsmAsm: FMUL, FMULX, FMLA, FMLS (by element Q=1) +// AdvSIMD shift by immediate: +// 1Falu: SCVTF, UCVTF (fixed-point Q=0) +// 1Fcvt: FCVTZS, FCVTZU (fixed-point Q=0) +// 1FaluFalu: SCVTF, UCVTF (fixed-point Q=1) +// 1FcvtFcvt: FCVTZS, FCVTZU (fixed-point Q=1) +//--- +def : InstRW<[XGeneWriteVF2Asa], (instregex "FMAXVv(4i16|8i16|4i32)v$")>; +def : InstRW<[XGeneWriteVF2Asa], (instregex "FMINVv(4i16|8i16|4i32)v$")>; +def : InstRW<[XGeneWriteVF2Asa], (instregex "FMAXNMVv(4i16|8i16|4i32)v$")>; +def : InstRW<[XGeneWriteVF2Asa], (instregex "FMINNMVv(4i16|8i16|4i32)v$")>; + +def : InstRW<[XGeneWriteVF1Asm], (instregex "FMULv(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVF1Asm], (instregex "FMULXv(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVF1Asm], (instregex "FML(A|S)v(4i16|2i32)_indexed$")>; +def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FMULv(8i16|4i32|2i64)_indexed$")>; +def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FMULXv(8i16|4i32|2i64)_indexed$")>; +def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FML(A|S)v(8i16|4i32|2i64)_indexed$")>; + +def : InstRW<[XGeneWriteVF1Falu], (instregex "SCVTFv(4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVF1Falu], (instregex "UCVTFv(4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVTZ(S|U)v(4i16|2i32)_shift$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "SCVTFv(8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "UCVTFv(8i16|4i32|2i64)_shift$")>; +def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVTZ(S|U)v(8i16|4i32|2i64)_shift$")>; + +} diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 04bb90d30d6..8e384261163 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -144,6 +144,9 @@ void AArch64Subtarget::initializeProperties() { case CortexA75: PrefFunctionAlignment = 4; break; + case XGene: + MaxInterleaveFactor = 4; + break; case Others: break; } } diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 5af4c0dd9c1..c17da7c445a 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -56,7 +56,8 @@ public: ThunderX, ThunderXT81, ThunderXT83, - ThunderXT88 + ThunderXT88, + XGene }; protected: -- cgit v1.2.3