//==- AArch64SchedXGene.td - X-Gene Scheduling Definitions -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM XGene processors. // //===----------------------------------------------------------------------===// // ===---------------------------------------------------------------------===// // The following definitions describe the simpler per-operand machine model. // This works with MachineScheduler. See MCSchedModel.h for details. // X-Gene machine model for scheduling and other instruction cost heuristics. def XGeneModel : SchedMachineModel { let MicroOpBufferSize = 64;// Value of 64 confirmed by APM let IssueWidth = 4; // 4 micro-ops are dispatched per cycle. let LoadLatency = 5; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. let MispredictPenalty = 64;// Determined by experiments // Enable partial & runtime unrolling. The magic number is chosen based on // experiments and benchmarking data. // Tried with 8, 12, 16, 24; 12 seems to be the best for CoreMark // coremark: any value but twelve gives at least -2% (DO NOT CHANGE, I guess) let LoopMicroOpBufferSize = 4; // TODO: try with high values such as 50 let CompleteModel = 1; list UnsupportedFeatures = [HasSVE]; } //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available. // I think this should be locked at 16; good results both in coremark and spec // A buffer size of 16 seems suitable according to spec/gobmk def XGeneUnitB : ProcResource<1> { let BufferSize = 16; } // Branch def XGeneUnitLd : ProcResource<1> { let BufferSize = 16; } // Load def XGeneUnitSt : ProcResource<1> { let BufferSize = 16; } // Store def XGeneUnitIXn : ProcResource<2> { let BufferSize = 20; } // Int ALU def XGeneUnitFSU : ProcResource<1> { let BufferSize = 16; } // Float ALU def XGeneUnitFDiv : ProcResource<1> { let BufferSize = 16; } // Float Division // On this machine there are two arithmetic units, but only one of them can run // all instructions; the other unit can run a subset of the instructions; we are // trying to achieve this by defining a third dummy unit to be used as a "lock" // the lock limits the scheduling of the restricted instructions // TODO: try lower BufferSize (10) for IXB to create back-pressure def XGeneLockIXB : ProcResource<1> { let BufferSize = 10; } // Int ALU B lock // On this machine, int division and multiplication ops can be issued only // once every two cycles (for each separately), we are using these locks // to model this particularity def XGeneLockDiv : ProcResource<1> { let BufferSize = 16; } // Int Division def XGeneLockMul : ProcResource<1> { let BufferSize = 16; } // Int Multipl // On this machine, the sqrt and div instructions cannot be issued at the same // time (for both together), therefore we are using this lock for these two def XGeneLockFInst : ProcResource<1> { let BufferSize = 16; } // Float lock //===----------------------------------------------------------------------===// // Subtarget-specific SchedWrite types which both map the ProcResources and // set the latency. let SchedModel = XGeneModel in { // ALU instructions which can run on both ALU units def : WriteRes; def : WriteRes; def : WriteRes; // ALU instructions which are restricted to IXB def : WriteRes; def : WriteRes; def : WriteRes; // MAC instructions can run only on IXB def : WriteRes; def : WriteRes; // DIV instructions can run only on IXB def : WriteRes; def : WriteRes; // Load def : WriteRes; def : WriteRes; def : WriteRes; // Pre/post indexing gonna be accounted for each individual instructions def : WriteRes; // Store def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; // WriteAtomic - not supported def : WriteRes { let Unsupported = 1; } // Branch - always no latency def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 0; } // FP ALU def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; // FP Mul, Div, Sqrt def : WriteRes; def : WriteRes; //--- // AdvSIMD Data Processing (Scalar FP) //--- def XGeneWriteF1Asm : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Adre : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Asre : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Falu : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Fcmp : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 10; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Fcvt : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Fdivs : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { let Latency = 22; let ResourceCycles = [8, 22]; let NumMicroOps = 1; } def XGeneWriteF1Fdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { let Latency = 28; let ResourceCycles = [11, 28]; let NumMicroOps = 1; } def XGeneWriteF1Fhcvt : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Fmov : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 2; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Fsel : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteF1Fsqrs : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { let Latency = 22; let ResourceCycles = [8, 22]; let NumMicroOps = 1; } def XGeneWriteF1Fsqrd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { let Latency = 38; let ResourceCycles = [17, 38]; let NumMicroOps = 1; } // instructions with store ops are extra-special because the chip will be using // data bypass; latencies are measured since the registers become available and // are as following: // for int, 1 for the address register, -1 for the data register // for float, 4 for the address register, 1 for the data register // for complex, 4 for the address register, 2 for the data register // we assumed latencies from data register availability def XGeneWriteF1St1Lf : SchedWriteRes<[XGeneUnitLd, XGeneUnitSt]> { let Latency = 9; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteF1Sf1Ld : SchedWriteRes<[XGeneUnitLd, XGeneUnitSt]> { let Latency = 6; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteF1Fcvt1Sf1Ld : SchedWriteRes<[XGeneUnitFSU, XGeneUnitLd, XGeneUnitSt]> { let Latency = 11; let ResourceCycles = [1, 1, 1]; let NumMicroOps = 3; } def XGeneWriteF1St1Lf1Falu : SchedWriteRes<[XGeneUnitFSU, XGeneUnitLd, XGeneUnitSt]> { let Latency = 14; let ResourceCycles = [1, 1, 1]; let NumMicroOps = 3; } //--- // Load instructions // NOTE: the way load latencies are calculated here is as follows: // biggest load first: 5 (int - Ld), 10 (float - Lf) or 11 (complex - Lc) // arithmethics is parallelized with the loads, so it does not affect latency // 1 for each other load - since they are pipelined, the only thing which // further contributes to the latency is the issue time //--- // Integer loads def XGeneWriteLD1Ld : SchedWriteRes<[XGeneUnitLd]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteLD1LdLd : SchedWriteRes<[XGeneUnitLd]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteLD1LdAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 5; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteLD1LdLdAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 6; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def XGeneWriteLD1Ld1Sbfm : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 6; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteLD1Ld1SbfmAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 6; let ResourceCycles = [1, 2]; let NumMicroOps = 3; } def XGeneWriteLD1Ld1LdSbfm1Sbfm : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 7; let ResourceCycles = [2, 2]; let NumMicroOps = 4; } def XGeneWriteLD1Ld1LdSbfm1SbfmAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 7; let ResourceCycles = [2, 3]; let NumMicroOps = 5; } // Float/SIMD loads (1LfLf and 1LfLfAlu already covered by vector loads) def XGeneWriteLD1LfAlu1Lf : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 11; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def XGeneWriteLD1LfLfAlu1LfLf : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 13; let ResourceCycles = [4, 1]; let NumMicroOps = 5; } def XGeneWriteLD1LfLfAlu1LfLfAlu: SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 13; let ResourceCycles = [4, 2]; let NumMicroOps = 6; } // Vector loads def XGeneWriteLD1Lc : SchedWriteRes<[XGeneUnitLd]> { let Latency = 11; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteLD1LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 11; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteLD1LcLc : SchedWriteRes<[XGeneUnitLd]> { let Latency = 12; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteLD1LcLcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 12; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def XGeneWriteLD1X3Lc : SchedWriteRes<[XGeneUnitLd]> { let Latency = 13; let ResourceCycles = [3]; let NumMicroOps = 3; } def XGeneWriteLD1X3LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 13; let ResourceCycles = [3, 1]; let NumMicroOps = 4; } def XGeneWriteLD1X4Lc : SchedWriteRes<[XGeneUnitLd]> { let Latency = 14; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteLD1X4LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 14; let ResourceCycles = [4, 1]; let NumMicroOps = 5; } def XGeneWriteLD1X6Lc : SchedWriteRes<[XGeneUnitLd]> { let Latency = 16; let ResourceCycles = [6]; let NumMicroOps = 6; } def XGeneWriteLD1X6LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 16; let ResourceCycles = [6, 1]; let NumMicroOps = 7; } def XGeneWriteLD1X8Lc : SchedWriteRes<[XGeneUnitLd]> { let Latency = 18; let ResourceCycles = [8]; let NumMicroOps = 8; } def XGeneWriteLD1X8LcAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 18; let ResourceCycles = [8, 1]; let NumMicroOps = 9; } def XGeneWriteLD1Lf : SchedWriteRes<[XGeneUnitLd]> { let Latency = 10; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteLD1LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 10; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteLD1LfLf : SchedWriteRes<[XGeneUnitLd]> { let Latency = 11; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteLD1LfLfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 11; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def XGeneWriteLD1X3Lf : SchedWriteRes<[XGeneUnitLd]> { let Latency = 12; let ResourceCycles = [3]; let NumMicroOps = 3; } def XGeneWriteLD1X3LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 12; let ResourceCycles = [3, 1]; let NumMicroOps = 4; } def XGeneWriteLD1X4Lf : SchedWriteRes<[XGeneUnitLd]> { let Latency = 13; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteLD1X4LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 13; let ResourceCycles = [4, 1]; let NumMicroOps = 5; } def XGeneWriteLD1X6Lf : SchedWriteRes<[XGeneUnitLd]> { let Latency = 15; let ResourceCycles = [6]; let NumMicroOps = 6; } def XGeneWriteLD1X6LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 15; let ResourceCycles = [6, 1]; let NumMicroOps = 7; } def XGeneWriteLD1X8Lf : SchedWriteRes<[XGeneUnitLd]> { let Latency = 17; let ResourceCycles = [8]; let NumMicroOps = 8; } def XGeneWriteLD1X8LfAlu : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn]> { let Latency = 17; let ResourceCycles = [8, 1]; let NumMicroOps = 9; } def XGeneWriteLD1Lf1Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { let Latency = 13; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteLD1LfAlu1Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { let Latency = 13; let ResourceCycles = [1, 1, 1]; let NumMicroOps = 3; } def XGeneWriteLD1LfLf1AsiAsi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { let Latency = 17; let ResourceCycles = [2, 2]; let NumMicroOps = 4; } def XGeneWriteLD1LfLfAlu1AsiAsi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { let Latency = 17; let ResourceCycles = [2, 1, 2]; let NumMicroOps = 5; } def XGeneWriteLD1X3Lf1X3Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { let Latency = 21; let ResourceCycles = [3, 3]; let NumMicroOps = 6; } def XGeneWriteLD1X3LfAlu1X3Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { let Latency = 21; let ResourceCycles = [3, 1, 3]; let NumMicroOps = 7; } def XGeneWriteLD1X4Lf1X4Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitFSU]> { let Latency = 25; let ResourceCycles = [4, 4]; let NumMicroOps = 8; } def XGeneWriteLD1X4LfAlu1X4Asi : SchedWriteRes<[XGeneUnitLd, XGeneUnitIXn, XGeneUnitFSU]> { let Latency = 25; let ResourceCycles = [4, 1, 4]; let NumMicroOps = 9; } //--- // Store instructions // NOTE: Stores generally have a latency of zero - this value was considered for // all the stores below; on the other hand, if there is a dependent load // following the store, the latencies have different values; // the current model will not cover these special cases //--- // Integer stores def XGeneWriteST1St : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteST1StAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteST1StSt : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteST1StStAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } // Float/SIMD stores (1LfLf and 1LfLfAlu already covered by vector stores) def XGeneWriteST1SfAlu1Sf : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def XGeneWriteST1SfSfAlu1SfSf : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [4, 1]; let NumMicroOps = 5; } def XGeneWriteST1SfSfAlu1SfSfAlu: SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 2; let ResourceCycles = [4, 2]; let NumMicroOps = 6; } // Vector stores def XGeneWriteST1Sc : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteST1ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteST1ScSc : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteST1ScScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def XGeneWriteST1X3Sc : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [3]; let NumMicroOps = 3; } def XGeneWriteST1X3ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [3, 1]; let NumMicroOps = 4; } def XGeneWriteST1X4Sc : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteST1X4ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [4, 1]; let NumMicroOps = 5; } def XGeneWriteST1X6Sc : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [6]; let NumMicroOps = 6; } def XGeneWriteST1X6ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [6, 1]; let NumMicroOps = 7; } def XGeneWriteST1X8Sc : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [8]; let NumMicroOps = 8; } def XGeneWriteST1X8ScAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [8, 1]; let NumMicroOps = 9; } def XGeneWriteST1Sf : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteST1SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteST1SfSf : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteST1SfSfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [2, 1]; let NumMicroOps = 3; } def XGeneWriteST1X3Sf : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [3]; let NumMicroOps = 3; } def XGeneWriteST1X3SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [3, 1]; let NumMicroOps = 4; } def XGeneWriteST1X4Sf : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteST1X4SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [4, 1]; let NumMicroOps = 5; } def XGeneWriteST1X6Sf : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [6]; let NumMicroOps = 6; } def XGeneWriteST1X6SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [6, 1]; let NumMicroOps = 7; } def XGeneWriteST1X8Sf : SchedWriteRes<[XGeneUnitSt]> { let Latency = 0; let ResourceCycles = [8]; let NumMicroOps = 8; } def XGeneWriteST1X8SfAlu : SchedWriteRes<[XGeneUnitSt, XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [8, 1]; let NumMicroOps = 9; } //--- // Integer Data Processing //--- def XGeneWriteI1Sbfm1Alu : SchedWriteRes<[XGeneUnitIXn]> { let Latency = 2; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteI1Alb1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { let Latency = 3; let ResourceCycles = [2, 1]; let NumMicroOps = 2; } def XGeneWriteI1Sbfm : SchedWriteRes<[XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteI1Car: SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { let Latency = 1; let ResourceCycles = [1, 1]; let NumMicroOps = 1; } def XGeneWriteI1Set: SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { let Latency = 1; let ResourceCycles = [1, 1]; let NumMicroOps = 1; } def XGeneWriteI1Sbfm1Set : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { let Latency = 2; let ResourceCycles = [2, 1]; let NumMicroOps = 2; } def XGeneWriteI1Alu : SchedWriteRes<[XGeneUnitIXn]> { let Latency = 1; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteI1Mlw1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { let Latency = 5; let ResourceCycles = [2, 1, 2]; let NumMicroOps = 2; } def XGeneWriteI1Mlx1Alu : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { let Latency = 6; let ResourceCycles = [2, 1, 2]; let NumMicroOps = 2; } def XGeneWriteI1Mlw : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { let Latency = 4; let ResourceCycles = [1, 1, 2]; let NumMicroOps = 1; } def XGeneWriteI1Mlx : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockMul]> { let Latency = 5; let ResourceCycles = [1, 1, 2]; let NumMicroOps = 1; } def XGeneWriteI1Div : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB, XGeneLockDiv]> { let Latency = 26; let ResourceCycles = [1, 1, 26]; let NumMicroOps = 1; } def XGeneWriteI1Alb : SchedWriteRes<[XGeneUnitIXn, XGeneLockIXB]> { let Latency = 2; let ResourceCycles = [1, 1]; let NumMicroOps = 1; } //--- // AdvSIMD Data Processing (Vector Integer) //--- def XGeneWriteVI1Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVI1Ass : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVI1Asl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 2; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVI1Asm : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVI1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1AssAss : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1AslAsl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 4; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1AsmAsm : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 10; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1ApolApol : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1AsaAsa1Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 9; let ResourceCycles = [3]; let NumMicroOps = 3; } def XGeneWriteVI1AsaAsa1Ass : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 9; let ResourceCycles = [3]; let NumMicroOps = 3; } def XGeneWriteVI1AsaAsa1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 12; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteVI1AssAss1AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 12; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteVI1AsaAsa2Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 12; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteVI1Adre : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVI1Asre : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVI1Asl1Asl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 4; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1Ass1Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1Ass1Asi : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1Ass1Ass : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI1Fmov : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 2; let ResourceCycles = [1]; let NumMicroOps = 1; } // instructions with store ops are extra-special because the chip will be using // data bypass; latencies are measured since the registers become available and // are as following: // for int, 1 for the address register, -1 for the data register // for float, 4 for the address register, 1 for the data register // for complex, 4 for the address register, 2 for the data register // we assumed latencies from data register availability def XGeneWriteVI1St1Lf : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd]> { let Latency = 9; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteVI1St1Lf1Asi : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitFSU]> { let Latency = 12; // assumed Asi latency 3 let ResourceCycles = [1, 1, 1]; let NumMicroOps = 3; } def XGeneWriteVI1St1Lf1Falu : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitFSU]> { let Latency = 14; // assumed Falu latency 5 let ResourceCycles = [1, 1, 1]; let NumMicroOps = 3; } def XGeneWriteVI1Sf1Ld : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd]> { let Latency = 6; let ResourceCycles = [1, 1]; let NumMicroOps = 2; } def XGeneWriteVI1Sf1Ld1Sbfm : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitIXn]> { let Latency = 7; // assumed Sbfm latency 1 let ResourceCycles = [1, 1, 1]; let NumMicroOps = 3; } def XGeneWriteVI1Sf1Ld1Ubfm : SchedWriteRes<[XGeneUnitSt, XGeneUnitLd, XGeneUnitIXn]> { let Latency = 7; // assumed Ubfm latency 1 let ResourceCycles = [1, 1, 1]; let NumMicroOps = 3; } def XGeneWriteVI2Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI2AsaAsa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 12; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteVI3Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 9; let ResourceCycles = [3]; let NumMicroOps = 3; } def XGeneWriteVI4Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 12; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteVI2Asl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 4; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVI2AslAsl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 8; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteVI4Asl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 8; let ResourceCycles = [4]; let NumMicroOps = 4; } def XGeneWriteVI4AslAsl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 16; let ResourceCycles = [8]; let NumMicroOps = 8; } def XGeneWriteVI6Asl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 12; let ResourceCycles = [6]; let NumMicroOps = 6; } def XGeneWriteVI6AslAsl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 24; let ResourceCycles = [12]; let NumMicroOps = 12; } def XGeneWriteVI8Asl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 16; let ResourceCycles = [8]; let NumMicroOps = 8; } def XGeneWriteVI8AslAsl : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 32; let ResourceCycles = [16]; let NumMicroOps = 16; } //--- // AdvSIMD Data Processing (Vector FP) //--- def XGeneWriteVF1Asm : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF1AsmAsm : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 10; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVF1Falu : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; // assumed Falu latency of 5 let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF1FaluFalu : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 10; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVF1Fcvt : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; // assumed Fcvt latency of 5 let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF1FcvtFcvt : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 10; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVF1Fdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { let Latency = 28; let ResourceCycles = [11, 28]; let NumMicroOps = 1; } def XGeneWriteVF1FdivdFdivd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { let Latency = 56; let ResourceCycles = [22, 56]; let NumMicroOps = 2; } def XGeneWriteVF1Fhcvt : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF1Fmov : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 2; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF1FmovFmov : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 4; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVF1Fsel : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF1FselFsel : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } def XGeneWriteVF1Fsqrd : SchedWriteRes<[XGeneUnitFSU, XGeneLockFInst]> { let Latency = 38; let ResourceCycles = [17, 38]; let NumMicroOps = 1; } def XGeneWriteVF1Adre : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 3; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF1Asre : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 5; let ResourceCycles = [1]; let NumMicroOps = 1; } def XGeneWriteVF2Asa : SchedWriteRes<[XGeneUnitFSU]> { let Latency = 6; let ResourceCycles = [2]; let NumMicroOps = 2; } //--- // Read Advances // No forwarding for these reads def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; // Arithmetic instructions which set the state flag introduce one more cycle of // latency when the flag is required by a conditional def XGeneWriteISFlags : SchedWriteRes<[]>; def XGeneReadISFlags : SchedReadAdvance<-1, [XGeneWriteISFlags]>; def XGeneReadISFlagsVar : SchedReadVariant<[ SchedVar]>; def : SchedAlias; // Store instructions introduce extra latency cycles when the registers are // used in a dependent load as such: // Store type Address register Data register // Integer 1 -1 // Float 4 1 // Complex 4 2 def XGeneWriteSTI : SchedWriteRes<[]>; def XGeneWriteSTF : SchedWriteRes<[]>; // Scalar loads def XGeneReadLDSTI : SchedReadAdvance<-1, [XGeneWriteSTI]>; def XGeneReadLDSTF : SchedReadAdvance<-4, [XGeneWriteSTF]>; def XGeneReadLDVar : SchedReadVariant<[ SchedVar, SchedVar]>; def : SchedAlias; // Vector loads are affected by the same latencies as regular loads when it // comes to preceding stores using the same registers def XGeneReadVLDSTI : SchedReadAdvance<-1, [XGeneWriteSTI]>; def XGeneReadVLDSTF : SchedReadAdvance<-4, [XGeneWriteSTF]>; def XGeneReadVLDVar : SchedReadVariant<[ SchedVar, SchedVar]>; def : SchedAlias; //--- // Grouping instructions with similar requirements in groups with specific names // Naming scheme // XGeneWrite[GROUP][opList] // GROUP can be an instruction group, eg. LD, ST, ALU etc // opList is a list of ops in the format [k1][Op1][k2][Op2]...[kN][OpN], for // example: load op + load op + arithmetic op -> 1Ld1Ld1Alu // if the ops are independent, they will be grouped together under the same // op identifier: load op + load op & arithmetic op -> 1Ld1LdAlu // load op & load op + load op -> 1LdLd1Ld // just an example // if the ops are identical, they can be grouped as such: // * within a group: prepend with x, where n is how many times the sequence // is repeated: 1LdLdLdLd -> 1X4Ld // * multiple groups can be put together as such: 1LdLd1LdLd -> 2LdLd // Group names used for this machine model (with usual latencies) // Nop / Nop (latency 0) // Br BU Branch (latency 0) // Alu IXn Arithmetic/logical op (latency 1) // Sbfm IXn Sbfm (latency 1 or 2) // Ubfm IXn Ubfm (latency 1 or 2) // Alb IXB Arithmetic/logical op on IXB (latency 2) // bfm, extr, shift/rotate, SIMD are mapped to Alb // Car IXB Carry (latency 1) // Set IXB Flag setting (latency 1) // Div IXB Integer division (latency 7, 10, 14, 18, 26 etc.) // Mlw IXB Integer 32bit multiplication (latency 4) // Mlx IXB Integer 64bit multiplication (latency 5) // Asa FSU ASIMD arithmetic (latency 3) // Asi FSU ASIMD insert (latency 3, also depends on previous destination) // Asl FSU ASIMD logical (latency 2 or 3 in MA or FP stores) // Ass FSU ASIMD shift (latency 3) // Asm FSU ASIMD multiply (latency 5) // Adre FSU ASIMD sre/dre (latency 5) // Apol FSU ASIMD polymul (latency 3) // Asre FSU ASIMD sre/dre (latency 3) // Falu FSU Floating point arithmetic (latency 5, 6, 7) // Fcmp FSU Floating point compare (latency 10, 11, 19) // Fcvt FSU Floating point convert (latency 5, 6, 7) // Fdivs FSU Single precision division (latency 22, 24 +1 +2) // Fdivd FSU Double precision division (latency 24, 28 +1 +2) // Fhcvt FSU Floating point half convert (latency 3) // Fmov FSU Floating point move (latency 2 or 3 in MA or FP stores) // Fmul FSU ASIMD multiply (latency 5) // Fsel FSU Floating point select (latency 3) // Fsqrs FSU Single precision sqrt (latency 22, 24 +1 +2) // Fsqrd FSU Double precision sqrt (latency 24, 38 +1 +2) TODO: 38? // Lc LD Complex load (latency 11) // Ld LD Integer load (latency 5) // Lf LD Floating point load (latency 10) // Sc ST Complex store (latency 0) // St ST Integer store (latency 0) // Sf ST Floating point store (latency 0) //--- //--- // AdvSIMD Data Processing (Scalar FP) // * NOTE: in the arm64 instruction model of llvm, the scalar floating point // * instructions are defined as vector instructions with one element v1i64 // * We will stay consistent with this model and put the one-element vector // * instructions in the scalar group // Floating-point immediate: // 1Fmov: FMOV (immediate) // Floating-point data-processing: // 1Fmov: FMOV (register) // 1Fmov: FABS, FNEG (1 source) // 1Fsqrs: FSQRT (1 source single precision) // 1Fsqrd: FSQRT (1 source double precision) // 1Falu: FMUL, FADD, FSUB, FNMUL (2 source) // 1Fdivs: FDIV (2 source single precision) // 1Fdivd: FDIV (2 source double precision) // 1Fsel: FMAX, FMIN, FMAXNM, FMINNM (2 source) // 1Falu: FMADD, FMSUB, FNMADD, FNMSUB (3 source) // Floating-point compare: // 1Fcmp: FCMP, FCMPE (all) // Floating-point convert: // 1Fcvt: FRINTN, FRINTP, FRINTM, FRINTZ, FRINTA, FRINTX, FRINTI (all) // 1Falu: FCVT (1 source single to double or double to single) // 1Fhcvt: FCVT (1 source to or from half precision) // Floating-point conditional: // 1Fcmp: FCCMP, FCCMPE (compare) // 1Fsel: FCSEL (select) // Floating-point<->integer conversions: // 1Fcvt1Sf1Ld: FCVTNS, FCVTAS, FCVTPS, FCVTMS (integer) // 1Fcvt1Sf1Ld: FCVTNU, FCVTAU, FCVTPU, FCVTMU (integer) // 1Fcvt1Sf1Ld: FCVTZS, FCVTZU (integer) // 1St1Lf1Falu: SCVTF, UCVTF (integer) // 1Fmov: FMOV (general register to FP register with Rn=XZR, WZR) // 1St1Lf: FMOV (general register to FP register with other Rn) // 1Sf1Ld: FMOV (from FP register to general register) // Floating-point<->fixed-point conversions: // 1Fcvt1Sf1Ld: FCVTZS, FCVTZU (fixed-point) // 1St1Lf1Falu: SCVTF, UCVTF (fixed-point) // AdvSIMD scalar three same: // 1Falu: FMULX, FRECPS, FRSQRTS, FABD (three same) // 1Fsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (three same) // AdvSIMD two-reg misc: // 1Falu: FCVTXN (two reg) // 1Falu: SCVTF, UCVTF (integer) // 1Fcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (two reg) // 1Fcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (two reg) // 1Fcvt: FCVTZS, FCVTZU (integer) // 1Fsel: FCMGT, FCMEQ, FCMLT, FCMGE, FCMLE (zero) // 1Adre: FRECPE, FRECPX (two reg) // 1Asre: FRSQRTE (two reg) // AdvSIMD scalar pairwise: // 1Falu: FADDP (pair) // 1Fsel: FMAXP, FMINP, FMAXNMP, FMINNMP (pair) // AdvSIMD scalar x indexed element // * NOTE: These seem to be bundled with vector elements in the arm model, we will // * model scalar elements here and vector elements in the vector section where // * they rightfully belong // 1Asm: FMUL, FMULX, FMLA, FMLS (by element) // AdvSIMD scalar shift by immediate: // 1Falu: SCVTF, UCVTF (fixed-point) // 1Fcvt: FCVTZS, FCVTZU (fixed-point) //--- def : InstRW<[WriteI], (instrs COPY)>; def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|H|S)i$")>; def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Fmov], (instregex "FABS(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Fmov], (instregex "FNEG(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Fsqrs], (instregex "FSQRT(H|S)r$")>; def : InstRW<[XGeneWriteF1Fsqrd], (instregex "FSQRTDr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FMUL(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FADD(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FSUB(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FNMUL(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Fdivs], (instregex "FDIV(H|S)rr$")>; def : InstRW<[XGeneWriteF1Fdivd], (instregex "FDIVDrr$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAX(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMIN(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXNM(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINNM(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FMADD(D|H|S)rrr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FMSUB(D|H|S)rrr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FNMADD(D|H|S)rrr$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FNMSUB(D|H|S)rrr$")>; def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCMP(D|H|S)r(r|i)$")>; def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCMPE(D|H|S)r(r|i)$")>; def : InstRW<[XGeneWriteF1Fcvt], (instregex "FRINT(N|P|M|Z|A|X|I)(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FCVT(SD|DS)r$")>; def : InstRW<[XGeneWriteF1Fhcvt], (instregex "FCVT(HS|HD|SH|DH)r$")>; def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCCMP(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Fcmp], (instregex "FCCMPE(D|H|S)rr$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FCSEL(D|H|S)rrr$")>; def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTN(S|U)U(W|X)(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTM(S|U)U(W|X)(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTA(S|U)U(W|X)(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTP(S|U)U(W|X)(D|H|S)r$")>; def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZ(S|U)U(W|X)(D|H|S)r$")>; def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "SCVTFU(W|X)(D|H|S)ri$")>; def : InstRW<[XGeneWriteF1Fmov], (instregex "FMOV(D|S)0$")>; def : InstRW<[XGeneWriteF1St1Lf], (instregex "FMOV(WH|XH|WS|XD|XDHigh)r$")>; def : InstRW<[XGeneWriteF1Sf1Ld], (instregex "FMOV(HW|HX|SW|DX|DXHigh)r$")>; def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZSS(W|X)(D|H|S)ri$")>; def : InstRW<[XGeneWriteF1Fcvt1Sf1Ld], (instregex "FCVTZUS(W|X)(D|H|S)ri$")>; def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "SCVTFS(W|X)(D|H|S)ri$")>; def : InstRW<[XGeneWriteF1St1Lf1Falu], (instregex "UCVTFS(W|X)(D|H|S)ri$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FMULX(16|32|64)$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FRECPS(16|32|64)$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FRSQRTS(16|32|64)$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FABD(16|32|64)$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FCM(EQ|GE|GT)(16|32|64)$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FAC(GE|GT)(16|32|64)$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FCVTXNv1i64$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "SCVTFv1(i16|i32|i64)$")>; def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVT(N|M|A|P)Sv1(f16|i32|i64)$")>; def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVT(N|M|A|P)Uv1(f16|i32|i64)$")>; def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVTZ(S|U)v1(f16|i32|i64)$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMGTv1(i16|i32|i64)rz$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMEQv1(i16|i32|i64)rz$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMLTv1(i16|i32|i64)rz$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMGEv1(i16|i32|i64)rz$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FCMLEv1(i16|i32|i64)rz$")>; def : InstRW<[XGeneWriteF1Adre], (instregex "FRECP(E|X)v1(f16|i32|i64)$")>; def : InstRW<[XGeneWriteF1Asre], (instregex "FRSQRTEv1(f16|i32|i64)$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "FADDPv2(i16|i32|i64)p$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXPv2(i16|i32|i64)p$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINPv2(i16|i32|i64)p$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMAXNMPv2(i16|i32|i64)p$")>; def : InstRW<[XGeneWriteF1Fsel], (instregex "FMINNMPv2(i16|i32|i64)p$")>; def : InstRW<[XGeneWriteF1Asm], (instregex "FMULv1(i16|i32|i64)_indexed$")>; def : InstRW<[XGeneWriteF1Asm], (instregex "FMULXv1(i16|i32|i64)_indexed$")>; def : InstRW<[XGeneWriteF1Asm], (instregex "FML(A|S)v1(i16|i32|i64)_indexed$")>; // TODO: maybe add |h| to these groups as well? it would make sense def : InstRW<[XGeneWriteF1Falu], (instregex "SCVTF(s|d)$")>; def : InstRW<[XGeneWriteF1Falu], (instregex "UCVTF(s|d)$")>; def : InstRW<[XGeneWriteF1Fcvt], (instregex "FCVTZ(S|U)(s|d)$")>; //--- // Load instructions (38 groups in total) // 1Ld: LDR (literal) // 1Ld: LDURB, LDURH, LDUR (unscaled immediate) // 1Ld: LDRB, LDRH, LDR (register offset) // 1Ld: LDRB, LDRH, LDR (unsigned immediate) // 1LdLd: LDP (offset) // 1LdAlu LDRB, LDRH, LDR (immediate post-indexed) // 1LdAlu LDRB, LDRH, LDR (immediate pre-indexed) // 1LdLdAlu LDP (post-indexed) // 1LdLdAlu LDP (pre-indexed) // 1Ld1Sbfm LDRSW (literal) // 1Ld1Sbfm LDURSB, LDURSH, LDURSW (unscaled immediate) // 1Ld1Sbfm LDRSB, LDRSH, LDRSW (register offset) // 1Ld1Sbfm LDRSB, LDRSH, LDRSW (unsigned immediate) // 1Ld1SbfmAlu LDRSB, LDRSH, LDRSW (immediate post-indexed) // 1Ld1SbfmAlu LDRSB, LDRSH, LDRSW (immediate pre-indexed) // 1Ld1LdSbfm1Sbfm LDPSW (offset) // 1Ld1LdSbfm1SbfmAlu LDPSW (post-indexed) // 1Ld1LdSbfm1SbfmAlu LDPSW (pre-indexed) //--- def : InstRW<[XGeneWriteLD1Ld, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(W|X|S|D|Q)l$")>; def : InstRW<[XGeneWriteLD1Ld, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDUR(X|W|HH|BB)i$")>; def : InstRW<[XGeneWriteLD1Ld, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)ro(X|W)$")>; def : InstRW<[XGeneWriteLD1Ld, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X|B|H|S|D|Q)ui$")>; def : InstRW<[XGeneWriteLD1LdLd, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)i$")>; def : InstRW<[XGeneWriteLD1LdAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)post$")>; def : InstRW<[XGeneWriteLD1LdAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(BB|HH|W|X)pre$")>; def : InstRW<[XGeneWriteLD1LdLdAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)post$")>; def : InstRW<[XGeneWriteLD1LdLdAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(W|X)pre$")>; def : InstRW<[XGeneWriteLD1Ld1Sbfm, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRSWl$")>; def : InstRW<[XGeneWriteLD1Ld1Sbfm, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDURS(BW|BX|HW|HX|W)i$")>; def : InstRW<[XGeneWriteLD1Ld1Sbfm, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)ro(X|W)$")>; def : InstRW<[XGeneWriteLD1Ld1Sbfm, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)ui$")>; def : InstRW<[XGeneWriteLD1Ld1SbfmAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)post$")>; def : InstRW<[XGeneWriteLD1Ld1SbfmAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRS(BW|BX|HW|HX|W)pre$")>; def : InstRW<[XGeneWriteLD1Ld1LdSbfm1Sbfm, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWi$")>; def : InstRW<[XGeneWriteLD1Ld1LdSbfm1SbfmAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWpost$")>; def : InstRW<[XGeneWriteLD1Ld1LdSbfm1SbfmAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPSWpre$")>; //--- // Load instructions - 64-bit FP/SIMD (8 groups in total) // 1Lf: LDUR (literal, unscaled immediate, unsigned immediate) // 1LfAlu: LDR (immediate post-indexed) // 1LfAlu: LDR (immediate pre-indexed) // 1LfAlu: LDR (register offset) // 1LfLf: LDP (offset) // 1LfLfAlu: LDP (post-indexed, pre-indexed) //--- // for LDUR, the llvm arm64 model only defines the unscaled immediates def : InstRW<[XGeneWriteLD1Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDUR(B|H|S|D)i$")>; def : InstRW<[XGeneWriteLD1LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D)post$")>; def : InstRW<[XGeneWriteLD1LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D)pre$")>; def : InstRW<[XGeneWriteLD1LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDR(B|H|S|D)ro(X|W)$")>; def : InstRW<[XGeneWriteLD1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S)i$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S)post$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDP(D|S)pre$")>; //--- // Load instructions - 128-bit FP/SIMD (8 groups in total) // 1LfLf: LDUR (literal, unscaled immediate, unsigned immediate) // 1LfLfAlu: LDR (immediate post-indexed) // 1LfLfAlu: LDR (immediate pre-indexed) // 1LfAlu1Lf: LDR (register offset) // 1LfLfAlu1LfLf: LDP (offset) // 1LfLfAlu1LfLfAlu: LDP (post-indexed, pre-indexed) //--- // for LDUR, the llvm arm64 model only defines the unscaled immediates def : InstRW<[XGeneWriteLD1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDURQi$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRQpost$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRQpre$")>; def : InstRW<[XGeneWriteLD1LfAlu1Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDRQro(X|W)$")>; def : InstRW<[XGeneWriteLD1LfLfAlu1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPQi$")>; def : InstRW<[XGeneWriteLD1LfLfAlu1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPQpost$")>; def : InstRW<[XGeneWriteLD1LfLfAlu1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LDPQpre$")>; //--- // Vector Load (66 groups in total) // 1Lc: LD1 (one register 2S/4H/8B) // 1LcAlu: LD1 (one register 2S/4H/8B pre/post indexed) // 1LcLc: LD1 (one register 4S/8H/16B) // 1LcLc: LD1 (two registers 2S/4H/8B) // 1LcLc: LD2 (two registers 2S/4H/8B) // 1LcLcAlu: LD1 (one register 4S/8H/16B pre/post indexed) // 1LcLcAlu: LD1 (two registers 2S/4H/8B pre/post indexed) // 1LcLcAlu: LD2 (two registers 2S/4H/8B pre/post indexed) // 1X3Lc: LD1 (three registers 2S/4H/8B) // 1X3Lc: LD3 (three registers 2S/4H/8B) // 1X3LcAlu: LD1 (three registers 2S/4H/8B pre/post indexed) // 1X3LcAlu: LD3 (three registers 2S/4H/8B pre/post indexed) // 1X4Lc: LD1 (two registers 4S/8H/16B) // 1X4Lc: LD1 (four registers 2S/4H/8B) // 1X4Lc: LD2 (two registers 4S/8H/16B) // 1X4Lc: LD4 (four registers 2S/4H/8B) // 1X4LcAlu: LD1 (two registers 4S/8H/16B pre/post indexed) // 1X4LcAlu: LD1 (four registers 2S/4H/8B pre/post indexed) // 1X4LcAlu: LD2 (two registers 4S/8H/16B pre/post indexed) // 1X4LcAlu: LD4 (four registers 2S/4H/8B pre/post indexed) // 1X6Lc: LD1 (three registers 4S/8H/16B) // 1X6Lc: LD3 (three registers 4S/8H/16B) // 1X6LcAlu: LD1 (three registers 4S/8H/16B pre/post indexed) // 1X6LcAlu: LD3 (three registers 4S/8H/16B pre/post indexed) // 1X8Lc: LD1 (four registers 4S/8H/16B) // 1X8Lc: LD4 (four registers 4S/8H/16B) // 1X8LcAlu: LD1 (four registers 4S/8H/16B pre/post indexed) // 1X8LcAlu: LD4 (four registers 4S/8H/16B pre/post indexed) // 1Lf: LD1 (one register 1D) // 1Lf: LD1R (other) // 1LfAlu: LD1 (one register 1D pre/post indexed) // 1LfAlu: LD1R (pre/post indexed) // 1LfLf: LD1 (one register 2D) // 1LfLf: LD1 (two registers 1D) // 1LfLf: LD2R (other) // 1LfLfAlu: LD1 (one register 2D pre/post indexed) // 1LfLfAlu: LD1 (two registers 1D pre/post indexed) // 1LfLfAlu: LD2R (pre/post indexed) // 1X3Lf: LD1 (three registers 1D) // 1X3Lf: LD3R (other) // 1X3LfAlu: LD1 (three registers 1D pre/post indexed) // 1X3LfAlu: LD3R (none pre/post indexed) // 1X4Lf: LD1 (two registers 2D) // 1X4Lf: LD1 (four registers 1D) // 1X4Lf: LD2 (two registers 2D) // 1X4Lf: LD4R (other) // 1X4LfAlu: LD1 (two registers 2D pre/post indexed) // 1X4LfAlu: LD1 (four registers 1D pre/post indexed) // 1X4LfAlu: LD2 (two registers 2D pre/post indexed) // 1X4LfAlu: LD4R (pre/post indexed) // 1X6Lf: LD1 (three registers 2D) // 1X6Lf: LD3 (three registers 2D) // 1X6LfAlu: LD1 (three registers 2D pre/post indexed) // 1X6LfAlu: LD3 (three registers 2D pre/post indexed) // 1X8Lf: LD1 (four registers 2D) // 1X8Lf: LD4 (four registers 2D) // 1X8LfAlu: LD1 (four registers 2D pre/post indexed) // 1X8LfAlu: LD4 (four registers 2D pre/post indexed) // 1Lf1Asi: LD1 (one register) // 1LfAlu1Asi: LD1 (one register pre/post indexed) // 1LfLf1AsiAsi: LD2 (two registers) // 1LfLfAlu1AsiAsi: LD2 (two registers pre/post indexed) // 1X3Lf1X3Asi: LD3 (three registers) // 1X3LfAlu1X3Asi: LD3 (three registers pre/post indexed) // 1X4Lf1X4Asi: LD4 (four registers) // 1X4LfAlu1X4Asi: LD4 (four registers pre/post indexed) //--- def : InstRW<[XGeneWriteLD1Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1LcLc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1LcLc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1LcLc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Twov(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1LcLcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1LcLcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1LcLcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Twov(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1X3Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1X3Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Threev(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1X3LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1X3LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Threev(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1X4Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X4Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1X4Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Twov(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X4Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Fourv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1X4LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1X4LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1X4LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Twov(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1X4LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Fourv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1X6Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X6Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Threev(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X6LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1X6LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Threev(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1X8Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X8Lc, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Fourv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X8LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1X8LcAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Fourv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev1d$")>; def : InstRW<[XGeneWriteLD1Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Rv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Rv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Rv(1d|2d)$")>; def : InstRW<[XGeneWriteLD1LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev1d_POST$")>; def : InstRW<[XGeneWriteLD1LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Rv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Rv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Rv(1d|2d)_POST$")>; def : InstRW<[XGeneWriteLD1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev2d$")>; def : InstRW<[XGeneWriteLD1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov1d$")>; def : InstRW<[XGeneWriteLD1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Rv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Rv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1LfLf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Rv(1d|2d)$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Onev2d_POST$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov1d_POST$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Rv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Rv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1LfLfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Rv(1d|2d)_POST$")>; def : InstRW<[XGeneWriteLD1X3Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev1d$")>; def : InstRW<[XGeneWriteLD1X3Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Rv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1X3Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Rv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X3Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Rv(1d|2d)$")>; def : InstRW<[XGeneWriteLD1X3LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev1d_POST$")>; def : InstRW<[XGeneWriteLD1X3LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Rv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1X3LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Rv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1X3LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Rv(1d|2d)_POST$")>; def : InstRW<[XGeneWriteLD1X4Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov2d$")>; def : InstRW<[XGeneWriteLD1X4Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv1d$")>; def : InstRW<[XGeneWriteLD1X4Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Twov2d$")>; def : InstRW<[XGeneWriteLD1X4Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Rv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteLD1X4Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Rv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteLD1X4Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Rv(1d|2d)$")>; def : InstRW<[XGeneWriteLD1X4LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Twov2d_POST$")>; def : InstRW<[XGeneWriteLD1X4LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv1d_POST$")>; def : InstRW<[XGeneWriteLD1X4LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2Twov2d_POST$")>; def : InstRW<[XGeneWriteLD1X4LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Rv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteLD1X4LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Rv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteLD1X4LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Rv(1d|2d)_POST$")>; def : InstRW<[XGeneWriteLD1X6Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev2d$")>; def : InstRW<[XGeneWriteLD1X6Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Threev2d$")>; def : InstRW<[XGeneWriteLD1X6LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Threev2d_POST$")>; def : InstRW<[XGeneWriteLD1X6LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3Threev2d_POST$")>; def : InstRW<[XGeneWriteLD1X8Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv2d$")>; def : InstRW<[XGeneWriteLD1X8Lf, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Fourv2d$")>; def : InstRW<[XGeneWriteLD1X8LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1Fourv2d_POST$")>; def : InstRW<[XGeneWriteLD1X8LfAlu, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4Fourv2d_POST$")>; def : InstRW<[XGeneWriteLD1Lf1Asi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteLD1LfAlu1Asi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD1(i8|i16|i32|i64)_POST$")>; def : InstRW<[XGeneWriteLD1LfLf1AsiAsi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteLD1LfLfAlu1AsiAsi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD2(i8|i16|i32|i64)_POST$")>; def : InstRW<[XGeneWriteLD1X3Lf1X3Asi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteLD1X3LfAlu1X3Asi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD3(i8|i16|i32|i64)_POST$")>; def : InstRW<[XGeneWriteLD1X4Lf1X4Asi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteLD1X4LfAlu1X4Asi, XGeneReadLDSTI, XGeneReadLDSTF], (instregex "LD4(i8|i16|i32|i64)_POST$")>; //--- // Store instructions (18 groups in total) // 1St: STURB, STURH, STUR (unscaled immediate) // 1St: STRB, STRH, STR (register offset) // 1St: STRB, STRH, STR (unsigned immediate) // 1StAlu: STRB, STRH, STR (immediate post-indexed) // 1StAlu: STRB, STRH, STR (immediate pre-indexed) // 1StSt: STP (offset) // 1StStAlu: STP (post-indexed) // 1StStAlu: STP (pre-indexed) //--- def : InstRW<[XGeneWriteST1St, XGeneWriteSTI], (instregex "STUR(BB|HH|W|X)i$")>; def : InstRW<[XGeneWriteST1St, XGeneWriteSTI], (instregex "STR(BB|HH|W|X)ro(W|X)$")>; def : InstRW<[XGeneWriteST1St, XGeneWriteSTI], (instregex "STR(X|W|HH|BB)ui$")>; def : InstRW<[XGeneWriteST1StAlu, XGeneWriteSTI], (instregex "STR(W|X|BB|HH)post$")>; def : InstRW<[XGeneWriteST1StAlu, XGeneWriteSTI], (instregex "STR(W|X|BB|HH)pre$")>; def : InstRW<[XGeneWriteST1StSt, XGeneWriteSTI], (instregex "STP(W|X)i$")>; def : InstRW<[XGeneWriteST1StStAlu, XGeneWriteSTI], (instregex "STP(W|X)post$")>; def : InstRW<[XGeneWriteST1StStAlu, XGeneWriteSTI], (instregex "STP(W|X)pre$")>; //--- // Store instructions - 64-bit FP/SIMD (8 groups in total) // 1Sf: STUR (literal, unscaled immediate, unsigned immediate) // 1SfAlu: STR (immediate post-indexed) // 1SfAlu: STR (immediate pre-indexed) // 1SfAlu: STR (register offset) // 1SfSf: STP (offset) // 1SfSfAlu: STP (post-indexed, pre-indexed) //--- // for STUR, the llvm arm64 model only defines the unscaled immediates def : InstRW<[XGeneWriteST1Sf, XGeneWriteSTF], (instregex "STUR(B|H|S|D)i$")>; def : InstRW<[XGeneWriteST1SfAlu, XGeneWriteSTF], (instregex "STR(B|H|S|D)post$")>; def : InstRW<[XGeneWriteST1SfAlu, XGeneWriteSTF], (instregex "STR(B|H|S|D)pre$")>; def : InstRW<[XGeneWriteST1SfAlu, XGeneWriteSTF], (instregex "STR(B|H|S|D)ro(X|W)$")>; def : InstRW<[XGeneWriteST1SfSf, XGeneWriteSTF], (instregex "STP(D|S)i$")>; def : InstRW<[XGeneWriteST1SfSfAlu, XGeneWriteSTF], (instregex "STP(D|S)post$")>; def : InstRW<[XGeneWriteST1SfSfAlu, XGeneWriteSTF], (instregex "STP(D|S)pre$")>; //--- // Store instructions - 128-bit FP/SIMD (8 groups in total) // 1SfSf: STUR (literal, unscaled immediate, unsigned immediate) // 1SfSfAlu: STR (immediate post-indexed) // 1SfSfAlu: STR (immediate pre-indexed) // 1SfAlu1Sf: STR (register offset) // 1SfSfAlu1SfSf: STP (offset) // 1SfSfAlu1SfSfAlu: STP (post-indexed, pre-indexed) //--- // for STUR, the llvm arm64 model only defines the unscaled immediates def : InstRW<[XGeneWriteST1SfSf, XGeneWriteSTF], (instregex "STURQi$")>; def : InstRW<[XGeneWriteST1SfSfAlu, XGeneWriteSTF], (instregex "STRQpost$")>; def : InstRW<[XGeneWriteST1SfSfAlu, XGeneWriteSTF], (instregex "STRQpre$")>; def : InstRW<[XGeneWriteST1SfAlu1Sf, XGeneWriteSTF], (instregex "STRQro(X|W)$")>; def : InstRW<[XGeneWriteST1SfSfAlu1SfSf, XGeneWriteSTF], (instregex "STPQi$")>; def : InstRW<[XGeneWriteST1SfSfAlu1SfSfAlu, XGeneWriteSTF], (instregex "STPQpost$")>; def : InstRW<[XGeneWriteST1SfSfAlu1SfSfAlu, XGeneWriteSTF], (instregex "STPQpre$")>; //--- // Vector Store (66 groups in total) // Multiple Structures // 1Sc: ST1 (one register 2S/4H/8B) // 1ScAlu: ST1 (one register 2S/4H/8B pre/post indexed) // 1ScSc: ST1 (one register 4S/8H/16B) // 1ScSc: ST1 (two registers 2S/4H/8B) // 1ScSc: ST2 (two registers 2S/4H/8B) // 1ScScAlu: ST1 (one register 4S/8H/16B pre/post indexed) // 1ScScAlu: ST1 (two registers 2S/4H/8B pre/post indexed) // 1ScScAlu: ST2 (two registers 2S/4H/8B pre/post indexed) // 1X3Sc: ST1 (three registers 2S/4H/8B) // 1X3Sc: ST3 (three registers 2S/4H/8B) // 1X3ScAlu: ST1 (three registers 2S/4H/8B pre/post indexed) // 1X3ScAlu: ST3 (three registers 2S/4H/8B pre/post indexed) // 1X4Sc: ST1 (two registers 4S/8H/16B) // 1X4Sc: ST1 (four registers 2S/4H/8B) // 1X4Sc: ST2 (two registers 4S/8H/16B) // 1X4Sc: ST4 (four registers 2S/4H/8B) // 1X4ScAlu: ST1 (two registers 4S/8H/16B pre/post indexed) // 1X4ScAlu: ST1 (four registers 2S/4H/8B pre/post indexed) // 1X4ScAlu: ST2 (two registers 4S/8H/16B pre/post indexed) // 1X4ScAlu: ST4 (four registers 2S/4H/8B pre/post indexed) // 1X6Sc: ST1 (three registers 4S/8H/16B) // 1X6Sc: ST3 (three registers 4S/8H/16B) // 1X6ScAlu: ST1 (three registers 4S/8H/16B pre/post indexed) // 1X6ScAlu: ST3 (three registers 4S/8H/16B pre/post indexed) // 1X8Sc: ST1 (four registers 4S/8H/16B) // 1X8Sc: ST4 (four registers 4S/8H/16B) // 1X8ScAlu: ST1 (four registers 4S/8H/16B pre/post indexed) // 1X8ScAlu: ST4 (four registers 4S/8H/16B pre/post indexed) // 1Sf: ST1 (one register 1D) // 1SfAlu: ST1 (one register 1D pre/post indexed) // 1SfSf: ST1 (one register 2D) // 1SfSf: ST1 (two registers 1D) // 1SfSfAlu: ST1 (one register 2D pre/post indexed) // 1SfSfAlu: ST1 (two registers 1D pre/post indexed) // 1X3Sf: ST1 (three registers 1D) // 1X3SfAlu: ST1 (three registers 1D pre/post indexed) // 1X4Sf: ST1 (two registers 2D) // 1X4Sf: ST1 (four registers 1D) // 1X4Sf: ST2 (two registers 2D) // 1X4SfAlu: ST1 (two registers 2D pre/post indexed) // 1X4SfAlu: ST1 (four registers 1D pre/post indexed) // 1X4SfAlu: ST2 (two registers 2D pre/post indexed) // 1X6Sf: ST1 (three registers 2D) // 1X6Sf: ST3 (three registers 2D) // 1X6SfAlu: ST1 (three registers 2D pre/post indexed) // 1X6SfAlu: ST3 (three registers 2D pre/post indexed) // 1X8Sf: ST1 (four registers 2D) // 1X8Sf: ST4 (four registers 2D) // 1X8SfAlu: ST1 (four registers 2D pre/post indexed) // 1X8SfAlu: ST4 (four registers 2D pre/post indexed) // Single Structure // 1Sf: ST1 (one register) // 1SfAlu: ST1 (one register pre/post indexed) // 1SfSf: ST2 (two registers) // 1SfSfAlu: ST2 (two registers pre/post indexed) // 1X3Sf: ST3 (three registers) // 1X3SfAlu: ST3 (three registers pre/post indexed) // 1X4Sf: ST4 (four registers) // 1X4SfAlu: ST4 (four registers pre/post indexed) //--- def : InstRW<[XGeneWriteST1Sc, XGeneWriteSTF], (instregex "ST1Onev(2s|4h|8b)$")>; def : InstRW<[XGeneWriteST1ScAlu, XGeneWriteSTF], (instregex "ST1Onev(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteST1ScSc, XGeneWriteSTF], (instregex "ST1Onev(4s|8h|16b)$")>; def : InstRW<[XGeneWriteST1ScSc, XGeneWriteSTF], (instregex "ST1Twov(2s|4h|8b)$")>; def : InstRW<[XGeneWriteST1ScSc, XGeneWriteSTF], (instregex "ST2Twov(2s|4h|8b)$")>; def : InstRW<[XGeneWriteST1ScScAlu, XGeneWriteSTF], (instregex "ST1Onev(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteST1ScScAlu, XGeneWriteSTF], (instregex "ST1Twov(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteST1ScScAlu, XGeneWriteSTF], (instregex "ST2Twov(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteST1X3Sc, XGeneWriteSTF], (instregex "ST1Threev(2s|4h|8b)$")>; def : InstRW<[XGeneWriteST1X3Sc, XGeneWriteSTF], (instregex "ST3Threev(2s|4h|8b)$")>; def : InstRW<[XGeneWriteST1X3ScAlu, XGeneWriteSTF], (instregex "ST1Threev(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteST1X3ScAlu, XGeneWriteSTF], (instregex "ST3Threev(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteST1X4Sc, XGeneWriteSTF], (instregex "ST1Twov(4s|8h|16b)$")>; def : InstRW<[XGeneWriteST1X4Sc, XGeneWriteSTF], (instregex "ST1Fourv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteST1X4Sc, XGeneWriteSTF], (instregex "ST2Twov(4s|8h|16b)$")>; def : InstRW<[XGeneWriteST1X4Sc, XGeneWriteSTF], (instregex "ST4Fourv(2s|4h|8b)$")>; def : InstRW<[XGeneWriteST1X4ScAlu, XGeneWriteSTF], (instregex "ST1Twov(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteST1X4ScAlu, XGeneWriteSTF], (instregex "ST1Fourv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteST1X4ScAlu, XGeneWriteSTF], (instregex "ST2Twov(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteST1X4ScAlu, XGeneWriteSTF], (instregex "ST4Fourv(2s|4h|8b)_POST$")>; def : InstRW<[XGeneWriteST1X6Sc, XGeneWriteSTF], (instregex "ST1Threev(4s|8h|16b)$")>; def : InstRW<[XGeneWriteST1X6Sc, XGeneWriteSTF], (instregex "ST3Threev(4s|8h|16b)$")>; def : InstRW<[XGeneWriteST1X6ScAlu, XGeneWriteSTF], (instregex "ST1Threev(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteST1X6ScAlu, XGeneWriteSTF], (instregex "ST3Threev(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteST1X8Sc, XGeneWriteSTF], (instregex "ST1Fourv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteST1X8Sc, XGeneWriteSTF], (instregex "ST4Fourv(4s|8h|16b)$")>; def : InstRW<[XGeneWriteST1X8ScAlu, XGeneWriteSTF], (instregex "ST1Fourv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteST1X8ScAlu, XGeneWriteSTF], (instregex "ST4Fourv(4s|8h|16b)_POST$")>; def : InstRW<[XGeneWriteST1Sf, XGeneWriteSTF], (instregex "ST1Onev1d$")>; def : InstRW<[XGeneWriteST1SfAlu, XGeneWriteSTF], (instregex "ST1Onev1d_POST$")>; def : InstRW<[XGeneWriteST1SfSf, XGeneWriteSTF], (instregex "ST1Onev2d$")>; def : InstRW<[XGeneWriteST1SfSf, XGeneWriteSTF], (instregex "ST1Twov1d$")>; def : InstRW<[XGeneWriteST1SfSfAlu, XGeneWriteSTF], (instregex "ST1Onev2d_POST$")>; def : InstRW<[XGeneWriteST1SfSfAlu, XGeneWriteSTF], (instregex "ST1Twov1d_POST$")>; def : InstRW<[XGeneWriteST1X3Sf, XGeneWriteSTF], (instregex "ST1Threev1d$")>; def : InstRW<[XGeneWriteST1X3SfAlu, XGeneWriteSTF], (instregex "ST1Threev1d_POST$")>; def : InstRW<[XGeneWriteST1X4Sf, XGeneWriteSTF], (instregex "ST1Twov2d$")>; def : InstRW<[XGeneWriteST1X4Sf, XGeneWriteSTF], (instregex "ST1Fourv1d$")>; def : InstRW<[XGeneWriteST1X4Sf, XGeneWriteSTF], (instregex "ST2Twov2d$")>; def : InstRW<[XGeneWriteST1X4SfAlu, XGeneWriteSTF], (instregex "ST1Twov2d_POST$")>; def : InstRW<[XGeneWriteST1X4SfAlu, XGeneWriteSTF], (instregex "ST1Fourv1d_POST$")>; def : InstRW<[XGeneWriteST1X4SfAlu, XGeneWriteSTF], (instregex "ST2Twov2d_POST$")>; def : InstRW<[XGeneWriteST1X6Sf, XGeneWriteSTF], (instregex "ST1Threev2d$")>; def : InstRW<[XGeneWriteST1X6Sf, XGeneWriteSTF], (instregex "ST3Threev2d$")>; def : InstRW<[XGeneWriteST1X6SfAlu, XGeneWriteSTF], (instregex "ST1Threev2d_POST$")>; def : InstRW<[XGeneWriteST1X6SfAlu, XGeneWriteSTF], (instregex "ST3Threev2d_POST$")>; def : InstRW<[XGeneWriteST1X8Sf, XGeneWriteSTF], (instregex "ST1Fourv2d$")>; def : InstRW<[XGeneWriteST1X8Sf, XGeneWriteSTF], (instregex "ST4Fourv2d$")>; def : InstRW<[XGeneWriteST1X8SfAlu, XGeneWriteSTF], (instregex "ST1Fourv2d_POST$")>; def : InstRW<[XGeneWriteST1X8SfAlu, XGeneWriteSTF], (instregex "ST4Fourv2d_POST$")>; def : InstRW<[XGeneWriteST1Sf, XGeneWriteSTF], (instregex "ST1(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteST1SfAlu, XGeneWriteSTF], (instregex "ST1(i8|i16|i32|i64)_POST$")>; def : InstRW<[XGeneWriteST1SfSf, XGeneWriteSTF], (instregex "ST2(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteST1SfSfAlu, XGeneWriteSTF], (instregex "ST2(i8|i16|i32|i64)_POST$")>; def : InstRW<[XGeneWriteST1X3Sf, XGeneWriteSTF], (instregex "ST3(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteST1X3SfAlu, XGeneWriteSTF], (instregex "ST3(i8|i16|i32|i64)_POST$")>; def : InstRW<[XGeneWriteST1X4Sf, XGeneWriteSTF], (instregex "ST4(i8|i16|i32|i64)$")>; def : InstRW<[XGeneWriteST1X4SfAlu, XGeneWriteSTF], (instregex "ST4(i8|i16|i32|i64)_POST$")>; //--- // Data Processing Register // 1Sbfm1Alu: LSL, LSR, ASR (shifted register) // 1Sbfm1Alu: LSLV, LSRV, ASRV, RORV: shift/rotate op. // 1Sbfm1Alu: UXTW, UXTX (shifted register) // 1Alb1Alu: ROR (shifted register) // 1Sbfm SBFM // 1Car: ADC (Add/subtract (with carry): carry op.) // 1Set: CCMP (Conditional compare (register): logical op., produces flag) // 1Alu: CSEL Conditional select: arithmetic op. // 1Mlw1Alu MADD, SMADDL, UMADDL with other Ra (32bit) // 1Mlx1Alu MADD, SMADDL, UMADDL with other Ra (64bit) // 1Mlw1Alu MSUB, SMSUBL, UMSUBL (32bit) // 1Mlx1Alu MSUB, SMSUBL, UMSUBL (64bit) // 1Mlx UMULH, SMULH (64bit) // 1Div UDIV, SDIV // 1Alb REV, REV32, REV64 // 1Alu BFM // 1Alu MRS, MSR //--- def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "LS(L|R)V(W|X)r$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ASRV(W|X)r$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "UBFM(W|X)ri$")>; // alias of UXTW def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "RORV(W|X)r$")>; def : InstRW<[XGeneWriteI1Sbfm], (instregex "SBFM(W|X)ri$")>; def : InstRW<[XGeneWriteI1Car], (instregex "ADC(W|X)r$")>; def : InstRW<[XGeneWriteI1Car, XGeneWriteISFlags], (instregex "ADCS(W|X)r$")>; def : InstRW<[XGeneWriteI1Set, XGeneReadISFlags], (instregex "CCMP(W|X)r$")>; def : InstRW<[XGeneWriteI1Set, XGeneReadISFlags], (instregex "CCMP(W|X)i$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CSEL(W|X)r$")>; def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "MADD(W)rrr$")>; def : InstRW<[XGeneWriteI1Mlx1Alu], (instregex "MADD(X)rrr$")>; def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "(S|U)MADDLrrr$")>; def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "MSUB(W)rrr$")>; def : InstRW<[XGeneWriteI1Mlx1Alu], (instregex "MSUB(X)rrr$")>; def : InstRW<[XGeneWriteI1Mlw1Alu], (instregex "(S|U)MSUBLrrr$")>; def : InstRW<[XGeneWriteI1Mlx], (instregex "(S|U)MULHrr$")>; def : InstRW<[XGeneWriteI1Div], (instregex "SDIVWr$")>; def : InstRW<[XGeneWriteI1Div], (instregex "SDIVXr$")>; def : InstRW<[XGeneWriteI1Div], (instregex "UDIV(W|X)r$")>; def : InstRW<[XGeneWriteI1Alb], (instregex "REV(W|X)r$")>; def : InstRW<[XGeneWriteI1Alb], (instregex "REV16(W|X)r$")>; def : InstRW<[XGeneWriteI1Alb], (instregex "REV32Xr$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "BFM(W|X)ri$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "BLR$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "MRS$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "MSR$")>; // Taken form list of missing instructions def : InstRW<[XGeneWriteI1Alu], (instregex "ADD(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "ADD(W|X)rx$")>; def : InstRW<[XGeneWriteI1Set, XGeneWriteISFlags], (instregex "ADDS(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Alb1Alu, XGeneWriteISFlags], (instregex "ADDS(W|X)rx$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "SUB(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Alb1Alu], (instregex "SUB(W|X)rx$")>; def : InstRW<[XGeneWriteI1Set, XGeneWriteISFlags], (instregex "SUBS(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Alb1Alu, XGeneWriteISFlags], (instregex "SUBS(W|X)rx$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "AND(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Set, XGeneWriteISFlags], (instregex "ANDS(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "BIC(W|X)rr$")>; def : InstRW<[XGeneWriteI1Set, XGeneWriteISFlags], (instregex "BICS(W|X)rr$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "EON(W|X)rr$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "EOR(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "ORN(W|X)rr$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "ORR(W|X)r(r|i)$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CLS(W|X)r$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CLZ(W|X)r$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "RBIT(W|X)r$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CSINC(W|X)r$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CSINV(W|X)r$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CSNEG(W|X)r$")>; def : InstRW<[XGeneWriteI1Car], (instregex "SBC(W|X)r$")>; def : InstRW<[XGeneWriteI1Car, XGeneWriteISFlags], (instregex "SBCS(W|X)r$")>; def : InstRW<[XGeneWriteI1Alb], (instregex "EXTR(W|X)rri$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "MOV(N|Z|K)(W|X)i$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "ADR$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "ADRP$")>; def : InstRW<[XGeneWriteI1Set, XGeneReadISFlags], (instregex "CCMN(W|X)(r|i)$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "TBZ(W|X)$")>; def : InstRW<[XGeneWriteI1Alu], (instregex "TBNZ(W|X)$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CBZ(W|X)$")>; def : InstRW<[XGeneWriteI1Alu, XGeneReadISFlags], (instregex "CBNZ(W|X)$")>; // shifted def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ADD(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "ADDS(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "SUB(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "SUBS(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "AND(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "ANDS(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "BIC(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Set], (instregex "BICS(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "EON(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "EOR(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ORN(W|X)rs$")>; def : InstRW<[XGeneWriteI1Sbfm1Alu], (instregex "ORR(W|X)rs$")>; // TODO: STLX et al. //--- // AdvSIMD Data Processing (Vector Integer) // Three same vector instructions // 1Asa: ADD, SUB, ADDP (Q=0) // 1Asa: SHADD, SQADD, SRHADD, SHSUB, SQSUB (Q=0) // 1Asa: UHADD, UQADD, URHADD, UHSUB, UQSUB (Q=0) // 1Asa: CMGT, CMGE, CMTST, CMHI, CMHS, CMEQ (Q=0 register) // 1Asa: SMAX, SMIN, SABD, SMAXP, SMINP (Q=0) // 1Asa: UMAX, UMIN, UABD, UMAXP, UMINP (Q=0) // 1Ass: SSHL, SQSHL, SRSHL, SQRSHL (Q=0) // 1Ass: USHL, UQSHL, URSHL, UQRSHL (Q=0) // 1Asm: MUL, MLA, MLS, SQDMULH, SQRDMULH (Q=0) // 1Asl: AND, BIC, ORR, ORN, EOR, BSL, BIT, BIF (Q=0) // TODO: maybe PMUL uses Apol? That would make more sense I think // 1Asl: PMUL (Q=0) // 1AsaAsa: ADD, SUB, ADDP (Q=1) // 1AsaAsa: SHADD, SQADD, SRHADD, SHSUB, SQSUB (Q=1) // 1AsaAsa: UHADD, UQADD, URHADD, UHSUB, UQSUB (Q=1) // 1AsaAsa: CMGT, CMGE, CMTST, CMHI, CMHS, CMEQ (Q=1 register) // 1AsaAsa: SMAX, SMIN, SABD, SMAXP, SMINP (Q=1) // 1AsaAsa: UMAX, UMIN, UABD, UMAXP, UMINP (Q=1) // 1AssAss: SSHL, SQSHL, SRSHL, SQRSHL (Q=1) // 1AssAss: USHL, UQSHL, URSHL, UQRSHL (Q=1) // 1AsmAsm: MUL, MLA, MLS, SQDMULH, SQRDMULH (Q=1) // 1AslAsl: AND, BIC, ORR, ORN, EOR, BSL, BIT, BIF (Q=1) // 1AslAsl: PMUL (Q=1) // 2Asa: SABA/UABA (Q=0) // 2AsaAsa: SABA/UABA (Q=1) //--- def : InstRW<[XGeneWriteVI1Asa], (instregex "ADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SUBv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "ADDPv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SHADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SQADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SRHADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SHSUBv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SQSUBv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UHADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UQADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "URHADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UHSUBv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UQSUBv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GT|GE|EQ)v(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(TST|HI|HS)v(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SMAXv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SMINv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SABDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SMAXPv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SMINPv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UMAXv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UMINv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UABDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UMAXPv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UMINPv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SSHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SRSHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SQRSHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "USHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "UQSHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "URSHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "UQRSHLv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "MULv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "MLAv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "MLSv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "SQDMULHv(4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "SQRDMULHv(4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "ANDv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "BICv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "ORRv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "ORNv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "EORv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "BSLv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "BITv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "BIFv(8i8)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "PMULv(8i8)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ADDv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SUBv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ADDPv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SHADDv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQADDv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SRHADDv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SHSUBv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQSUBv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UHADDv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UQADDv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "URHADDv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UHSUBv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UQSUBv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GT|GE|EQ)v(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(TST|HI|HS)v(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMAXv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMINv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMAXPv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SMINPv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMAXv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMINv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMAXPv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UMINPv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRSHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "URSHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHLv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MULv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MLAv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MLSv(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULHv(8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQRDMULHv(8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ANDv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BICv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORRv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORNv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "EORv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BSLv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BITv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BIFv(16i8)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "PMULv(16i8)$")>; //--- // AdvSIMD Data Processing (Vector Integer) // Three different vector instructions // 1AsaAsa: SADDL, SSUBL, SABDL (Q=0, Q=1) // 1AsaAsa: UADDL, USUBL, UABDL (Q=0, Q=1) // 1AsaAsa: SADDW, SSUBW (Q=0, Q=1) // 1AsaAsa: UADDW, USUBW (Q=0, Q=1) // 1AsmAsm: SMLAL, SMLSL, SMULL (Q=0, Q=1) // 1AsmAsm: SQDMLAL, SQDMLSL, SQDMULL (Q=0, Q=1) // 1AsmAsm: UMLAL, UMLSL, UMULL ((Q=0, Q=1) // 1AsmAsm: UQDMLAL, UQDMLSL, UQDMULL (Q=0, Q=1) // NOTE: UQDM... are not modelled in llvm // 1ApolApol: PMULL (Q=0, Q=1) // 1AsaAsa1Ass: ADDHN, SUBHN, RADDHN, RSUBHN (Q=0, Q=1) // 2AsaAsa: SABAL, UABAL (Q=0, Q=1) //--- def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABDLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABDLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDWv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SSUBWv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDWv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USUBWv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1ApolApol], (instregex "PMULLv(8i8|16i8|1i64|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(2i64_v2i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(4i32_v4i16)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "ADDHNv(8i16_v8i8)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(2i64_v2i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(4i32_v4i16)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "SUBHNv(8i16_v8i8)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(2i64_v2i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(4i32_v4i16)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RADDHNv(8i16_v8i8)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(2i64_v2i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(4i32_v4i16)$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Ass], (instregex "RSUBHNv(8i16_v8i8)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SABALv(2i32|4i32)_v2i64$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(8i8|16i8)_v8i16$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(4i16|8i16)_v4i32$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UABALv(2i32|4i32)_v2i64$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD two-reg misc // 1Asa: SADDLP, SUQADD, SQABS, SQNEG (Q=0) // 1Asa: UADDLP, USQADD, ABS, NEG (Q=0) // 1Asa: CMGT, CMEQ, CMLT, CMGE, CMLE (zero Q=0) // 1Asl: CLS, CLZ, CNT, NOT, RBIT (Q=0) // 1Ass: REV64, REV32, REV16 (Q=0) // 1Ass: XTN, SQXTN, UQXTN, SQXTUN, SHLL (Q=0, Q=1) // 1AsaAsa: SADDLP, SUQADD, SQABS, SQNEG (Q=1) // 1AsaAsa: UADDLP, USQADD, ABS, NEG (Q=1) // 1AsaAsa: CMGT, CMEQ, CMLT, CMGE, CMLE (zero Q=1) // 1AslAsl: CLS, CLZ, CNT, NOT, RBIT (Q=1) // 1AssAss: REV64, REV32, REV16 (Q=1) // 1Adre: URECPE (Q=0, Q=1) // 1Asre: URSQRTE (Q=0, Q=1) // 2Asa: SADALP, UADALP (Q=0) // 2AsaAsa: SADALP, UADALP (Q=1) //--- def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(8i8_v4i16)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(4i16_v2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SADDLPv(2i32_v1i64)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SUQADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SQABSv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "SQNEGv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(8i8_v4i16)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(4i16_v2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "UADDLPv(2i32_v1i64)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "USQADDv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "ABSv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "NEGv(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GT|EQ|LT)v(8i8|4i16|2i32)rz$")>; def : InstRW<[XGeneWriteVI1Asa], (instregex "CM(GE|LE)v(8i8|4i16|2i32)rz$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "CL(S|Z)v(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "CNTv8i8$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "NOTv8i8$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "RBITv8i8$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "REV16v8i8$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "REV32v(8i8|4i16)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "REV64v(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(16i8_v8i16)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(8i16_v4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SADDLPv(4i32_v2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SUQADDv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQABSv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "SQNEGv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(16i8_v8i16)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(8i16_v4i32)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "UADDLPv(4i32_v2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "USQADDv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "ABSv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "NEGv(16i8|8i16|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GT|EQ|LT)v(16i8|8i16|4i32|2i64)rz$")>; def : InstRW<[XGeneWriteVI1AsaAsa], (instregex "CM(GE|LE)v(16i8|8i16|4i32|2i64)rz$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "CL(S|Z)v(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "CNTv16i8$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "NOTv16i8$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "RBITv16i8$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV16v16i8$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV32v(16i8|8i16)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "REV64v(16i8|8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1Adre], (instregex "URECPEv(2i32|4i32)$")>; def : InstRW<[XGeneWriteVI1Asre], (instregex "URSQRTEv(2i32|4i32)$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(8i8_v4i16)$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(4i16_v2i32)$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "SADALPv(2i32_v1i64)$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(8i8_v4i16)$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(4i16_v2i32)$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "UADALPv(2i32_v1i64)$")>; def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(16i8_v8i16)$")>; def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(8i16_v4i32)$")>; def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "SADALPv(4i32_v2i64)$")>; def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(16i8_v8i16)$")>; def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(8i16_v4i32)$")>; def : InstRW<[XGeneWriteVI2AsaAsa], (instregex "UADALPv(4i32_v2i64)$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD across lanes // 1AsaAsa1Asa: SADDLV, UADDLV (Q=1 size=10) // 1AsaAsa2Asa: SADDLV, UADDLV (Q=1 size=00) // 1AsaAsa2Asa: SADDLV, UADDLV (Q=1 size=01) // 2Asa: SADDLV, UADDLV (Q=0 size=01) // 2Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=10) // 2Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=0 size=01) // 3Asa: SADDLV, UADDLV (Q=0 size=00) // 3Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=0 size=00) // 3Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=01) // 4Asa: ADDV, SMAXV, SMINV, UMAXV, UMINV (Q=1 size=00) //--- def : InstRW<[XGeneWriteVI1AsaAsa1Asa], (instregex "SADDLVv(4i32)v$")>; def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "SADDLVv(16i8)v$")>; def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "SADDLVv(8i16)v$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "SADDLVv(4i16)v$")>; def : InstRW<[XGeneWriteVI1AsaAsa1Asa], (instregex "UADDLVv(4i32)v$")>; def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "UADDLVv(16i8)v$")>; def : InstRW<[XGeneWriteVI1AsaAsa2Asa], (instregex "UADDLVv(8i16)v$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "UADDLVv(4i16)v$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "ADDVv(4i16|4i32)v$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "SMAXVv(4i16|4i32)v$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "SMINVv(4i16|4i32)v$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "UMAXVv(4i16|4i32)v$")>; def : InstRW<[XGeneWriteVI2Asa], (instregex "UMINVv(4i16|4i32)v$")>; def : InstRW<[XGeneWriteVI3Asa], (instregex "SADDLVv(8i8)v$")>; def : InstRW<[XGeneWriteVI3Asa], (instregex "UADDLVv(8i8)v$")>; def : InstRW<[XGeneWriteVI3Asa], (instregex "ADDVv(8i8|8i16)v$")>; def : InstRW<[XGeneWriteVI3Asa], (instregex "SMAXVv(8i8|8i16)v$")>; def : InstRW<[XGeneWriteVI3Asa], (instregex "SMINVv(8i8|8i16)v$")>; def : InstRW<[XGeneWriteVI3Asa], (instregex "UMAXVv(8i8|8i16)v$")>; def : InstRW<[XGeneWriteVI3Asa], (instregex "UMINVv(8i8|8i16)v$")>; def : InstRW<[XGeneWriteVI4Asa], (instregex "ADDVv(16i8)v$")>; def : InstRW<[XGeneWriteVI4Asa], (instregex "SMAXVv(16i8)v$")>; def : InstRW<[XGeneWriteVI4Asa], (instregex "SMINVv(16i8)v$")>; def : InstRW<[XGeneWriteVI4Asa], (instregex "UMAXVv(16i8)v$")>; def : InstRW<[XGeneWriteVI4Asa], (instregex "UMINVv(16i8)v$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD copy // 1Ass: DUP (element Q=0) // 1AslAsl: DUP (element Q=1 size=x1000) // 1Ass1Ass: DUP (element Q=1 size=other) // 1St1Lf: DUP (general register) (Q=0) // 1St1Lf1Falu: DUP (general register) (Q=1) // 1Fmov: INS (element imm5=01000) // 1Ass1Asi: INS (element imm5=other) // 1St1Lf: INS (general register imm5=01000) // 1St1Lf1Asi: INS (general register imm5=other) // 1Sf1Ld1Sbfm: SMOV (all) // 1Sf1Ld: UMOV (imm5=xxx00) // 1Sf1Ld1Ubfm: UMOV (imm5=other) //--- def : InstRW<[XGeneWriteVI1Ass], (instregex "DUPv(2i32|4i16|8i8)lane$")>; def : InstRW<[XGeneWriteVI1Asl1Asl], (instregex "DUPv(2i64)lane$")>; def : InstRW<[XGeneWriteVI1Ass1Ass], (instregex "DUPv(4i32|8i16|16i8)lane$")>; def : InstRW<[XGeneWriteVI1St1Lf], (instregex "DUPv(8i8|4i16|2i32)gpr$")>; def : InstRW<[XGeneWriteVI1St1Lf1Falu], (instregex "DUPv(16i8|8i16|4i32|2i64)gpr$")>; def : InstRW<[XGeneWriteVI1Fmov], (instregex "INSv(i64)lane$")>; def : InstRW<[XGeneWriteVI1Ass1Asi], (instregex "INSv(i8|i16|i32)lane$")>; def : InstRW<[XGeneWriteVI1St1Lf], (instregex "INSv(i64)gpr$")>; def : InstRW<[XGeneWriteVI1St1Lf1Asi], (instregex "INSv(i8|i16|i32)gpr$")>; def : InstRW<[XGeneWriteVI1Sf1Ld1Sbfm], (instregex "SMOVv(i8|i16)to32$")>; def : InstRW<[XGeneWriteVI1Sf1Ld1Sbfm], (instregex "SMOVv(i8|i16|i32)to64$")>; def : InstRW<[XGeneWriteVI1Sf1Ld], (instregex "UMOVvi64$")>; def : InstRW<[XGeneWriteVI1Sf1Ld1Ubfm], (instregex "UMOVv(i8|i16|i32)$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD vector x indexed element // 1Asm: MUL, SQDMULH, SQRDMULH, MLA, MLS (by element Q=0) // 1AsmAsm: SMULL, SMLAL, SMLSL (by element) // 1AsmAsm: UMULL, UMLAL, UMLSL (by element) // 1AsmAsm: SQDMULL, SQDMLAL, SQDMLSL (by element) // 1AsmAsm: MUL, SQDMULH, SQRDMULH, MLA, MLS (by element Q=1) //--- def : InstRW<[XGeneWriteVI1Asm], (instregex "MULv(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "ML(A|S)v(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "SQDMULHv(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVI1Asm], (instregex "SQRDMULHv(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SMULLv(4i16|8i16|2i32|4i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SML(A|S)Lv(4i16|8i16|2i32|4i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UMULLv(4i16|8i16|2i32|4i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "UML(A|S)Lv(4i16|8i16|2i32|4i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULLv(8i16|4i32|1i32|1i64)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDML(A|S)Lv(8i16|4i32|1i32|1i64)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "MULv(8i16|4i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "ML(A|S)v(8i16|4i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQDMULHv(8i16|4i32|1i16|1i32)_indexed$")>; def : InstRW<[XGeneWriteVI1AsmAsm], (instregex "SQRDMULHv(8i16|4i32|1i16|1i32)_indexed$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD shift by immediate // 1Ass: SSHR, SRSHR (Q=0) // 1Ass: USHR, URSHR (Q=0) // 1Ass: SHL, SQSHL, SQSHLU (Q=0) // 1Ass: UQSHL (Q=0) // 1Ass: SRI, SLI (Q=0) // 1Ass: SQRSHRN, SQSHRUN, SQRSHRUN (Q=0, Q=1) // 1Ass: UQSHRN, UQRSHRN (Q=0, Q=1) // 1AssAss: SSHR, SRSHR (Q=1) // 1AssAss: USHR, URSHR (Q=1) // 1AssAss: SHL, SQSHL, SQSHLU (Q=1) // 1AssAss: UQSHL (Q=1) // 1AssAss: SRI, SLI (Q=1) // 1AssAss: SHRN, RSHRN, SQSHRN (Q=0, Q=1) // 1AssAss: SSHLL, USHLL (Q=0, Q=1) // 1Ass1Asa: SSRA, SRSRA (Q=0) // 1Ass1Asa: USRA, URSRA (Q=0) // 1AssAss1AsaAsa: SSRA, SRSRA (Q=1) // 1AssAss1AsaAsa: USRA, URSRA (Q=1) //--- def : InstRW<[XGeneWriteVI1Ass], (instregex "SSHRv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SRSHRv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "USHRv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "URSHRv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SHLv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "UQSHLv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SQSHLUv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SRIv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "SLIv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRUNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRUNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRUNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQRSHRUNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHRNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHRNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHRNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQRSHRNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHRv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRSHRv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHRv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "URSHRv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHLv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UQSHLv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHLUv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SRIv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SLIv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHRNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SHRNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "RSHRNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "RSHRNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRNv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SQSHRNv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLLv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "SSHLLv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLLv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "USHLLv(16i8|8i16|4i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "SSRAv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "SRSRAv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "USRAv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1Ass1Asa], (instregex "URSRAv(8i8|4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "SSRAv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "SRSRAv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "USRAv(16i8|8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVI1AssAss1AsaAsa], (instregex "URSRAv(16i8|8i16|4i32|2i64)_shift$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD modified immediate // 1Asl: MOVI, MVNI, ORR, BIC, FMOV (Q=0) // 1AslAsl: MOVI, MVNI, ORR, BIC, FMOV (Q=1) //--- def : InstRW<[XGeneWriteVI1Asl], (instregex "MOVIv(4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "MVNIv(4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "ORRv(4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Asl], (instregex "BICv(4i16|2i32)$")>; // TODO: not sure about FMOVs def : InstRW<[XGeneWriteVI1Asl], (instregex "FMOVv(4f16|2f32)_ns$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "MOVIv(8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "MVNIv(8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ORRv(8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "BICv(8i16|4i32)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "FMOVv(8f16|4f32)_ns$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD TBL, TBX // All ops depend on Vm. // The first TBX op depends on Vd. // The first TBL/TBX ops depend on the low half of Vn, Vn+1, ... in turn, while the last TBL/TBX ops depend on the high half of Vn, Vn+1, ... in turn. // 2Asl: TBL/TBX (single register table Q=0) // 4Asl: TBL/TBX (two register table Q=0) // 6Asl: TBL/TBX (three register table Q=0) // 8Asl: TBL/TBX (four register table Q=0) // 2AslAsl: TBL/TBX (single register table Q=1) // 4AslAsl: TBL/TBX (two register table Q=1) // 6AslAsl: TBL/TBX (three register table Q=1) // 8AslAsl: TBL/TBX (four register table Q=1) //--- def : InstRW<[XGeneWriteVI2Asl], (instregex "TB(L|X)v(8i8)One$")>; def : InstRW<[XGeneWriteVI4Asl], (instregex "TB(L|X)v(8i8)Two$")>; def : InstRW<[XGeneWriteVI6Asl], (instregex "TB(L|X)v(8i8)Three$")>; def : InstRW<[XGeneWriteVI8Asl], (instregex "TB(L|X)v(8i8)Four$")>; def : InstRW<[XGeneWriteVI2AslAsl], (instregex "TB(L|X)v(16i8)One$")>; def : InstRW<[XGeneWriteVI4AslAsl], (instregex "TB(L|X)v(16i8)Two$")>; def : InstRW<[XGeneWriteVI6AslAsl], (instregex "TB(L|X)v(16i8)Three$")>; def : InstRW<[XGeneWriteVI8AslAsl], (instregex "TB(L|X)v(16i8)Four$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD ZIP/UZP/TRN // 1Ass: ZIP1/ZIP2/UZP1/UZP2 (Q=0) // 1AslAsl: ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=11) // 1AssAss: ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=other) // 1AslAsl: TRN1/TRN2 (size=11) // 1Ass: TRN1/TRN2 (size=other Q=0) // 1AssAss: TRN1/TRN2 (size=other Q=1) //--- def : InstRW<[XGeneWriteVI1Ass], (instregex "ZIP(1|2)v(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "UZP(1|2)v(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1Ass], (instregex "TRN(1|2)v(8i8|4i16|2i32)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "ZIP(1|2)v(8i16)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "UZP(1|2)v(8i16)$")>; def : InstRW<[XGeneWriteVI1AslAsl], (instregex "TRN(1|2)v(8i16)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "ZIP(1|2)v(16i8|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "UZP(1|2)v(16i8|4i32|2i64)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "TRN(1|2)v(16i8|4i32|2i64)$")>; //--- // AdvSIMD Data Processing (Vector Integer) // AdvSIMD EXT: // 1Ass: EXT (Q=0) // 1AssAss: EXT (Q=1) //--- def : InstRW<[XGeneWriteVI1Ass], (instregex "EXTv(8i8)$")>; def : InstRW<[XGeneWriteVI1AssAss], (instregex "EXTv(16i8)$")>; //--- // AdvSIMD Data Processing (Vector FP) // AdvSIMD three same: // 1Falu: FADD, FSUB, FMULX, FMLA, FMLS, FADDP (Q=0) // 1Falu: FRECPS, FRSQRTS (Q=0) // 1Falu: FABD (Q=0) // 1Fdivd: FDIV (Q=0) // 1Fsel: FMAX, FMAXNM, FMAXP, FMAXNMP (Q=0) // 1Fsel: FMIN, FMINNM, FMINP, FMINNMP (Q=0) // 1Fsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (Q=0) // 1FaluFalu: FADD, FSUB, FMULX, FMLA, FMLS, FADDP (Q=1) // 1FaluFalu: FRECPS, FRSQRTS (Q=1) // 1FaluFalu: FABD (Q=1) // 1FdivdFdivd: FDIV (Q=1) // 1FselFsel: FMAX, FMAXNM, FMAXP, FMAXNMP (Q=1) // 1FselFsel: FMIN, FMINNM, FMINP, FMINNMP (Q=1) // 1FselFsel: FCMEQ, FCMGE, FCMGT, FACGE, FACGT (Q=1) //--- def : InstRW<[XGeneWriteVF1Falu], (instregex "FADDv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FSUBv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FMULXv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FADDPv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FML(A|S)v(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FRECPSv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FRSQRTSv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FABDv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fdivd], (instregex "FDIVv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXNMv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXPv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMAXNMPv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINNMv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINPv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FMINNMPv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(EQ|GE|GT)v(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FAC(GE|GT)v(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FADDv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FSUBv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FMULXv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FADDPv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FML(A|S)v(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FRECPSv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FRSQRTSv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "FABDv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FdivdFdivd], (instregex "FDIVv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXNMv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXPv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMAXNMPv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINNMv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINPv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FMINNMPv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(EQ|GE|GT)v(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FAC(GE|GT)v(8f16|4f32|2f64)$")>; //--- // AdvSIMD Data Processing (Vector FP) // AdvSIMD two-reg misc: // 1Fhcvt: FCVTN, FCVTL (size=0) // NOTE: FCVTL is missing from the documentation, but we will include it here // 1Falu: FCVTN, FCVTL (size=1) // 1Falu: FCVTXN (all) // 1Fcvt: FRINTN, FRINTM, FRINTA, FRINTP (Q=0) // 1Fcvt: FRINTZ, FRINTX, FRINTI (Q=0) // 1Fcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (Q=0) // 1Fcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (Q=0) // 1Fcvt: FCVTZS, FCVTZU (integer Q=0) // 1Falu: SCVTF, UCVTF (integer Q=0) // 1Fsel: FCMGT, FCMEQ, FCMLT (zero Q=0) // 1Fsel: FCMGE, FCMLE (zero Q=0) // 1Fmov: FABS, FNEG (Q=0) // 1FcvtFcvt: FRINTN, FRINTM, FRINTA, FRINTP (Q=1) // 1FcvtFcvt: FRINTZ, FRINTX, FRINTI (Q=1) // 1FcvtFcvt: FCVTNS, FCVTMS, FCVTAS, FCVTPS (Q=1) // 1FcvtFcvt: FCVTNU, FCVTMU, FCVTAU, FCVTPU (Q=1) // 1FcvtFcvt: FCVTZS, FCVTZU (integer Q=1) // 1FaluFalu: SCVTF, UCVTF (integer Q=1) // 1FselFsel: FCMGT, FCMEQ, FCMLT (zero Q=1) // 1FselFsel: FCMGE, FCMLE (zero Q=1) // 1FmovFmov: FABS, FNEG (Q=1) // 1Adre: FRECPE (all) // 1Asre: FRSQRTE (all) // 1Fsqrd: FSQRT (all) //--- def : InstRW<[XGeneWriteVF1Fhcvt], (instregex "FCVT(N|L)v(4i16|8i16)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FCVT(N|L)v(2i32|4i32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "FCVTXNv(2f32|4f32)$")>; def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FRINT(N|M|A|P)v(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FRINT(Z|X|I)v(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVT(N|M|A|P|Z)Sv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVT(N|M|A|P|Z)Uv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "SCVTFv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "UCVTFv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(GT|EQ|LT)v(4i16|2i32)rz$")>; def : InstRW<[XGeneWriteVF1Fsel], (instregex "FCM(GE|LE)v(4i16|2i32)rz$")>; def : InstRW<[XGeneWriteVF1Fmov], (instregex "FABSv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fmov], (instregex "FNEGv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FRINT(N|M|A|P)v(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FRINT(Z|X|I)v(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVT(N|M|A|P|Z)Sv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVT(N|M|A|P|Z)Uv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "SCVTFv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "UCVTFv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(GT|EQ|LT)v(8i16|4i32|2i64)rz$")>; def : InstRW<[XGeneWriteVF1FselFsel], (instregex "FCM(GE|LE)v(8i16|4i32|2i64)rz$")>; def : InstRW<[XGeneWriteVF1FmovFmov], (instregex "FABSv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1FmovFmov], (instregex "FNEGv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1Adre], (instregex "FRECPEv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Adre], (instregex "FRECPEv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1Asre], (instregex "FRSQRTEv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Asre], (instregex "FRSQRTEv(8f16|4f32|2f64)$")>; def : InstRW<[XGeneWriteVF1Fsqrd], (instregex "FSQRTv(4f16|2f32)$")>; def : InstRW<[XGeneWriteVF1Fsqrd], (instregex "FSQRTv(8f16|4f32|2f64)$")>; //--- // AdvSIMD Data Processing (Vector FP) // AdvSIMD across lanes: // 2Asa: FMAXV, FMINV, FMAXNMV, FMINNMV // AdvSIMD vector x indexed element: // 1Asm: FMUL, FMULX, FMLA, FMLS (by element Q=0) // 1AsmAsm: FMUL, FMULX, FMLA, FMLS (by element Q=1) // AdvSIMD shift by immediate: // 1Falu: SCVTF, UCVTF (fixed-point Q=0) // 1Fcvt: FCVTZS, FCVTZU (fixed-point Q=0) // 1FaluFalu: SCVTF, UCVTF (fixed-point Q=1) // 1FcvtFcvt: FCVTZS, FCVTZU (fixed-point Q=1) //--- def : InstRW<[XGeneWriteVF2Asa], (instregex "FMAXVv(4i16|8i16|4i32)v$")>; def : InstRW<[XGeneWriteVF2Asa], (instregex "FMINVv(4i16|8i16|4i32)v$")>; def : InstRW<[XGeneWriteVF2Asa], (instregex "FMAXNMVv(4i16|8i16|4i32)v$")>; def : InstRW<[XGeneWriteVF2Asa], (instregex "FMINNMVv(4i16|8i16|4i32)v$")>; def : InstRW<[XGeneWriteVF1Asm], (instregex "FMULv(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVF1Asm], (instregex "FMULXv(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVF1Asm], (instregex "FML(A|S)v(4i16|2i32)_indexed$")>; def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FMULv(8i16|4i32|2i64)_indexed$")>; def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FMULXv(8i16|4i32|2i64)_indexed$")>; def : InstRW<[XGeneWriteVF1AsmAsm], (instregex "FML(A|S)v(8i16|4i32|2i64)_indexed$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "SCVTFv(4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVF1Falu], (instregex "UCVTFv(4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVF1Fcvt], (instregex "FCVTZ(S|U)v(4i16|2i32)_shift$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "SCVTFv(8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVF1FaluFalu], (instregex "UCVTFv(8i16|4i32|2i64)_shift$")>; def : InstRW<[XGeneWriteVF1FcvtFcvt], (instregex "FCVTZ(S|U)v(8i16|4i32|2i64)_shift$")>; }