summaryrefslogtreecommitdiff
path: root/gcc/config/aarch64/falkor-tag-collision-avoidance.c
blob: 779dee81f7f4ed3d3ae1fc11f232fc558027b9fe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
/* Tag Collision Avoidance pass for Falkor.
   Copyright (C) 2018-2019 Free Software Foundation, Inc.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   GCC is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GCC; see the file COPYING3.  If not see
   <http://www.gnu.org/licenses/>.  */

#define IN_TARGET_CODE 1

#include "config.h"
#define INCLUDE_LIST
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "tree-pass.h"
#include "aarch64-protos.h"
#include "hash-map.h"
#include "cfgloop.h"
#include "cfgrtl.h"
#include "rtl-iter.h"
#include "df.h"
#include "memmodel.h"
#include "optabs.h"
#include "regs.h"
#include "recog.h"
#include "regrename.h"
#include "print-rtl.h"

/* The Falkor hardware prefetching system uses the encoding of the registers
   and offsets of loads to decide which of the multiple hardware prefetchers to
   assign the load to.  This has the positive effect of accelerating prefetches
   when all related loads with uniform strides are assigned to the same
   prefetcher unit.  The down side is that because of the way the assignment
   works, multiple unrelated loads may end up on the same prefetch unit, thus
   causing the unit to bounce between different sets of addresses and never
   train correctly.  The point of this pass is to avoid such collisions so that
   unrelated loads are spread out to different prefetchers.  It also makes a
   rudimentary attempt to ensure that related loads with the same tags don't
   get moved out unnecessarily.

   Perhaps a future enhancement would be to make a more concerted attempt to
   get related loads under the same tag.  See the memcpy/memset implementation
   for falkor in glibc to understand the kind of impact this can have on
   falkor.

   The assignment of loads is based on a tag that is computed from the encoding
   of the first destination register (the only destination in case of LDR), the
   base register and the offset (either the register or the immediate value, as
   encoded in the instruction).  This is what the 14 bit tag looks like:

   |<- 6 bits ->|<- 4b ->|<- 4b ->|
   --------------------------------
   |  OFFSET    |  SRC   |  DST   |
   --------------------------------

   For all cases, the SRC and DST are the 4 LSB of the encoding of the register
   in the instruction.  Offset computation is more involved and is as follows:

   - For register offset addressing: 4 LSB of the offset register with the MSB
     of the 6 bits set to 1.

   - For immediate offset: 4 LSB of the encoded immediate offset.  The encoding
     depends on the width of the load and is expressed as multiples of the
     width.

   - For loads with update: 4 LSB of the offset.  The encoding here is the
     exact number by which the base is offset and incremented.

   Based on the above it is clear that registers 0 and 16 will result in
   collisions, 1 and 17 and so on.  This pass detects such collisions within a
   def/use chain of the source register in a loop and tries to resolve the
   collision by renaming one of the destination registers.  */

/* Get the destination part of the tag.  */
#define TAG_GET_DEST(__tag) ((__tag) & 0xf)

/* Get the tag with the destination part updated.  */
#define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))

#define MAX_PREFETCH_STRIDE 2048

/* The instruction information structure.  This is used to cache information
   about the INSN that we derive when traversing through all of the insns in
   loops.  */
class tag_insn_info
{
public:
  rtx_insn *insn;
  rtx dest;
  rtx base;
  rtx offset;
  bool writeback;
  bool ldp;

  tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
    : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
  {}

  /* Compute the tag based on BASE, DEST and OFFSET of the load.  */
  unsigned tag ()
    {
      unsigned int_offset = 0;
      rtx offset = this->offset;
      unsigned dest = REGNO (this->dest);
      unsigned base = REGNO (this->base);
      machine_mode dest_mode = GET_MODE (this->dest);

      /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
	 destination mode is constant here.  */
      unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();

      /* For loads of larger than 16 bytes, the DEST part of the tag is 0.  */
      if ((dest_mode_size << this->ldp) > 16)
	dest = 0;

      if (offset && REG_P (offset))
	int_offset = (1 << 5) | REGNO (offset);
      else if (offset && CONST_INT_P (offset))
	{
	  int_offset = INTVAL (offset);
	  int_offset /= dest_mode_size;
	  if (!this->writeback)
	    int_offset >>= 2;
	}
      return ((dest & 0xf)
	      | ((base & 0xf) << 4)
	      | ((int_offset & 0x3f) << 8));
    }
};

/* Hash map to traverse and process instructions with colliding tags.  */
typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;

/* Vector of instructions with colliding tags.  */
typedef auto_vec <tag_insn_info *> insn_info_list_t;

/* Pair of instruction information and unavailable register set to pass to
   CHECK_COLLIDING_TAGS.  */
typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;


/* Callback to free all tag_insn_info objects.  */
bool
free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
		void *arg ATTRIBUTE_UNUSED)
{
  while (v->length () > 0)
    delete v->pop ();

  return true;
}


/* Add all aliases of the register to the unavailable register set.  REG is the
   smallest register number that can then be used to reference its aliases.
   UNAVAILABLE is the hard register set to add the ignored register numbers to
   and MODE is the mode in which the registers would have been used.  */
static void
ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
{
  add_to_hard_reg_set (unavailable, mode, reg);
  add_to_hard_reg_set (unavailable, mode, reg + 16);
  add_to_hard_reg_set (unavailable, mode, reg + 32);
  add_to_hard_reg_set (unavailable, mode, reg + 48);
}


/* Callback to check which destination registers are unavailable to us for
   renaming because of the base and offset colliding.  This is a callback that
   gets called for every name value pair (T, V) in the TAG_MAP.  The ARG is an
   std::pair of the tag_insn_info of the original insn and the hard register
   set UNAVAILABLE that is used to record hard register numbers that cannot be
   used for the renaming.  This always returns true since we want to traverse
   through the entire TAG_MAP.  */
bool
check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
{
  HARD_REG_SET *unavailable = arg->second;
  unsigned orig_tag = arg->first->tag ();
  unsigned tag = INTVAL (t);
  machine_mode mode = GET_MODE (arg->first->dest);

  /* Can't collide with emptiness.  */
  if (v.length () == 0)
    return true;

  /* Drop all aliased destination registers that result in the same
     tag.  It is not necessary to drop all of them but we do anyway
     because it is quicker than checking ranges.  */
  if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
    ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));

  return true;
}


/* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
   renaming.  INSN_INFO is the original insn, TAG_MAP is the map of the list of
   insns indexed by their tags, HEAD is the def/use chain head of the
   destination register of the original insn.  The routine returns the super
   class of register classes that may be used during the renaming.  */
static enum reg_class
init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
		  HARD_REG_SET *unavailable)
{
  unsigned dest = head->regno;
  enum reg_class super_class = NO_REGS;
  machine_mode mode = GET_MODE (insn_info->dest);

  CLEAR_HARD_REG_SET (*unavailable);

  for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
    {
      if (DEBUG_INSN_P (tmp->insn))
	continue;

      IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
      super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
    }

  for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    if (fixed_regs[i] || global_regs[i])
      add_to_hard_reg_set (unavailable, mode, i);

  arg_pair_t arg = arg_pair_t (insn_info, unavailable);

  /* Exclude all registers that would lead to collisions with other loads.  */
  tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);

  /* Finally, also ignore all aliases of the current reg.  */
  ignore_all_aliases (unavailable, mode, dest & 0xf);

  return super_class;
}


/* Find a suitable and available register and rename the chain of occurrences
   of the register  defined in the def/use chain headed by HEAD in which INSN
   exists.  CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
   unavailable due to a potential collision due to the rename.  The routine
   returns the register number in case of a successful rename or -1 to indicate
   failure.  */
static int
rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
{
  unsigned dest_regno = head->regno;

  if (head->cannot_rename || head->renamed)
    return -1;

  HARD_REG_SET unavailable;

  enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
						 &unavailable);

  unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
					dest_regno, false);

  /* Attempt to rename as long as regrename doesn't just throw the same
     register at us.  */
  if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
    {
      if (dump_file && (dump_flags & TDF_DETAILS))
	  fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
		   INSN_UID (insn_info->insn), dest_regno, new_regno);

      return new_regno;
    }

  return -1;
}


/* Return true if REGNO is not safe to rename.  */
static bool
unsafe_rename_p (unsigned regno)
{
  /* Avoid renaming registers used for argument passing and return value.  In
     future we could be a little less conservative and walk through the basic
     blocks to see if there are any call or syscall sites.  */
  if (regno <= R8_REGNUM
      || (regno >= V0_REGNUM && regno < V8_REGNUM))
    return true;

  /* Don't attempt to rename registers that may have specific meanings.  */
  switch (regno)
    {
    case LR_REGNUM:
    case HARD_FRAME_POINTER_REGNUM:
    case FRAME_POINTER_REGNUM:
    case STACK_POINTER_REGNUM:
      return true;
    }

  return false;
}


/* Go through the def/use chains for the register and find the chain for this
   insn to rename.  The function returns the hard register number in case of a
   successful rename and -1 otherwise.  */
static int
rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
{
  struct du_chain *chain = NULL;
  du_head_p head = NULL;
  int i;

  unsigned dest_regno = REGNO (insn_info->dest);

  if (unsafe_rename_p (dest_regno))
    return -1;

  /* Search the chain where this instruction is (one of) the root.  */
  rtx_insn *insn = insn_info->insn;
  operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;

  for (i = 0; i < dest_op_info->n_chains; i++)
    {
      /* The register tracked by this chain does not match the
	 destination register of insn.  */
      if (dest_op_info->heads[i]->regno != dest_regno)
	continue;

      head = dest_op_info->heads[i];
      /* The chain was merged in another, find the new head.  */
      if (!head->first)
	head = regrename_chain_from_id (head->id);

      for (chain = head->first; chain; chain = chain->next_use)
	/* Found the insn in the chain, so try renaming the register in this
	   chain.  */
	if (chain->insn == insn)
	  return rename_chain (insn_info, tag_map, head);
    }

  return -1;
}


/* Flag to track if the map has changed.  */
static bool map_changed = false;

/* The actual reallocation logic.  For each vector of collisions V, try to
   resolve the collision by attempting to rename the destination register of
   all but one of the loads.  This is a callback that is invoked for each
   name-value pair (T, V) in TAG_MAP.  The function returns true whenever it
   returns unchanged and false otherwise to halt traversal.  */
bool
avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
{
  /* We need at least two loads to cause a tag collision, return unchanged.  */
  if (v->length () < 2)
    return true;

  tag_insn_info *vec_start = v->pop ();
  tag_insn_info *insn_info = vec_start;

  /* Try to rename at least one register to reduce the collision.  If we
     iterate all the way through, we end up dropping one of the loads from the
     list.  This is fine because we want at most one element to ensure that a
     subsequent rename attempt does not end up worsening the collision.  */
  do
    {
      int new_regno;

      if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
	{
	  rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));

	  tag_map->get_or_insert (new_tag).safe_push (insn_info);
	  df_set_regs_ever_live (new_regno, true);
	  map_changed = true;
	  return false;
	}

      v->safe_insert (0, insn_info);
      insn_info = v->pop ();
    }
  while (insn_info != vec_start);

  if (dump_file)
    fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
	     INSN_UID (insn_info->insn));

  /* Drop the last element and move on to the next tag.  */
  delete insn_info;
  return true;
}


/* For each set of collisions, attempt to rename the registers or insert a move
   to avoid the collision.  We repeatedly traverse through TAG_MAP using
   AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
   full traversal results in no change in the map.  */
static void
avoid_collisions (tag_map_t &tag_map)
{
  do
    {
      map_changed = false;
      tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
    }
  while (map_changed);
}



/* Find the use def chain in which INSN exists and then see if there is a
   definition inside the loop and outside it.  We use this as a simple
   approximation to determine whether the base register is an IV.  The basic
   idea is to find INSN in the use-def chains for its base register and find
   all definitions that reach it.  Of all these definitions, there should be at
   least one definition that is a simple addition of a constant value, either
   as a binary operation or a pre or post update.

   The function returns true if the base register is estimated to be an IV.  */
static bool
iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
{
  df_ref ause;
  unsigned regno = REGNO (reg);

  /* Ignore loads from the stack.  */
  if (regno == SP_REGNUM)
    return false;

  for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
    {
      if (!DF_REF_INSN_INFO (ause)
	  || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
	continue;

      if (insn != DF_REF_INSN (ause))
	continue;

      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
      df_ref def_rec;

      FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
	{
	  rtx_insn *insn = DF_REF_INSN (def_rec);
	  basic_block bb = BLOCK_FOR_INSN (insn);

	  if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
	      && bb->loop_father == loop)
	    {
	      if (recog_memoized (insn) < 0)
		continue;

	      rtx pat = PATTERN (insn);

	      /* Prefetch or clobber; unlikely to be a constant stride.  The
		 falkor software prefetcher tuning is pretty conservative, so
		 its presence indicates that the access pattern is probably
		 strided but most likely with an unknown stride size or a
		 stride size that is quite large.  */
	      if (GET_CODE (pat) != SET)
		continue;

	      rtx x = SET_SRC (pat);
	      if (GET_CODE (x) == ZERO_EXTRACT
		  || GET_CODE (x) == ZERO_EXTEND
		  || GET_CODE (x) == SIGN_EXTEND)
		x = XEXP (x, 0);

	      /* Loading the value from memory; unlikely to be a constant
		 stride.  */
	      if (MEM_P (x))
		continue;

	      /* An increment or decrement by a constant MODE_SIZE amount or
		 the result of a binary expression is likely to be an IV.  */
	      if (GET_CODE (x) == POST_INC
		  || GET_CODE (x) == POST_DEC
		  || GET_CODE (x) == PRE_INC
		  || GET_CODE (x) == PRE_DEC)
		return true;
	      else if (BINARY_P (x)
		       && (CONST_INT_P (XEXP (x, 0))
			   || CONST_INT_P (XEXP (x, 1))))
		{
		  rtx stride = (CONST_INT_P (XEXP (x, 0))
				? XEXP (x, 0) : XEXP (x, 1));

		  /* Don't bother with very long strides because the prefetcher
		     is unable to train on them anyway.  */
		  if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
		    return true;
		}
	    }
	}
      return false;
    }
  return false;
}


/* Return true if SRC is a strided load in the LOOP, false otherwise.
   If it is a strided load, set the BASE and OFFSET.  Also, if this is
   a pre/post increment load, set PRE_POST to true.  */
static bool
valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
	     rtx *base, rtx *offset, bool load_pair)
{
  subrtx_var_iterator::array_type array;
  rtx x = NULL_RTX;

  FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
    if (MEM_P (*iter))
      {
	x = *iter;
	break;
      }

  if (!x)
    return false;

  struct aarch64_address_info addr;
  machine_mode mode = GET_MODE (x);

  if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
    return false;

  unsigned regno = REGNO (addr.base);
  if (global_regs[regno] || fixed_regs[regno])
    return false;

  if (addr.type == ADDRESS_REG_WB)
    {
      unsigned code = GET_CODE (XEXP (x, 0));

      *pre_post = true;
      *base = addr.base;

      if (code == PRE_MODIFY || code == POST_MODIFY)
	*offset = addr.offset;
      else
	{
	  /*Writeback is only supported for fixed-width modes.  */
	  unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();

	  /* For post-incremented load pairs we would increment the base twice
	     over, so make that adjustment.  */
	  if (load_pair && (code == POST_INC || code == POST_DEC))
	    int_offset *= 2;

	  *offset = GEN_INT (int_offset);
	}
      return true;
    }
  else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
    {
      /* Check if the load is strided.  */
      if (!iv_p (insn, addr.base, loop))
	return false;

      *base = addr.base;
      *offset = addr.offset;
      return true;
    }

  return false;
}


/* Return true if INSN is a strided load in LOOP.  If it is a strided load, set
   the DEST, BASE and OFFSET.  Also, if this is a pre/post increment load, set
   PRE_POST to true.

   The routine does checks on the destination of the insn and depends on
   STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET.  */
static bool
get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
	       rtx *offset, bool *pre_post, bool *ldp)
{
  if (!INSN_P (insn) || recog_memoized (insn) < 0)
    return false;

  rtx pat = PATTERN (insn);
  unsigned code = GET_CODE (pat);
  bool load_pair = (code == PARALLEL);

  /* For a load pair we need only the first base and destination
     registers.  We however need to ensure that our pre/post increment
     offset is doubled; we do that in STRIDED_LOAD_P.  */
  if (load_pair)
    {
      pat = XVECEXP (pat, 0, 0);
      code = GET_CODE (pat);
    }

  if (code != SET)
    return false;

  rtx dest_rtx = SET_DEST (pat);

  if (!REG_P (dest_rtx))
    return false;

  unsigned regno = REGNO (dest_rtx);
  machine_mode mode = GET_MODE (dest_rtx);
  machine_mode inner_mode = GET_MODE_INNER (mode);

  /* Falkor does not support SVE vectors.  */
  if (!GET_MODE_SIZE (mode).is_constant ())
    return false;

  /* Ignore vector struct or lane loads.  */
  if (GET_MODE_SIZE (mode).to_constant ()
      != GET_MODE_SIZE (inner_mode).to_constant ())
    return false;

  /* The largest width we want to bother with is a load of a pair of
     quad-words.  */
  if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
      > GET_MODE_SIZE (OImode))
    return false;

  /* Ignore loads into the stack pointer because it is unlikely to be a
     stream.  */
  if (regno == SP_REGNUM)
    return false;

  if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
		   load_pair))
    {
      *dest = dest_rtx;
      *ldp = load_pair;

      return true;
    }

  return false;
}


/* Return whether INSN and CAND are in the same def/use chain.  */
static bool
in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
{
  struct du_chain *chain = NULL;
  du_head_p head = NULL;
  int i;

  /* Search the chain where this instruction is (one of) the root.  */
  operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;

  for (i = 0; i < op_info->n_chains; i++)
    {
      /* The register tracked by this chain does not match the
	 dest register of insn.  */
      if (op_info->heads[i]->regno != regno)
	continue;

      head = op_info->heads[i];
      /* The chain was merged in another, find the new head.  */
      if (!head->first)
	head = regrename_chain_from_id (head->id);

      bool found_insn = false, found_cand = false;

      for (chain = head->first; chain; chain = chain->next_use)
	{
	  rtx *loc = &SET_DEST (PATTERN (chain->insn));

	  if (chain->loc != loc)
	    continue;

	  if (chain->insn == insn)
	    found_insn = true;

	  if (chain->insn == cand)
	    found_cand = true;

	  if (found_insn && found_cand)
	    return true;
	}
    }

  return false;
}


/* Callback function to traverse the tag map and drop loads that have the same
   destination and and in the same chain of occurrence.  Routine always returns
   true to allow traversal through all of TAG_MAP.  */
bool
single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
		       void *arg ATTRIBUTE_UNUSED)
{
  for (int i = v->length () - 1; i>= 1; i--)
    {
      tag_insn_info *insn_info = (*v)[i];

      for (int j = v->length () - 2; j >= 0; j--)
	{
	  /* Filter out destinations in the same chain.  */
	  if (in_same_chain (insn_info->insn, (*v)[j]->insn,
			     REGNO (insn_info->dest)))
	    {
	      v->ordered_remove (j);
	      i = v->length ();
	      break;
	    }
	}
    }

  return true;
}


/* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
   list INSN_INFO for tag T.  */
bool
dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
		void *unused ATTRIBUTE_UNUSED)
{
  gcc_assert (dump_file);
  fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t));

  for (unsigned i = 0; i < insn_info.length (); i++)
    dump_insn_slim (dump_file, insn_info[i]->insn);

  fprintf (dump_file, "\n");

  return true;
}


/* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
   prefetcher memory tags.  */
static void
record_loads (tag_map_t &tag_map, struct loop *loop)
{
  rtx_insn *insn;
  basic_block *body, bb;

  body = get_loop_body (loop);

  for (unsigned i = 0; i < loop->num_nodes; i++)
    {
      bb = body[i];
      FOR_BB_INSNS (bb, insn)
	{
	  rtx base = NULL_RTX;
	  rtx dest = NULL_RTX;
	  rtx offset = NULL_RTX;
	  bool writeback = false;
	  bool ldp = false;

	  if (!INSN_P (insn) || DEBUG_INSN_P (insn))
	    continue;

	  if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
			     &ldp))
	    {
	      tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
						    writeback, ldp);
	      rtx tag = GEN_INT (i->tag ());
	      tag_map.get_or_insert (tag).safe_push (i);
	    }
	}
    }

  if (dump_file)
    {
      fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
      tag_map.traverse <void *, dump_insn_list> (NULL);
    }

  /* Try to reduce the dataset before launching into the rename attempt.  Drop
     destinations in the same collision chain that appear in the same def/use
     chain, all as defs.  These chains will move together in a rename so
     there's no point in keeping both in there.  */
  tag_map.traverse <void *, single_dest_per_chain> (NULL);
}


/* Tag collision avoidance pass for Falkor.  The pass runs in two phases for
   each loop; the first phase collects all loads that we consider as
   interesting for renaming into a tag-indexed map of lists.  The second phase
   renames the destination register of the loads in an attempt to spread out
   the loads into different tags.  */
void
execute_tag_collision_avoidance ()
{
  struct loop *loop;

  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
  df_chain_add_problem (DF_UD_CHAIN);
  df_compute_regs_ever_live (true);
  df_note_add_problem ();
  df_analyze ();
  df_set_flags (DF_DEFER_INSN_RESCAN);

  regrename_init (true);
  regrename_analyze (NULL);

  compute_bb_for_insn ();
  calculate_dominance_info (CDI_DOMINATORS);
  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);

  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
    {
      tag_map_t tag_map (512);

      record_loads (tag_map, loop);
      avoid_collisions (tag_map);
      if (dump_file)
	{
	  fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
	  tag_map.traverse <void *, dump_insn_list> (NULL);
	}
      tag_map.traverse <void *, free_insn_info> (NULL);
    }

  loop_optimizer_finalize ();
  free_dominance_info (CDI_DOMINATORS);
  regrename_finish ();
}


const pass_data pass_data_tag_collision_avoidance =
{
  RTL_PASS, /* type */
  "tag_collision_avoidance", /* name */
  OPTGROUP_NONE, /* optinfo_flags */
  TV_NONE, /* tv_id */
  0, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
  TODO_df_finish, /* todo_flags_finish */
};


class pass_tag_collision_avoidance : public rtl_opt_pass
{
public:
  pass_tag_collision_avoidance (gcc::context *ctxt)
    : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
  {}

  /* opt_pass methods: */
  virtual bool gate (function *)
    {
      return ((aarch64_tune_params.extra_tuning_flags
	       & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
	      && optimize >= 2);
    }

  virtual unsigned int execute (function *)
    {
      execute_tag_collision_avoidance ();
      return 0;
    }

}; // class pass_tag_collision_avoidance


/* Create a new pass instance.  */
rtl_opt_pass *
make_pass_tag_collision_avoidance (gcc::context *ctxt)
{
  return new pass_tag_collision_avoidance (ctxt);
}