From 7d7873efde34ad5dd826db0250710e77f9a2a047 Mon Sep 17 00:00:00 2001
From: Geoff Berry <gberry@codeaurora.org>
Date: Fri, 14 Jul 2017 21:44:12 +0000
Subject: [AArch64][Falkor] Avoid HW prefetcher tag collisions (step 1)

Summary:
This patch is the first step in reducing HW prefetcher instruction tag
collisions in inner loops for Falkor.  It adds a pass that annotates IR
loads with metadata to indicate that they are known to be strided loads,
and adds a target lowering hook that translates this metadata to a
target-specific MachineMemOperand flag.

A follow on change will use this MachineMemOperand flag to re-write
instructions to reduce tag collisions.

Reviewers: mcrosier, t.p.northover

Subscribers: aemerson, rengolin, mgorny, javed.absar, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D34963

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308059 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/AArch64/falkor-hwpf.ll             | 106 ++++++++++++++++++++++++
 test/CodeGen/MIR/AArch64/target-memoperands.mir |   4 +
 2 files changed, 110 insertions(+)
 create mode 100644 test/CodeGen/AArch64/falkor-hwpf.ll

(limited to 'test/CodeGen')

diff --git a/test/CodeGen/AArch64/falkor-hwpf.ll b/test/CodeGen/AArch64/falkor-hwpf.ll
new file mode 100644
index 00000000000..bbe7febe397
--- /dev/null
+++ b/test/CodeGen/AArch64/falkor-hwpf.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -S -falkor-hwpf-fix -mtriple aarch64 -mcpu=falkor | FileCheck %s
+; RUN: opt < %s -S -falkor-hwpf-fix -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s --check-prefix=NOHWPF
+
+; Check that strided access metadata is added to loads in inner loops when compiling for Falkor.
+
+; CHECK-LABEL: @hwpf1(
+; CHECK: load i32, i32* %gep, !falkor.strided.access !0
+; CHECK: load i32, i32* %gep2, !falkor.strided.access !0
+
+; NOHWPF-LABEL: @hwpf1(
+; NOHWPF: load i32, i32* %gep{{$}}
+; NOHWPF: load i32, i32* %gep2{{$}}
+define void @hwpf1(i32* %p, i32* %p2) {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+
+  %gep = getelementptr inbounds i32, i32* %p, i32 %iv
+  %load = load i32, i32* %gep
+
+  %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv
+  %load2 = load i32, i32* %gep2
+
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Check that outer loop strided load isn't marked.
+; CHECK-LABEL: @hwpf2(
+; CHECK: load i32, i32* %gep, !falkor.strided.access !0
+; CHECK: load i32, i32* %gep2{{$}}
+
+; NOHWPF-LABEL: @hwpf2(
+; NOHWPF: load i32, i32* %gep{{$}}
+; NOHWPF: load i32, i32* %gep2{{$}}
+define void @hwpf2(i32* %p) {
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
+  %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ]
+  br label %loop2.header
+
+loop2.header:
+  br label %loop2
+
+loop2:
+  %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
+  %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ]
+  %gep = getelementptr inbounds i32, i32* %p, i32 %iv2
+  %load = load i32, i32* %gep
+  %sum.inc = add i32 %sum, %load
+  %inc2 = add i32 %iv2, 1
+  %exitcnd2 = icmp uge i32 %inc2, 1024
+  br i1 %exitcnd2, label %exit2, label %loop2
+
+exit2:
+  %gep2 = getelementptr inbounds i32, i32* %p, i32 %iv1
+  %load2 = load i32, i32* %gep2
+  br label %loop1.latch
+
+loop1.latch:
+  %inc1 = add i32 %iv1, 1
+  %exitcnd1 = icmp uge i32 %inc1, 1024
+  br i1 %exitcnd2, label %exit, label %loop1
+
+exit:
+  ret void
+}
+
+
+; Check that non-strided load isn't marked.
+; CHECK-LABEL: @hwpf3(
+; CHECK: load i32, i32* %gep, !falkor.strided.access !0
+; CHECK: load i32, i32* %gep2{{$}}
+
+; NOHWPF-LABEL: @hwpf3(
+; NOHWPF: load i32, i32* %gep{{$}}
+; NOHWPF: load i32, i32* %gep2{{$}}
+define void @hwpf3(i32* %p, i32* %p2) {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+
+  %gep = getelementptr inbounds i32, i32* %p, i32 %iv
+  %load = load i32, i32* %gep
+
+  %gep2 = getelementptr inbounds i32, i32* %p2, i32 %load
+  %load2 = load i32, i32* %gep2
+
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/test/CodeGen/MIR/AArch64/target-memoperands.mir b/test/CodeGen/MIR/AArch64/target-memoperands.mir
index f853b551e09..c71302d97e2 100644
--- a/test/CodeGen/MIR/AArch64/target-memoperands.mir
+++ b/test/CodeGen/MIR/AArch64/target-memoperands.mir
@@ -10,13 +10,17 @@
 ---
 # CHECK-LABEL: name: target_memoperands
 # CHECK: %1(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8)
+# CHECK: %2(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4)
 # CHECK: G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8)
+# CHECK: G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4)
 name:            target_memoperands
 body: |
   bb.0:
 
     %0:_(p0) = COPY %x0
     %1:_(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8)
+    %2:_(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4)
     G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8)
+    G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4)
     RET_ReallyLR
 ...
-- 
cgit v1.2.3