From 7d7873efde34ad5dd826db0250710e77f9a2a047 Mon Sep 17 00:00:00 2001 From: Geoff Berry Date: Fri, 14 Jul 2017 21:44:12 +0000 Subject: [AArch64][Falkor] Avoid HW prefetcher tag collisions (step 1) Summary: This patch is the first step in reducing HW prefetcher instruction tag collisions in inner loops for Falkor. It adds a pass that annotates IR loads with metadata to indicate that they are known to be strided loads, and adds a target lowering hook that translates this metadata to a target-specific MachineMemOperand flag. A follow on change will use this MachineMemOperand flag to re-write instructions to reduce tag collisions. Reviewers: mcrosier, t.p.northover Subscribers: aemerson, rengolin, mgorny, javed.absar, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D34963 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308059 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/falkor-hwpf.ll | 106 ++++++++++++++++++++++++ test/CodeGen/MIR/AArch64/target-memoperands.mir | 4 + 2 files changed, 110 insertions(+) create mode 100644 test/CodeGen/AArch64/falkor-hwpf.ll (limited to 'test/CodeGen') diff --git a/test/CodeGen/AArch64/falkor-hwpf.ll b/test/CodeGen/AArch64/falkor-hwpf.ll new file mode 100644 index 00000000000..bbe7febe397 --- /dev/null +++ b/test/CodeGen/AArch64/falkor-hwpf.ll @@ -0,0 +1,106 @@ +; RUN: opt < %s -S -falkor-hwpf-fix -mtriple aarch64 -mcpu=falkor | FileCheck %s +; RUN: opt < %s -S -falkor-hwpf-fix -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s --check-prefix=NOHWPF + +; Check that strided access metadata is added to loads in inner loops when compiling for Falkor. + +; CHECK-LABEL: @hwpf1( +; CHECK: load i32, i32* %gep, !falkor.strided.access !0 +; CHECK: load i32, i32* %gep2, !falkor.strided.access !0 + +; NOHWPF-LABEL: @hwpf1( +; NOHWPF: load i32, i32* %gep{{$}} +; NOHWPF: load i32, i32* %gep2{{$}} +define void @hwpf1(i32* %p, i32* %p2) { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + + %gep = getelementptr inbounds i32, i32* %p, i32 %iv + %load = load i32, i32* %gep + + %gep2 = getelementptr inbounds i32, i32* %p2, i32 %iv + %load2 = load i32, i32* %gep2 + + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; Check that outer loop strided load isn't marked. +; CHECK-LABEL: @hwpf2( +; CHECK: load i32, i32* %gep, !falkor.strided.access !0 +; CHECK: load i32, i32* %gep2{{$}} + +; NOHWPF-LABEL: @hwpf2( +; NOHWPF: load i32, i32* %gep{{$}} +; NOHWPF: load i32, i32* %gep2{{$}} +define void @hwpf2(i32* %p) { +entry: + br label %loop1 + +loop1: + %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ] + %outer.sum = phi i32 [ 0, %entry ], [ %sum, %loop1.latch ] + br label %loop2.header + +loop2.header: + br label %loop2 + +loop2: + %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ] + %sum = phi i32 [ %outer.sum, %loop2.header ], [ %sum.inc, %loop2 ] + %gep = getelementptr inbounds i32, i32* %p, i32 %iv2 + %load = load i32, i32* %gep + %sum.inc = add i32 %sum, %load + %inc2 = add i32 %iv2, 1 + %exitcnd2 = icmp uge i32 %inc2, 1024 + br i1 %exitcnd2, label %exit2, label %loop2 + +exit2: + %gep2 = getelementptr inbounds i32, i32* %p, i32 %iv1 + %load2 = load i32, i32* %gep2 + br label %loop1.latch + +loop1.latch: + %inc1 = add i32 %iv1, 1 + %exitcnd1 = icmp uge i32 %inc1, 1024 + br i1 %exitcnd2, label %exit, label %loop1 + +exit: + ret void +} + + +; Check that non-strided load isn't marked. +; CHECK-LABEL: @hwpf3( +; CHECK: load i32, i32* %gep, !falkor.strided.access !0 +; CHECK: load i32, i32* %gep2{{$}} + +; NOHWPF-LABEL: @hwpf3( +; NOHWPF: load i32, i32* %gep{{$}} +; NOHWPF: load i32, i32* %gep2{{$}} +define void @hwpf3(i32* %p, i32* %p2) { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + + %gep = getelementptr inbounds i32, i32* %p, i32 %iv + %load = load i32, i32* %gep + + %gep2 = getelementptr inbounds i32, i32* %p2, i32 %load + %load2 = load i32, i32* %gep2 + + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} diff --git a/test/CodeGen/MIR/AArch64/target-memoperands.mir b/test/CodeGen/MIR/AArch64/target-memoperands.mir index f853b551e09..c71302d97e2 100644 --- a/test/CodeGen/MIR/AArch64/target-memoperands.mir +++ b/test/CodeGen/MIR/AArch64/target-memoperands.mir @@ -10,13 +10,17 @@ --- # CHECK-LABEL: name: target_memoperands # CHECK: %1(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8) +# CHECK: %2(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4) # CHECK: G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8) +# CHECK: G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4) name: target_memoperands body: | bb.0: %0:_(p0) = COPY %x0 %1:_(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8) + %2:_(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4) G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8) + G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4) RET_ReallyLR ... -- cgit v1.2.3