diff options
author | Hal Finkel <hfinkel@anl.gov> | 2015-02-25 21:36:59 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2015-02-25 21:36:59 +0000 |
commit | 7840990de8f102bf83ee297e74f225b109228512 (patch) | |
tree | ee115c8e4342e2b29c0348e033acbfe8564a77b3 /test | |
parent | e9e16aa4a5fe2da7fb9621080cc9ef13a3604549 (diff) |
[PowerPC] Make LDtocL and friends invariant loads
LDtocL, and other loads that roughly correspond to the TOC_ENTRY SDAG node,
represent loads from the TOC, which is invariant. As a result, these loads can
be hoisted out of loops, etc. In order to do this, we need to generate
GOT-style MMOs for TOC_ENTRY, which requires treating it as a legitimate memory
intrinsic node type. Once this is done, the MMO transfer is automatically
handled for TableGen-driven instruction selection, and for nodes generated
directly in PPCISelDAGToDAG, we need to transfer the MMOs manually.
Also, we were not transferring MMOs associated with pre-increment loads, so do
that too.
Lastly, this fixes an exposed bug where R30 was not added as a defined operand of
UpdateGBR.
This problem was highlighted by an example (used to generate the test case)
posted to llvmdev by Francois Pichet.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230553 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/PowerPC/ldtoc-inv.ll | 39 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/ppc64le-aggregates.ll | 40 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/tls-store2.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vec-abi-align.ll | 32 |
4 files changed, 79 insertions, 37 deletions
diff --git a/test/CodeGen/PowerPC/ldtoc-inv.ll b/test/CodeGen/PowerPC/ldtoc-inv.ll new file mode 100644 index 00000000000..550747c4695 --- /dev/null +++ b/test/CodeGen/PowerPC/ldtoc-inv.ll @@ -0,0 +1,39 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@phasor = external constant [4096 x i32] + +; Function Attrs: nounwind +define void @test(i32* nocapture %out, i32 zeroext %step_size) #0 { +entry: + %shl = shl i32 %step_size, 2 + %idxprom = zext i32 %shl to i64 + br label %for.body + +; Make sure that the TOC load has been hoisted out of the loop. +; CHECK-LABEL: @test +; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc@l +; CHECK: %for.body +; CHECK: blr + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = trunc i64 %indvars.iv to i32 + %shl1 = shl i32 %0, %step_size + %idxprom2 = sext i32 %shl1 to i64 + %arrayidx.sum = add nsw i64 %idxprom2, %idxprom + %arrayidx3 = getelementptr inbounds [4096 x i32]* @phasor, i64 0, i64 %arrayidx.sum + %1 = load i32* %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds i32* %out, i64 %indvars.iv + store i32 %1, i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4 + %cmp = icmp slt i64 %indvars.iv.next, 1020 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/test/CodeGen/PowerPC/ppc64le-aggregates.ll index 4fe6f8db33f..3fce36ec23b 100644 --- a/test/CodeGen/PowerPC/ppc64le-aggregates.ll +++ b/test/CodeGen/PowerPC/ppc64le-aggregates.ll @@ -264,26 +264,26 @@ entry: ret void } ; CHECK-LABEL: @caller2 -; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK-DAG: lfs 1, 0([[REG]]) -; CHECK-DAG: lfs 2, 4([[REG]]) -; CHECK-DAG: lfs 3, 8([[REG]]) -; CHECK-DAG: lfs 4, 12([[REG]]) -; CHECK-DAG: lfs 5, 16([[REG]]) -; CHECK-DAG: lfs 6, 20([[REG]]) -; CHECK-DAG: lfs 7, 24([[REG]]) -; CHECK-DAG: lfs 8, 28([[REG]]) -; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK-DAG: lfs 9, 0([[REG]]) -; CHECK-DAG: lfs 10, 4([[REG]]) -; CHECK-DAG: lfs 11, 8([[REG]]) -; CHECK-DAG: lfs 12, 12([[REG]]) -; CHECK-DAG: lfs 13, 16([[REG]]) -; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK-DAG: lwz [[REG0:[0-9]+]], 0([[REG]]) -; CHECK-DAG: lwz [[REG1:[0-9]+]], 4([[REG]]) -; CHECK-DAG: sldi [[REG1]], [[REG1]], 32 -; CHECK-DAG: or 10, [[REG0]], [[REG1]] +; CHECK: ld {{[0-9]+}}, .LC +; CHECK-DAG: lfs 1, 0({{[0-9]+}}) +; CHECK-DAG: lfs 2, 4({{[0-9]+}}) +; CHECK-DAG: lfs 3, 8({{[0-9]+}}) +; CHECK-DAG: lfs 4, 12({{[0-9]+}}) +; CHECK-DAG: lfs 5, 16({{[0-9]+}}) +; CHECK-DAG: lfs 6, 20({{[0-9]+}}) +; CHECK-DAG: lfs 7, 24({{[0-9]+}}) +; CHECK-DAG: lfs 8, 28({{[0-9]+}}) + +; CHECK-DAG: lfs 9, 0({{[0-9]+}}) +; CHECK-DAG: lfs 10, 4({{[0-9]+}}) +; CHECK-DAG: lfs 11, 8({{[0-9]+}}) +; CHECK-DAG: lfs 12, 12({{[0-9]+}}) +; CHECK-DAG: lfs 13, 16({{[0-9]+}}) + +; CHECK-DAG: lwz [[REG0:[0-9]+]], 0({{[0-9]+}}) +; CHECK-DAG: lwz [[REG1:[0-9]+]], 4({{[0-9]+}}) +; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK-DAG: or 10, [[REG0]], [[REG2]] ; CHECK: bl test2 declare void @test2([8 x float], [5 x float], [2 x float]) diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll index a9c97b5e23e..e9aa17e8c0f 100644 --- a/test/CodeGen/PowerPC/tls-store2.ll +++ b/test/CodeGen/PowerPC/tls-store2.ll @@ -22,7 +22,10 @@ entry: ; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_callable@tlsgd) ; CHECK-NEXT: nop -; CHECK: std {{[0-9]+}}, 0(3) +; FIXME: We could check here for 'std {{[0-9]+}}, 0(3)', but that no longer +; works because, with new scheduling freedom, we create a copy of R3 based on the +; initial scheduling, but don't coalesce it again after we move the instructions +; so that the copy is no longer necessary. ; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_call@tlsgd) ; CHECK-NEXT: nop diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll index 5075ff2b8c0..2ec57af3513 100644 --- a/test/CodeGen/PowerPC/vec-abi-align.ll +++ b/test/CodeGen/PowerPC/vec-abi-align.ll @@ -35,17 +35,17 @@ entry: ret void ; CHECK-LABEL: @test2 -; CHECK: ld {{[0-9]+}}, 112(1) -; CHECK: li [[REG16:[0-9]+]], 16 -; CHECK: addi [[REGB:[0-9]+]], 1, 112 -; CHECK: lvx 2, [[REGB]], [[REG16]] +; CHECK-DAG: ld {{[0-9]+}}, 112(1) +; CHECK-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 112 +; CHECK-DAG: lvx 2, [[REGB]], [[REG16]] ; CHECK: blr ; CHECK-VSX-LABEL: @test2 -; CHECK-VSX: ld {{[0-9]+}}, 112(1) -; CHECK-VSX: li [[REG16:[0-9]+]], 16 -; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112 -; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] +; CHECK-VSX-DAG: ld {{[0-9]+}}, 112(1) +; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 112 +; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] ; CHECK-VSX: blr } @@ -61,17 +61,17 @@ entry: ret void ; CHECK-LABEL: @test3 -; CHECK: ld {{[0-9]+}}, 128(1) -; CHECK: li [[REG16:[0-9]+]], 16 -; CHECK: addi [[REGB:[0-9]+]], 1, 128 -; CHECK: lvx 2, [[REGB]], [[REG16]] +; CHECK-DAG: ld {{[0-9]+}}, 128(1) +; CHECK-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 128 +; CHECK-DAG: lvx 2, [[REGB]], [[REG16]] ; CHECK: blr ; CHECK-VSX-LABEL: @test3 -; CHECK-VSX: ld {{[0-9]+}}, 128(1) -; CHECK-VSX: li [[REG16:[0-9]+]], 16 -; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128 -; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] +; CHECK-VSX-DAG: ld {{[0-9]+}}, 128(1) +; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 128 +; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] ; CHECK-VSX: blr } |