diff options
author | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2017-10-11 15:51:44 +0000 |
---|---|---|
committer | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2017-10-11 15:51:44 +0000 |
commit | 9f806a04386a6ec08cc976286b9bf998438fc803 (patch) | |
tree | 1d244908c7d1e4440f6a5d2d0a86a97b3ecf3131 /test/CodeGen/Hexagon | |
parent | 143ffebf675fe0d90b55d2226c6f690fc4b7ed4b (diff) |
[Pipeliner] Improve serialization order for post-increments
The pipeliner is generating a serial sequence that causes poor
register allocation when a post-increment instruction appears
prior to the use of the post-increment register. This occurs when
there is a circular set of dependences involved with a sequence
of instructions in the same cycle. In this case, there is no
serialization of the parallel semantics that will not cause an
additional register to be allocated.
This patch fixes the problem by changing the instructions so that
the post-increment instruction is used by the subsequent
instruction, which enables the register allocator to make a
better decision and not require another register.
Patch by Brendon Cahoon.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315466 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/Hexagon')
-rw-r--r-- | test/CodeGen/Hexagon/swp-order-copies.ll | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/test/CodeGen/Hexagon/swp-order-copies.ll b/test/CodeGen/Hexagon/swp-order-copies.ll new file mode 100644 index 00000000000..5de0717654f --- /dev/null +++ b/test/CodeGen/Hexagon/swp-order-copies.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Test that the instruction ordering code in the pipeliner fixes up dependences +; between post-increment register definitions and uses so that the register +; allocator does not allocate an additional register. The following test case +; should generate a single packet. + +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: { +; CHECK-NOT: { +; CHECK: :endloop0 + +define void @test(i64* nocapture %v1, i64 %v2, i32 %len) local_unnamed_addr #0 { +entry: + %cmp7 = icmp sgt i32 %len, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: + %arrayidx.phi = phi i64* [ %arrayidx.inc, %for.body ], [ %v1, %entry ] + %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %0 = load i64, i64* %arrayidx.phi, align 8 + %1 = tail call i64 @llvm.hexagon.M2.mmpyul.rs1(i64 %0, i64 %v2) + store i64 %1, i64* %arrayidx.phi, align 8 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, %len + %arrayidx.inc = getelementptr i64, i64* %arrayidx.phi, i32 1 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare i64 @llvm.hexagon.M2.mmpyul.rs1(i64, i64) #1 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" } +attributes #1 = { nounwind readnone } |