[Pipeliner] Improve serialization order for post-increments

The pipeliner is generating a serial sequence that causes poor register allocation when a post-increment instruction appears prior to the use of the post-increment register. This occurs when there is a circular set of dependences involved with a sequence of instructions in the same cycle. In this case, there is no serialization of the parallel semantics that will not cause an additional register to be allocated. This patch fixes the problem by changing the instructions so that the post-increment instruction is used by the subsequent instruction, which enables the register allocator to make a better decision and not require another register. Patch by Brendon Cahoon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315466 91177308-0d34-0410-b5e6-96231b3b80d8
author: Krzysztof Parzyszek <kparzysz@codeaurora.org> 2017-10-11 15:51:44 +0000
committer: Krzysztof Parzyszek <kparzysz@codeaurora.org> 2017-10-11 15:51:44 +0000
commit: 9f806a04386a6ec08cc976286b9bf998438fc803 (patch)
tree: 1d244908c7d1e4440f6a5d2d0a86a97b3ecf3131 /test/CodeGen/Hexagon
parent: 143ffebf675fe0d90b55d2226c6f690fc4b7ed4b (diff)
1 files changed, 37 insertions, 0 deletions
diff --git a/test/CodeGen/Hexagon/swp-order-copies.ll b/test/CodeGen/Hexagon/swp-order-copies.ll
new file mode 100644
index 00000000000..5de0717654f
--- /dev/null
+++ b/test/CodeGen/Hexagon/swp-order-copies.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that the instruction ordering code in the pipeliner fixes up dependences
+; between post-increment register definitions and uses so that the register
+; allocator does not allocate an additional register. The following test case
+; should generate a single packet.
+
+; CHECK: loop0(.LBB0_[[LOOP:.]],
+; CHECK: .LBB0_[[LOOP]]:
+; CHECK: {
+; CHECK-NOT: {
+; CHECK: :endloop0
+
+define void @test(i64* nocapture %v1, i64 %v2, i32 %len) local_unnamed_addr #0 {
+entry:
+  %cmp7 = icmp sgt i32 %len, 0
+  br i1 %cmp7, label %for.body, label %for.end
+
+for.body:
+  %arrayidx.phi = phi i64* [ %arrayidx.inc, %for.body ], [ %v1, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %0 = load i64, i64* %arrayidx.phi, align 8
+  %1 = tail call i64 @llvm.hexagon.M2.mmpyul.rs1(i64 %0, i64 %v2)
+  store i64 %1, i64* %arrayidx.phi, align 8
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %len
+  %arrayidx.inc = getelementptr i64, i64* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+declare i64 @llvm.hexagon.M2.mmpyul.rs1(i64, i64) #1
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" }
+attributes #1 = { nounwind readnone }
author	Krzysztof Parzyszek <kparzysz@codeaurora.org>	2017-10-11 15:51:44 +0000
committer	Krzysztof Parzyszek <kparzysz@codeaurora.org>	2017-10-11 15:51:44 +0000
commit	9f806a04386a6ec08cc976286b9bf998438fc803 (patch)
tree	1d244908c7d1e4440f6a5d2d0a86a97b3ecf3131 /test/CodeGen/Hexagon
parent	143ffebf675fe0d90b55d2226c6f690fc4b7ed4b (diff)