[ARM][AArch64][DAG] Reenable post-legalize store merge

Reenable post-legalize stores with constant merging computation and corresponding test case. * Properly truncate store merge constants * Disable merging of truncated stores floating points * Ensure merges of constant stores into a single vector are constructed from legal elements. Reviewers: eastig, efriedma Reviewed By: eastig Subscribers: spatel, rengolin, aemerson, javed.absar, kristof.beyls, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D40701 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319899 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nirav Dave <niravd@google.com> 2017-12-06 15:30:13 +0000
committer: Nirav Dave <niravd@google.com> 2017-12-06 15:30:13 +0000
commit: 256a26609b9e6977a0a24d5b8cc0cdfc386eb677 (patch)
tree: 80acffc678f5915ea4e00bafd1e8c0ba518dcd3c /test/CodeGen/AArch64
parent: e780c515773ad78af5d0a3f30a56440347911941 (diff)
6 files changed, 69 insertions, 26 deletions
diff --git a/test/CodeGen/AArch64/arm64-complex-ret.ll b/test/CodeGen/AArch64/arm64-complex-ret.ll
index 250edac553c..b4a38544ca1 100644
--- a/test/CodeGen/AArch64/arm64-complex-ret.ll
+++ b/test/CodeGen/AArch64/arm64-complex-ret.ll
@@ -2,6 +2,7 @@
 
 define { i192, i192, i21, i192 } @foo(i192) {
 ; CHECK-LABEL: foo:
-; CHECK: stp xzr, xzr, [x8]
+; CHECK-DAG: str xzr, [x8, #16]
+; CHECK-DAG: str q0, [x8]
   ret { i192, i192, i21, i192 } {i192 0, i192 1, i21 2, i192 3}
 }
diff --git a/test/CodeGen/AArch64/arm64-narrow-st-merge.ll b/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
index ec7c227e169..b48f3b46cb4 100644
--- a/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
+++ b/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
@@ -19,7 +19,7 @@ entry:
 }
 
 ; CHECK-LABEL: Strh_zero_4
-; CHECK: stp wzr, wzr
+; CHECK: str xzr
 ; CHECK-STRICT-LABEL: Strh_zero_4
 ; CHECK-STRICT: strh wzr
 ; CHECK-STRICT: strh wzr
@@ -137,7 +137,7 @@ entry:
 }
 
 ; CHECK-LABEL: Sturh_zero_4
-; CHECK: stp wzr, wzr
+; CHECK: stur xzr
 ; CHECK-STRICT-LABEL: Sturh_zero_4
 ; CHECK-STRICT: sturh wzr
 ; CHECK-STRICT: sturh wzr
diff --git a/test/CodeGen/AArch64/arm64-storebytesmerge.ll b/test/CodeGen/AArch64/arm64-storebytesmerge.ll
new file mode 100644
index 00000000000..fb06131242d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-storebytesmerge.ll
@@ -0,0 +1,46 @@
+; RUN: llc  -mtriple=aarch64-linux-gnu -enable-misched=false < %s | FileCheck %s
+
+;target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+;target triple = "aarch64--linux-gnu"
+
+
+; CHECK-LABEL: test
+; CHECK: str     x30, [sp, #-16]!
+; CHECK: adrp    x8, q   
+; CHECK: ldr     x8, [x8, :lo12:q]
+; CHECK: stp     xzr, xzr, [x8] 
+; CHECK: bl f
+
+@q = external unnamed_addr global i16*, align 8
+
+; Function Attrs: nounwind
+define void @test() local_unnamed_addr #0 {
+entry:
+  br label %for.body453.i
+
+for.body453.i:                                    ; preds = %for.body453.i, %entry
+  br i1 undef, label %for.body453.i, label %for.end705.i
+
+for.end705.i:                                     ; preds = %for.body453.i
+  %0 = load i16*, i16** @q, align 8
+  %1 = getelementptr inbounds i16, i16* %0, i64 0
+  %2 = bitcast i16* %1 to <2 x i16>*
+  store <2 x i16> zeroinitializer, <2 x i16>* %2, align 2
+  %3 = getelementptr i16, i16* %1, i64 2
+  %4 = bitcast i16* %3 to <2 x i16>*
+  store <2 x i16> zeroinitializer, <2 x i16>* %4, align 2
+  %5 = getelementptr i16, i16* %1, i64 4
+  %6 = bitcast i16* %5 to <2 x i16>*
+  store <2 x i16> zeroinitializer, <2 x i16>* %6, align 2
+  %7 = getelementptr i16, i16* %1, i64 6
+  %8 = bitcast i16* %7 to <2 x i16>*
+  store <2 x i16> zeroinitializer, <2 x i16>* %8, align 2
+  call void @f() #2
+  unreachable
+}
+
+declare void @f() local_unnamed_addr #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index c6c7a65e2c1..0f8f4c5d4a4 100644
--- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -32,11 +32,9 @@ define void @test_simple(i32 %n, ...) {
 ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128
 ; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
 
-; CHECK: mov [[GR_OFFS:w[0-9]+]], #-56
-; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-
-; CHECK: orr [[VR_OFFS:w[0-9]+]], wzr, #0xffffff80
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: mov     [[GRVR:x[0-9]+]], #-545460846720
+; CHECK: movk    [[GRVR]], #65480
+; CHECK: str     [[GRVR]], [x[[VA_LIST]], #24]
 
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
@@ -70,11 +68,9 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112
 ; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16]
 
-; CHECK: mov [[GR_OFFS:w[0-9]+]], #-40
-; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24]
-
-; CHECK: mov [[VR_OFFS:w[0-9]+]], #-11
-; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28]
+; CHECK: mov  [[GRVR_OFFS:x[0-9]+]], #-40
+; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32
+; CHECK: str  [[GRVR_OFFS]], [x[[VA_LIST]], #24]
 
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
index c1579336189..b60958b5a25 100644
--- a/test/CodeGen/AArch64/tailcall-explicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -35,7 +35,7 @@ define void @test_tailcall_explicit_sret_alloca_unused() #0 {
 }
 
 ; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers:
-; CHECK: ldr [[PTRLOAD1:x[0-9]+]], [x0]
+; CHECK: ldr [[PTRLOAD1:q[0-9]+]], [x0]
 ; CHECK: str [[PTRLOAD1]], [sp]
 ; CHECK: mov  x8, sp
 ; CHECK-NEXT: bl _test_explicit_sret
@@ -64,8 +64,8 @@ define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
 ; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
 ; CHECK: mov  x8, sp
 ; CHECK-NEXT: bl _test_explicit_sret
-; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
-; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK-NEXT: ldr [[CALLERSRET1:q[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
 ; CHECK: ret
 define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
   %l = alloca i1024, align 8
@@ -79,8 +79,8 @@ define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
 ; CHECK-DAG: mov  [[FPTR:x[0-9]+]], x0
 ; CHECK: mov  x0, sp
 ; CHECK-NEXT: blr [[FPTR]]
-; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
-; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
 ; CHECK: ret
 define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 {
   %l = alloca i1024, align 8
@@ -94,8 +94,8 @@ define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, v
 ; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
 ; CHECK: mov  x8, sp
 ; CHECK-NEXT: blr x0
-; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
-; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
 ; CHECK: ret
 define void @test_indirect_tailcall_explicit_sret_(i1024* sret %arg, i1024 ()* %f) #0 {
   %ret = tail call i1024 %f()
diff --git a/test/CodeGen/AArch64/tailcall-implicit-sret.ll b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
index 10c4ba4c31d..f449a7e0658 100644
--- a/test/CodeGen/AArch64/tailcall-implicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-implicit-sret.ll
@@ -11,8 +11,8 @@ declare i1024 @test_sret() #0
 ; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
 ; CHECK: mov  x8, sp
 ; CHECK-NEXT: bl _test_sret
-; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
-; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
 ; CHECK: ret
 define i1024 @test_call_sret() #0 {
   %a = call i1024 @test_sret()
@@ -23,8 +23,8 @@ define i1024 @test_call_sret() #0 {
 ; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
 ; CHECK: mov  x8, sp
 ; CHECK-NEXT: bl _test_sret
-; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
-; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
 ; CHECK: ret
 define i1024 @test_tailcall_sret() #0 {
   %a = tail call i1024 @test_sret()
@@ -35,8 +35,8 @@ define i1024 @test_tailcall_sret() #0 {
 ; CHECK: mov  x[[CALLERX8NUM:[0-9]+]], x8
 ; CHECK: mov  x8, sp
 ; CHECK-NEXT: blr x0
-; CHECK-NEXT: ldr [[CALLERSRET1:x[0-9]+]], [sp]
-; CHECK: str [[CALLERSRET1:x[0-9]+]], [x[[CALLERX8NUM]]]
+; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp]
+; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]]
 ; CHECK: ret
 define i1024 @test_indirect_tailcall_sret(i1024 ()* %f) #0 {
   %a = tail call i1024 %f()
author	Nirav Dave <niravd@google.com>	2017-12-06 15:30:13 +0000
committer	Nirav Dave <niravd@google.com>	2017-12-06 15:30:13 +0000
commit	256a26609b9e6977a0a24d5b8cc0cdfc386eb677 (patch)
tree	80acffc678f5915ea4e00bafd1e8c0ba518dcd3c /test/CodeGen/AArch64
parent	e780c515773ad78af5d0a3f30a56440347911941 (diff)