diff options
author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:42:48 +0000 |
---|---|---|
committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:42:48 +0000 |
commit | b5bf1de320a5a6676636c51e2934f10855e4c1d0 (patch) | |
tree | eca13c29a6978f58103ff6634c04ebc9974db259 /test/CodeGen/SystemZ | |
parent | 8ab242ce954d3dfec4d07bb436aee26e38d48553 (diff) |
[SystemZ] Add support for IBM z14 processor (2/3)
This adds support for the new 32-bit vector float instructions of z14.
This includes:
- Enabling the instructions for the assembler/disassembler.
- CodeGen for the instructions, including new LLVM intrinsics.
- Scheduler description support for the instructions.
- Update to the vector cost function calculations.
In general, CodeGen support for the new v4f32 instructions closely
matches support for the existing v2f64 instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308195 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/SystemZ')
27 files changed, 1523 insertions, 71 deletions
diff --git a/test/CodeGen/SystemZ/fp-abs-03.ll b/test/CodeGen/SystemZ/fp-abs-03.ll new file mode 100644 index 00000000000..ccb69642a2c --- /dev/null +++ b/test/CodeGen/SystemZ/fp-abs-03.ll @@ -0,0 +1,40 @@ +; Test floating-point absolute on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test f32. +declare float @llvm.fabs.f32(float %f) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: lpdfr %f0, %f0 +; CHECK: br %r14 + %res = call float @llvm.fabs.f32(float %f) + ret float %res +} + +; Test f64. +declare double @llvm.fabs.f64(double %f) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: lpdfr %f0, %f0 +; CHECK: br %r14 + %res = call double @llvm.fabs.f64(double %f) + ret double %res +} + +; Test f128. With the loads and stores, a pure absolute would probably +; be better implemented using an NI on the upper byte. Do some extra +; processing so that using FPRs is unequivocally better. +declare fp128 @llvm.fabs.f128(fp128 %f) +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: lpxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 , fp128 *%ptr + %abs = call fp128 @llvm.fabs.f128(fp128 %orig) + %op2 = load fp128 , fp128 *%ptr2 + %res = fdiv fp128 %abs, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-abs-04.ll b/test/CodeGen/SystemZ/fp-abs-04.ll new file mode 100644 index 00000000000..59064795b98 --- /dev/null +++ b/test/CodeGen/SystemZ/fp-abs-04.ll @@ -0,0 +1,43 @@ +; Test negated floating-point absolute on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test f32. +declare float @llvm.fabs.f32(float %f) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: lndfr %f0, %f0 +; CHECK: br %r14 + %abs = call float @llvm.fabs.f32(float %f) + %res = fsub float -0.0, %abs + ret float %res +} + +; Test f64. +declare double @llvm.fabs.f64(double %f) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: lndfr %f0, %f0 +; CHECK: br %r14 + %abs = call double @llvm.fabs.f64(double %f) + %res = fsub double -0.0, %abs + ret double %res +} + +; Test f128. With the loads and stores, a pure negative-absolute would +; probably be better implemented using an OI on the upper byte. Do some +; extra processing so that using FPRs is unequivocally better. +declare fp128 @llvm.fabs.f128(fp128 %f) +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: lnxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 , fp128 *%ptr + %abs = call fp128 @llvm.fabs.f128(fp128 %orig) + %negabs = fsub fp128 0xL00000000000000008000000000000000, %abs + %op2 = load fp128 , fp128 *%ptr2 + %res = fdiv fp128 %negabs, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-add-01.ll b/test/CodeGen/SystemZ/fp-add-01.ll index 5b0ed0513a3..219607d628d 100644 --- a/test/CodeGen/SystemZ/fp-add-01.ll +++ b/test/CodeGen/SystemZ/fp-add-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit floating-point addition. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: aeb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll index 075c7aa3dd8..146b16bc695 100644 --- a/test/CodeGen/SystemZ/fp-cmp-01.ll +++ b/test/CodeGen/SystemZ/fp-cmp-01.ll @@ -1,7 +1,10 @@ ; Test 32-bit floating-point comparison. The tests assume a z10 implementation ; of select, using conditional branches rather than LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare float @foo() @@ -9,8 +12,9 @@ declare float @foo() define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) { ; CHECK-LABEL: f1: ; CHECK: cebr %f0, %f2 -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq float %f1, %f2 %res = select i1 %cond, i64 %a, i64 %b @@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) { define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) { ; CHECK-LABEL: f2: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %f2 = load float , float *%ptr %cond = fcmp oeq float %f1, %f2 @@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) { define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) { ; CHECK-LABEL: f3: ; CHECK: ceb %f0, 4092(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1023 %f2 = load float , float *%ptr @@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) { ; CHECK-LABEL: f4: ; CHECK: aghi %r4, 4096 ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1024 %f2 = load float , float *%ptr @@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) { ; CHECK-LABEL: f5: ; CHECK: aghi %r4, -4 ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 -1 %f2 = load float , float *%ptr @@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r5, 2 ; CHECK: ceb %f0, 400(%r1,%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr1 = getelementptr float, float *%base, i64 %index %ptr2 = getelementptr float, float *%ptr1, i64 100 @@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK-SCALAR: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 @@ -153,8 +162,9 @@ define float @f7(float *%ptr0) { define i64 @f8(i64 %a, i64 %b, float %f) { ; CHECK-LABEL: f8: ; CHECK: ltebr %f0, %f0 -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq float %f, 0.0 %res = select i1 %cond, i64 %a, i64 %b @@ -166,8 +176,9 @@ define i64 @f8(i64 %a, i64 %b, float %f) { define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f9: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: ber %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: ber %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp oeq float %f1, %f2 @@ -179,8 +190,9 @@ define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f10: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: blhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: blhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp one float %f1, %f2 @@ -192,8 +204,9 @@ define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f11: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp olt float %f1, %f2 @@ -205,8 +218,9 @@ define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f12: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bher %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ole float %f1, %f2 @@ -218,8 +232,9 @@ define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f13: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bler %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnle %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp oge float %f1, %f2 @@ -231,8 +246,9 @@ define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f14: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: blr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: blr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ogt float %f1, %f2 @@ -244,8 +260,9 @@ define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f15: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnlhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnlhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrlh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ueq float %f1, %f2 @@ -257,8 +274,9 @@ define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f16: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bner %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bner %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgre %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp une float %f1, %f2 @@ -270,8 +288,9 @@ define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f17: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnler %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnler %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrle %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ult float %f1, %f2 @@ -283,8 +302,9 @@ define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f18: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnlr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnlr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrl %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ule float %f1, %f2 @@ -296,8 +316,9 @@ define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f19: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnhr %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnhr %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrh %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp uge float %f1, %f2 @@ -309,8 +330,9 @@ define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) { define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) { ; CHECK-LABEL: f20: ; CHECK: ceb %f0, 0(%r4) -; CHECK-NEXT: bnher %r14 -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: bnher %r14 +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrhe %r2, %r3 ; CHECK: br %r14 %f1 = load float , float *%ptr %cond = fcmp ugt float %f1, %f2 diff --git a/test/CodeGen/SystemZ/fp-div-01.ll b/test/CodeGen/SystemZ/fp-div-01.ll index 0791e8db93f..ee514dc474e 100644 --- a/test/CodeGen/SystemZ/fp-div-01.ll +++ b/test/CodeGen/SystemZ/fp-div-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit floating-point division. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: deb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-mul-01.ll b/test/CodeGen/SystemZ/fp-mul-01.ll index 3b72d25e0b5..126567b218a 100644 --- a/test/CodeGen/SystemZ/fp-mul-01.ll +++ b/test/CodeGen/SystemZ/fp-mul-01.ll @@ -1,6 +1,8 @@ ; Test multiplication of two f32s, producing an f32 result. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: meeb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/fp-mul-06.ll b/test/CodeGen/SystemZ/fp-mul-06.ll index 896fafecbda..581e44eeaa2 100644 --- a/test/CodeGen/SystemZ/fp-mul-06.ll +++ b/test/CodeGen/SystemZ/fp-mul-06.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define float @f1(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f1: -; CHECK: maebr %f4, %f0, %f2 -; CHECK: ler %f0, %f4 +; CHECK-SCALAR: maebr %f4, %f0, %f2 +; CHECK-SCALAR: ler %f0, %f4 +; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) ret float %res @@ -14,7 +18,8 @@ define float @f1(float %f1, float %f2, float %acc) { define float @f2(float %f1, float *%ptr, float %acc) { ; CHECK-LABEL: f2: ; CHECK: maeb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %f2 = load float , float *%ptr %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) @@ -24,7 +29,8 @@ define float @f2(float %f1, float *%ptr, float %acc) { define float @f3(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f3: ; CHECK: maeb %f2, %f0, 4092(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1023 %f2 = load float , float *%ptr @@ -39,7 +45,8 @@ define float @f4(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: maeb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1024 %f2 = load float , float *%ptr @@ -54,7 +61,8 @@ define float @f5(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: maeb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 -1 %f2 = load float , float *%ptr @@ -66,7 +74,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: maeb %f2, %f0, 0(%r1,%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 %index %f2 = load float , float *%ptr @@ -78,7 +87,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 2 ; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1023 %ptr = getelementptr float, float *%base, i64 %index2 @@ -92,7 +102,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) { ; CHECK: sllg %r1, %r3, 2 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: maeb %f2, %f0, 0(%r1) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1024 %ptr = getelementptr float, float *%base, i64 %index2 diff --git a/test/CodeGen/SystemZ/fp-mul-08.ll b/test/CodeGen/SystemZ/fp-mul-08.ll index 5e5538bfacc..5b1f9b96c08 100644 --- a/test/CodeGen/SystemZ/fp-mul-08.ll +++ b/test/CodeGen/SystemZ/fp-mul-08.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define float @f1(float %f1, float %f2, float %acc) { ; CHECK-LABEL: f1: -; CHECK: msebr %f4, %f0, %f2 -; CHECK: ler %f0, %f4 +; CHECK-SCALAR: msebr %f4, %f0, %f2 +; CHECK-SCALAR: ler %f0, %f4 +; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %negacc = fsub float -0.0, %acc %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) @@ -15,7 +19,8 @@ define float @f1(float %f1, float %f2, float %acc) { define float @f2(float %f1, float *%ptr, float %acc) { ; CHECK-LABEL: f2: ; CHECK: mseb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %f2 = load float , float *%ptr %negacc = fsub float -0.0, %acc @@ -26,7 +31,8 @@ define float @f2(float %f1, float *%ptr, float %acc) { define float @f3(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f3: ; CHECK: mseb %f2, %f0, 4092(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1023 %f2 = load float , float *%ptr @@ -42,7 +48,8 @@ define float @f4(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f4: ; CHECK: aghi %r2, 4096 ; CHECK: mseb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 1024 %f2 = load float , float *%ptr @@ -58,7 +65,8 @@ define float @f5(float %f1, float *%base, float %acc) { ; CHECK-LABEL: f5: ; CHECK: aghi %r2, -4 ; CHECK: mseb %f2, %f0, 0(%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 -1 %f2 = load float , float *%ptr @@ -71,7 +79,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r3, 2 ; CHECK: mseb %f2, %f0, 0(%r1,%r2) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %ptr = getelementptr float, float *%base, i64 %index %f2 = load float , float *%ptr @@ -84,7 +93,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) { ; CHECK-LABEL: f7: ; CHECK: sllg %r1, %r3, 2 ; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}}) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1023 %ptr = getelementptr float, float *%base, i64 %index2 @@ -99,7 +109,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) { ; CHECK: sllg %r1, %r3, 2 ; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}}) ; CHECK: mseb %f2, %f0, 0(%r1) -; CHECK: ler %f0, %f2 +; CHECK-SCALAR: ler %f0, %f2 +; CHECK-VECTOR: ldr %f0, %f2 ; CHECK: br %r14 %index2 = add i64 %index, 1024 %ptr = getelementptr float, float *%base, i64 %index2 diff --git a/test/CodeGen/SystemZ/fp-mul-10.ll b/test/CodeGen/SystemZ/fp-mul-10.ll index 977e5c60e3a..c23a6a202ad 100644 --- a/test/CodeGen/SystemZ/fp-mul-10.ll +++ b/test/CodeGen/SystemZ/fp-mul-10.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare double @llvm.fma.f64(double %f1, double %f2, double %f3) +declare float @llvm.fma.f32(float %f1, float %f2, float %f3) define double @f1(double %f1, double %f2, double %acc) { ; CHECK-LABEL: f1: @@ -21,3 +22,22 @@ define double @f2(double %f1, double %f2, double %acc) { ret double %negres } +define float @f3(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f3: +; CHECK: wfnmasb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc) + %negres = fsub float -0.0, %res + ret float %negres +} + +define float @f4(float %f1, float %f2, float %acc) { +; CHECK-LABEL: f4: +; CHECK: wfnmssb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %negacc = fsub float -0.0, %acc + %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc) + %negres = fsub float -0.0, %res + ret float %negres +} + diff --git a/test/CodeGen/SystemZ/fp-neg-02.ll b/test/CodeGen/SystemZ/fp-neg-02.ll new file mode 100644 index 00000000000..c904d19947b --- /dev/null +++ b/test/CodeGen/SystemZ/fp-neg-02.ll @@ -0,0 +1,38 @@ +; Test floating-point negation on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test f32. +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: lcdfr %f0, %f0 +; CHECK: br %r14 + %res = fsub float -0.0, %f + ret float %res +} + +; Test f64. +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: lcdfr %f0, %f0 +; CHECK: br %r14 + %res = fsub double -0.0, %f + ret double %res +} + +; Test f128. With the loads and stores, a pure negation would probably +; be better implemented using an XI on the upper byte. Do some extra +; processing so that using FPRs is unequivocally better. +define void @f3(fp128 *%ptr, fp128 *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: lcxbr +; CHECK: dxbr +; CHECK: br %r14 + %orig = load fp128 , fp128 *%ptr + %negzero = fpext float -0.0 to fp128 + %neg = fsub fp128 0xL00000000000000008000000000000000, %orig + %op2 = load fp128 , fp128 *%ptr2 + %res = fdiv fp128 %neg, %op2 + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-round-03.ll b/test/CodeGen/SystemZ/fp-round-03.ll new file mode 100644 index 00000000000..1a8296357bc --- /dev/null +++ b/test/CodeGen/SystemZ/fp-round-03.ll @@ -0,0 +1,195 @@ +; Test rounding functions for z14 and above. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test rint for f32. +declare float @llvm.rint.f32(float %f) +define float @f1(float %f) { +; CHECK-LABEL: f1: +; CHECK: fiebra %f0, 0, %f0, 0 +; CHECK: br %r14 + %res = call float @llvm.rint.f32(float %f) + ret float %res +} + +; Test rint for f64. +declare double @llvm.rint.f64(double %f) +define double @f2(double %f) { +; CHECK-LABEL: f2: +; CHECK: fidbra %f0, 0, %f0, 0 +; CHECK: br %r14 + %res = call double @llvm.rint.f64(double %f) + ret double %res +} + +; Test rint for f128. +declare fp128 @llvm.rint.f128(fp128 %f) +define void @f3(fp128 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: fixbr %f0, 0, %f0 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.rint.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test nearbyint for f32. +declare float @llvm.nearbyint.f32(float %f) +define float @f4(float %f) { +; CHECK-LABEL: f4: +; CHECK: fiebra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.nearbyint.f32(float %f) + ret float %res +} + +; Test nearbyint for f64. +declare double @llvm.nearbyint.f64(double %f) +define double @f5(double %f) { +; CHECK-LABEL: f5: +; CHECK: fidbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.nearbyint.f64(double %f) + ret double %res +} + +; Test nearbyint for f128. +declare fp128 @llvm.nearbyint.f128(fp128 %f) +define void @f6(fp128 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: fixbra %f0, 0, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.nearbyint.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test floor for f32. +declare float @llvm.floor.f32(float %f) +define float @f7(float %f) { +; CHECK-LABEL: f7: +; CHECK: fiebra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.floor.f32(float %f) + ret float %res +} + +; Test floor for f64. +declare double @llvm.floor.f64(double %f) +define double @f8(double %f) { +; CHECK-LABEL: f8: +; CHECK: fidbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.floor.f64(double %f) + ret double %res +} + +; Test floor for f128. +declare fp128 @llvm.floor.f128(fp128 %f) +define void @f9(fp128 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: fixbra %f0, 7, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.floor.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test ceil for f32. +declare float @llvm.ceil.f32(float %f) +define float @f10(float %f) { +; CHECK-LABEL: f10: +; CHECK: fiebra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.ceil.f32(float %f) + ret float %res +} + +; Test ceil for f64. +declare double @llvm.ceil.f64(double %f) +define double @f11(double %f) { +; CHECK-LABEL: f11: +; CHECK: fidbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.ceil.f64(double %f) + ret double %res +} + +; Test ceil for f128. +declare fp128 @llvm.ceil.f128(fp128 %f) +define void @f12(fp128 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: fixbra %f0, 6, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.ceil.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test trunc for f32. +declare float @llvm.trunc.f32(float %f) +define float @f13(float %f) { +; CHECK-LABEL: f13: +; CHECK: fiebra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.trunc.f32(float %f) + ret float %res +} + +; Test trunc for f64. +declare double @llvm.trunc.f64(double %f) +define double @f14(double %f) { +; CHECK-LABEL: f14: +; CHECK: fidbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.trunc.f64(double %f) + ret double %res +} + +; Test trunc for f128. +declare fp128 @llvm.trunc.f128(fp128 %f) +define void @f15(fp128 *%ptr) { +; CHECK-LABEL: f15: +; CHECK: fixbra %f0, 5, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.trunc.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} + +; Test round for f32. +declare float @llvm.round.f32(float %f) +define float @f16(float %f) { +; CHECK-LABEL: f16: +; CHECK: fiebra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call float @llvm.round.f32(float %f) + ret float %res +} + +; Test round for f64. +declare double @llvm.round.f64(double %f) +define double @f17(double %f) { +; CHECK-LABEL: f17: +; CHECK: fidbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %res = call double @llvm.round.f64(double %f) + ret double %res +} + +; Test round for f128. +declare fp128 @llvm.round.f128(fp128 %f) +define void @f18(fp128 *%ptr) { +; CHECK-LABEL: f18: +; CHECK: fixbra %f0, 1, %f0, 4 +; CHECK: br %r14 + %src = load fp128 , fp128 *%ptr + %res = call fp128 @llvm.round.f128(fp128 %src) + store fp128 %res, fp128 *%ptr + ret void +} diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll index 3680207e7f2..85a46bc2d7f 100644 --- a/test/CodeGen/SystemZ/fp-sqrt-01.ll +++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit square root. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @llvm.sqrt.f32(float) declare float @sqrtf(float) @@ -77,7 +79,7 @@ define float @f6(float *%base, i64 %index) { ; to use SQEB if possible. define void @f7(float *%ptr) { ; CHECK-LABEL: f7: -; CHECK: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK-SCALAR: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15) ; CHECK: br %r14 %val0 = load volatile float , float *%ptr %val1 = load volatile float , float *%ptr @@ -160,7 +162,7 @@ define float @f8(float %dummy, float %val) { ; CHECK: sqebr %f0, %f2 ; CHECK: cebr %f0, %f0 ; CHECK: bnor %r14 -; CHECK: ler %f0, %f2 +; CHECK: {{ler|ldr}} %f0, %f2 ; CHECK: jg sqrtf@PLT %res = tail call float @sqrtf(float %val) ret float %res diff --git a/test/CodeGen/SystemZ/fp-sub-01.ll b/test/CodeGen/SystemZ/fp-sub-01.ll index f4185ca3108..41f72e1810e 100644 --- a/test/CodeGen/SystemZ/fp-sub-01.ll +++ b/test/CodeGen/SystemZ/fp-sub-01.ll @@ -1,6 +1,8 @@ ; Test 32-bit floating-point subtraction. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare float @foo() @@ -76,7 +78,7 @@ define float @f6(float %f1, float *%base, i64 %index) { define float @f7(float *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: seb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: seb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr float, float *%ptr0, i64 2 %ptr2 = getelementptr float, float *%ptr0, i64 4 diff --git a/test/CodeGen/SystemZ/vec-abs-06.ll b/test/CodeGen/SystemZ/vec-abs-06.ll new file mode 100644 index 00000000000..8eee1d9d250 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-abs-06.ll @@ -0,0 +1,47 @@ +; Test f32 and v4f32 absolute on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.fabs.f32(float) +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) + +; Test a plain absolute. +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vflpsb %v24, %v24 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.fabs.v4f32(<4 x float> %val) + ret <4 x float> %ret +} + +; Test a negative absolute. +define <4 x float> @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: vflnsb %v24, %v24 +; CHECK: br %r14 + %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %val) + %ret = fsub <4 x float> <float -0.0, float -0.0, + float -0.0, float -0.0>, %abs + ret <4 x float> %ret +} + +; Test an f32 absolute that uses vector registers. +define float @f3(<4 x float> %val) { +; CHECK-LABEL: f3: +; CHECK: wflpsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %ret = call float @llvm.fabs.f32(float %scalar) + ret float %ret +} + +; Test an f32 negative absolute that uses vector registers. +define float @f4(<4 x float> %val) { +; CHECK-LABEL: f4: +; CHECK: wflnsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %abs = call float @llvm.fabs.f32(float %scalar) + %ret = fsub float -0.0, %abs + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-add-02.ll b/test/CodeGen/SystemZ/vec-add-02.ll new file mode 100644 index 00000000000..97a9b84a063 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-add-02.ll @@ -0,0 +1,24 @@ +; Test vector addition on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 addition. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfasb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fadd <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 addition that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfasb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fadd float %scalar1, %scalar2 + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-cmp-07.ll b/test/CodeGen/SystemZ/vec-cmp-07.ll new file mode 100644 index 00000000000..f272ba4bd75 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-cmp-07.ll @@ -0,0 +1,349 @@ +; Test f32 and v4f32 comparisons on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test oeq. +define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfcesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp oeq <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test one. +define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp one <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ogt. +define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f3: +; CHECK: vfchsb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp ogt <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oge. +define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f4: +; CHECK: vfchesb %v24, %v26, %v28 +; CHECK-NEXT: br %r14 + %cmp = fcmp oge <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ole. +define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfchesb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = fcmp ole <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test olt. +define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfchsb %v24, %v28, %v26 +; CHECK-NEXT: br %r14 + %cmp = fcmp olt <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ueq. +define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f7: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ueq <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test une. +define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f8: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp une <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ugt. +define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f9: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ugt <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uge. +define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f10: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v28, %v26 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uge <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ule. +define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f11: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ule <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ult. +define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f12: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v28 +; CHECK-NEXT: vno %v24, [[REG]], [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ult <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test ord. +define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f13: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vo %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ord <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test uno. +define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f14: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28 +; CHECK: vno %v24, [[REG1]], [[REG2]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uno <4 x float> %val1, %val2 + %ret = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %ret +} + +; Test oeq selects. +define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f15: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp oeq <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test one selects. +define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f16: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp one <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ogt selects. +define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f17: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ogt <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test oge selects. +define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f18: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp oge <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ole selects. +define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f19: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ole <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test olt selects. +define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f20: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp olt <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ueq selects. +define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f21: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ueq <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test une selects. +define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f22: +; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp une <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ugt selects. +define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f23: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ugt <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uge selects. +define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f24: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uge <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ule selects. +define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f25: +; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ule <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ult selects. +define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f26: +; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26 +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ult <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test ord selects. +define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f27: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp ord <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test uno selects. +define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, + <4 x float> %val3, <4 x float> %val4) { +; CHECK-LABEL: f28: +; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24 +; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26 +; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] +; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] +; CHECK-NEXT: br %r14 + %cmp = fcmp uno <4 x float> %val1, %val2 + %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 + ret <4 x float> %ret +} + +; Test an f32 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) { +; CHECK-LABEL: f29: +; CHECK: wfcsb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <4 x float> %vec, i32 0 + %cond = fcmp oeq float %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/test/CodeGen/SystemZ/vec-div-02.ll b/test/CodeGen/SystemZ/vec-div-02.ll new file mode 100644 index 00000000000..74e3b5148ad --- /dev/null +++ b/test/CodeGen/SystemZ/vec-div-02.ll @@ -0,0 +1,24 @@ +; Test vector division on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 division. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfdsb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fdiv <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 division that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfdsb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fdiv float %scalar1, %scalar2 + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-intrinsics-02.ll b/test/CodeGen/SystemZ/vec-intrinsics-02.ll index 27ee83fc774..84c6a078403 100644 --- a/test/CodeGen/SystemZ/vec-intrinsics-02.ll +++ b/test/CodeGen/SystemZ/vec-intrinsics-02.ll @@ -6,8 +6,17 @@ declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>) declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32) declare <16 x i8> @llvm.s390.vlrl(i32, i8 *) declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *) + +declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>) +declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>) +declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>) +declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32) +declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32) + declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32) declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32) +declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32) +declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32) ; VBPERM. define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) { @@ -192,6 +201,208 @@ define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) { ret void } +; VFCESBS with no processing of the result. +define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfcesbs: +; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFCESBS, returning 1 if any elements are equal (CC != 3). +define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfcesbs_any_bool: +; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: afi %r2, -536870912 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp ne i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCESBS, storing to %ptr if any elements are equal. +define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfcesbs_any_store: +; CHECK-NOT: %r +; CHECK: vfcesbs %v24, %v24, %v26 +; CHECK-NEXT: {{bor|bnler}} %r14 +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp ule i32 %cc, 2 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VFCHSBS with no processing of the result. +define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchsbs: +; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFCHSBS, returning 1 if not all elements are higher. +define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchsbs_notall_bool: +; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 36 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp sge i32 %res, 1 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCHSBS, storing to %ptr if not all elements are higher. +define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfchsbs_notall_store: +; CHECK-NOT: %r +; CHECK: vfchsbs %v24, %v24, %v26 +; CHECK-NEXT: {{bher|ber}} %r14 +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VFCHESBS with no processing of the result. +define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchesbs: +; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFCHESBS, returning 1 if neither element is higher or equal. +define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfchesbs_none_bool: +; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risblg %r2, [[REG]], 31, 159, 35 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %res, 3 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFCHESBS, storing to %ptr if neither element is higher or equal. +define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b, + i32 *%ptr) { +; CHECK-LABEL: test_vfchesbs_none_store: +; CHECK-NOT: %r +; CHECK: vfchesbs %v24, %v24, %v26 +; CHECK-NEXT: {{bnor|bler}} %r14 +; CHECK: mvhi 0(%r2), 0 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, + <4 x float> %b) + %res = extractvalue {<4 x i32>, i32} %call, 0 + %cc = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp uge i32 %cc, 3 + br i1 %cmp, label %store, label %exit + +store: + store i32 0, i32 *%ptr + br label %exit + +exit: + ret <4 x i32> %res +} + +; VFTCISB with the lowest useful class selector and no processing of the result. +define i32 @test_vftcisb(<4 x float> %a) { +; CHECK-LABEL: test_vftcisb: +; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1 +; CHECK: ipm %r2 +; CHECK: srl %r2, 28 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1) + %res = extractvalue {<4 x i32>, i32} %call, 1 + ret i32 %res +} + +; VFTCISB with the highest useful class selector, returning 1 if all elements +; have the right class (CC == 0). +define i32 @test_vftcisb_all_bool(<4 x float> %a) { +; CHECK-LABEL: test_vftcisb_all_bool: +; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094 +; CHECK: afi %r2, -268435456 +; CHECK: srl %r2, 31 +; CHECK: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094) + %res = extractvalue {<4 x i32>, i32} %call, 1 + %cmp = icmp eq i32 %res, 0 + %ext = zext i1 %cmp to i32 + ret i32 %ext +} + +; VFISB with a rounding mode not usable via standard intrinsics. +define <4 x float> @test_vfisb_0_4(<4 x float> %a) { +; CHECK-LABEL: test_vfisb_0_4: +; CHECK: vfisb %v24, %v24, 0, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4) + ret <4 x float> %res +} + +; VFISB with IEEE-inexact exception suppressed. +define <4 x float> @test_vfisb_4_0(<4 x float> %a) { +; CHECK-LABEL: test_vfisb_4_0: +; CHECK: vfisb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0) + ret <4 x float> %res +} + ; VFMAXDB. define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: test_vfmaxdb: @@ -210,3 +421,21 @@ define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) { ret <2 x double> %res } +; VFMAXSB. +define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfmaxsb: +; CHECK: vfmaxsb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4) + ret <4 x float> %res +} + +; VFMINSB. +define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vfminsb: +; CHECK: vfminsb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4) + ret <4 x float> %res +} + diff --git a/test/CodeGen/SystemZ/vec-max-05.ll b/test/CodeGen/SystemZ/vec-max-05.ll index 44efac76423..47400b8c66b 100644 --- a/test/CodeGen/SystemZ/vec-max-05.ll +++ b/test/CodeGen/SystemZ/vec-max-05.ll @@ -6,6 +6,10 @@ declare double @fmax(double, double) declare double @llvm.maxnum.f64(double, double) declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) +declare float @fmaxf(float, float) +declare float @llvm.maxnum.f32(float, float) +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) + ; Test the fmax library function. define double @f1(double %dummy, double %val1, double %val2) { ; CHECK-LABEL: f1: @@ -56,3 +60,53 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the fmaxf library function. +define float @f11(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f11: +; CHECK: wfmaxsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @fmaxf(float %val1, float %val2) readnone + ret float %ret +} + +; Test the f32 maxnum intrinsic. +define float @f12(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f12: +; CHECK: wfmaxsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @llvm.maxnum.f32(float %val1, float %val2) + ret float %ret +} + +; Test a f32 constant compare/select resulting in maxnum. +define float @f13(float %dummy, float %val) { +; CHECK-LABEL: f13: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfmaxsb %f0, %f2, [[REG]], 4 +; CHECK: br %r14 + %cmp = fcmp ogt float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test a f32 constant compare/select resulting in maxnan. +define float @f14(float %dummy, float %val) { +; CHECK-LABEL: f14: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfmaxsb %f0, %f2, [[REG]], 1 +; CHECK: br %r14 + %cmp = fcmp ugt float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test the v4f32 maxnum intrinsic. +define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f15: +; CHECK: vfmaxsb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %val1, <4 x float> %val2) + ret <4 x float> %ret +} + diff --git a/test/CodeGen/SystemZ/vec-min-05.ll b/test/CodeGen/SystemZ/vec-min-05.ll index c2d8726addf..b84ea6b6b4f 100644 --- a/test/CodeGen/SystemZ/vec-min-05.ll +++ b/test/CodeGen/SystemZ/vec-min-05.ll @@ -6,6 +6,10 @@ declare double @fmin(double, double) declare double @llvm.minnum.f64(double, double) declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) +declare float @fminf(float, float) +declare float @llvm.minnum.f32(float, float) +declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) + ; Test the fmin library function. define double @f1(double %dummy, double %val1, double %val2) { ; CHECK-LABEL: f1: @@ -56,3 +60,53 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ret <2 x double> %ret } +; Test the fminf library function. +define float @f11(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f11: +; CHECK: wfminsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @fminf(float %val1, float %val2) readnone + ret float %ret +} + +; Test the f32 minnum intrinsic. +define float @f12(float %dummy, float %val1, float %val2) { +; CHECK-LABEL: f12: +; CHECK: wfminsb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call float @llvm.minnum.f32(float %val1, float %val2) + ret float %ret +} + +; Test a f32 constant compare/select resulting in minnum. +define float @f13(float %dummy, float %val) { +; CHECK-LABEL: f13: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfminsb %f0, %f2, [[REG]], 4 +; CHECK: br %r14 + %cmp = fcmp olt float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test a f32 constant compare/select resulting in minnan. +define float @f14(float %dummy, float %val) { +; CHECK-LABEL: f14: +; CHECK: lzer [[REG:%f[0-9]+]] +; CHECK: wfminsb %f0, %f2, [[REG]], 1 +; CHECK: br %r14 + %cmp = fcmp ult float %val, 0.0 + %ret = select i1 %cmp, float %val, float 0.0 + ret float %ret +} + +; Test the v4f32 minnum intrinsic. +define <4 x float> @f15(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f15: +; CHECK: vfminsb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.minnum.v4f32(<4 x float> %val1, <4 x float> %val2) + ret <4 x float> %ret +} + diff --git a/test/CodeGen/SystemZ/vec-mul-03.ll b/test/CodeGen/SystemZ/vec-mul-03.ll new file mode 100644 index 00000000000..3733db9fb33 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-mul-03.ll @@ -0,0 +1,24 @@ +; Test vector multiplication on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 multiplication. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f1: +; CHECK: vfmsb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fmul <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 multiplication that uses vector registers. +define float @f2(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f2: +; CHECK: wfmsb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fmul float %scalar1, %scalar2 + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-mul-04.ll b/test/CodeGen/SystemZ/vec-mul-04.ll new file mode 100644 index 00000000000..d96f0b6a745 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-mul-04.ll @@ -0,0 +1,31 @@ +; Test vector multiply-and-add on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) + +; Test a v4f32 multiply-and-add. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f1: +; CHECK: vfmasb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %val3) + ret <4 x float> %ret +} + +; Test a v4f32 multiply-and-subtract. +define <4 x float> @f2(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f2: +; CHECK: vfmssb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <4 x float> <float -0.0, float -0.0, + float -0.0, float -0.0>, %val3 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %negval3) + ret <4 x float> %ret +} diff --git a/test/CodeGen/SystemZ/vec-mul-05.ll b/test/CodeGen/SystemZ/vec-mul-05.ll index c05437d4923..90a1f7a7efd 100644 --- a/test/CodeGen/SystemZ/vec-mul-05.ll +++ b/test/CodeGen/SystemZ/vec-mul-05.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) ; Test a v2f64 negative multiply-and-add. define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1, @@ -30,3 +31,33 @@ define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret ret <2 x double> %negret } + +; Test a v4f32 negative multiply-and-add. +define <4 x float> @f3(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f3: +; CHECK: vfnmasb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %val3) + %negret = fsub <4 x float> <float -0.0, float -0.0, + float -0.0, float -0.0>, %ret + ret <4 x float> %negret +} + +; Test a v4f32 negative multiply-and-subtract. +define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2, <4 x float> %val3) { +; CHECK-LABEL: f4: +; CHECK: vfnmssb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <4 x float> <float -0.0, float -0.0, + float -0.0, float -0.0>, %val3 + %ret = call <4 x float> @llvm.fma.v4f32 (<4 x float> %val1, + <4 x float> %val2, + <4 x float> %negval3) + %negret = fsub <4 x float> <float -0.0, float -0.0, + float -0.0, float -0.0>, %ret + ret <4 x float> %negret +} diff --git a/test/CodeGen/SystemZ/vec-neg-02.ll b/test/CodeGen/SystemZ/vec-neg-02.ll new file mode 100644 index 00000000000..07ce037542f --- /dev/null +++ b/test/CodeGen/SystemZ/vec-neg-02.ll @@ -0,0 +1,23 @@ +; Test vector negation on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 negation. +define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vflcsb %v24, %v26 +; CHECK: br %r14 + %ret = fsub <4 x float> <float -0.0, float -0.0, + float -0.0, float -0.0>, %val + ret <4 x float> %ret +} + +; Test an f32 negation that uses vector registers. +define float @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: wflcsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %ret = fsub float -0.0, %scalar + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-round-02.ll b/test/CodeGen/SystemZ/vec-round-02.ll new file mode 100644 index 00000000000..bcd66ea803d --- /dev/null +++ b/test/CodeGen/SystemZ/vec-round-02.ll @@ -0,0 +1,118 @@ +; Test v4f32 rounding on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.rint.f32(float) +declare float @llvm.nearbyint.f32(float) +declare float @llvm.floor.f32(float) +declare float @llvm.ceil.f32(float) +declare float @llvm.trunc.f32(float) +declare float @llvm.round.f32(float) +declare <4 x float> @llvm.rint.v4f32(<4 x float>) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +declare <4 x float> @llvm.round.v4f32(<4 x float>) + +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vfisb %v24, %v24, 0, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: vfisb %v24, %v24, 4, 0 +; CHECK: br %r14 + %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f3(<4 x float> %val) { +; CHECK-LABEL: f3: +; CHECK: vfisb %v24, %v24, 4, 7 +; CHECK: br %r14 + %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f4(<4 x float> %val) { +; CHECK-LABEL: f4: +; CHECK: vfisb %v24, %v24, 4, 6 +; CHECK: br %r14 + %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f5(<4 x float> %val) { +; CHECK-LABEL: f5: +; CHECK: vfisb %v24, %v24, 4, 5 +; CHECK: br %r14 + %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define <4 x float> @f6(<4 x float> %val) { +; CHECK-LABEL: f6: +; CHECK: vfisb %v24, %v24, 4, 1 +; CHECK: br %r14 + %res = call <4 x float> @llvm.round.v4f32(<4 x float> %val) + ret <4 x float> %res +} + +define float @f7(<4 x float> %val) { +; CHECK-LABEL: f7: +; CHECK: wfisb %f0, %v24, 0, 0 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.rint.f32(float %scalar) + ret float %res +} + +define float @f8(<4 x float> %val) { +; CHECK-LABEL: f8: +; CHECK: wfisb %f0, %v24, 4, 0 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.nearbyint.f32(float %scalar) + ret float %res +} + +define float @f9(<4 x float> %val) { +; CHECK-LABEL: f9: +; CHECK: wfisb %f0, %v24, 4, 7 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.floor.f32(float %scalar) + ret float %res +} + +define float @f10(<4 x float> %val) { +; CHECK-LABEL: f10: +; CHECK: wfisb %f0, %v24, 4, 6 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.ceil.f32(float %scalar) + ret float %res +} + +define float @f11(<4 x float> %val) { +; CHECK-LABEL: f11: +; CHECK: wfisb %f0, %v24, 4, 5 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.trunc.f32(float %scalar) + ret float %res +} + +define float @f12(<4 x float> %val) { +; CHECK-LABEL: f12: +; CHECK: wfisb %f0, %v24, 4, 1 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %res = call float @llvm.round.f32(float %scalar) + ret float %res +} diff --git a/test/CodeGen/SystemZ/vec-sqrt-02.ll b/test/CodeGen/SystemZ/vec-sqrt-02.ll new file mode 100644 index 00000000000..6970d9db669 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-sqrt-02.ll @@ -0,0 +1,23 @@ +; Test f32 and v4f32 square root on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) + +define <4 x float> @f1(<4 x float> %val) { +; CHECK-LABEL: f1: +; CHECK: vfsqsb %v24, %v24 +; CHECK: br %r14 + %ret = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %val) + ret <4 x float> %ret +} + +define float @f2(<4 x float> %val) { +; CHECK-LABEL: f2: +; CHECK: wfsqsb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %val, i32 0 + %ret = call float @llvm.sqrt.f32(float %scalar) + ret float %ret +} diff --git a/test/CodeGen/SystemZ/vec-sub-02.ll b/test/CodeGen/SystemZ/vec-sub-02.ll new file mode 100644 index 00000000000..83c76b5d4aa --- /dev/null +++ b/test/CodeGen/SystemZ/vec-sub-02.ll @@ -0,0 +1,31 @@ +; Test vector subtraction on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v4f32 subtraction. +define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1, + <4 x float> %val2) { +; CHECK-LABEL: f6: +; CHECK: vfssb %v24, %v26, %v28 +; CHECK: br %r14 + %ret = fsub <4 x float> %val1, %val2 + ret <4 x float> %ret +} + +; Test an f32 subtraction that uses vector registers. +define float @f7(<4 x float> %val1, <4 x float> %val2) { +; CHECK-LABEL: f7: +; CHECK: wfssb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <4 x float> %val1, i32 0 + %scalar2 = extractelement <4 x float> %val2, i32 0 + %ret = fsub float %scalar1, %scalar2 + ret float %ret +} + +; Test a v2f32 subtraction, which gets promoted to v4f32. +define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) { +; No particular output expected, but must compile. + %ret = fsub <2 x float> %val1, %val2 + ret <2 x float> %ret +} |