diff options
author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:41:11 +0000 |
---|---|---|
committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2017-07-17 17:41:11 +0000 |
commit | 8ab242ce954d3dfec4d07bb436aee26e38d48553 (patch) | |
tree | eb830d37e7a03904678f369c7131a9d894d5b71c /test/CodeGen/SystemZ | |
parent | 02972a4b38d953bb0af26108e8285f6f407a7256 (diff) |
[SystemZ] Add support for IBM z14 processor (1/3)
This patch series adds support for the IBM z14 processor. This part includes:
- Basic support for the new processor and its features.
- Support for new instructions (except vector 32-bit float and 128-bit float).
- CodeGen for new instructions, including new LLVM intrinsics.
- Scheduler description for the new processor.
- Detection of z14 as host processor.
Support for the new 32-bit vector float and 128-bit vector float
instructions is provided by separate patches.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308194 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/SystemZ')
-rw-r--r-- | test/CodeGen/SystemZ/branch-11.ll | 56 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/fp-mul-10.ll | 23 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/int-add-17.ll | 95 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/int-mul-09.ll | 95 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/int-mul-10.ll | 165 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/int-mul-11.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/int-sub-10.ll | 95 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-and-04.ll | 47 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-ctpop-02.ll | 45 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-intrinsics-01.ll (renamed from test/CodeGen/SystemZ/vec-intrinsics.ll) | 0 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-intrinsics-02.ll | 212 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-max-05.ll | 58 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-min-05.ll | 58 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-move-18.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-mul-05.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-or-03.ll | 91 | ||||
-rw-r--r-- | test/CodeGen/SystemZ/vec-xor-02.ll | 47 |
17 files changed, 1175 insertions, 0 deletions
diff --git a/test/CodeGen/SystemZ/branch-11.ll b/test/CodeGen/SystemZ/branch-11.ll new file mode 100644 index 00000000000..ce7b3ef267b --- /dev/null +++ b/test/CodeGen/SystemZ/branch-11.ll @@ -0,0 +1,56 @@ +; Test indirect jumps on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +define i32 @f1(i32 %x, i32 %y, i32 %op) { +; CHECK-LABEL: f1: +; CHECK: ahi %r4, -1 +; CHECK: clibh %r4, 5, 0(%r14) +; CHECK: llgfr [[OP64:%r[0-5]]], %r4 +; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3 +; CHECK: larl [[BASE:%r[1-5]]] +; CHECK: bi 0([[BASE]],[[INDEX]]) +entry: + switch i32 %op, label %exit [ + i32 1, label %b.add + i32 2, label %b.sub + i32 3, label %b.and + i32 4, label %b.or + i32 5, label %b.xor + i32 6, label %b.mul + ] + +b.add: + %add = add i32 %x, %y + br label %exit + +b.sub: + %sub = sub i32 %x, %y + br label %exit + +b.and: + %and = and i32 %x, %y + br label %exit + +b.or: + %or = or i32 %x, %y + br label %exit + +b.xor: + %xor = xor i32 %x, %y + br label %exit + +b.mul: + %mul = mul i32 %x, %y + br label %exit + +exit: + %res = phi i32 [ %x, %entry ], + [ %add, %b.add ], + [ %sub, %b.sub ], + [ %and, %b.and ], + [ %or, %b.or ], + [ %xor, %b.xor ], + [ %mul, %b.mul ] + ret i32 %res +} diff --git a/test/CodeGen/SystemZ/fp-mul-10.ll b/test/CodeGen/SystemZ/fp-mul-10.ll new file mode 100644 index 00000000000..977e5c60e3a --- /dev/null +++ b/test/CodeGen/SystemZ/fp-mul-10.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare double @llvm.fma.f64(double %f1, double %f2, double %f3) + +define double @f1(double %f1, double %f2, double %acc) { +; CHECK-LABEL: f1: +; CHECK: wfnmadb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + %negres = fsub double -0.0, %res + ret double %negres +} + +define double @f2(double %f1, double %f2, double %acc) { +; CHECK-LABEL: f2: +; CHECK: wfnmsdb %f0, %f0, %f2, %f4 +; CHECK: br %r14 + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) + %negres = fsub double -0.0, %res + ret double %negres +} + diff --git a/test/CodeGen/SystemZ/int-add-17.ll b/test/CodeGen/SystemZ/int-add-17.ll new file mode 100644 index 00000000000..fd245871c65 --- /dev/null +++ b/test/CodeGen/SystemZ/int-add-17.ll @@ -0,0 +1,95 @@ +; Test additions between an i64 and a sign-extended i16 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check AGH with no displacement. +define i64 @f1(i64 %a, i16 *%src) { +; CHECK-LABEL: f1: +; CHECK: agh %r2, 0(%r3) +; CHECK: br %r14 + %b = load i16, i16 *%src + %bext = sext i16 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the high end of the aligned AGH range. +define i64 @f2(i64 %a, i16 *%src) { +; CHECK-LABEL: f2: +; CHECK: agh %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f3(i64 %a, i16 *%src) { +; CHECK-LABEL: f3: +; CHECK: agfi %r3, 524288 +; CHECK: agh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the high end of the negative aligned AGH range. +define i64 @f4(i64 %a, i16 *%src) { +; CHECK-LABEL: f4: +; CHECK: agh %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the low end of the AGH range. +define i64 @f5(i64 %a, i16 *%src) { +; CHECK-LABEL: f5: +; CHECK: agh %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i64 %a, i16 *%src) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524290 +; CHECK: agh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + +; Check that AGH allows an index. +define i64 @f7(i64 %a, i64 %src, i64 %index) { +; CHECK-LABEL: f7: +; CHECK: agh %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i16 * + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %add = add i64 %a, %bext + ret i64 %add +} + diff --git a/test/CodeGen/SystemZ/int-mul-09.ll b/test/CodeGen/SystemZ/int-mul-09.ll new file mode 100644 index 00000000000..3e384e72db5 --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-09.ll @@ -0,0 +1,95 @@ +; Test multiplications between an i64 and a sign-extended i16 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check MGH with no displacement. +define i64 @f1(i64 %a, i16 *%src) { +; CHECK-LABEL: f1: +; CHECK: mgh %r2, 0(%r3) +; CHECK: br %r14 + %b = load i16, i16 *%src + %bext = sext i16 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the high end of the aligned MGH range. +define i64 @f2(i64 %a, i16 *%src) { +; CHECK-LABEL: f2: +; CHECK: mgh %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f3(i64 %a, i16 *%src) { +; CHECK-LABEL: f3: +; CHECK: agfi %r3, 524288 +; CHECK: mgh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the high end of the negative aligned MGH range. +define i64 @f4(i64 %a, i16 *%src) { +; CHECK-LABEL: f4: +; CHECK: mgh %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the low end of the MGH range. +define i64 @f5(i64 %a, i16 *%src) { +; CHECK-LABEL: f5: +; CHECK: mgh %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i64 %a, i16 *%src) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524290 +; CHECK: mgh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + +; Check that MGH allows an index. +define i64 @f7(i64 %a, i64 %src, i64 %index) { +; CHECK-LABEL: f7: +; CHECK: mgh %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i16 * + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + diff --git a/test/CodeGen/SystemZ/int-mul-10.ll b/test/CodeGen/SystemZ/int-mul-10.ll new file mode 100644 index 00000000000..a4d80af36a3 --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-10.ll @@ -0,0 +1,165 @@ +; Test signed high-part i64->i128 multiplications on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check sign-extended multiplication in which only the high part is used. +define i64 @f1(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f1: +; CHECK-NOT: {{%r[234]}} +; CHECK: mgrk %r2, %r3, %r4 +; CHECK: br %r14 + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check sign-extended multiplication in which only part of the high half +; is used. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK-NOT: {{%r[234]}} +; CHECK: mgrk [[REG:%r[0-9]+]], %r3, %r4 +; CHECK: srlg %r2, [[REG]], 3 +; CHECK: br %r14 + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 67 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check sign-extended multiplication in which the result is split into +; high and low halves. +define i64 @f3(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f3: +; CHECK-NOT: {{%r[234]}} +; CHECK: mgrk %r2, %r3, %r4 +; CHECK: ogr %r2, %r3 +; CHECK: br %r14 + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + %low = trunc i128 %mulx to i64 + %or = or i64 %high, %low + ret i64 %or +} + +; Check MG with no displacement. +define i64 @f4(i64 %dummy, i64 %a, i64 *%src) { +; CHECK-LABEL: f4: +; CHECK-NOT: {{%r[234]}} +; CHECK: mg %r2, 0(%r4) +; CHECK: br %r14 + %b = load i64 , i64 *%src + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the high end of the aligned MG range. +define i64 @f5(i64 %dummy, i64 %a, i64 *%src) { +; CHECK-LABEL: f5: +; CHECK: mg %r2, 524280(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64 , i64 *%ptr + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the next doubleword up, which requires separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i64 %dummy, i64 %a, i64 *%src) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: mg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64 , i64 *%ptr + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the high end of the negative aligned MG range. +define i64 @f7(i64 %dummy, i64 %a, i64 *%src) { +; CHECK-LABEL: f7: +; CHECK: mg %r2, -8(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64 , i64 *%ptr + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the low end of the MG range. +define i64 @f8(i64 %dummy, i64 %a, i64 *%src) { +; CHECK-LABEL: f8: +; CHECK: mg %r2, -524288(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64 , i64 *%ptr + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f9(i64 *%dest, i64 %a, i64 *%src) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: mg %r2, 0(%r4) +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64 , i64 *%ptr + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + +; Check that MG allows an index. +define i64 @f10(i64 *%dest, i64 %a, i64 %src, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: mg %r2, 524287(%r5,%r4) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524287 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64 , i64 *%ptr + %ax = sext i64 %a to i128 + %bx = sext i64 %b to i128 + %mulx = mul i128 %ax, %bx + %highx = lshr i128 %mulx, 64 + %high = trunc i128 %highx to i64 + ret i64 %high +} + diff --git a/test/CodeGen/SystemZ/int-mul-11.ll b/test/CodeGen/SystemZ/int-mul-11.ll new file mode 100644 index 00000000000..f2625198251 --- /dev/null +++ b/test/CodeGen/SystemZ/int-mul-11.ll @@ -0,0 +1,32 @@ +; Test three-operand multiplication instructions on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Check MSRKC. +define i32 @f1(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: msrkc %r2, %r3, %r4 +; CHECK: br %r14 + %mul = mul i32 %a, %b + ret i32 %mul +} + +; Check MSGRKC. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: msgrkc %r2, %r3, %r4 +; CHECK: br %r14 + %mul = mul i64 %a, %b + ret i64 %mul +} + +; Verify that we still use MSGFR for i32->i64 multiplies. +define i64 @f3(i64 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: msgfr %r2, %r3 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %mul = mul i64 %a, %bext + ret i64 %mul +} + diff --git a/test/CodeGen/SystemZ/int-sub-10.ll b/test/CodeGen/SystemZ/int-sub-10.ll new file mode 100644 index 00000000000..bf6638575e5 --- /dev/null +++ b/test/CodeGen/SystemZ/int-sub-10.ll @@ -0,0 +1,95 @@ +; Test subtractions of a sign-extended i16 from an i64 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check SGH with no displacement. +define i64 @f1(i64 %a, i16 *%src) { +; CHECK-LABEL: f1: +; CHECK: sgh %r2, 0(%r3) +; CHECK: br %r14 + %b = load i16, i16 *%src + %bext = sext i16 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the high end of the aligned SGH range. +define i64 @f2(i64 %a, i16 *%src) { +; CHECK-LABEL: f2: +; CHECK: sgh %r2, 524286(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f3(i64 %a, i16 *%src) { +; CHECK-LABEL: f3: +; CHECK: agfi %r3, 524288 +; CHECK: sgh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the high end of the negative aligned SGH range. +define i64 @f4(i64 %a, i16 *%src) { +; CHECK-LABEL: f4: +; CHECK: sgh %r2, -2(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the low end of the SGH range. +define i64 @f5(i64 %a, i16 *%src) { +; CHECK-LABEL: f5: +; CHECK: sgh %r2, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define i64 @f6(i64 %a, i16 *%src) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524290 +; CHECK: sgh %r2, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + +; Check that SGH allows an index. +define i64 @f7(i64 %a, i64 %src, i64 %index) { +; CHECK-LABEL: f7: +; CHECK: sgh %r2, 524284({{%r4,%r3|%r3,%r4}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i16 * + %b = load i16, i16 *%ptr + %bext = sext i16 %b to i64 + %sub = sub i64 %a, %bext + ret i64 %sub +} + diff --git a/test/CodeGen/SystemZ/vec-and-04.ll b/test/CodeGen/SystemZ/vec-and-04.ll new file mode 100644 index 00000000000..e9355beb429 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-and-04.ll @@ -0,0 +1,47 @@ +; Test vector NAND on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v16i8 NAND. +define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { +; CHECK-LABEL: f1: +; CHECK: vnn %v24, %v26, %v28 +; CHECK: br %r14 + %ret = and <16 x i8> %val1, %val2 + %not = xor <16 x i8> %ret, <i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1> + ret <16 x i8> %not +} + +; Test a v8i16 NAND. +define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { +; CHECK-LABEL: f2: +; CHECK: vnn %v24, %v26, %v28 +; CHECK: br %r14 + %ret = and <8 x i16> %val1, %val2 + %not = xor <8 x i16> %ret, <i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1> + ret <8 x i16> %not +} + +; Test a v4i32 NAND. +define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { +; CHECK-LABEL: f3: +; CHECK: vnn %v24, %v26, %v28 +; CHECK: br %r14 + %ret = and <4 x i32> %val1, %val2 + %not = xor <4 x i32> %ret, <i32 -1, i32 -1, i32 -1, i32 -1> + ret <4 x i32> %not +} + +; Test a v2i64 NAND. +define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { +; CHECK-LABEL: f4: +; CHECK: vnn %v24, %v26, %v28 +; CHECK: br %r14 + %ret = and <2 x i64> %val1, %val2 + %not = xor <2 x i64> %ret, <i64 -1, i64 -1> + ret <2 x i64> %not +} diff --git a/test/CodeGen/SystemZ/vec-ctpop-02.ll b/test/CodeGen/SystemZ/vec-ctpop-02.ll new file mode 100644 index 00000000000..ee50e88d043 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-ctpop-02.ll @@ -0,0 +1,45 @@ +; Test vector population-count instruction on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) +declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) + +define <16 x i8> @f1(<16 x i8> %a) { +; CHECK-LABEL: f1: +; CHECK: vpopctb %v24, %v24 +; CHECK: br %r14 + + %popcnt = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) + ret <16 x i8> %popcnt +} + +define <8 x i16> @f2(<8 x i16> %a) { +; CHECK-LABEL: f2: +; CHECK: vpopcth %v24, %v24 +; CHECK: br %r14 + + %popcnt = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) + ret <8 x i16> %popcnt +} + +define <4 x i32> @f3(<4 x i32> %a) { +; CHECK-LABEL: f3: +; CHECK: vpopctf %v24, %v24 +; CHECK: br %r14 + + %popcnt = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) + ret <4 x i32> %popcnt +} + +define <2 x i64> @f4(<2 x i64> %a) { +; CHECK-LABEL: f4: +; CHECK: vpopctg %v24, %v24 +; CHECK: br %r14 + + %popcnt = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) + ret <2 x i64> %popcnt +} + diff --git a/test/CodeGen/SystemZ/vec-intrinsics.ll b/test/CodeGen/SystemZ/vec-intrinsics-01.ll index 6f5eb0691aa..6f5eb0691aa 100644 --- a/test/CodeGen/SystemZ/vec-intrinsics.ll +++ b/test/CodeGen/SystemZ/vec-intrinsics-01.ll diff --git a/test/CodeGen/SystemZ/vec-intrinsics-02.ll b/test/CodeGen/SystemZ/vec-intrinsics-02.ll new file mode 100644 index 00000000000..27ee83fc774 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-intrinsics-02.ll @@ -0,0 +1,212 @@ +; Test vector intrinsics added with z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32) +declare <16 x i8> @llvm.s390.vlrl(i32, i8 *) +declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *) +declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32) +declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32) + +; VBPERM. +define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vbperm: +; CHECK: vbperm %v24, %v24, %v26 +; CHECK: br %r14 + %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b) + ret <2 x i64> %res +} + +; VMSLG with no shifts. +define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vmslg1: +; CHECK: vmslg %v24, %v24, %v26, %v28, 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0) + ret <16 x i8> %res +} + +; VMSLG with both shifts. +define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { +; CHECK-LABEL: test_vmslg2: +; CHECK: vmslg %v24, %v24, %v26, %v28, 12 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12) + ret <16 x i8> %res +} + +; VLRLR with the lowest in-range displacement. +define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) { +; CHECK-LABEL: test_vlrlr1: +; CHECK: vlrlr %v24, %r3, 0(%r2) +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; VLRLR with the highest in-range displacement. +define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) { +; CHECK-LABEL: test_vlrlr2: +; CHECK: vlrlr %v24, %r3, 4095(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4095 + %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; VLRLR with an out-of-range displacement. +define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) { +; CHECK-LABEL: test_vlrlr3: +; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; Check that VLRLR doesn't allow an index. +define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) { +; CHECK-LABEL: test_vlrlr4: +; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 %index + %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) + ret <16 x i8> %res +} + +; VLRL with the lowest in-range displacement. +define <16 x i8> @test_vlrl1(i8 *%ptr) { +; CHECK-LABEL: test_vlrl1: +; CHECK: vlrl %v24, 0(%r2), 0 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) + ret <16 x i8> %res +} + +; VLRL with the highest in-range displacement. +define <16 x i8> @test_vlrl2(i8 *%base) { +; CHECK-LABEL: test_vlrl2: +; CHECK: vlrl %v24, 4095(%r2), 0 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4095 + %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) + ret <16 x i8> %res +} + +; VLRL with an out-of-range displacement. +define <16 x i8> @test_vlrl3(i8 *%base) { +; CHECK-LABEL: test_vlrl3: +; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) + ret <16 x i8> %res +} + +; Check that VLRL doesn't allow an index. +define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) { +; CHECK-LABEL: test_vlrl4: +; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 %index + %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) + ret <16 x i8> %res +} + +; VSTRLR with the lowest in-range displacement. +define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) { +; CHECK-LABEL: test_vstrlr1: +; CHECK: vstrlr %v24, %r3, 0(%r2) +; CHECK: br %r14 + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; VSTRLR with the highest in-range displacement. +define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) { +; CHECK-LABEL: test_vstrlr2: +; CHECK: vstrlr %v24, %r3, 4095(%r2) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4095 + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; VSTRLR with an out-of-range displacement. +define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) { +; CHECK-LABEL: test_vstrlr3: +; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; Check that VSTRLR doesn't allow an index. +define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) { +; CHECK-LABEL: test_vstrlr4: +; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}}) +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 %index + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) + ret void +} + +; VSTRL with the lowest in-range displacement. +define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) { +; CHECK-LABEL: test_vstrl1: +; CHECK: vstrl %v24, 0(%r2), 8 +; CHECK: br %r14 + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) + ret void +} + +; VSTRL with the highest in-range displacement. +define void @test_vstrl2(<16 x i8> %vec, i8 *%base) { +; CHECK-LABEL: test_vstrl2: +; CHECK: vstrl %v24, 4095(%r2), 8 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4095 + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) + ret void +} + +; VSTRL with an out-of-range displacement. +define void @test_vstrl3(<16 x i8> %vec, i8 *%base) { +; CHECK-LABEL: test_vstrl3: +; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 4096 + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) + ret void +} + +; Check that VSTRL doesn't allow an index. +define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) { +; CHECK-LABEL: test_vstrl4: +; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8 +; CHECK: br %r14 + %ptr = getelementptr i8, i8 *%base, i64 %index + call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) + ret void +} + +; VFMAXDB. +define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfmaxdb: +; CHECK: vfmaxdb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4) + ret <2 x double> %res +} + +; VFMINDB. +define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: test_vfmindb: +; CHECK: vfmindb %v24, %v24, %v26, 4 +; CHECK: br %r14 + %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4) + ret <2 x double> %res +} + diff --git a/test/CodeGen/SystemZ/vec-max-05.ll b/test/CodeGen/SystemZ/vec-max-05.ll new file mode 100644 index 00000000000..44efac76423 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-max-05.ll @@ -0,0 +1,58 @@ +; Test vector maximum on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare double @fmax(double, double) +declare double @llvm.maxnum.f64(double, double) +declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) + +; Test the fmax library function. +define double @f1(double %dummy, double %val1, double %val2) { +; CHECK-LABEL: f1: +; CHECK: wfmaxdb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call double @fmax(double %val1, double %val2) readnone + ret double %ret +} + +; Test the f64 maxnum intrinsic. +define double @f2(double %dummy, double %val1, double %val2) { +; CHECK-LABEL: f2: +; CHECK: wfmaxdb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call double @llvm.maxnum.f64(double %val1, double %val2) + ret double %ret +} + +; Test a f64 constant compare/select resulting in maxnum. +define double @f3(double %dummy, double %val) { +; CHECK-LABEL: f3: +; CHECK: lzdr [[REG:%f[0-9]+]] +; CHECK: wfmaxdb %f0, %f2, [[REG]], 4 +; CHECK: br %r14 + %cmp = fcmp ogt double %val, 0.0 + %ret = select i1 %cmp, double %val, double 0.0 + ret double %ret +} + +; Test a f64 constant compare/select resulting in maxnan. +define double @f4(double %dummy, double %val) { +; CHECK-LABEL: f4: +; CHECK: lzdr [[REG:%f[0-9]+]] +; CHECK: wfmaxdb %f0, %f2, [[REG]], 1 +; CHECK: br %r14 + %cmp = fcmp ugt double %val, 0.0 + %ret = select i1 %cmp, double %val, double 0.0 + ret double %ret +} + +; Test the v2f64 maxnum intrinsic. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfmaxdb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %val1, <2 x double> %val2) + ret <2 x double> %ret +} + diff --git a/test/CodeGen/SystemZ/vec-min-05.ll b/test/CodeGen/SystemZ/vec-min-05.ll new file mode 100644 index 00000000000..c2d8726addf --- /dev/null +++ b/test/CodeGen/SystemZ/vec-min-05.ll @@ -0,0 +1,58 @@ +; Test vector minimum on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare double @fmin(double, double) +declare double @llvm.minnum.f64(double, double) +declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) + +; Test the fmin library function. +define double @f1(double %dummy, double %val1, double %val2) { +; CHECK-LABEL: f1: +; CHECK: wfmindb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call double @fmin(double %val1, double %val2) readnone + ret double %ret +} + +; Test the f64 minnum intrinsic. +define double @f2(double %dummy, double %val1, double %val2) { +; CHECK-LABEL: f2: +; CHECK: wfmindb %f0, %f2, %f4, 4 +; CHECK: br %r14 + %ret = call double @llvm.minnum.f64(double %val1, double %val2) + ret double %ret +} + +; Test a f64 constant compare/select resulting in minnum. +define double @f3(double %dummy, double %val) { +; CHECK-LABEL: f3: +; CHECK: lzdr [[REG:%f[0-9]+]] +; CHECK: wfmindb %f0, %f2, [[REG]], 4 +; CHECK: br %r14 + %cmp = fcmp olt double %val, 0.0 + %ret = select i1 %cmp, double %val, double 0.0 + ret double %ret +} + +; Test a f64 constant compare/select resulting in minnan. +define double @f4(double %dummy, double %val) { +; CHECK-LABEL: f4: +; CHECK: lzdr [[REG:%f[0-9]+]] +; CHECK: wfmindb %f0, %f2, [[REG]], 1 +; CHECK: br %r14 + %cmp = fcmp ult double %val, 0.0 + %ret = select i1 %cmp, double %val, double 0.0 + ret double %ret +} + +; Test the v2f64 minnum intrinsic. +define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2) { +; CHECK-LABEL: f5: +; CHECK: vfmindb %v24, %v26, %v28, 4 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.minnum.v2f64(<2 x double> %val1, <2 x double> %val2) + ret <2 x double> %ret +} + diff --git a/test/CodeGen/SystemZ/vec-move-18.ll b/test/CodeGen/SystemZ/vec-move-18.ll new file mode 100644 index 00000000000..5d3d09d83ef --- /dev/null +++ b/test/CodeGen/SystemZ/vec-move-18.ll @@ -0,0 +1,24 @@ +; Test insertions of memory values into 0 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test VLLEZLF. +define <4 x i32> @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vllezlf %v24, 0(%r2) +; CHECK: br %r14 + %val = load i32, i32 *%ptr + %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0 + ret <4 x i32> %ret +} + +; Test VLLEZLF with a float. +define <4 x float> @f2(float *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vllezlf %v24, 0(%r2) +; CHECK: br %r14 + %val = load float, float *%ptr + %ret = insertelement <4 x float> zeroinitializer, float %val, i32 0 + ret <4 x float> %ret +} + diff --git a/test/CodeGen/SystemZ/vec-mul-05.ll b/test/CodeGen/SystemZ/vec-mul-05.ll new file mode 100644 index 00000000000..c05437d4923 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-mul-05.ll @@ -0,0 +1,32 @@ +; Test vector negative multiply-and-add on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) + +; Test a v2f64 negative multiply-and-add. +define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f1: +; CHECK: vfnmadb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, + <2 x double> %val2, + <2 x double> %val3) + %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret + ret <2 x double> %negret +} + +; Test a v2f64 negative multiply-and-subtract. +define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1, + <2 x double> %val2, <2 x double> %val3) { +; CHECK-LABEL: f2: +; CHECK: vfnmsdb %v24, %v26, %v28, %v30 +; CHECK: br %r14 + %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3 + %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, + <2 x double> %val2, + <2 x double> %negval3) + %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret + ret <2 x double> %negret +} diff --git a/test/CodeGen/SystemZ/vec-or-03.ll b/test/CodeGen/SystemZ/vec-or-03.ll new file mode 100644 index 00000000000..010629d880d --- /dev/null +++ b/test/CodeGen/SystemZ/vec-or-03.ll @@ -0,0 +1,91 @@ +; Test vector OR-NOT on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v16i8 OR-NOT. +define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { +; CHECK-LABEL: f1: +; CHECK: voc %v24, %v26, %v28 +; CHECK: br %r14 + %not = xor <16 x i8> %val2, <i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1> + %ret = or <16 x i8> %val1, %not + ret <16 x i8> %ret +} + +; ...and again with the reverse. +define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { +; CHECK-LABEL: f2: +; CHECK: voc %v24, %v28, %v26 +; CHECK: br %r14 + %not = xor <16 x i8> %val1, <i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1> + %ret = or <16 x i8> %not, %val2 + ret <16 x i8> %ret +} + +; Test a v8i16 OR-NOT. +define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { +; CHECK-LABEL: f3: +; CHECK: voc %v24, %v26, %v28 +; CHECK: br %r14 + %not = xor <8 x i16> %val2, <i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1> + %ret = or <8 x i16> %val1, %not + ret <8 x i16> %ret +} + +; ...and again with the reverse. +define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { +; CHECK-LABEL: f4: +; CHECK: voc %v24, %v28, %v26 +; CHECK: br %r14 + %not = xor <8 x i16> %val1, <i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1> + %ret = or <8 x i16> %not, %val2 + ret <8 x i16> %ret +} + +; Test a v4i32 OR-NOT. +define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { +; CHECK-LABEL: f5: +; CHECK: voc %v24, %v26, %v28 +; CHECK: br %r14 + %not = xor <4 x i32> %val2, <i32 -1, i32 -1, i32 -1, i32 -1> + %ret = or <4 x i32> %val1, %not + ret <4 x i32> %ret +} + +; ...and again with the reverse. +define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { +; CHECK-LABEL: f6: +; CHECK: voc %v24, %v28, %v26 +; CHECK: br %r14 + %not = xor <4 x i32> %val1, <i32 -1, i32 -1, i32 -1, i32 -1> + %ret = or <4 x i32> %not, %val2 + ret <4 x i32> %ret +} + +; Test a v2i64 OR-NOT. +define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { +; CHECK-LABEL: f7: +; CHECK: voc %v24, %v26, %v28 +; CHECK: br %r14 + %not = xor <2 x i64> %val2, <i64 -1, i64 -1> + %ret = or <2 x i64> %val1, %not + ret <2 x i64> %ret +} + +; ...and again with the reverse. +define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { +; CHECK-LABEL: f8: +; CHECK: voc %v24, %v28, %v26 +; CHECK: br %r14 + %not = xor <2 x i64> %val1, <i64 -1, i64 -1> + %ret = or <2 x i64> %not, %val2 + ret <2 x i64> %ret +} diff --git a/test/CodeGen/SystemZ/vec-xor-02.ll b/test/CodeGen/SystemZ/vec-xor-02.ll new file mode 100644 index 00000000000..b4b5a96ba25 --- /dev/null +++ b/test/CodeGen/SystemZ/vec-xor-02.ll @@ -0,0 +1,47 @@ +; Test vector NOT-XOR on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +; Test a v16i8 NOT-XOR. +define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { +; CHECK-LABEL: f1: +; CHECK: vnx %v24, %v26, %v28 +; CHECK: br %r14 + %ret = xor <16 x i8> %val1, %val2 + %not = xor <16 x i8> %ret, <i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1> + ret <16 x i8> %not +} + +; Test a v8i16 NOT-XOR. +define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { +; CHECK-LABEL: f2: +; CHECK: vnx %v24, %v26, %v28 +; CHECK: br %r14 + %ret = xor <8 x i16> %val1, %val2 + %not = xor <8 x i16> %ret, <i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1> + ret <8 x i16> %not +} + +; Test a v4i32 NOT-XOR. +define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { +; CHECK-LABEL: f3: +; CHECK: vnx %v24, %v26, %v28 +; CHECK: br %r14 + %ret = xor <4 x i32> %val1, %val2 + %not = xor <4 x i32> %ret, <i32 -1, i32 -1, i32 -1, i32 -1> + ret <4 x i32> %not +} + +; Test a v2i64 NOT-XOR. +define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { +; CHECK-LABEL: f4: +; CHECK: vnx %v24, %v26, %v28 +; CHECK: br %r14 + %ret = xor <2 x i64> %val1, %val2 + %not = xor <2 x i64> %ret, <i64 -1, i64 -1> + ret <2 x i64> %not +} |