summaryrefslogtreecommitdiff
path: root/test/CodeGen/SystemZ
diff options
context:
space:
mode:
authorUlrich Weigand <ulrich.weigand@de.ibm.com>2017-07-17 17:41:11 +0000
committerUlrich Weigand <ulrich.weigand@de.ibm.com>2017-07-17 17:41:11 +0000
commit8ab242ce954d3dfec4d07bb436aee26e38d48553 (patch)
treeeb830d37e7a03904678f369c7131a9d894d5b71c /test/CodeGen/SystemZ
parent02972a4b38d953bb0af26108e8285f6f407a7256 (diff)
[SystemZ] Add support for IBM z14 processor (1/3)
This patch series adds support for the IBM z14 processor. This part includes: - Basic support for the new processor and its features. - Support for new instructions (except vector 32-bit float and 128-bit float). - CodeGen for new instructions, including new LLVM intrinsics. - Scheduler description for the new processor. - Detection of z14 as host processor. Support for the new 32-bit vector float and 128-bit vector float instructions is provided by separate patches. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308194 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/SystemZ')
-rw-r--r--test/CodeGen/SystemZ/branch-11.ll56
-rw-r--r--test/CodeGen/SystemZ/fp-mul-10.ll23
-rw-r--r--test/CodeGen/SystemZ/int-add-17.ll95
-rw-r--r--test/CodeGen/SystemZ/int-mul-09.ll95
-rw-r--r--test/CodeGen/SystemZ/int-mul-10.ll165
-rw-r--r--test/CodeGen/SystemZ/int-mul-11.ll32
-rw-r--r--test/CodeGen/SystemZ/int-sub-10.ll95
-rw-r--r--test/CodeGen/SystemZ/vec-and-04.ll47
-rw-r--r--test/CodeGen/SystemZ/vec-ctpop-02.ll45
-rw-r--r--test/CodeGen/SystemZ/vec-intrinsics-01.ll (renamed from test/CodeGen/SystemZ/vec-intrinsics.ll)0
-rw-r--r--test/CodeGen/SystemZ/vec-intrinsics-02.ll212
-rw-r--r--test/CodeGen/SystemZ/vec-max-05.ll58
-rw-r--r--test/CodeGen/SystemZ/vec-min-05.ll58
-rw-r--r--test/CodeGen/SystemZ/vec-move-18.ll24
-rw-r--r--test/CodeGen/SystemZ/vec-mul-05.ll32
-rw-r--r--test/CodeGen/SystemZ/vec-or-03.ll91
-rw-r--r--test/CodeGen/SystemZ/vec-xor-02.ll47
17 files changed, 1175 insertions, 0 deletions
diff --git a/test/CodeGen/SystemZ/branch-11.ll b/test/CodeGen/SystemZ/branch-11.ll
new file mode 100644
index 00000000000..ce7b3ef267b
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-11.ll
@@ -0,0 +1,56 @@
+; Test indirect jumps on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+define i32 @f1(i32 %x, i32 %y, i32 %op) {
+; CHECK-LABEL: f1:
+; CHECK: ahi %r4, -1
+; CHECK: clibh %r4, 5, 0(%r14)
+; CHECK: llgfr [[OP64:%r[0-5]]], %r4
+; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
+; CHECK: larl [[BASE:%r[1-5]]]
+; CHECK: bi 0([[BASE]],[[INDEX]])
+entry:
+ switch i32 %op, label %exit [
+ i32 1, label %b.add
+ i32 2, label %b.sub
+ i32 3, label %b.and
+ i32 4, label %b.or
+ i32 5, label %b.xor
+ i32 6, label %b.mul
+ ]
+
+b.add:
+ %add = add i32 %x, %y
+ br label %exit
+
+b.sub:
+ %sub = sub i32 %x, %y
+ br label %exit
+
+b.and:
+ %and = and i32 %x, %y
+ br label %exit
+
+b.or:
+ %or = or i32 %x, %y
+ br label %exit
+
+b.xor:
+ %xor = xor i32 %x, %y
+ br label %exit
+
+b.mul:
+ %mul = mul i32 %x, %y
+ br label %exit
+
+exit:
+ %res = phi i32 [ %x, %entry ],
+ [ %add, %b.add ],
+ [ %sub, %b.sub ],
+ [ %and, %b.and ],
+ [ %or, %b.or ],
+ [ %xor, %b.xor ],
+ [ %mul, %b.mul ]
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-10.ll b/test/CodeGen/SystemZ/fp-mul-10.ll
new file mode 100644
index 00000000000..977e5c60e3a
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-10.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK: wfnmadb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+ %negres = fsub double -0.0, %res
+ ret double %negres
+}
+
+define double @f2(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: wfnmsdb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+ %negres = fsub double -0.0, %res
+ ret double %negres
+}
+
diff --git a/test/CodeGen/SystemZ/int-add-17.ll b/test/CodeGen/SystemZ/int-add-17.ll
new file mode 100644
index 00000000000..fd245871c65
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-17.ll
@@ -0,0 +1,95 @@
+; Test additions between an i64 and a sign-extended i16 on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare i64 @foo()
+
+; Check AGH with no displacement.
+define i64 @f1(i64 %a, i16 *%src) {
+; CHECK-LABEL: f1:
+; CHECK: agh %r2, 0(%r3)
+; CHECK: br %r14
+ %b = load i16, i16 *%src
+ %bext = sext i16 %b to i64
+ %add = add i64 %a, %bext
+ ret i64 %add
+}
+
+; Check the high end of the aligned AGH range.
+define i64 @f2(i64 %a, i16 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: agh %r2, 524286(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %add = add i64 %a, %bext
+ ret i64 %add
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f3(i64 %a, i16 *%src) {
+; CHECK-LABEL: f3:
+; CHECK: agfi %r3, 524288
+; CHECK: agh %r2, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %add = add i64 %a, %bext
+ ret i64 %add
+}
+
+; Check the high end of the negative aligned AGH range.
+define i64 @f4(i64 %a, i16 *%src) {
+; CHECK-LABEL: f4:
+; CHECK: agh %r2, -2(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %add = add i64 %a, %bext
+ ret i64 %add
+}
+
+; Check the low end of the AGH range.
+define i64 @f5(i64 %a, i16 *%src) {
+; CHECK-LABEL: f5:
+; CHECK: agh %r2, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %add = add i64 %a, %bext
+ ret i64 %add
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i64 %a, i16 *%src) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524290
+; CHECK: agh %r2, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %add = add i64 %a, %bext
+ ret i64 %add
+}
+
+; Check that AGH allows an index.
+define i64 @f7(i64 %a, i64 %src, i64 %index) {
+; CHECK-LABEL: f7:
+; CHECK: agh %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+ %add1 = add i64 %src, %index
+ %add2 = add i64 %add1, 524284
+ %ptr = inttoptr i64 %add2 to i16 *
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %add = add i64 %a, %bext
+ ret i64 %add
+}
+
diff --git a/test/CodeGen/SystemZ/int-mul-09.ll b/test/CodeGen/SystemZ/int-mul-09.ll
new file mode 100644
index 00000000000..3e384e72db5
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-09.ll
@@ -0,0 +1,95 @@
+; Test multiplications between an i64 and a sign-extended i16 on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare i64 @foo()
+
+; Check MGH with no displacement.
+define i64 @f1(i64 %a, i16 *%src) {
+; CHECK-LABEL: f1:
+; CHECK: mgh %r2, 0(%r3)
+; CHECK: br %r14
+ %b = load i16, i16 *%src
+ %bext = sext i16 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
+; Check the high end of the aligned MGH range.
+define i64 @f2(i64 %a, i16 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: mgh %r2, 524286(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f3(i64 %a, i16 *%src) {
+; CHECK-LABEL: f3:
+; CHECK: agfi %r3, 524288
+; CHECK: mgh %r2, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
+; Check the high end of the negative aligned MGH range.
+define i64 @f4(i64 %a, i16 *%src) {
+; CHECK-LABEL: f4:
+; CHECK: mgh %r2, -2(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
+; Check the low end of the MGH range.
+define i64 @f5(i64 %a, i16 *%src) {
+; CHECK-LABEL: f5:
+; CHECK: mgh %r2, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i64 %a, i16 *%src) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524290
+; CHECK: mgh %r2, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
+; Check that MGH allows an index.
+define i64 @f7(i64 %a, i64 %src, i64 %index) {
+; CHECK-LABEL: f7:
+; CHECK: mgh %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+ %add1 = add i64 %src, %index
+ %add2 = add i64 %add1, 524284
+ %ptr = inttoptr i64 %add2 to i16 *
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
diff --git a/test/CodeGen/SystemZ/int-mul-10.ll b/test/CodeGen/SystemZ/int-mul-10.ll
new file mode 100644
index 00000000000..a4d80af36a3
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-10.ll
@@ -0,0 +1,165 @@
+; Test signed high-part i64->i128 multiplications on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare i64 @foo()
+
+; Check sign-extended multiplication in which only the high part is used.
+define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mgrk %r2, %r3, %r4
+; CHECK: br %r14
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check sign-extended multiplication in which only part of the high half
+; is used.
+define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mgrk [[REG:%r[0-9]+]], %r3, %r4
+; CHECK: srlg %r2, [[REG]], 3
+; CHECK: br %r14
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 67
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check sign-extended multiplication in which the result is split into
+; high and low halves.
+define i64 @f3(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mgrk %r2, %r3, %r4
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ %low = trunc i128 %mulx to i64
+ %or = or i64 %high, %low
+ ret i64 %or
+}
+
+; Check MG with no displacement.
+define i64 @f4(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mg %r2, 0(%r4)
+; CHECK: br %r14
+ %b = load i64 , i64 *%src
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the high end of the aligned MG range.
+define i64 @f5(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK-LABEL: f5:
+; CHECK: mg %r2, 524280(%r4)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the next doubleword up, which requires separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r4, 524288
+; CHECK: mg %r2, 0(%r4)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the high end of the negative aligned MG range.
+define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK-LABEL: f7:
+; CHECK: mg %r2, -8(%r4)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the low end of the MG range.
+define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: mg %r2, -524288(%r4)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f9(i64 *%dest, i64 %a, i64 *%src) {
+; CHECK-LABEL: f9:
+; CHECK: agfi %r4, -524296
+; CHECK: mg %r2, 0(%r4)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check that MG allows an index.
+define i64 @f10(i64 *%dest, i64 %a, i64 %src, i64 %index) {
+; CHECK-LABEL: f10:
+; CHECK: mg %r2, 524287(%r5,%r4)
+; CHECK: br %r14
+ %add1 = add i64 %src, %index
+ %add2 = add i64 %add1, 524287
+ %ptr = inttoptr i64 %add2 to i64 *
+ %b = load i64 , i64 *%ptr
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
diff --git a/test/CodeGen/SystemZ/int-mul-11.ll b/test/CodeGen/SystemZ/int-mul-11.ll
new file mode 100644
index 00000000000..f2625198251
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-11.ll
@@ -0,0 +1,32 @@
+; Test three-operand multiplication instructions on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Check MSRKC.
+define i32 @f1(i32 %dummy, i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: msrkc %r2, %r3, %r4
+; CHECK: br %r14
+ %mul = mul i32 %a, %b
+ ret i32 %mul
+}
+
+; Check MSGRKC.
+define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: msgrkc %r2, %r3, %r4
+; CHECK: br %r14
+ %mul = mul i64 %a, %b
+ ret i64 %mul
+}
+
+; Verify that we still use MSGFR for i32->i64 multiplies.
+define i64 @f3(i64 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: msgfr %r2, %r3
+; CHECK: br %r14
+ %bext = sext i32 %b to i64
+ %mul = mul i64 %a, %bext
+ ret i64 %mul
+}
+
diff --git a/test/CodeGen/SystemZ/int-sub-10.ll b/test/CodeGen/SystemZ/int-sub-10.ll
new file mode 100644
index 00000000000..bf6638575e5
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-10.ll
@@ -0,0 +1,95 @@
+; Test subtractions of a sign-extended i16 from an i64 on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare i64 @foo()
+
+; Check SGH with no displacement.
+define i64 @f1(i64 %a, i16 *%src) {
+; CHECK-LABEL: f1:
+; CHECK: sgh %r2, 0(%r3)
+; CHECK: br %r14
+ %b = load i16, i16 *%src
+ %bext = sext i16 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check the high end of the aligned SGH range.
+define i64 @f2(i64 %a, i16 *%src) {
+; CHECK-LABEL: f2:
+; CHECK: sgh %r2, 524286(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f3(i64 %a, i16 *%src) {
+; CHECK-LABEL: f3:
+; CHECK: agfi %r3, 524288
+; CHECK: sgh %r2, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check the high end of the negative aligned SGH range.
+define i64 @f4(i64 %a, i16 *%src) {
+; CHECK-LABEL: f4:
+; CHECK: sgh %r2, -2(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check the low end of the SGH range.
+define i64 @f5(i64 %a, i16 *%src) {
+; CHECK-LABEL: f5:
+; CHECK: sgh %r2, -524288(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i64 %a, i16 *%src) {
+; CHECK-LABEL: f6:
+; CHECK: agfi %r3, -524290
+; CHECK: sgh %r2, 0(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
+; Check that SGH allows an index.
+define i64 @f7(i64 %a, i64 %src, i64 %index) {
+; CHECK-LABEL: f7:
+; CHECK: sgh %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+ %add1 = add i64 %src, %index
+ %add2 = add i64 %add1, 524284
+ %ptr = inttoptr i64 %add2 to i16 *
+ %b = load i16, i16 *%ptr
+ %bext = sext i16 %b to i64
+ %sub = sub i64 %a, %bext
+ ret i64 %sub
+}
+
diff --git a/test/CodeGen/SystemZ/vec-and-04.ll b/test/CodeGen/SystemZ/vec-and-04.ll
new file mode 100644
index 00000000000..e9355beb429
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-and-04.ll
@@ -0,0 +1,47 @@
+; Test vector NAND on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test a v16i8 NAND.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vnn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <16 x i8> %val1, %val2
+ %not = xor <16 x i8> %ret, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ ret <16 x i8> %not
+}
+
+; Test a v8i16 NAND.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vnn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <8 x i16> %val1, %val2
+ %not = xor <8 x i16> %ret, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ ret <8 x i16> %not
+}
+
+; Test a v4i32 NAND.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vnn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <4 x i32> %val1, %val2
+ %not = xor <4 x i32> %ret, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %not
+}
+
+; Test a v2i64 NAND.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vnn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <2 x i64> %val1, %val2
+ %not = xor <2 x i64> %ret, <i64 -1, i64 -1>
+ ret <2 x i64> %not
+}
diff --git a/test/CodeGen/SystemZ/vec-ctpop-02.ll b/test/CodeGen/SystemZ/vec-ctpop-02.ll
new file mode 100644
index 00000000000..ee50e88d043
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-ctpop-02.ll
@@ -0,0 +1,45 @@
+; Test vector population-count instruction on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+
+define <16 x i8> @f1(<16 x i8> %a) {
+; CHECK-LABEL: f1:
+; CHECK: vpopctb %v24, %v24
+; CHECK: br %r14
+
+ %popcnt = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+ ret <16 x i8> %popcnt
+}
+
+define <8 x i16> @f2(<8 x i16> %a) {
+; CHECK-LABEL: f2:
+; CHECK: vpopcth %v24, %v24
+; CHECK: br %r14
+
+ %popcnt = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+ ret <8 x i16> %popcnt
+}
+
+define <4 x i32> @f3(<4 x i32> %a) {
+; CHECK-LABEL: f3:
+; CHECK: vpopctf %v24, %v24
+; CHECK: br %r14
+
+ %popcnt = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+ ret <4 x i32> %popcnt
+}
+
+define <2 x i64> @f4(<2 x i64> %a) {
+; CHECK-LABEL: f4:
+; CHECK: vpopctg %v24, %v24
+; CHECK: br %r14
+
+ %popcnt = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+ ret <2 x i64> %popcnt
+}
+
diff --git a/test/CodeGen/SystemZ/vec-intrinsics.ll b/test/CodeGen/SystemZ/vec-intrinsics-01.ll
index 6f5eb0691aa..6f5eb0691aa 100644
--- a/test/CodeGen/SystemZ/vec-intrinsics.ll
+++ b/test/CodeGen/SystemZ/vec-intrinsics-01.ll
diff --git a/test/CodeGen/SystemZ/vec-intrinsics-02.ll b/test/CodeGen/SystemZ/vec-intrinsics-02.ll
new file mode 100644
index 00000000000..27ee83fc774
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-intrinsics-02.ll
@@ -0,0 +1,212 @@
+; Test vector intrinsics added with z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32)
+declare <16 x i8> @llvm.s390.vlrl(i32, i8 *)
+declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *)
+declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32)
+declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32)
+
+; VBPERM.
+define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vbperm:
+; CHECK: vbperm %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b)
+ ret <2 x i64> %res
+}
+
+; VMSLG with no shifts.
+define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmslg1:
+; CHECK: vmslg %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0)
+ ret <16 x i8> %res
+}
+
+; VMSLG with both shifts.
+define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmslg2:
+; CHECK: vmslg %v24, %v24, %v26, %v28, 12
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12)
+ ret <16 x i8> %res
+}
+
+; VLRLR with the lowest in-range displacement.
+define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) {
+; CHECK-LABEL: test_vlrlr1:
+; CHECK: vlrlr %v24, %r3, 0(%r2)
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VLRLR with the highest in-range displacement.
+define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vlrlr2:
+; CHECK: vlrlr %v24, %r3, 4095(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VLRLR with an out-of-range displacement.
+define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vlrlr3:
+; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; Check that VLRLR doesn't allow an index.
+define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) {
+; CHECK-LABEL: test_vlrlr4:
+; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 %index
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VLRL with the lowest in-range displacement.
+define <16 x i8> @test_vlrl1(i8 *%ptr) {
+; CHECK-LABEL: test_vlrl1:
+; CHECK: vlrl %v24, 0(%r2), 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VLRL with the highest in-range displacement.
+define <16 x i8> @test_vlrl2(i8 *%base) {
+; CHECK-LABEL: test_vlrl2:
+; CHECK: vlrl %v24, 4095(%r2), 0
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VLRL with an out-of-range displacement.
+define <16 x i8> @test_vlrl3(i8 *%base) {
+; CHECK-LABEL: test_vlrl3:
+; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; Check that VLRL doesn't allow an index.
+define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) {
+; CHECK-LABEL: test_vlrl4:
+; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 %index
+ %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VSTRLR with the lowest in-range displacement.
+define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
+; CHECK-LABEL: test_vstrlr1:
+; CHECK: vstrlr %v24, %r3, 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; VSTRLR with the highest in-range displacement.
+define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vstrlr2:
+; CHECK: vstrlr %v24, %r3, 4095(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; VSTRLR with an out-of-range displacement.
+define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vstrlr3:
+; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; Check that VSTRLR doesn't allow an index.
+define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
+; CHECK-LABEL: test_vstrlr4:
+; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 %index
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; VSTRL with the lowest in-range displacement.
+define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) {
+; CHECK-LABEL: test_vstrl1:
+; CHECK: vstrl %v24, 0(%r2), 8
+; CHECK: br %r14
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
+ ret void
+}
+
+; VSTRL with the highest in-range displacement.
+define void @test_vstrl2(<16 x i8> %vec, i8 *%base) {
+; CHECK-LABEL: test_vstrl2:
+; CHECK: vstrl %v24, 4095(%r2), 8
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
+ ret void
+}
+
+; VSTRL with an out-of-range displacement.
+define void @test_vstrl3(<16 x i8> %vec, i8 *%base) {
+; CHECK-LABEL: test_vstrl3:
+; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
+ ret void
+}
+
+; Check that VSTRL doesn't allow an index.
+define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) {
+; CHECK-LABEL: test_vstrl4:
+; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 %index
+ call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
+ ret void
+}
+
+; VFMAXDB.
+define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfmaxdb:
+; CHECK: vfmaxdb %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4)
+ ret <2 x double> %res
+}
+
+; VFMINDB.
+define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfmindb:
+; CHECK: vfmindb %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4)
+ ret <2 x double> %res
+}
+
diff --git a/test/CodeGen/SystemZ/vec-max-05.ll b/test/CodeGen/SystemZ/vec-max-05.ll
new file mode 100644
index 00000000000..44efac76423
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-max-05.ll
@@ -0,0 +1,58 @@
+; Test vector maximum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @fmax(double, double)
+declare double @llvm.maxnum.f64(double, double)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+
+; Test the fmax library function.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmaxdb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call double @fmax(double %val1, double %val2) readnone
+ ret double %ret
+}
+
+; Test the f64 maxnum intrinsic.
+define double @f2(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfmaxdb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call double @llvm.maxnum.f64(double %val1, double %val2)
+ ret double %ret
+}
+
+; Test a f64 constant compare/select resulting in maxnum.
+define double @f3(double %dummy, double %val) {
+; CHECK-LABEL: f3:
+; CHECK: lzdr [[REG:%f[0-9]+]]
+; CHECK: wfmaxdb %f0, %f2, [[REG]], 4
+; CHECK: br %r14
+ %cmp = fcmp ogt double %val, 0.0
+ %ret = select i1 %cmp, double %val, double 0.0
+ ret double %ret
+}
+
+; Test a f64 constant compare/select resulting in maxnan.
+define double @f4(double %dummy, double %val) {
+; CHECK-LABEL: f4:
+; CHECK: lzdr [[REG:%f[0-9]+]]
+; CHECK: wfmaxdb %f0, %f2, [[REG]], 1
+; CHECK: br %r14
+ %cmp = fcmp ugt double %val, 0.0
+ %ret = select i1 %cmp, double %val, double 0.0
+ ret double %ret
+}
+
+; Test the v2f64 maxnum intrinsic.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfmaxdb %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %val1, <2 x double> %val2)
+ ret <2 x double> %ret
+}
+
diff --git a/test/CodeGen/SystemZ/vec-min-05.ll b/test/CodeGen/SystemZ/vec-min-05.ll
new file mode 100644
index 00000000000..c2d8726addf
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-min-05.ll
@@ -0,0 +1,58 @@
+; Test vector minimum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @fmin(double, double)
+declare double @llvm.minnum.f64(double, double)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+
+; Test the fmin library function.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmindb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call double @fmin(double %val1, double %val2) readnone
+ ret double %ret
+}
+
+; Test the f64 minnum intrinsic.
+define double @f2(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfmindb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call double @llvm.minnum.f64(double %val1, double %val2)
+ ret double %ret
+}
+
+; Test a f64 constant compare/select resulting in minnum.
+define double @f3(double %dummy, double %val) {
+; CHECK-LABEL: f3:
+; CHECK: lzdr [[REG:%f[0-9]+]]
+; CHECK: wfmindb %f0, %f2, [[REG]], 4
+; CHECK: br %r14
+ %cmp = fcmp olt double %val, 0.0
+ %ret = select i1 %cmp, double %val, double 0.0
+ ret double %ret
+}
+
+; Test a f64 constant compare/select resulting in minnan.
+define double @f4(double %dummy, double %val) {
+; CHECK-LABEL: f4:
+; CHECK: lzdr [[REG:%f[0-9]+]]
+; CHECK: wfmindb %f0, %f2, [[REG]], 1
+; CHECK: br %r14
+ %cmp = fcmp ult double %val, 0.0
+ %ret = select i1 %cmp, double %val, double 0.0
+ ret double %ret
+}
+
+; Test the v2f64 minnum intrinsic.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfmindb %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.minnum.v2f64(<2 x double> %val1, <2 x double> %val2)
+ ret <2 x double> %ret
+}
+
diff --git a/test/CodeGen/SystemZ/vec-move-18.ll b/test/CodeGen/SystemZ/vec-move-18.ll
new file mode 100644
index 00000000000..5d3d09d83ef
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-18.ll
@@ -0,0 +1,24 @@
+; Test insertions of memory values into 0 on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test VLLEZLF.
+define <4 x i32> @f1(i32 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vllezlf %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test VLLEZLF with a float.
+define <4 x float> @f2(float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vllezlf %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load float, float *%ptr
+ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 0
+ ret <4 x float> %ret
+}
+
diff --git a/test/CodeGen/SystemZ/vec-mul-05.ll b/test/CodeGen/SystemZ/vec-mul-05.ll
new file mode 100644
index 00000000000..c05437d4923
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-mul-05.ll
@@ -0,0 +1,32 @@
+; Test vector negative multiply-and-add on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+; Test a v2f64 negative multiply-and-add.
+define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vfnmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %val3)
+ %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+ ret <2 x double> %negret
+}
+
+; Test a v2f64 negative multiply-and-subtract.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vfnmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %negval3)
+ %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+ ret <2 x double> %negret
+}
diff --git a/test/CodeGen/SystemZ/vec-or-03.ll b/test/CodeGen/SystemZ/vec-or-03.ll
new file mode 100644
index 00000000000..010629d880d
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-or-03.ll
@@ -0,0 +1,91 @@
+; Test vector OR-NOT on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test a v16i8 OR-NOT.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: voc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <16 x i8> %val2, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %ret = or <16 x i8> %val1, %not
+ ret <16 x i8> %ret
+}
+
+; ...and again with the reverse.
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: voc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <16 x i8> %val1, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %ret = or <16 x i8> %not, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 OR-NOT.
+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: voc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <8 x i16> %val2, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ %ret = or <8 x i16> %val1, %not
+ ret <8 x i16> %ret
+}
+
+; ...and again with the reverse.
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: voc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <8 x i16> %val1, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ %ret = or <8 x i16> %not, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 OR-NOT.
+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: voc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <4 x i32> %val2, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ret = or <4 x i32> %val1, %not
+ ret <4 x i32> %ret
+}
+
+; ...and again with the reverse.
+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: voc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <4 x i32> %val1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ret = or <4 x i32> %not, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 OR-NOT.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: voc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <2 x i64> %val2, <i64 -1, i64 -1>
+ %ret = or <2 x i64> %val1, %not
+ ret <2 x i64> %ret
+}
+
+; ...and again with the reverse.
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: voc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <2 x i64> %val1, <i64 -1, i64 -1>
+ %ret = or <2 x i64> %not, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-xor-02.ll b/test/CodeGen/SystemZ/vec-xor-02.ll
new file mode 100644
index 00000000000..b4b5a96ba25
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-xor-02.ll
@@ -0,0 +1,47 @@
+; Test vector NOT-XOR on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test a v16i8 NOT-XOR.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vnx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <16 x i8> %val1, %val2
+ %not = xor <16 x i8> %ret, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ ret <16 x i8> %not
+}
+
+; Test a v8i16 NOT-XOR.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vnx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <8 x i16> %val1, %val2
+ %not = xor <8 x i16> %ret, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ ret <8 x i16> %not
+}
+
+; Test a v4i32 NOT-XOR.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vnx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <4 x i32> %val1, %val2
+ %not = xor <4 x i32> %ret, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %not
+}
+
+; Test a v2i64 NOT-XOR.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vnx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <2 x i64> %val1, %val2
+ %not = xor <2 x i64> %ret, <i64 -1, i64 -1>
+ ret <2 x i64> %not
+}