diff options
author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-22 09:52:19 +0000 |
---|---|---|
committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2016-09-22 09:52:19 +0000 |
commit | a941fe247ef597fbc9f449adb8bc02f5a9e32863 (patch) | |
tree | 82b06a913ddadb0590e41b1df90fff5f139ac1b5 /test | |
parent | 5b1a39068cd1fcf0668f9d818faf77b7a8ac7608 (diff) |
[Power9] Add exploitation of non-permuting memory ops
This patch corresponds to review:
https://reviews.llvm.org/D19825
The new lxvx/stxvx instructions do not require the swaps to line the elements
up correctly. In order to select them over the lxvd2x/lxvw4x instructions which
require swaps, the patterns for the old instruction have a predicate that
ensures they won't be selected on Power9 and newer CPUs.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282143 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/PowerPC/lxvw4x-bug.ll | 13 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/ppc64-i128-abi.ll | 51 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/swaps-le-1.ll | 44 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/swaps-le-6.ll | 24 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll | 35 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vsx-ldst.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vsx-p9.ll | 146 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vsx_insert_extract_le.ll | 29 | ||||
-rw-r--r-- | test/CodeGen/PowerPC/vsx_shuffle_le.ll | 80 |
10 files changed, 425 insertions, 21 deletions
diff --git a/test/CodeGen/PowerPC/lxvw4x-bug.ll b/test/CodeGen/PowerPC/lxvw4x-bug.ll index 9390819d560..33185671d40 100644 --- a/test/CodeGen/PowerPC/lxvw4x-bug.ll +++ b/test/CodeGen/PowerPC/lxvw4x-bug.ll @@ -1,4 +1,13 @@ -; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd + +; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr9 -mattr=-power9-vector \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + ; Function Attrs: nounwind define void @test() { entry: @@ -17,6 +26,8 @@ entry: ; CHECK: lwa [[REG0:[0-9]+]], ; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]] ; CHECK: xxswapd [[REG1]], [[REG1]] +; CHECK-P9: lwa [[REG0:[0-9]+]], +; CHECK-P9: lxvx [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]] store <4 x i32> %4, <4 x i32>* %j, align 16 ret void } diff --git a/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll index fa2844b8d55..8b4a76757c3 100644 --- a/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ b/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -972,10 +972,10 @@ define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) { entry: ; CHECK-LABEL: insertVarF ; CHECK: stxsspx 1, -; CHECK: lxvd2x +; CHECK: lxvx ; CHECK-BE-LABEL: insertVarF ; CHECK-BE: stxsspx 1, -; CHECK-BE: lxvw4x +; CHECK-BE: lxvx %vecins = insertelement <4 x float> %a, float %f, i32 %el ret <4 x float> %vecins } @@ -983,10 +983,10 @@ define <4 x i32> @insertVarI(<4 x i32> %a, i32 %i, i32 %el) { entry: ; CHECK-LABEL: insertVarI ; CHECK: stwx -; CHECK: lxvd2x +; CHECK: lxvx ; CHECK-BE-LABEL: insertVarI ; CHECK-BE: stwx -; CHECK-BE: lxvw4x +; CHECK-BE: lxvx %vecins = insertelement <4 x i32> %a, i32 %i, i32 %el ret <4 x i32> %vecins } diff --git a/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/test/CodeGen/PowerPC/ppc64-i128-abi.ll index 7dbe710832b..8d5a8cdf3a3 100644 --- a/test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ b/test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -1,9 +1,32 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-P9 \ +; RUN: --implicit-check-not xxswapd + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \ +; RUN: --implicit-check-not xxswapd + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -mattr=-power9-vector < %s | FileCheck %s \ +; RUN: -check-prefix=CHECK-LE @x = common global <1 x i128> zeroinitializer, align 16 @y = common global <1 x i128> zeroinitializer, align 16 @@ -31,6 +54,11 @@ define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind { ; CHECK-LE: vadduqm 2, 2, 3 ; CHECK-LE: blr +; CHECK-P9-LABEL: @v1i128_increment_by_one +; CHECK-P9: lxvx +; CHECK-P9: vadduqm 2, 2, 3 +; CHECK-P9: blr + ; CHECK-BE-LABEL: @v1i128_increment_by_one ; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}} ; CHECK-BE-NOT: xxswapd @@ -171,6 +199,11 @@ define <1 x i128> @call_v1i128_increment_by_one() nounwind { ; CHECK-LE: bl v1i128_increment_by_one ; CHECK-LE: blr +; CHECK-P9-LABEL: @call_v1i128_increment_by_one +; CHECK-P9: lxvx +; CHECK-P9: bl v1i128_increment_by_one +; CHECK-P9: blr + ; CHECK-BE-LABEL: @call_v1i128_increment_by_one ; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}} ; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}} @@ -198,6 +231,12 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind { ; CHECK-LE: bl v1i128_increment_by_val ; CHECK-LE: blr +; CHECK-P9-LABEL: @call_v1i128_increment_by_val +; CHECK-P9-DAG: lxvx 34 +; CHECK-P9-DAG: lxvx 35 +; CHECK-P9: bl v1i128_increment_by_val +; CHECK-P9: blr + ; CHECK-BE-LABEL: @call_v1i128_increment_by_val diff --git a/test/CodeGen/PowerPC/swaps-le-1.ll b/test/CodeGen/PowerPC/swaps-le-1.ll index ec53017ae60..cb83bf262b4 100644 --- a/test/CodeGen/PowerPC/swaps-le-1.ll +++ b/test/CodeGen/PowerPC/swaps-le-1.ll @@ -1,5 +1,17 @@ -; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck -check-prefix=NOOPTSWAP %s +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ +; RUN: -check-prefix=NOOPTSWAP %s + +; RUN: llc -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -verify-machineinstrs -ppc-vsr-nums-as-vr < %s | FileCheck \ +; RUN: -check-prefix=CHECK-P9 --implicit-check-not xxswapd %s + +; RUN: llc -O3 -mcpu=pwr9 -disable-ppc-vsx-swap-removal -mattr=-power9-vector \ +; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck -check-prefix=NOOPTSWAP %s ; This test was generated from the following source: ; @@ -98,6 +110,7 @@ for.end: ; CHECK-LABEL: @foo ; CHECK-NOT: xxpermdi ; CHECK-NOT: xxswapd +; CHECK-P9-NOT: xxpermdi ; CHECK: lxvd2x ; CHECK: lxvd2x @@ -127,7 +140,6 @@ for.end: ; CHECK: vmuluwm ; CHECK: stxvd2x - ; NOOPTSWAP-LABEL: @foo ; NOOPTSWAP: lxvd2x @@ -145,3 +157,29 @@ for.end: ; NOOPTSWAP-DAG: stxvd2x ; NOOPTSWAP: stxvd2x +; CHECK-P9-LABEL: @foo +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9: lxvx +; CHECK-P9-DAG: lxvx +; CHECK-P9-DAG: lxvx +; CHECK-P9-DAG: lxvx +; CHECK-P9-DAG: vadduwm +; CHECK-P9-DAG: vadduwm +; CHECK-P9-DAG: vadduwm +; CHECK-P9-DAG: vadduwm +; CHECK-P9: vmuluwm +; CHECK-P9: vmuluwm +; CHECK-P9: vmuluwm +; CHECK-P9-DAG: vmuluwm +; CHECK-P9-DAG: stxvx +; CHECK-P9: stxvx +; CHECK-P9: stxvx +; CHECK-P9: stxvx + diff --git a/test/CodeGen/PowerPC/swaps-le-6.ll b/test/CodeGen/PowerPC/swaps-le-6.ll index 0d39536e5f3..da6605e494f 100644 --- a/test/CodeGen/PowerPC/swaps-le-6.ll +++ b/test/CodeGen/PowerPC/swaps-le-6.ll @@ -1,4 +1,12 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s + +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \ +; RUN: --implicit-check-not xxswapd + +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ +; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s ; These tests verify that VSX swap optimization works when loading a scalar ; into a vector register. @@ -24,6 +32,13 @@ entry: ; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 ; CHECK: stxvd2x [[REG5]] +; CHECK-P9-LABEL: @bar0 +; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] +; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]] +; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 +; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 +; CHECK-P9: stxvx [[REG5]] + define void @bar1() { entry: %0 = load <2 x double>, <2 x double>* @x, align 16 @@ -40,3 +55,10 @@ entry: ; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] ; CHECK: stxvd2x [[REG5]] +; CHECK-P9-LABEL: @bar1 +; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] +; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]] +; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 +; CHECK-P9: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] +; CHECK-P9: stxvx [[REG5]] + diff --git a/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll b/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll index c65f2a17eb0..acedc260633 100644 --- a/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll +++ b/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll @@ -1,4 +1,12 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -O2 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s @vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16 @vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16 @@ -16,59 +24,84 @@ define void @test1() { entry: ; CHECK-LABEL: test1 +; CHECK-P9-LABEL: test1 ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %0 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vsi to i8*)) ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx store <4 x i32> %0, <4 x i32>* @res_vsi, align 16 ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %1 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x i32>* @vui to i8*)) ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx store <4 x i32> %1, <4 x i32>* @res_vui, align 16 ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* bitcast (<4 x float>* @vf to i8*)) %3 = bitcast <4 x i32> %2 to <4 x float> ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx store <4 x float> %3, <4 x float>* @res_vf, align 16 ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %4 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vsll to i8*)) %5 = bitcast <2 x double> %4 to <2 x i64> ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx store <2 x i64> %5, <2 x i64>* @res_vsll, align 16 ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %6 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x i64>* @vull to i8*)) %7 = bitcast <2 x double> %6 to <2 x i64> ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx store <2 x i64> %7, <2 x i64>* @res_vull, align 16 ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %8 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* bitcast (<2 x double>* @vd to i8*)) ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx store <2 x double> %8, <2 x double>* @res_vd, align 16 ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %9 = load <4 x i32>, <4 x i32>* @vsi, align 16 ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %9, i8* bitcast (<4 x i32>* @res_vsi to i8*)) ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %10 = load <4 x i32>, <4 x i32>* @vui, align 16 ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %10, i8* bitcast (<4 x i32>* @res_vui to i8*)) ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %11 = load <4 x float>, <4 x float>* @vf, align 16 %12 = bitcast <4 x float> %11 to <4 x i32> ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %12, i8* bitcast (<4 x float>* @res_vf to i8*)) ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %13 = load <2 x i64>, <2 x i64>* @vsll, align 16 %14 = bitcast <2 x i64> %13 to <2 x double> ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx call void @llvm.ppc.vsx.stxvd2x(<2 x double> %14, i8* bitcast (<2 x i64>* @res_vsll to i8*)) ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %15 = load <2 x i64>, <2 x i64>* @vull, align 16 %16 = bitcast <2 x i64> %15 to <2 x double> ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx call void @llvm.ppc.vsx.stxvd2x(<2 x double> %16, i8* bitcast (<2 x i64>* @res_vull to i8*)) ; CHECK: lxvd2x +; CHECK-P9-DAG: lxvx %17 = load <2 x double>, <2 x double>* @vd, align 16 ; CHECK: stxvd2x +; CHECK-P9-DAG: stxvx call void @llvm.ppc.vsx.stxvd2x(<2 x double> %17, i8* bitcast (<2 x double>* @res_vd to i8*)) ret void } diff --git a/test/CodeGen/PowerPC/vsx-ldst.ll b/test/CodeGen/PowerPC/vsx-ldst.ll index 3c06a3fdc34..a146182de99 100644 --- a/test/CodeGen/PowerPC/vsx-ldst.ll +++ b/test/CodeGen/PowerPC/vsx-ldst.ll @@ -1,18 +1,28 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64-unknown-linux-gnu < %s > %t +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s > %t ; RUN: grep lxvw4x < %t | count 3 ; RUN: grep lxvd2x < %t | count 3 ; RUN: grep stxvw4x < %t | count 3 ; RUN: grep stxvd2x < %t | count 3 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O0 -fast-isel=1 -mtriple=powerpc64-unknown-linux-gnu < %s > %t + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O0 -fast-isel=1 \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s > %t ; RUN: grep lxvw4x < %t | count 3 ; RUN: grep lxvd2x < %t | count 3 ; RUN: grep stxvw4x < %t | count 3 ; RUN: grep stxvd2x < %t | count 3 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t ; RUN: grep lxvd2x < %t | count 6 ; RUN: grep stxvd2x < %t | count 6 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s > %t +; RUN: grep lxvx < %t | count 6 +; RUN: grep stxvx < %t | count 6 + + @vsi = global <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, align 16 @vui = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16 @vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16 diff --git a/test/CodeGen/PowerPC/vsx-p9.ll b/test/CodeGen/PowerPC/vsx-p9.ll new file mode 100644 index 00000000000..4b2dc77b439 --- /dev/null +++ b/test/CodeGen/PowerPC/vsx-p9.ll @@ -0,0 +1,146 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + +@uca = global <16 x i8> zeroinitializer, align 16 +@ucb = global <16 x i8> zeroinitializer, align 16 +@sca = global <16 x i8> zeroinitializer, align 16 +@scb = global <16 x i8> zeroinitializer, align 16 +@usa = global <8 x i16> zeroinitializer, align 16 +@usb = global <8 x i16> zeroinitializer, align 16 +@ssa = global <8 x i16> zeroinitializer, align 16 +@ssb = global <8 x i16> zeroinitializer, align 16 +@uia = global <4 x i32> zeroinitializer, align 16 +@uib = global <4 x i32> zeroinitializer, align 16 +@sia = global <4 x i32> zeroinitializer, align 16 +@sib = global <4 x i32> zeroinitializer, align 16 +@ulla = global <2 x i64> zeroinitializer, align 16 +@ullb = global <2 x i64> zeroinitializer, align 16 +@slla = global <2 x i64> zeroinitializer, align 16 +@sllb = global <2 x i64> zeroinitializer, align 16 +@uxa = global <1 x i128> zeroinitializer, align 16 +@uxb = global <1 x i128> zeroinitializer, align 16 +@sxa = global <1 x i128> zeroinitializer, align 16 +@sxb = global <1 x i128> zeroinitializer, align 16 +@vfa = global <4 x float> zeroinitializer, align 16 +@vfb = global <4 x float> zeroinitializer, align 16 +@vda = global <2 x double> zeroinitializer, align 16 +@vdb = global <2 x double> zeroinitializer, align 16 + +define void @_Z4testv() { +entry: +; CHECK-LABEL: @_Z4testv + %0 = load <16 x i8>, <16 x i8>* @uca, align 16 + %1 = load <16 x i8>, <16 x i8>* @ucb, align 16 + %add.i = add <16 x i8> %1, %0 + tail call void (...) @sink(<16 x i8> %add.i) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddubm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %2 = load <16 x i8>, <16 x i8>* @sca, align 16 + %3 = load <16 x i8>, <16 x i8>* @scb, align 16 + %add.i22 = add <16 x i8> %3, %2 + tail call void (...) @sink(<16 x i8> %add.i22) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddubm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %4 = load <8 x i16>, <8 x i16>* @usa, align 16 + %5 = load <8 x i16>, <8 x i16>* @usb, align 16 + %add.i21 = add <8 x i16> %5, %4 + tail call void (...) @sink(<8 x i16> %add.i21) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduhm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %6 = load <8 x i16>, <8 x i16>* @ssa, align 16 + %7 = load <8 x i16>, <8 x i16>* @ssb, align 16 + %add.i20 = add <8 x i16> %7, %6 + tail call void (...) @sink(<8 x i16> %add.i20) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduhm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %8 = load <4 x i32>, <4 x i32>* @uia, align 16 + %9 = load <4 x i32>, <4 x i32>* @uib, align 16 + %add.i19 = add <4 x i32> %9, %8 + tail call void (...) @sink(<4 x i32> %add.i19) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduwm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %10 = load <4 x i32>, <4 x i32>* @sia, align 16 + %11 = load <4 x i32>, <4 x i32>* @sib, align 16 + %add.i18 = add <4 x i32> %11, %10 + tail call void (...) @sink(<4 x i32> %add.i18) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduwm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %12 = load <2 x i64>, <2 x i64>* @ulla, align 16 + %13 = load <2 x i64>, <2 x i64>* @ullb, align 16 + %add.i17 = add <2 x i64> %13, %12 + tail call void (...) @sink(<2 x i64> %add.i17) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddudm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %14 = load <2 x i64>, <2 x i64>* @slla, align 16 + %15 = load <2 x i64>, <2 x i64>* @sllb, align 16 + %add.i16 = add <2 x i64> %15, %14 + tail call void (...) @sink(<2 x i64> %add.i16) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddudm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %16 = load <1 x i128>, <1 x i128>* @uxa, align 16 + %17 = load <1 x i128>, <1 x i128>* @uxb, align 16 + %add.i15 = add <1 x i128> %17, %16 + tail call void (...) @sink(<1 x i128> %add.i15) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduqm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %18 = load <1 x i128>, <1 x i128>* @sxa, align 16 + %19 = load <1 x i128>, <1 x i128>* @sxb, align 16 + %add.i14 = add <1 x i128> %19, %18 + tail call void (...) @sink(<1 x i128> %add.i14) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduqm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %20 = load <4 x float>, <4 x float>* @vfa, align 16 + %21 = load <4 x float>, <4 x float>* @vfb, align 16 + %add.i13 = fadd <4 x float> %20, %21 + tail call void (...) @sink(<4 x float> %add.i13) +; CHECK: lxvx 0, 0, 3 +; CHECK: lxvx 1, 0, 4 +; CHECK: xvaddsp 34, 0, 1 +; CHECK: stxvx 34, +; CHECK: bl sink + %22 = load <2 x double>, <2 x double>* @vda, align 16 + %23 = load <2 x double>, <2 x double>* @vdb, align 16 + %add.i12 = fadd <2 x double> %22, %23 + tail call void (...) @sink(<2 x double> %add.i12) +; CHECK: lxvx 0, 0, 3 +; CHECK: lxvx 1, 0, 4 +; CHECK: xvadddp 0, 0, 1 +; CHECK: stxvx 0, +; CHECK: bl sink + ret void +} + +declare void @sink(...) diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll index 45c3c742e8d..53878883033 100644 --- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -1,4 +1,12 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { %v = load <2 x double>, <2 x double>* %p1 @@ -12,6 +20,12 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxspltd 1, 1, 0 ; CHECK: xxpermdi 34, 0, 1, 1 + +; CHECK-P9-LABEL: testi0 +; CHECK-P9: lxsdx 0, 0, 4 +; CHECK-P9: lxvx 1, 0, 3 +; CHECK-P9: xxspltd 0, 0, 0 +; CHECK-P9: xxpermdi 34, 1, 0, 1 } define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { @@ -26,6 +40,12 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxspltd 1, 1, 0 ; CHECK: xxmrgld 34, 1, 0 + +; CHECK-P9-LABEL: testi1 +; CHECK-P9: lxsdx 0, 0, 4 +; CHECK-P9: lxvx 1, 0, 3 +; CHECK-P9: xxspltd 0, 0, 0 +; CHECK-P9: xxmrgld 34, 0, 1 } define double @teste0(<2 x double>* %p1) { @@ -35,6 +55,9 @@ define double @teste0(<2 x double>* %p1) { ; CHECK-LABEL: teste0 ; CHECK: lxvd2x 1, 0, 3 + +; CHECK-P9-LABEL: teste0 +; CHECK-P9: lxsdx 1, 0, 3 } define double @teste1(<2 x double>* %p1) { @@ -45,4 +68,8 @@ define double @teste1(<2 x double>* %p1) { ; CHECK-LABEL: teste1 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxswapd 1, 0 + +; CHECK-P9-LABEL: teste1 +; CHECK-P9: li 4, 8 +; CHECK-P9: lxsdx 1, 3, 4 } diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll index 6bdf695db92..3bf24adfdd9 100644 --- a/test/CodeGen/PowerPC/vsx_shuffle_le.ll +++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll @@ -1,4 +1,12 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) { %v1 = load <2 x double>, <2 x double>* %p1 @@ -9,6 +17,10 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: test00 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxspltd 34, 0, 0 + +; CHECK-P9-LABEL: test00 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: xxspltd 34, 0, 1 } define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) { @@ -20,6 +32,9 @@ define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: test01 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxswapd 34, 0 + +; CHECK-P9-LABEL: test01 +; CHECK-P9: lxvx 34, 0, 3 } define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) { @@ -34,6 +49,11 @@ define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrgld 34, 1, 0 + +; CHECK-P9-LABEL: @test02 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrgld 34, 1, 0 } define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) { @@ -48,6 +68,11 @@ define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 1, 0, 1 + +; CHECK-P9-LABEL: @test03 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 1, 0, 1 } define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) { @@ -58,6 +83,10 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: @test10 ; CHECK: lxvd2x 34, 0, 3 + +; CHECK-P9-LABEL: @test10 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: xxswapd 34, 0 } define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) { @@ -69,6 +98,10 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: @test11 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxspltd 34, 0, 1 + +; CHECK-P9-LABEL: @test11 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: xxspltd 34, 0, 0 } define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) { @@ -83,6 +116,11 @@ define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 1, 0, 2 + +; CHECK-P9-LABEL: @test12 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 1, 0, 2 } define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) { @@ -97,6 +135,11 @@ define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrghd 34, 1, 0 + +; CHECK-P9-LABEL: @test13 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrghd 34, 1, 0 } define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) { @@ -111,6 +154,11 @@ define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrgld 34, 0, 1 + +; CHECK-P9-LABEL: @test20 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrgld 34, 0, 1 } define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) { @@ -125,6 +173,11 @@ define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 0, 1, 1 + +; CHECK-P9-LABEL: @test21 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 0, 1, 1 } define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) { @@ -136,6 +189,10 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: @test22 ; CHECK: lxvd2x 0, 0, 4 ; CHECK: xxspltd 34, 0, 0 + +; CHECK-P9-LABEL: @test22 +; CHECK-P9: lxvx 0, 0, 4 +; CHECK-P9: xxspltd 34, 0, 1 } define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) { @@ -147,6 +204,9 @@ define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: @test23 ; CHECK: lxvd2x 0, 0, 4 ; CHECK: xxswapd 34, 0 + +; CHECK-P9-LABEL: @test23 +; CHECK-P9: lxvx 34, 0, 4 } define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) { @@ -161,6 +221,11 @@ define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 0, 1, 2 + +; CHECK-P9-LABEL: @test30 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 0, 1, 2 } define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) { @@ -175,6 +240,11 @@ define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrghd 34, 0, 1 + +; CHECK-P9-LABEL: @test31 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrghd 34, 0, 1 } define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) { @@ -185,6 +255,10 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: @test32 ; CHECK: lxvd2x 34, 0, 4 + +; CHECK-P9-LABEL: @test32 +; CHECK-P9: lxvx 0, 0, 4 +; CHECK-P9: xxswapd 34, 0 } define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) { @@ -196,4 +270,8 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK-LABEL: @test33 ; CHECK: lxvd2x 0, 0, 4 ; CHECK: xxspltd 34, 0, 1 + +; CHECK-P9-LABEL: @test33 +; CHECK-P9: lxvx 0, 0, 4 +; CHECK-P9: xxspltd 34, 0, 0 } |