[InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel

We want to do this for 2 reasons: 1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like D39766. 2. DAGCombiner does better at producing optimal codegen when we have the cmp+sel pattern. More detail about what happens in the backend: 1. DAGCombiner has a generic transform for all targets to convert the scalar cmp+sel variant of abs into the shift variant. That is the opposite of this IR canonicalization. 2. DAGCombiner has a generic transform for all targets to convert the vector cmp+sel variant of abs into either an ABS node or the shift variant. That is again the opposite of this IR canonicalization. 3. DAGCombiner has a generic transform for all targets to convert the exact shift variants produced by #1 or #2 into an ISD::ABS node. Note: It would be an efficiency improvement if we had #1 go directly to an ABS node when that's legal/custom. 4. The pattern matching above is incomplete, so it is possible to escape the intended/optimal codegen in a variety of ways. a. For #2, the vector path is missing the case for setlt with a '1' constant. b. For #3, we are missing a match for commuted versions of the shift variants. 5. Therefore, this IR canonicalization can only help get us to the optimal codegen. The version of cmp+sel produced by this patch will be recognized in the DAG and converted to an ABS node when possible or the shift sequence when not. 6. In the following examples with this patch applied, we may get conditional moves rather than the shift produced by the generic DAGCombiner transforms. The conditional move is created using a target-specific decision for any given target. Whether it is optimal or not for a particular subtarget may be up for debate. define i32 @abs_shifty(i32 %x) { %signbit = ashr i32 %x, 31 %add = add i32 %signbit, %x %abs = xor i32 %signbit, %add ret i32 %abs } define i32 @abs_cmpsubsel(i32 %x) { %cmp = icmp slt i32 %x, zeroinitializer %sub = sub i32 zeroinitializer, %x %abs = select i1 %cmp, i32 %sub, i32 %x ret i32 %abs } define <4 x i32> @abs_shifty_vec(<4 x i32> %x) { %signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %add = add <4 x i32> %signbit, %x %abs = xor <4 x i32> %signbit, %add ret <4 x i32> %abs } define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) { %cmp = icmp slt <4 x i32> %x, zeroinitializer %sub = sub <4 x i32> zeroinitializer, %x %abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x ret <4 x i32> %abs } > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=x86_64 -mattr=avx > abs_shifty: > movl %edi, %eax > negl %eax > cmovll %edi, %eax > retq > > abs_cmpsubsel: > movl %edi, %eax > negl %eax > cmovll %edi, %eax > retq > > abs_shifty_vec: > vpabsd %xmm0, %xmm0 > retq > > abs_cmpsubsel_vec: > vpabsd %xmm0, %xmm0 > retq > > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=aarch64 > abs_shifty: > cmp w0, #0 // =0 > cneg w0, w0, mi > ret > > abs_cmpsubsel: > cmp w0, #0 // =0 > cneg w0, w0, mi > ret > > abs_shifty_vec: > abs v0.4s, v0.4s > ret > > abs_cmpsubsel_vec: > abs v0.4s, v0.4s > ret > > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=powerpc64le > abs_shifty: > srawi 4, 3, 31 > add 3, 3, 4 > xor 3, 3, 4 > blr > > abs_cmpsubsel: > srawi 4, 3, 31 > add 3, 3, 4 > xor 3, 3, 4 > blr > > abs_shifty_vec: > vspltisw 3, -16 > vspltisw 4, 15 > vsubuwm 3, 4, 3 > vsraw 3, 2, 3 > vadduwm 2, 2, 3 > xxlxor 34, 34, 35 > blr > > abs_cmpsubsel_vec: > vspltisw 3, -16 > vspltisw 4, 15 > vsubuwm 3, 4, 3 > vsraw 3, 2, 3 > vadduwm 2, 2, 3 > xxlxor 34, 34, 35 > blr > Differential Revision: https://reviews.llvm.org/D40984 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320921 91177308-0d34-0410-b5e6-96231b3b80d8
author: Sanjay Patel <spatel@rotateright.com> 2017-12-16 16:41:17 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-12-16 16:41:17 +0000
commit: ccf39286236e37797cec77097ec50a06f4654b3f (patch)
tree: b3d92af61f13b5b4ce8d57b30374f0b822674baa /test/Transforms
parent: 09a3b1266be4774bcce735be3b95fd90e3d8477a (diff)
1 files changed, 12 insertions, 12 deletions
diff --git a/test/Transforms/InstCombine/abs-1.ll b/test/Transforms/InstCombine/abs-1.ll
index de782f7248f..5cd5868d67a 100644
--- a/test/Transforms/InstCombine/abs-1.ll
+++ b/test/Transforms/InstCombine/abs-1.ll
@@ -48,9 +48,9 @@ define i64 @test_llabs(i64 %x) {
 
 define i8 @shifty_abs_commute0(i8 %x) {
 ; CHECK-LABEL: @shifty_abs_commute0(
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr i8 %x, 7
-; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SIGNBIT]], %x
-; CHECK-NEXT:    [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, %x
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 %x
 ; CHECK-NEXT:    ret i8 [[ABS]]
 ;
   %signbit = ashr i8 %x, 7
@@ -61,9 +61,9 @@ define i8 @shifty_abs_commute0(i8 %x) {
 
 define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
 ; CHECK-LABEL: @shifty_abs_commute1(
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr <2 x i8> %x, <i8 7, i8 7>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[SIGNBIT]], %x
-; CHECK-NEXT:    [[ABS:%.*]] = xor <2 x i8> [[SIGNBIT]], [[ADD]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, %x
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> %x
 ; CHECK-NEXT:    ret <2 x i8> [[ABS]]
 ;
   %signbit = ashr <2 x i8> %x, <i8 7, i8 7>
@@ -75,9 +75,9 @@ define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
 define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
 ; CHECK-LABEL: @shifty_abs_commute2(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> %x, <i8 3, i8 3>
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr <2 x i8> [[Y]], <i8 7, i8 7>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[Y]], [[SIGNBIT]]
-; CHECK-NEXT:    [[ABS:%.*]] = xor <2 x i8> [[SIGNBIT]], [[ADD]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[Y]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[Y]]
 ; CHECK-NEXT:    ret <2 x i8> [[ABS]]
 ;
   %y = mul <2 x i8> %x, <i8 3, i8 3>   ; extra op to thwart complexity-based canonicalization
@@ -90,9 +90,9 @@ define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
 define i8 @shifty_abs_commute3(i8 %x) {
 ; CHECK-LABEL: @shifty_abs_commute3(
 ; CHECK-NEXT:    [[Y:%.*]] = mul i8 %x, 3
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr i8 [[Y]], 7
-; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[Y]], [[SIGNBIT]]
-; CHECK-NEXT:    [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[Y]]
 ; CHECK-NEXT:    ret i8 [[ABS]]
 ;
   %y = mul i8 %x, 3                    ; extra op to thwart complexity-based canonicalization
author	Sanjay Patel <spatel@rotateright.com>	2017-12-16 16:41:17 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-12-16 16:41:17 +0000
commit	ccf39286236e37797cec77097ec50a06f4654b3f (patch)
tree	b3d92af61f13b5b4ce8d57b30374f0b822674baa /test/Transforms
parent	09a3b1266be4774bcce735be3b95fd90e3d8477a (diff)