summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2018-08-16 09:48:15 +0000
committerHans Wennborg <hans@hanshq.net>2018-08-16 09:48:15 +0000
commit8684e0b4d359c6c19b4a83a148619b092d69784a (patch)
tree31dd451b0e6300d51eccc89c1f7c5d44e0e28cd8
parentef7c5cc79d1dcf5575b0cbe915f8c7869940e487 (diff)
Merging r339166:
------------------------------------------------------------------------ r339166 | abataev | 2018-08-07 21:21:05 +0200 (Tue, 07 Aug 2018) | 12 lines [SLP] Fix insert point for reused extract instructions. Summary: Reworked the previously committed patch to insert shuffles for reused extract element instructions in the correct position. Previous logic was incorrect, and might lead to the crash with PHIs and EH instructions. Reviewers: efriedma, javed.absar Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D50143 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_70@339853 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/PR38339.ll95
2 files changed, 96 insertions, 7 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5c2efe885e2..32df6d58157 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
if (NeedToShuffleReuses) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
- if (!E->ReorderIndices.empty())
+ if (E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
- else if (auto *I = dyn_cast<Instruction>(V))
- Builder.SetInsertPoint(I->getParent(),
- std::next(I->getIterator()));
- else
- Builder.SetInsertPoint(&F->getEntryBlock(),
- F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
diff --git a/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
index 1ab4a13260e..1a981a32804 100644
--- a/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
@@ -27,3 +27,98 @@ define void @f1(<2 x i16> %x, i16* %a) {
store i16 %t2, i16* %ptr3
ret void
}
+
+define void @f2(<2 x i16> %x, i16* %a) {
+; CHECK-LABEL: @f2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: cont:
+; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
+; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
+; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %cont
+
+cont: ; preds = %entry, %cont
+ %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
+ %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
+ %t2 = extractelement <2 x i16> %xx, i32 0
+ %t3 = extractelement <2 x i16> %xx, i32 1
+ %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+ %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+ %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+ %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+ store i16 %t2, i16* %a
+ store i16 %t2, i16* %ptr0
+ store i16 %t3, i16* %ptr1
+ store i16 %t3, i16* %ptr2
+ store i16 %t2, i16* %ptr3
+ %a_val = load i16, i16* %a, align 2
+ %cmp = icmp eq i16 %a_val, 0
+ br i1 %cmp, label %cont, label %exit
+
+exit: ; preds = %cont
+ ret void
+}
+
+define void @f3(<2 x i16> %x, i16* %a) {
+; CHECK-LABEL: @f3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: cont:
+; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
+; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
+; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %cont
+
+cont: ; preds = %entry, %cont
+ %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
+ %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
+ %t2 = extractelement <2 x i16> %xx, i32 0
+ %t3 = extractelement <2 x i16> %xx, i32 1
+ %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+ %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+ %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+ %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+ store i16 %t3, i16* %a
+ store i16 %t3, i16* %ptr0
+ store i16 %t2, i16* %ptr1
+ store i16 %t2, i16* %ptr2
+ store i16 %t3, i16* %ptr3
+ %a_val = load i16, i16* %a, align 2
+ %cmp = icmp eq i16 %a_val, 0
+ br i1 %cmp, label %cont, label %exit
+
+exit: ; preds = %cont
+ ret void
+}