summaryrefslogtreecommitdiff
path: root/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorAlon Kom <alon.kom@intel.com>2017-09-14 07:40:02 +0000
committerAlon Kom <alon.kom@intel.com>2017-09-14 07:40:02 +0000
commitdde48e1948271381d5b8c4a4ffeb630289305914 (patch)
treeb169f4cd9fee426322a1b2cb21ace97404387dae /lib/Transforms/Vectorize/LoopVectorize.cpp
parenteca1741cf54867b974c87277833af43d77ea9953 (diff)
[LV] Fix maximum legal VF calculation
This patch fixes pr34283, which exposed that the computation of maximum legal width for vectorization was wrong, because it relied on MaxInterleaveFactor to obtain the maximum stride used in the loop, however not all strided accesses in the loop have an interleave-group associated with them. Instead of recording the maximum stride in the loop, which can be over conservative (e.g. if the access with the maximum stride is not involved in the dependence limitation), this patch tracks the actual maximum legal width imposed by accesses that are involved in dependencies. Differential Revision: https://reviews.llvm.org/D37507 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313237 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp46
1 files changed, 18 insertions, 28 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 28edd65f852..a37c877db1e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -963,14 +963,6 @@ public:
return InterleaveGroupMap.count(Instr);
}
- /// \brief Return the maximum interleave factor of all interleaved groups.
- unsigned getMaxInterleaveFactor() const {
- unsigned MaxFactor = 1;
- for (auto &Entry : InterleaveGroupMap)
- MaxFactor = std::max(MaxFactor, Entry.second->getFactor());
- return MaxFactor;
- }
-
/// \brief Get the interleave group that \p Instr belongs to.
///
/// \returns nullptr if doesn't have such group.
@@ -1553,11 +1545,6 @@ public:
return InterleaveInfo.isInterleaved(Instr);
}
- /// \brief Return the maximum interleave factor of all interleaved groups.
- unsigned getMaxInterleaveFactor() const {
- return InterleaveInfo.getMaxInterleaveFactor();
- }
-
/// \brief Get the interleaved access group that \p Instr belongs to.
const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
return InterleaveInfo.getInterleaveGroup(Instr);
@@ -1571,6 +1558,10 @@ public:
unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
+ uint64_t getMaxSafeRegisterWidth() const {
+ return LAI->getDepChecker().getMaxSafeRegisterWidth();
+ }
+
bool hasStride(Value *V) { return LAI->hasStride(V); }
/// Returns true if the target machine supports masked store operation
@@ -6077,9 +6068,11 @@ void InterleavedAccessInfo::analyzeInterleaving(
// Remove interleaved store groups with gaps.
for (InterleaveGroup *Group : StoreGroups)
- if (Group->getNumMembers() != Group->getFactor())
+ if (Group->getNumMembers() != Group->getFactor()) {
+ DEBUG(dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
releaseGroup(Group);
-
+ }
// Remove interleaved groups with gaps (currently only loads) whose memory
// accesses may wrap around. We have to revisit the getPtrStride analysis,
// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
@@ -6132,6 +6125,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
// to look for a member at index factor - 1, since every group must have
// a member at index zero.
if (Group->isReverse()) {
+ DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "a reverse access with gaps.\n");
releaseGroup(Group);
continue;
}
@@ -6215,25 +6210,20 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize,
unsigned SmallestType, WidestType;
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
- unsigned MaxSafeDepDist = -1U;
- // Get the maximum safe dependence distance in bits computed by LAA. If the
- // loop contains any interleaved accesses, we divide the dependence distance
- // by the maximum interleave factor of all interleaved groups. Note that
- // although the division ensures correctness, this is a fairly conservative
- // computation because the maximum distance computed by LAA may not involve
- // any of the interleaved accesses.
- if (Legal->getMaxSafeDepDistBytes() != -1U)
- MaxSafeDepDist =
- Legal->getMaxSafeDepDistBytes() * 8 / Legal->getMaxInterleaveFactor();
+ // Get the maximum safe dependence distance in bits computed by LAA.
+ // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
+ // the memory accesses that is most restrictive (involved in the smallest
+ // dependence distance).
+ unsigned MaxSafeRegisterWidth = Legal->getMaxSafeRegisterWidth();
+
+ WidestRegister = std::min(WidestRegister, MaxSafeRegisterWidth);
- WidestRegister =
- ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
<< WidestType << " bits.\n");
- DEBUG(dbgs() << "LV: The Widest register is: " << WidestRegister
+ DEBUG(dbgs() << "LV: The Widest register safe to use is: " << WidestRegister
<< " bits.\n");
assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements"