summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-11-15 00:45:43 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-11-15 00:45:43 +0000
commitbc9fb908bcd28383001b1b52e4abd206e666217a (patch)
treef81fef0ee5d7c747272e450f262ab2bdcdb46da4 /lib/Target/AMDGPU/SIISelLowering.cpp
parent2c21c88a19222633e52910fc6d22851a76156d3b (diff)
AMDGPU: Don't use MUBUF vaddr if address may overflow
Effectively revert r263964. Before we would not allow this if vaddr was not known to be positive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318240 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp36
1 files changed, 35 insertions, 1 deletions
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index f7fe652dbea..43c4be359f4 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -94,6 +94,12 @@ static cl::opt<bool> EnableVGPRIndexMode(
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
cl::init(false));
+static cl::opt<unsigned> AssumeFrameIndexHighZeroBits(
+ "amdgpu-frame-index-zero-bits",
+ cl::desc("High bits of frame index assumed to be zero"),
+ cl::init(5),
+ cl::ReallyHidden);
+
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
@@ -1600,6 +1606,17 @@ SDValue SITargetLowering::LowerFormalArguments(
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) {
+ // The return object should be reasonably addressable.
+
+ // FIXME: This helps when the return is a real sret. If it is a
+ // automatically inserted sret (i.e. CanLowerReturn returns false), an
+ // extra copy is inserted in SelectionDAGBuilder which obscures this.
+ unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits;
+ Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits)));
+ }
+
// If this is an 8 or 16-bit value, it is really passed promoted
// to 32 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
@@ -3216,7 +3233,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
-
case ISD::TRAP:
case ISD::DEBUGTRAP:
return lowerTRAP(Op, DAG);
@@ -6997,3 +7013,21 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
TargetLoweringBase::finalizeLowering(MF);
}
+
+void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts,
+ DAG, Depth);
+
+ if (getSubtarget()->enableHugePrivateBuffer())
+ return;
+
+ // Technically it may be possible to have a dispatch with a single workitem
+ // that uses the full private memory size, but that's not really useful. We
+ // can't use vaddr in MUBUF instructions if we don't know the address
+ // calculation won't overflow, so assume the sign bit is never set.
+ Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
+}