diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-15 00:45:43 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-15 00:45:43 +0000 |
commit | bc9fb908bcd28383001b1b52e4abd206e666217a (patch) | |
tree | f81fef0ee5d7c747272e450f262ab2bdcdb46da4 /lib/Target/AMDGPU/SIISelLowering.cpp | |
parent | 2c21c88a19222633e52910fc6d22851a76156d3b (diff) |
AMDGPU: Don't use MUBUF vaddr if address may overflow
Effectively revert r263964. Before we would not
allow this if vaddr was not known to be positive.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318240 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 36 |
1 files changed, 35 insertions, 1 deletions
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index f7fe652dbea..43c4be359f4 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -94,6 +94,12 @@ static cl::opt<bool> EnableVGPRIndexMode( cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false)); +static cl::opt<unsigned> AssumeFrameIndexHighZeroBits( + "amdgpu-frame-index-zero-bits", + cl::desc("High bits of frame index assumed to be zero"), + cl::init(5), + cl::ReallyHidden); + static unsigned findFirstFreeSGPR(CCState &CCInfo) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -1600,6 +1606,17 @@ SDValue SITargetLowering::LowerFormalArguments( Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); + if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) { + // The return object should be reasonably addressable. + + // FIXME: This helps when the return is a real sret. If it is a + // automatically inserted sret (i.e. CanLowerReturn returns false), an + // extra copy is inserted in SelectionDAGBuilder which obscures this. + unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits; + Val = DAG.getNode(ISD::AssertZext, DL, VT, Val, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits))); + } + // If this is an 8 or 16-bit value, it is really passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. @@ -3216,7 +3233,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); - case ISD::TRAP: case ISD::DEBUGTRAP: return lowerTRAP(Op, DAG); @@ -6997,3 +7013,21 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const { TargetLoweringBase::finalizeLowering(MF); } + +void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts, + DAG, Depth); + + if (getSubtarget()->enableHugePrivateBuffer()) + return; + + // Technically it may be possible to have a dispatch with a single workitem + // that uses the full private memory size, but that's not really useful. We + // can't use vaddr in MUBUF instructions if we don't know the address + // calculation won't overflow, so assume the sign bit is never set. + Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits); +} |