aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-28 15:52:08 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-28 15:52:08 +0000
commitdbe42be9699c27012027bf3c526da1c5a034d206 (patch)
tree2ac0d62393904ad58c2de1f43d0f3fa3f3bc52af /lib/Target/AMDGPU
parent7d0c810188a33b1c6a399e9f5ee8ddc501d3027c (diff)
AMDGPU: Annotate implicitarg.ptr usage
We need to pass something to functions for this to work. It isn't derivable just from the kernarg segment pointer because the implicit arguments are placed after the kernel arguments. Also fixes missing test for the intrinsic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309398 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h3
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp12
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp8
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h8
6 files changed, 32 insertions, 6 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index c68e5861ff2..551737c1d27 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -156,8 +156,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID,
case Intrinsic::amdgcn_dispatch_id:
return "amdgpu-dispatch-id";
case Intrinsic::amdgcn_kernarg_segment_ptr:
- case Intrinsic::amdgcn_implicitarg_ptr:
return "amdgpu-kernarg-segment-ptr";
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return "amdgpu-implicitarg-ptr";
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::trap:
case Intrinsic::debugtrap:
@@ -190,7 +191,8 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
{ "amdgpu-work-group-id-z" },
{ "amdgpu-dispatch-ptr" },
{ "amdgpu-dispatch-id" },
- { "amdgpu-kernarg-segment-ptr" }
+ { "amdgpu-kernarg-segment-ptr" },
+ { "amdgpu-implicitarg-ptr" }
};
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 389fdc9d636..2737ef9b2ca 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -764,7 +764,8 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
+ unsigned getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 56db67c20f4..9fb1bdb90f0 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -899,6 +899,13 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
DAG.getConstant(Offset, SL, PtrVT));
}
+SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
+ const SDLoc &SL) const {
+ auto MFI = DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+ uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+ return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
+}
+
SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
@@ -3029,8 +3036,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
TRI->getPreloadedValue(MF, Reg), VT);
}
case Intrinsic::amdgcn_implicitarg_ptr: {
- unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
- return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ if (MFI->isEntryFunction())
+ return getImplicitArgPtr(DAG, DL);
+ report_fatal_error("amdgcn.implicitarg.ptr not implemented for functions");
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 30482dc6a0a..b703cedf743 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -23,6 +23,7 @@ namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
+ SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
uint64_t Offset, bool Signed,
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a7c8166ff6d..04e57bedb21 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -93,11 +93,17 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// FIXME: Not really a system SGPR.
PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ ImplicitArgPtr = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ KernargSegmentPtr = true;
}
CallingConv::ID CC = F->getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = !F->arg_empty();
+ if (!F->arg_empty())
+ KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4c7f38a09a4..8511403ebc3 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -186,6 +186,10 @@ private:
// Other shaders indirect 64-bits at sgpr[0:1]
bool ImplicitBufferPtr : 1;
+ // Pointer to where the ABI inserts special kernel arguments separate from the
+ // user arguments. This is an offset from the KernargSegmentPtr.
+ bool ImplicitArgPtr : 1;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -346,6 +350,10 @@ public:
return WorkItemIDZ;
}
+ bool hasImplicitArgPtr() const {
+ return ImplicitArgPtr;
+ }
+
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}