summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/AMDGPUUsage.rst10
-rw-r--r--include/llvm/Support/AMDGPUMetadata.h8
-rw-r--r--lib/Support/AMDGPUMetadata.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp2
-rw-r--r--test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll142
-rw-r--r--test/MC/AMDGPU/hsa-metadata-kernel-code-props.s4
6 files changed, 153 insertions, 17 deletions
diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst
index 1cf30304dfc..ecb0c11dbcb 100644
--- a/docs/AMDGPUUsage.rst
+++ b/docs/AMDGPUUsage.rst
@@ -1295,6 +1295,16 @@ non-AMD key names should be prefixed by "*vendor-name*.".
code is capable of
supporting XNACK. See
:ref:`amdgpu-target-features`.
+ "NumSpilledSGPRs" integer Number of stores from
+ a scalar register to
+ a register allocator
+ created spill
+ location.
+ "NumSpilledVGPRs" integer Number of stores from
+ a vector register to
+ a register allocator
+ created spill
+ location.
============================ ============== ========= =====================
..
diff --git a/include/llvm/Support/AMDGPUMetadata.h b/include/llvm/Support/AMDGPUMetadata.h
index 0c8d0228773..00039a75c51 100644
--- a/include/llvm/Support/AMDGPUMetadata.h
+++ b/include/llvm/Support/AMDGPUMetadata.h
@@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";
constexpr char IsDynamicCallStack[] = "IsDynamicCallStack";
/// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled.
constexpr char IsXNACKEnabled[] = "IsXNACKEnabled";
+/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
+constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs";
+/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
+constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs";
} // end namespace Key
/// \brief In-memory representation of kernel code properties metadata.
@@ -275,6 +279,10 @@ struct Metadata final {
/// \brief True if the generated machine code is capable of supporting XNACK.
/// Optional.
bool mIsXNACKEnabled = false;
+ /// \brief Number of SGPRs spilled by a wavefront. Optional.
+ uint16_t mNumSpilledSGPRs = 0;
+ /// \brief Number of VGPRs spilled by a workitem. Optional.
+ uint16_t mNumSpilledVGPRs = 0;
/// \brief Default constructor.
Metadata() = default;
diff --git a/lib/Support/AMDGPUMetadata.cpp b/lib/Support/AMDGPUMetadata.cpp
index ec2714cfc1c..ddb25935e0e 100644
--- a/lib/Support/AMDGPUMetadata.cpp
+++ b/lib/Support/AMDGPUMetadata.cpp
@@ -148,6 +148,10 @@ struct MappingTraits<Kernel::CodeProps::Metadata> {
MD.mIsDynamicCallStack, false);
YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled,
MD.mIsXNACKEnabled, false);
+ YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs,
+ MD.mNumSpilledSGPRs, uint16_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs,
+ MD.mNumSpilledVGPRs, uint16_t(0));
}
};
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 563ca0d236a..9c87ecada77 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1188,6 +1188,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
+ HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
+ HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
return HSACodeProps;
}
diff --git a/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
index 2d02b46e479..f4a914adddb 100644
--- a/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
+++ b/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
@@ -1,26 +1,26 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+@var = addrspace(1) global float 0.0
+
; CHECK: ---
; CHECK: Version: [ 1, 0 ]
-
; CHECK: Kernels:
-; CHECK: - Name: test
-; CHECK: SymbolName: 'test@kd'
-; CHECK: CodeProps:
-; CHECK: KernargSegmentSize: 24
-; CHECK: GroupSegmentFixedSize: 0
-; CHECK: PrivateSegmentFixedSize: 0
-; CHECK: KernargSegmentAlign: 8
-; CHECK: WavefrontSize: 64
-; GFX700: NumSGPRs: 6
-; GFX800: NumSGPRs: 96
-; GFX900: NumSGPRs: 6
-; GFX700: NumVGPRs: 4
-; GFX800: NumVGPRs: 6
-; GFX900: NumVGPRs: 6
-; CHECK: MaxFlatWorkGroupSize: 256
+
+; CHECK: - Name: test
+; CHECK: SymbolName: 'test@kd'
+; CHECK: CodeProps:
+; CHECK: KernargSegmentSize: 24
+; CHECK: GroupSegmentFixedSize: 0
+; CHECK: PrivateSegmentFixedSize: 0
+; CHECK: KernargSegmentAlign: 8
+; CHECK: WavefrontSize: 64
+; CHECK: NumSGPRs: 6
+; GFX700: NumVGPRs: 4
+; GFX803: NumVGPRs: 6
+; GFX900: NumVGPRs: 6
+; CHECK: MaxFlatWorkGroupSize: 256
define amdgpu_kernel void @test(
half addrspace(1)* %r,
half addrspace(1)* %a,
@@ -32,3 +32,111 @@ entry:
store half %r.val, half addrspace(1)* %r
ret void
}
+
+; CHECK: - Name: num_spilled_sgprs
+; CHECK: SymbolName: 'num_spilled_sgprs@kd'
+; CHECK: CodeProps:
+; CHECK: NumSpilledSGPRs: 41
+define amdgpu_kernel void @num_spilled_sgprs(
+ i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2,
+ i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5,
+ i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8,
+ i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb,
+ i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute,
+ i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4,
+ i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb,
+ i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
+entry:
+ store i32 %in0, i32 addrspace(1)* %out0
+ store i32 %in1, i32 addrspace(1)* %out1
+ store i32 %in2, i32 addrspace(1)* %out2
+ store i32 %in3, i32 addrspace(1)* %out3
+ store i32 %in4, i32 addrspace(1)* %out4
+ store i32 %in5, i32 addrspace(1)* %out5
+ store i32 %in6, i32 addrspace(1)* %out6
+ store i32 %in7, i32 addrspace(1)* %out7
+ store i32 %in8, i32 addrspace(1)* %out8
+ store i32 %in9, i32 addrspace(1)* %out9
+ store i32 %ina, i32 addrspace(1)* %outa
+ store i32 %inb, i32 addrspace(1)* %outb
+ store i32 %inc, i32 addrspace(1)* %outc
+ store i32 %ind, i32 addrspace(1)* %outd
+ store i32 %ine, i32 addrspace(1)* %oute
+ store i32 %inf, i32 addrspace(1)* %outf
+ ret void
+}
+
+; CHECK: - Name: num_spilled_vgprs
+; CHECK: SymbolName: 'num_spilled_vgprs@kd'
+; CHECK: CodeProps:
+; CHECK: NumSpilledVGPRs: 14
+define amdgpu_kernel void @num_spilled_vgprs() #1 {
+ %val0 = load volatile float, float addrspace(1)* @var
+ %val1 = load volatile float, float addrspace(1)* @var
+ %val2 = load volatile float, float addrspace(1)* @var
+ %val3 = load volatile float, float addrspace(1)* @var
+ %val4 = load volatile float, float addrspace(1)* @var
+ %val5 = load volatile float, float addrspace(1)* @var
+ %val6 = load volatile float, float addrspace(1)* @var
+ %val7 = load volatile float, float addrspace(1)* @var
+ %val8 = load volatile float, float addrspace(1)* @var
+ %val9 = load volatile float, float addrspace(1)* @var
+ %val10 = load volatile float, float addrspace(1)* @var
+ %val11 = load volatile float, float addrspace(1)* @var
+ %val12 = load volatile float, float addrspace(1)* @var
+ %val13 = load volatile float, float addrspace(1)* @var
+ %val14 = load volatile float, float addrspace(1)* @var
+ %val15 = load volatile float, float addrspace(1)* @var
+ %val16 = load volatile float, float addrspace(1)* @var
+ %val17 = load volatile float, float addrspace(1)* @var
+ %val18 = load volatile float, float addrspace(1)* @var
+ %val19 = load volatile float, float addrspace(1)* @var
+ %val20 = load volatile float, float addrspace(1)* @var
+ %val21 = load volatile float, float addrspace(1)* @var
+ %val22 = load volatile float, float addrspace(1)* @var
+ %val23 = load volatile float, float addrspace(1)* @var
+ %val24 = load volatile float, float addrspace(1)* @var
+ %val25 = load volatile float, float addrspace(1)* @var
+ %val26 = load volatile float, float addrspace(1)* @var
+ %val27 = load volatile float, float addrspace(1)* @var
+ %val28 = load volatile float, float addrspace(1)* @var
+ %val29 = load volatile float, float addrspace(1)* @var
+ %val30 = load volatile float, float addrspace(1)* @var
+
+ store volatile float %val0, float addrspace(1)* @var
+ store volatile float %val1, float addrspace(1)* @var
+ store volatile float %val2, float addrspace(1)* @var
+ store volatile float %val3, float addrspace(1)* @var
+ store volatile float %val4, float addrspace(1)* @var
+ store volatile float %val5, float addrspace(1)* @var
+ store volatile float %val6, float addrspace(1)* @var
+ store volatile float %val7, float addrspace(1)* @var
+ store volatile float %val8, float addrspace(1)* @var
+ store volatile float %val9, float addrspace(1)* @var
+ store volatile float %val10, float addrspace(1)* @var
+ store volatile float %val11, float addrspace(1)* @var
+ store volatile float %val12, float addrspace(1)* @var
+ store volatile float %val13, float addrspace(1)* @var
+ store volatile float %val14, float addrspace(1)* @var
+ store volatile float %val15, float addrspace(1)* @var
+ store volatile float %val16, float addrspace(1)* @var
+ store volatile float %val17, float addrspace(1)* @var
+ store volatile float %val18, float addrspace(1)* @var
+ store volatile float %val19, float addrspace(1)* @var
+ store volatile float %val20, float addrspace(1)* @var
+ store volatile float %val21, float addrspace(1)* @var
+ store volatile float %val22, float addrspace(1)* @var
+ store volatile float %val23, float addrspace(1)* @var
+ store volatile float %val24, float addrspace(1)* @var
+ store volatile float %val25, float addrspace(1)* @var
+ store volatile float %val26, float addrspace(1)* @var
+ store volatile float %val27, float addrspace(1)* @var
+ store volatile float %val28, float addrspace(1)* @var
+ store volatile float %val29, float addrspace(1)* @var
+ store volatile float %val30, float addrspace(1)* @var
+
+ ret void
+}
+
+attributes #0 = { "amdgpu-num-sgpr"="14" }
+attributes #1 = { "amdgpu-num-vgpr"="20" }
diff --git a/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s b/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s
index 54c4b4a01e2..0b0404295cf 100644
--- a/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s
+++ b/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s
@@ -14,6 +14,8 @@
// CHECK: KernargSegmentAlign: 16
// CHECK: WavefrontSize: 64
// CHECK: MaxFlatWorkGroupSize: 256
+// CHECK: NumSpilledSGPRs: 1
+// CHECK: NumSpilledVGPRs: 1
.amd_amdgpu_hsa_metadata
Version: [ 1, 0 ]
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
@@ -27,4 +29,6 @@
KernargSegmentAlign: 16
WavefrontSize: 64
MaxFlatWorkGroupSize: 256
+ NumSpilledSGPRs: 1
+ NumSpilledVGPRs: 1
.end_amd_amdgpu_hsa_metadata