summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp10
-rw-r--r--test/CodeGen/AMDGPU/smrd.ll34
2 files changed, 42 insertions, 2 deletions
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2c127d78726..654b96f792b 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3797,7 +3797,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
}
- BuildMI(*MBB, Inst, Inst.getDebugLoc(),
+ MachineInstr *NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(),
get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
.add(*VAddr) // vaddr
.add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
@@ -3806,12 +3807,17 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
.addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
.addImm(0) // slc
.addImm(0) // tfe
- .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
+ .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
+ .getInstr();
MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
VDst);
addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
Inst.eraseFromParent();
+
+ // Legalize all operands other than the offset. Notably, convert the srsrc
+ // into SGPRs using v_readfirstlane if needed.
+ legalizeOperands(*NewInstr);
continue;
}
}
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index 420c7b80b8d..adf22323ae6 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll
@@ -261,8 +261,42 @@ main_body:
ret void
}
+; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted
+; GCN: v_readfirstlane
+define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 {
+main_body:
+ %descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0
+ br label %.outer_loop_header
+
+ret_block: ; preds = %.outer, %.label22, %main_body
+ ret void
+
+.outer_loop_header:
+ br label %.inner_loop_header
+
+.inner_loop_header: ; preds = %.inner_loop_body, %.outer_loop_header
+ %loopctr.1 = phi i32 [ 0, %.outer_loop_header ], [ %loopctr.2, %.inner_loop_body ]
+ %loopctr.2 = add i32 %loopctr.1, 1
+ %inner_br1 = icmp slt i32 %loopctr.2, 10
+ br i1 %inner_br1, label %.inner_loop_body, label %ret_block
+
+.inner_loop_body:
+ %descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0
+ %load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0)
+ %inner_br2 = icmp uge i32 %1, 10
+ br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body
+
+.outer_loop_body:
+ %offset = shl i32 %loopctr.2, 6
+ %load2result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 %offset)
+ %outer_br = fcmp ueq float %load2result, 0x0
+ br i1 %outer_br, label %.outer_loop_header, label %ret_block
+}
+
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
+
+!0 = !{}