From 13240bd5e5ef6c29e521c875ee2b68e08f5aa363 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 2 Feb 2018 13:50:25 +0000 Subject: Merging r323907 and r323913: ------------------------------------------------------------------------ r323907 | mareko | 2018-01-31 21:17:52 +0100 (Wed, 31 Jan 2018) | 11 lines [SeparateConstOffsetFromGEP] Preserve metadata when splitting GEPs Summary: !amdgpu.uniform needs to be preserved for AMDGPU, otherwise bad things happen. Reviewers: arsenm, nhaehnle, jingyue, broune, majnemer, bjarke.roune, dblaikie Subscribers: wdng, tpr, llvm-commits Differential Revision: https://reviews.llvm.org/D42744 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r323913 | mareko | 2018-01-31 21:49:19 +0100 (Wed, 31 Jan 2018) | 1 line [SeparateConstOffsetFromGEP] Fix up addrspace in the AMDGPU test ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@324088 91177308-0d34-0410-b5e6-96231b3b80d8 --- ...split-gep-and-gvn-addrspace-addressing-modes.ll | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'test') diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll index 23ec0ca2554..43fe18f1aa2 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll @@ -92,3 +92,48 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y store float %tmp21, float addrspace(1)* %output, align 4 ret void } + +; IR-LABEL: @keep_metadata( +; IR: getelementptr {{.*}} !amdgpu.uniform +; IR: getelementptr {{.*}} !amdgpu.uniform +; IR: getelementptr {{.*}} !amdgpu.uniform +define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 { +main_body: + %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8 + %23 = bitcast float %22 to i32 + %24 = shl i32 %23, 1 + %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %1, i32 0, i32 %24, !amdgpu.uniform !0 + %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0 + %27 = shl i32 %23, 2 + %28 = or i32 %27, 3 + %29 = bitcast [0 x <8 x i32>] addrspace(2)* %1 to [0 x <4 x i32>] addrspace(2)* + %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i32 0, i32 %28, !amdgpu.uniform !0 + %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0 + %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8 + %33 = extractelement <4 x float> %32, i32 0 + %34 = extractelement <4 x float> %32, i32 1 + %35 = extractelement <4 x float> %32, i32 2 + %36 = extractelement <4 x float> %32, i32 3 + %37 = bitcast float %4 to i32 + %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4 + %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5 + %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6 + %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7 + %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8 + %43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19 + ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43 +} + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6 + +; Function Attrs: nounwind readonly +declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7 + + +!0 = !{} + +attributes #5 = { "InitialPSInputAddr"="45175" } +attributes #6 = { nounwind readnone speculatable } +attributes #7 = { nounwind readonly } +attributes #8 = { nounwind readnone } -- cgit v1.2.3