[llvm] [AMDGPUInstCombineIntrinsic] Do not narrow 8,16-bit amdgcn_s_buffer_load instrinsics (PR #117997)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 2 15:49:28 PST 2024
================
@@ -1122,14 +1123,37 @@ define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inr
ret <2 x half> %shuf
}
+define amdgpu_ps <2 x half> @extract_elt1_elt2_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
+; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v4f16(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2
+; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
+; CHECK-NEXT: ret <2 x half> [[DATA]]
+;
+ %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
+ %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 1, i32 2>
+ ret <2 x half> %shuf
+}
+
+define amdgpu_ps <2 x half> @extract_elt2_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 {
+; CHECK-LABEL: @extract_elt2_elt3_s_buffer_load_v4f16(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4
+; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0)
+; CHECK-NEXT: ret <2 x half> [[DATA]]
+;
+ %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0)
+ %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 2, i32 3>
+ ret <2 x half> %shuf
+}
+
----------------
arsenm wrote:
Test an 8-bit element case?
https://github.com/llvm/llvm-project/pull/117997
More information about the llvm-commits
mailing list