[llvm] [AMDGPU] Prefer vector i8s in PHI Nodes (PR #91016)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Fri May 3 14:24:51 PDT 2024
================
@@ -362,71 +362,26 @@ define protected amdgpu_kernel void @vectorizeShuffle(<16 x i8> %invec, ptr %out
;
; GFX8PLUS-LABEL: @vectorizeShuffle(
; GFX8PLUS-NEXT: entry:
-; GFX8PLUS-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC:%.*]], i64 0
-; GFX8PLUS-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
-; GFX8PLUS-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
-; GFX8PLUS-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
-; GFX8PLUS-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
-; GFX8PLUS-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
-; GFX8PLUS-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
-; GFX8PLUS-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
-; GFX8PLUS-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
-; GFX8PLUS-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
-; GFX8PLUS-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
-; GFX8PLUS-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
-; GFX8PLUS-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
-; GFX8PLUS-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
-; GFX8PLUS-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
-; GFX8PLUS-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
-; GFX8PLUS-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1
-; GFX8PLUS-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
-; GFX8PLUS-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1
-; GFX8PLUS-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1
-; GFX8PLUS-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1
-; GFX8PLUS-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1
-; GFX8PLUS-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1
-; GFX8PLUS-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1
-; GFX8PLUS-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1
-; GFX8PLUS-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1
-; GFX8PLUS-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1
-; GFX8PLUS-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1
-; GFX8PLUS-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1
-; GFX8PLUS-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1
-; GFX8PLUS-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1
-; GFX8PLUS-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1
-; GFX8PLUS-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1
-; GFX8PLUS-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
-; GFX8PLUS-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1
-; GFX8PLUS-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
-; GFX8PLUS-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1
-; GFX8PLUS-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1
-; GFX8PLUS-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1
-; GFX8PLUS-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1
-; GFX8PLUS-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1
-; GFX8PLUS-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1
-; GFX8PLUS-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1
-; GFX8PLUS-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1
-; GFX8PLUS-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1
-; GFX8PLUS-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1
-; GFX8PLUS-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1
-; GFX8PLUS-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1
-; GFX8PLUS-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
-; GFX8PLUS-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
-; GFX8PLUS-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
-; GFX8PLUS-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
-; GFX8PLUS-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
-; GFX8PLUS-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
-; GFX8PLUS-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
-; GFX8PLUS-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
-; GFX8PLUS-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
-; GFX8PLUS-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
-; GFX8PLUS-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
-; GFX8PLUS-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
-; GFX8PLUS-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
-; GFX8PLUS-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
-; GFX8PLUS-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
-; GFX8PLUS-NEXT: [[VECINS15:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
-; GFX8PLUS-NEXT: store <16 x i8> [[VECINS15]], ptr [[OUT:%.*]], align 16
+; GFX8PLUS-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; GFX8PLUS-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1>
----------------
jrbyrnes wrote:
I believe this is due to not adjusting the arithmetic cost model, which is still a TODO
https://github.com/llvm/llvm-project/pull/91016
More information about the llvm-commits
mailing list