[llvm] [AMDGPU] Prefer vector i8s in PHI Nodes (PR #91016)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Fri May 3 14:24:51 PDT 2024


================
@@ -362,71 +362,26 @@ define protected amdgpu_kernel void @vectorizeShuffle(<16 x i8> %invec, ptr %out
 ;
 ; GFX8PLUS-LABEL: @vectorizeShuffle(
 ; GFX8PLUS-NEXT:  entry:
-; GFX8PLUS-NEXT:    [[EL0:%.*]] = extractelement <16 x i8> [[INVEC:%.*]], i64 0
-; GFX8PLUS-NEXT:    [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
-; GFX8PLUS-NEXT:    [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
-; GFX8PLUS-NEXT:    [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
-; GFX8PLUS-NEXT:    [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
-; GFX8PLUS-NEXT:    [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
-; GFX8PLUS-NEXT:    [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
-; GFX8PLUS-NEXT:    [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
-; GFX8PLUS-NEXT:    [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
-; GFX8PLUS-NEXT:    [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
-; GFX8PLUS-NEXT:    [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
-; GFX8PLUS-NEXT:    [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
-; GFX8PLUS-NEXT:    [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
-; GFX8PLUS-NEXT:    [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
-; GFX8PLUS-NEXT:    [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
-; GFX8PLUS-NEXT:    [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
-; GFX8PLUS-NEXT:    [[MUL0:%.*]] = mul i8 [[EL0]], 1
-; GFX8PLUS-NEXT:    [[MUL1:%.*]] = mul i8 [[EL1]], 1
-; GFX8PLUS-NEXT:    [[MUL2:%.*]] = mul i8 [[EL2]], 1
-; GFX8PLUS-NEXT:    [[MUL3:%.*]] = mul i8 [[EL3]], 1
-; GFX8PLUS-NEXT:    [[MUL4:%.*]] = mul i8 [[EL4]], 1
-; GFX8PLUS-NEXT:    [[MUL5:%.*]] = mul i8 [[EL5]], 1
-; GFX8PLUS-NEXT:    [[MUL6:%.*]] = mul i8 [[EL6]], 1
-; GFX8PLUS-NEXT:    [[MUL7:%.*]] = mul i8 [[EL7]], 1
-; GFX8PLUS-NEXT:    [[MUL8:%.*]] = mul i8 [[EL8]], 1
-; GFX8PLUS-NEXT:    [[MUL9:%.*]] = mul i8 [[EL9]], 1
-; GFX8PLUS-NEXT:    [[MUL10:%.*]] = mul i8 [[EL10]], 1
-; GFX8PLUS-NEXT:    [[MUL11:%.*]] = mul i8 [[EL11]], 1
-; GFX8PLUS-NEXT:    [[MUL12:%.*]] = mul i8 [[EL12]], 1
-; GFX8PLUS-NEXT:    [[MUL13:%.*]] = mul i8 [[EL13]], 1
-; GFX8PLUS-NEXT:    [[MUL14:%.*]] = mul i8 [[EL14]], 1
-; GFX8PLUS-NEXT:    [[MUL15:%.*]] = mul i8 [[EL15]], 1
-; GFX8PLUS-NEXT:    [[ADD0:%.*]] = add i8 [[MUL0]], 1
-; GFX8PLUS-NEXT:    [[ADD1:%.*]] = add i8 [[MUL1]], 1
-; GFX8PLUS-NEXT:    [[ADD2:%.*]] = add i8 [[MUL2]], 1
-; GFX8PLUS-NEXT:    [[ADD3:%.*]] = add i8 [[MUL3]], 1
-; GFX8PLUS-NEXT:    [[ADD4:%.*]] = add i8 [[MUL4]], 1
-; GFX8PLUS-NEXT:    [[ADD5:%.*]] = add i8 [[MUL5]], 1
-; GFX8PLUS-NEXT:    [[ADD6:%.*]] = add i8 [[MUL6]], 1
-; GFX8PLUS-NEXT:    [[ADD7:%.*]] = add i8 [[MUL7]], 1
-; GFX8PLUS-NEXT:    [[ADD8:%.*]] = add i8 [[MUL8]], 1
-; GFX8PLUS-NEXT:    [[ADD9:%.*]] = add i8 [[MUL9]], 1
-; GFX8PLUS-NEXT:    [[ADD10:%.*]] = add i8 [[MUL10]], 1
-; GFX8PLUS-NEXT:    [[ADD11:%.*]] = add i8 [[MUL11]], 1
-; GFX8PLUS-NEXT:    [[ADD12:%.*]] = add i8 [[MUL12]], 1
-; GFX8PLUS-NEXT:    [[ADD13:%.*]] = add i8 [[MUL13]], 1
-; GFX8PLUS-NEXT:    [[ADD14:%.*]] = add i8 [[MUL14]], 1
-; GFX8PLUS-NEXT:    [[ADD15:%.*]] = add i8 [[MUL15]], 1
-; GFX8PLUS-NEXT:    [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
-; GFX8PLUS-NEXT:    [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
-; GFX8PLUS-NEXT:    [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
-; GFX8PLUS-NEXT:    [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
-; GFX8PLUS-NEXT:    [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
-; GFX8PLUS-NEXT:    [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
-; GFX8PLUS-NEXT:    [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
-; GFX8PLUS-NEXT:    [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
-; GFX8PLUS-NEXT:    [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
-; GFX8PLUS-NEXT:    [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
-; GFX8PLUS-NEXT:    [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
-; GFX8PLUS-NEXT:    [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
-; GFX8PLUS-NEXT:    [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
-; GFX8PLUS-NEXT:    [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
-; GFX8PLUS-NEXT:    [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
-; GFX8PLUS-NEXT:    [[VECINS15:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
-; GFX8PLUS-NEXT:    store <16 x i8> [[VECINS15]], ptr [[OUT:%.*]], align 16
+; GFX8PLUS-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; GFX8PLUS-NEXT:    [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1>
----------------
jrbyrnes wrote:

I believe this is due to not adjusting the arithmetic cost model, which is still a TODO

https://github.com/llvm/llvm-project/pull/91016


More information about the llvm-commits mailing list