[llvm] [LAA][LV]Allow recognition of strided pointers with constant stride (PR #171151)
Nashe Mncube via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 23 06:32:55 PST 2026
================
@@ -861,33 +856,24 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-UF2: vector.ph:
; STRIDED-UF2-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-UF2-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2
-; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP29]], i64 0
-; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-UF2-NEXT: [[TMP30:%.*]] = shl nuw i64 [[TMP29]], 1
; STRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP30]]
; STRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; STRIDED-UF2-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
-; STRIDED-UF2-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT10]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-UF2-NEXT: [[TMP31:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; STRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
; STRIDED-UF2: vector.body:
; STRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-UF2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP31]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; STRIDED-UF2-NEXT: [[TMP33:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT11]]
-; STRIDED-UF2-NEXT: [[TMP34:%.*]] = mul nuw nsw <vscale x 4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT11]]
-; STRIDED-UF2-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP33]]
-; STRIDED-UF2-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP34]]
-; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP35]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]]
-; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP36]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8]]
+; STRIDED-UF2-NEXT: [[TMP31:%.*]] = mul nuw nsw i64 [[INDEX]], [[STRIDE]]
+; STRIDED-UF2-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP31]]
+; STRIDED-UF2-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP32]], i64 [[TMP29]]
+; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <vscale x 4 x i32>, ptr [[TMP32]], align 4, !alias.scope [[META8:![0-9]+]]
+; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER12:%.*]] = load <vscale x 4 x i32>, ptr [[TMP33]], align 4, !alias.scope [[META8]]
; STRIDED-UF2-NEXT: [[TMP37:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
; STRIDED-UF2-NEXT: [[TMP38:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER12]], splat (i32 1)
-; STRIDED-UF2-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP33]]
-; STRIDED-UF2-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP34]]
-; STRIDED-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP37]], <vscale x 4 x ptr> align 4 [[TMP39]], <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
-; STRIDED-UF2-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP38]], <vscale x 4 x ptr> align 4 [[TMP40]], <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META8]]
+; STRIDED-UF2-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP31]]
+; STRIDED-UF2-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP36]], i64 [[TMP29]]
+; STRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP37]], ptr [[TMP36]], align 4, !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
----------------
nasherm wrote:
You are correct. I've added changes that make sure to perform a SCEV check on `%stride`. I've also allowed for gather/scatters to be generated when appropriate
https://github.com/llvm/llvm-project/pull/171151
More information about the llvm-commits
mailing list