[llvm] [LV] Add test cases for reverse accesses involving irregular types. nfc (PR #135139)

Fri Apr 11 00:27:35 PDT 2025

================
@@ -429,6 +429,631 @@ exit:
   ret void
 }
 
+define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
+; RV64-LABEL: define void @vector_reverse_irregular_type(
+; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] {
+; RV64-NEXT:  [[ENTRY:.*]]:
+; RV64-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; RV64:       [[VECTOR_PH]]:
+; RV64-NEXT:    br label %[[VECTOR_BODY:.*]]
+; RV64:       [[VECTOR_BODY]]:
+; RV64-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; RV64-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
+; RV64-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; RV64-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
+; RV64-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
+; RV64-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
+; RV64-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -4
+; RV64-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -5
+; RV64-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -6
+; RV64-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -7
+; RV64-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], -8
+; RV64-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], -9
+; RV64-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], -10
+; RV64-NEXT:    [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -11
+; RV64-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], -12
+; RV64-NEXT:    [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], -13
+; RV64-NEXT:    [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -14
+; RV64-NEXT:    [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], -15
+; RV64-NEXT:    [[TMP16:%.*]] = add nsw i64 [[TMP0]], -1
+; RV64-NEXT:    [[TMP17:%.*]] = add nsw i64 [[TMP1]], -1
+; RV64-NEXT:    [[TMP18:%.*]] = add nsw i64 [[TMP2]], -1
+; RV64-NEXT:    [[TMP19:%.*]] = add nsw i64 [[TMP3]], -1
+; RV64-NEXT:    [[TMP20:%.*]] = add nsw i64 [[TMP4]], -1
+; RV64-NEXT:    [[TMP21:%.*]] = add nsw i64 [[TMP5]], -1
+; RV64-NEXT:    [[TMP22:%.*]] = add nsw i64 [[TMP6]], -1
+; RV64-NEXT:    [[TMP23:%.*]] = add nsw i64 [[TMP7]], -1
+; RV64-NEXT:    [[TMP24:%.*]] = add nsw i64 [[TMP8]], -1
+; RV64-NEXT:    [[TMP25:%.*]] = add nsw i64 [[TMP9]], -1
+; RV64-NEXT:    [[TMP26:%.*]] = add nsw i64 [[TMP10]], -1
+; RV64-NEXT:    [[TMP27:%.*]] = add nsw i64 [[TMP11]], -1
+; RV64-NEXT:    [[TMP28:%.*]] = add nsw i64 [[TMP12]], -1
+; RV64-NEXT:    [[TMP29:%.*]] = add nsw i64 [[TMP13]], -1
+; RV64-NEXT:    [[TMP30:%.*]] = add nsw i64 [[TMP14]], -1
+; RV64-NEXT:    [[TMP31:%.*]] = add nsw i64 [[TMP15]], -1
+; RV64-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP16]]
+; RV64-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP17]]
+; RV64-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP18]]
+; RV64-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP19]]
+; RV64-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP20]]
+; RV64-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP21]]
+; RV64-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP22]]
+; RV64-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP23]]
+; RV64-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP24]]
+; RV64-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP25]]
+; RV64-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP26]]
+; RV64-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP27]]
+; RV64-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP28]]
+; RV64-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP29]]
+; RV64-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP30]]
+; RV64-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP31]]
+; RV64-NEXT:    [[TMP48:%.*]] = load i7, ptr [[TMP32]], align 1
+; RV64-NEXT:    [[TMP49:%.*]] = load i7, ptr [[TMP33]], align 1
+; RV64-NEXT:    [[TMP50:%.*]] = load i7, ptr [[TMP34]], align 1
+; RV64-NEXT:    [[TMP51:%.*]] = load i7, ptr [[TMP35]], align 1
+; RV64-NEXT:    [[TMP52:%.*]] = load i7, ptr [[TMP36]], align 1
+; RV64-NEXT:    [[TMP53:%.*]] = load i7, ptr [[TMP37]], align 1
+; RV64-NEXT:    [[TMP54:%.*]] = load i7, ptr [[TMP38]], align 1
+; RV64-NEXT:    [[TMP55:%.*]] = load i7, ptr [[TMP39]], align 1
+; RV64-NEXT:    [[TMP56:%.*]] = load i7, ptr [[TMP40]], align 1
+; RV64-NEXT:    [[TMP57:%.*]] = load i7, ptr [[TMP41]], align 1
+; RV64-NEXT:    [[TMP58:%.*]] = load i7, ptr [[TMP42]], align 1
+; RV64-NEXT:    [[TMP59:%.*]] = load i7, ptr [[TMP43]], align 1
+; RV64-NEXT:    [[TMP60:%.*]] = load i7, ptr [[TMP44]], align 1
+; RV64-NEXT:    [[TMP61:%.*]] = load i7, ptr [[TMP45]], align 1
+; RV64-NEXT:    [[TMP62:%.*]] = load i7, ptr [[TMP46]], align 1
+; RV64-NEXT:    [[TMP63:%.*]] = load i7, ptr [[TMP47]], align 1
+; RV64-NEXT:    [[TMP64:%.*]] = insertelement <16 x i7> poison, i7 [[TMP48]], i32 0
+; RV64-NEXT:    [[TMP65:%.*]] = insertelement <16 x i7> [[TMP64]], i7 [[TMP49]], i32 1
+; RV64-NEXT:    [[TMP66:%.*]] = insertelement <16 x i7> [[TMP65]], i7 [[TMP50]], i32 2
+; RV64-NEXT:    [[TMP67:%.*]] = insertelement <16 x i7> [[TMP66]], i7 [[TMP51]], i32 3
+; RV64-NEXT:    [[TMP68:%.*]] = insertelement <16 x i7> [[TMP67]], i7 [[TMP52]], i32 4
+; RV64-NEXT:    [[TMP69:%.*]] = insertelement <16 x i7> [[TMP68]], i7 [[TMP53]], i32 5
+; RV64-NEXT:    [[TMP70:%.*]] = insertelement <16 x i7> [[TMP69]], i7 [[TMP54]], i32 6
+; RV64-NEXT:    [[TMP71:%.*]] = insertelement <16 x i7> [[TMP70]], i7 [[TMP55]], i32 7
+; RV64-NEXT:    [[TMP72:%.*]] = insertelement <16 x i7> [[TMP71]], i7 [[TMP56]], i32 8
+; RV64-NEXT:    [[TMP73:%.*]] = insertelement <16 x i7> [[TMP72]], i7 [[TMP57]], i32 9
+; RV64-NEXT:    [[TMP74:%.*]] = insertelement <16 x i7> [[TMP73]], i7 [[TMP58]], i32 10
+; RV64-NEXT:    [[TMP75:%.*]] = insertelement <16 x i7> [[TMP74]], i7 [[TMP59]], i32 11
+; RV64-NEXT:    [[TMP76:%.*]] = insertelement <16 x i7> [[TMP75]], i7 [[TMP60]], i32 12
+; RV64-NEXT:    [[TMP77:%.*]] = insertelement <16 x i7> [[TMP76]], i7 [[TMP61]], i32 13
+; RV64-NEXT:    [[TMP78:%.*]] = insertelement <16 x i7> [[TMP77]], i7 [[TMP62]], i32 14
+; RV64-NEXT:    [[TMP79:%.*]] = insertelement <16 x i7> [[TMP78]], i7 [[TMP63]], i32 15
+; RV64-NEXT:    [[TMP80:%.*]] = add <16 x i7> [[TMP79]], splat (i7 1)
----------------
Mel-Chen wrote:

The reason is that scalarized load/store currently only supports fixed VFs, but `!llvm.loop !0` requests a user VF of `vscale x 4`.
The current strategy for handling such cases is to allow the compiler to freely choose the VF. 
```
     // Only clamp if the UserVF is not scalable. If the UserVF is scalable, it
     // is better to ignore the hint and let the compiler choose a suitable VF.
```
As a result, the actual VF may differ from the specified width. This behavior indeed doesn't quite match the intended semantics described in the comment for` ScalableForceKind::SK_PreferScalable`.
```
    /// Vectorize loops using scalable vectors or fixed-width vectors, but favor
    /// scalable vectors when the cost-model is inconclusive. This is the
    /// default when the scalable.enable hint is enabled through a pragma.
    SK_PreferScalable = 1
```
Do we consider updating the behavior so that when a user-specified scalable VF can't be used, we fall back to a fixed VF instead? That would better align with the definition of SK_PreferScalable.  cc @fhahn 

But for now, since this is just an NFC test patch, I added a new hint with fixed VF 4. 8f6bd750e2f15e4f5203babc774f9a6f354cd73a

https://github.com/llvm/llvm-project/pull/135139