[llvm] f4c7cc2 - [LV] Use more precise isPredicatedInst in legacy CCH (NFC).

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 13 11:56:26 PDT 2025


Author: Florian Hahn
Date: 2025-07-13T19:55:34+01:00
New Revision: f4c7cc26b65912c9951f0cb09b3a5ba082096687

URL: https://github.com/llvm/llvm-project/commit/f4c7cc26b65912c9951f0cb09b3a5ba082096687
DIFF: https://github.com/llvm/llvm-project/commit/f4c7cc26b65912c9951f0cb09b3a5ba082096687.diff

LOG: [LV] Use more precise isPredicatedInst in legacy CCH (NFC).

Legal::isMaskRequired may be overly conservative and also return true
when no mask is actually required.

Use isPredicatedInst from the cost model instead, which fixes a
cost-model divergence between legacy and VPlan cost model where the
legacy cost model incorrectly assumed some loads were predicated.

Fixes https://github.com/llvm/llvm-project/issues/148431.

Added: 
    llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fb6640d5cfcf8..5380a0fc6498a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6289,8 +6289,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
         return TTI::CastContextHint::Interleave;
       case LoopVectorizationCostModel::CM_Scalarize:
       case LoopVectorizationCostModel::CM_Widen:
-        return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
-                                        : TTI::CastContextHint::Normal;
+        return isPredicatedInst(I) ? TTI::CastContextHint::Masked
+                                   : TTI::CastContextHint::Normal;
       case LoopVectorizationCostModel::CM_Widen_Reverse:
         return TTI::CastContextHint::Reversed;
       case LoopVectorizationCostModel::CM_Unknown:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
new file mode 100644
index 0000000000000..019d2ee9886a6
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll
@@ -0,0 +1,331 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux"
+
+; Test case from https://github.com/llvm/llvm-project/issues/148431.
+define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 %n, i64 %off) #0 {
+; CHECK-LABEL: define void @test_predicated_load_cast_hint(
+; CHECK-SAME: ptr [[DST_1:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]], i8 [[N:%.*]], i64 [[OFF:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[N_EXT:%.*]] = sext i8 [[N]] to i32
+; CHECK-NEXT:    [[N_SUB:%.*]] = add i32 [[N_EXT]], -15
+; CHECK-NEXT:    [[SMAX16:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4)
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i32 [[SMAX16]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK:       [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4)
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[SMAX]], -1
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
+; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 4, i8 [[TMP5]])
+; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
+; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = add i8 4, [[MUL_RESULT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i8 [[TMP6]], 4
+; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ugt i32 [[TMP4]], 255
+; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shl i64 [[OFF]], 3
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT:    [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 512, i64 [[TMP12]])
+; CHECK-NEXT:    [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
+; CHECK-NEXT:    [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[SCEVGEP]]
+; CHECK-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW3]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
+; CHECK-NEXT:    br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK:       [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST_2]], i64 1
+; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 1
+; CHECK-NEXT:    [[TMP18:%.*]] = shl i64 [[OFF]], 3
+; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP18]]
+; CHECK-NEXT:    [[SMAX7:%.*]] = call i32 @llvm.smax.i32(i32 [[N_SUB]], i32 4)
+; CHECK-NEXT:    [[TMP19:%.*]] = add nsw i32 [[SMAX7]], -1
+; CHECK-NEXT:    [[TMP20:%.*]] = zext nneg i32 [[TMP19]] to i64
+; CHECK-NEXT:    [[TMP21:%.*]] = lshr i64 [[TMP20]], 2
+; CHECK-NEXT:    [[TMP22:%.*]] = shl nuw nsw i64 [[TMP21]], 9
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[TMP22]], [[TMP18]]
+; CHECK-NEXT:    [[TMP24:%.*]] = add i64 [[TMP23]], 8
+; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[TMP24]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP5]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP4]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND09:%.*]] = icmp ult ptr [[DST_2]], [[SCEVGEP8]]
+; CHECK-NEXT:    [[BOUND110:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP4]]
+; CHECK-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; CHECK-NEXT:    [[BOUND012:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP8]]
+; CHECK-NEXT:    [[BOUND113:%.*]] = icmp ult ptr [[SCEVGEP6]], [[SCEVGEP5]]
+; CHECK-NEXT:    [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]]
+; CHECK-NEXT:    [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP2]], 15
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 16
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP2]], 1
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE50:.*]] ]
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i8
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i8 [[DOTCAST]], 4
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT18:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT17]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT18]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp ule <16 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <16 x i8> poison, i8 [[TMP26]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT20:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT19]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP27:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT20]] to <16 x i64>
+; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <16 x i1> [[TMP25]], i32 0
+; CHECK-NEXT:    br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP29:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP30:%.*]] = zext i8 [[TMP29]] to i64
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP30]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x i64> [[TMP27]], i32 0
+; CHECK-NEXT:    [[TMP33:%.*]] = or i64 [[TMP32]], 1
+; CHECK-NEXT:    store i64 [[TMP33]], ptr [[TMP31]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <16 x i1> [[TMP25]], i32 1
+; CHECK-NEXT:    br i1 [[TMP34]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK:       [[PRED_STORE_IF21]]:
+; CHECK-NEXT:    [[TMP35:%.*]] = add i8 [[OFFSET_IDX]], 4
+; CHECK-NEXT:    [[TMP36:%.*]] = zext i8 [[TMP35]] to i64
+; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP36]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <16 x i64> [[TMP27]], i32 1
+; CHECK-NEXT:    [[TMP39:%.*]] = or i64 [[TMP38]], 1
+; CHECK-NEXT:    store i64 [[TMP39]], ptr [[TMP37]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; CHECK:       [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <16 x i1> [[TMP25]], i32 2
+; CHECK-NEXT:    br i1 [[TMP40]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK:       [[PRED_STORE_IF23]]:
+; CHECK-NEXT:    [[TMP41:%.*]] = add i8 [[OFFSET_IDX]], 8
+; CHECK-NEXT:    [[TMP42:%.*]] = zext i8 [[TMP41]] to i64
+; CHECK-NEXT:    [[TMP43:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP42]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <16 x i64> [[TMP27]], i32 2
+; CHECK-NEXT:    [[TMP45:%.*]] = or i64 [[TMP44]], 1
+; CHECK-NEXT:    store i64 [[TMP45]], ptr [[TMP43]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; CHECK:       [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT:    [[TMP46:%.*]] = extractelement <16 x i1> [[TMP25]], i32 3
+; CHECK-NEXT:    br i1 [[TMP46]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK:       [[PRED_STORE_IF25]]:
+; CHECK-NEXT:    [[TMP47:%.*]] = add i8 [[OFFSET_IDX]], 12
+; CHECK-NEXT:    [[TMP48:%.*]] = zext i8 [[TMP47]] to i64
+; CHECK-NEXT:    [[TMP49:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP48]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <16 x i64> [[TMP27]], i32 3
+; CHECK-NEXT:    [[TMP51:%.*]] = or i64 [[TMP50]], 1
+; CHECK-NEXT:    store i64 [[TMP51]], ptr [[TMP49]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; CHECK:       [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <16 x i1> [[TMP25]], i32 4
+; CHECK-NEXT:    br i1 [[TMP52]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK:       [[PRED_STORE_IF27]]:
+; CHECK-NEXT:    [[TMP53:%.*]] = add i8 [[OFFSET_IDX]], 16
+; CHECK-NEXT:    [[TMP54:%.*]] = zext i8 [[TMP53]] to i64
+; CHECK-NEXT:    [[TMP55:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP54]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <16 x i64> [[TMP27]], i32 4
+; CHECK-NEXT:    [[TMP57:%.*]] = or i64 [[TMP56]], 1
+; CHECK-NEXT:    store i64 [[TMP57]], ptr [[TMP55]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; CHECK:       [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <16 x i1> [[TMP25]], i32 5
+; CHECK-NEXT:    br i1 [[TMP58]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; CHECK:       [[PRED_STORE_IF29]]:
+; CHECK-NEXT:    [[TMP59:%.*]] = add i8 [[OFFSET_IDX]], 20
+; CHECK-NEXT:    [[TMP60:%.*]] = zext i8 [[TMP59]] to i64
+; CHECK-NEXT:    [[TMP61:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP60]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <16 x i64> [[TMP27]], i32 5
+; CHECK-NEXT:    [[TMP63:%.*]] = or i64 [[TMP62]], 1
+; CHECK-NEXT:    store i64 [[TMP63]], ptr [[TMP61]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
+; CHECK:       [[PRED_STORE_CONTINUE30]]:
+; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <16 x i1> [[TMP25]], i32 6
+; CHECK-NEXT:    br i1 [[TMP64]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
+; CHECK:       [[PRED_STORE_IF31]]:
+; CHECK-NEXT:    [[TMP65:%.*]] = add i8 [[OFFSET_IDX]], 24
+; CHECK-NEXT:    [[TMP66:%.*]] = zext i8 [[TMP65]] to i64
+; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP66]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP68:%.*]] = extractelement <16 x i64> [[TMP27]], i32 6
+; CHECK-NEXT:    [[TMP69:%.*]] = or i64 [[TMP68]], 1
+; CHECK-NEXT:    store i64 [[TMP69]], ptr [[TMP67]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE32]]
+; CHECK:       [[PRED_STORE_CONTINUE32]]:
+; CHECK-NEXT:    [[TMP70:%.*]] = extractelement <16 x i1> [[TMP25]], i32 7
+; CHECK-NEXT:    br i1 [[TMP70]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
+; CHECK:       [[PRED_STORE_IF33]]:
+; CHECK-NEXT:    [[TMP71:%.*]] = add i8 [[OFFSET_IDX]], 28
+; CHECK-NEXT:    [[TMP72:%.*]] = zext i8 [[TMP71]] to i64
+; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP72]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <16 x i64> [[TMP27]], i32 7
+; CHECK-NEXT:    [[TMP75:%.*]] = or i64 [[TMP74]], 1
+; CHECK-NEXT:    store i64 [[TMP75]], ptr [[TMP73]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE34]]
+; CHECK:       [[PRED_STORE_CONTINUE34]]:
+; CHECK-NEXT:    [[TMP76:%.*]] = extractelement <16 x i1> [[TMP25]], i32 8
+; CHECK-NEXT:    br i1 [[TMP76]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36:.*]]
+; CHECK:       [[PRED_STORE_IF35]]:
+; CHECK-NEXT:    [[TMP77:%.*]] = add i8 [[OFFSET_IDX]], 32
+; CHECK-NEXT:    [[TMP78:%.*]] = zext i8 [[TMP77]] to i64
+; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP78]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <16 x i64> [[TMP27]], i32 8
+; CHECK-NEXT:    [[TMP81:%.*]] = or i64 [[TMP80]], 1
+; CHECK-NEXT:    store i64 [[TMP81]], ptr [[TMP79]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE36]]
+; CHECK:       [[PRED_STORE_CONTINUE36]]:
+; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <16 x i1> [[TMP25]], i32 9
+; CHECK-NEXT:    br i1 [[TMP82]], label %[[PRED_STORE_IF37:.*]], label %[[PRED_STORE_CONTINUE38:.*]]
+; CHECK:       [[PRED_STORE_IF37]]:
+; CHECK-NEXT:    [[TMP83:%.*]] = add i8 [[OFFSET_IDX]], 36
+; CHECK-NEXT:    [[TMP84:%.*]] = zext i8 [[TMP83]] to i64
+; CHECK-NEXT:    [[TMP85:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP84]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP86:%.*]] = extractelement <16 x i64> [[TMP27]], i32 9
+; CHECK-NEXT:    [[TMP87:%.*]] = or i64 [[TMP86]], 1
+; CHECK-NEXT:    store i64 [[TMP87]], ptr [[TMP85]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE38]]
+; CHECK:       [[PRED_STORE_CONTINUE38]]:
+; CHECK-NEXT:    [[TMP88:%.*]] = extractelement <16 x i1> [[TMP25]], i32 10
+; CHECK-NEXT:    br i1 [[TMP88]], label %[[PRED_STORE_IF39:.*]], label %[[PRED_STORE_CONTINUE40:.*]]
+; CHECK:       [[PRED_STORE_IF39]]:
+; CHECK-NEXT:    [[TMP89:%.*]] = add i8 [[OFFSET_IDX]], 40
+; CHECK-NEXT:    [[TMP90:%.*]] = zext i8 [[TMP89]] to i64
+; CHECK-NEXT:    [[TMP91:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP90]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <16 x i64> [[TMP27]], i32 10
+; CHECK-NEXT:    [[TMP93:%.*]] = or i64 [[TMP92]], 1
+; CHECK-NEXT:    store i64 [[TMP93]], ptr [[TMP91]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE40]]
+; CHECK:       [[PRED_STORE_CONTINUE40]]:
+; CHECK-NEXT:    [[TMP94:%.*]] = extractelement <16 x i1> [[TMP25]], i32 11
+; CHECK-NEXT:    br i1 [[TMP94]], label %[[PRED_STORE_IF41:.*]], label %[[PRED_STORE_CONTINUE42:.*]]
+; CHECK:       [[PRED_STORE_IF41]]:
+; CHECK-NEXT:    [[TMP95:%.*]] = add i8 [[OFFSET_IDX]], 44
+; CHECK-NEXT:    [[TMP96:%.*]] = zext i8 [[TMP95]] to i64
+; CHECK-NEXT:    [[TMP97:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP96]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP98:%.*]] = extractelement <16 x i64> [[TMP27]], i32 11
+; CHECK-NEXT:    [[TMP99:%.*]] = or i64 [[TMP98]], 1
+; CHECK-NEXT:    store i64 [[TMP99]], ptr [[TMP97]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE42]]
+; CHECK:       [[PRED_STORE_CONTINUE42]]:
+; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <16 x i1> [[TMP25]], i32 12
+; CHECK-NEXT:    br i1 [[TMP100]], label %[[PRED_STORE_IF43:.*]], label %[[PRED_STORE_CONTINUE44:.*]]
+; CHECK:       [[PRED_STORE_IF43]]:
+; CHECK-NEXT:    [[TMP101:%.*]] = add i8 [[OFFSET_IDX]], 48
+; CHECK-NEXT:    [[TMP102:%.*]] = zext i8 [[TMP101]] to i64
+; CHECK-NEXT:    [[TMP103:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP102]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <16 x i64> [[TMP27]], i32 12
+; CHECK-NEXT:    [[TMP105:%.*]] = or i64 [[TMP104]], 1
+; CHECK-NEXT:    store i64 [[TMP105]], ptr [[TMP103]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE44]]
+; CHECK:       [[PRED_STORE_CONTINUE44]]:
+; CHECK-NEXT:    [[TMP106:%.*]] = extractelement <16 x i1> [[TMP25]], i32 13
+; CHECK-NEXT:    br i1 [[TMP106]], label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]]
+; CHECK:       [[PRED_STORE_IF45]]:
+; CHECK-NEXT:    [[TMP107:%.*]] = add i8 [[OFFSET_IDX]], 52
+; CHECK-NEXT:    [[TMP108:%.*]] = zext i8 [[TMP107]] to i64
+; CHECK-NEXT:    [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP110:%.*]] = extractelement <16 x i64> [[TMP27]], i32 13
+; CHECK-NEXT:    [[TMP111:%.*]] = or i64 [[TMP110]], 1
+; CHECK-NEXT:    store i64 [[TMP111]], ptr [[TMP109]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE46]]
+; CHECK:       [[PRED_STORE_CONTINUE46]]:
+; CHECK-NEXT:    [[TMP112:%.*]] = extractelement <16 x i1> [[TMP25]], i32 14
+; CHECK-NEXT:    br i1 [[TMP112]], label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]]
+; CHECK:       [[PRED_STORE_IF47]]:
+; CHECK-NEXT:    [[TMP113:%.*]] = add i8 [[OFFSET_IDX]], 56
+; CHECK-NEXT:    [[TMP114:%.*]] = zext i8 [[TMP113]] to i64
+; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP116:%.*]] = extractelement <16 x i64> [[TMP27]], i32 14
+; CHECK-NEXT:    [[TMP117:%.*]] = or i64 [[TMP116]], 1
+; CHECK-NEXT:    store i64 [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE48]]
+; CHECK:       [[PRED_STORE_CONTINUE48]]:
+; CHECK-NEXT:    [[TMP118:%.*]] = extractelement <16 x i1> [[TMP25]], i32 15
+; CHECK-NEXT:    br i1 [[TMP118]], label %[[PRED_STORE_IF49:.*]], label %[[PRED_STORE_CONTINUE50]]
+; CHECK:       [[PRED_STORE_IF49]]:
+; CHECK-NEXT:    [[TMP119:%.*]] = add i8 [[OFFSET_IDX]], 60
+; CHECK-NEXT:    [[TMP120:%.*]] = zext i8 [[TMP119]] to i64
+; CHECK-NEXT:    [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]]
+; CHECK-NEXT:    [[TMP122:%.*]] = extractelement <16 x i64> [[TMP27]], i32 15
+; CHECK-NEXT:    [[TMP123:%.*]] = or i64 [[TMP122]], 1
+; CHECK-NEXT:    store i64 [[TMP123]], ptr [[TMP121]], align 8, !alias.scope [[META3]]
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE50]]
+; CHECK:       [[PRED_STORE_CONTINUE50]]:
+; CHECK-NEXT:    store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP124:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP124]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[SRC]], align 1
+; CHECK-NEXT:    [[L_EXT:%.*]] = zext i8 [[L]] to i64
+; CHECK-NEXT:    [[ADD:%.*]] = or i64 [[L_EXT]], 1
+; CHECK-NEXT:    [[IV_EXT:%.*]] = zext i8 [[IV]] to i64
+; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[IV_EXT]], i64 [[OFF]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[GEP_DST_1]], align 8
+; CHECK-NEXT:    store i8 0, ptr [[DST_2]], align 1
+; CHECK-NEXT:    [[IV_NEXT]] = add i8 [[IV]], 4
+; CHECK-NEXT:    [[IV_NEXT_EXT:%.*]] = zext i8 [[IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N_SUB]], [[IV_NEXT_EXT]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %n.ext = sext i8 %n to i32
+  %n.sub = add i32 %n.ext, -15
+  br label %loop
+
+loop:
+  %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+  %l = load i8, ptr %src, align 1
+  %l.ext = zext i8 %l to i64
+  %add = or i64 %l.ext, 1
+  %iv.ext = zext i8 %iv to i64
+  %gep.dst.1 = getelementptr [16 x i64], ptr %dst.1, i64 %iv.ext, i64 %off
+  store i64 %add, ptr %gep.dst.1, align 8
+  store i8 0, ptr %dst.2, align 1
+  %iv.next = add i8 %iv, 4
+  %iv.next.ext = zext i8 %iv.next to i32
+  %cmp = icmp sgt i32 %n.sub, %iv.next.ext
+  br i1 %cmp, label %loop, label %exit, !llvm.loop !0
+
+exit:
+  ret void
+}
+
+!0 = distinct !{!0, !1, !2, !3}
+!1 = !{!"llvm.loop.mustprogress"}
+!2 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
+!3 = !{!"llvm.loop.vectorize.enable", i1 true}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = distinct !{[[META2]], !"LVerDomain"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META2]]}
+; CHECK: [[META5]] = !{[[META6:![0-9]+]]}
+; CHECK: [[META6]] = distinct !{[[META6]], [[META2]]}
+; CHECK: [[META7]] = !{[[META1]], [[META4]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
+; CHECK: [[META9]] = !{!"llvm.loop.mustprogress"}
+; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META9]], [[META10]]}
+;.


        


More information about the llvm-commits mailing list