[llvm] 70a26da - [VPlan] Set correct flags when creating and cloning VPWidenCastRecipe.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 01:01:39 PDT 2025


Author: Florian Hahn
Date: 2025-09-25T09:00:47+01:00
New Revision: 70a26da63992142ba2221f1034048ea883cdcb3d

URL: https://github.com/llvm/llvm-project/commit/70a26da63992142ba2221f1034048ea883cdcb3d
DIFF: https://github.com/llvm/llvm-project/commit/70a26da63992142ba2221f1034048ea883cdcb3d.diff

LOG: [VPlan] Set correct flags when creating and cloning VPWidenCastRecipe.

Make sure that we set the correct wrap flags when creating new
VPWidenCastRecipes for truncs and preserve the flags from the recipe
directly when cloning, to make sure they are not dropped.

Fixes https://github.com/llvm/llvm-project/issues/160396

Added: 
    llvm/test/Transforms/LoopVectorize/cse-casts.ll

Modified: 
    llvm/lib/Transforms/Vectorize/VPlan.h
    llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e64cefde81e31..0822511150e9e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -705,6 +705,9 @@ class VPIRFlags {
   VPIRFlags(WrapFlagsTy WrapFlags)
       : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
 
+  VPIRFlags(TruncFlagsTy TruncFlags)
+      : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
+
   VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
 
   VPIRFlags(DisjointFlagsTy DisjointFlags)
@@ -1494,9 +1497,10 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
 
   VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
                     const VPIRFlags &Flags = {},
+                    const VPIRMetadata &Metadata = {},
                     DebugLoc DL = DebugLoc::getUnknown())
       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
-        VPIRMetadata(), Opcode(Opcode), ResultTy(ResultTy) {
+        VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
     assert(flagsValidForOpcode(Opcode) &&
            "Set flags not supported for the provided opcode");
   }
@@ -1504,11 +1508,11 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
   ~VPWidenCastRecipe() override = default;
 
   VPWidenCastRecipe *clone() override {
+    auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this,
+                                      *this, getDebugLoc());
     if (auto *UV = getUnderlyingValue())
-      return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
-                                   *cast<CastInst>(UV));
-
-    return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
+      New->setUnderlyingValue(UV);
+    return New;
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index aa3de3613b68e..deb64bfc75063 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2016,13 +2016,13 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
     return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
            Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
            Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
+           Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc ||
            Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
            Opcode == VPInstruction::WideIVStep ||
            Opcode == VPInstruction::ReductionStartVector ||
            Opcode == VPInstruction::ComputeReductionResult;
   case OperationType::NonNegOp:
-    return Opcode == Instruction::ZExt;
-    break;
+    return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;
   case OperationType::Cmp:
     return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
   case OperationType::Other:

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 84f02059743c3..58fab8f222d23 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2195,7 +2195,8 @@ void VPlanTransforms::truncateToMinimalBitwidths(
         auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op);
         VPWidenCastRecipe *NewOp =
             IterIsEmpty
-                ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)
+                ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy,
+                                        VPIRFlags::TruncFlagsTy(false, false))
                 : ProcessedIter->second;
         R.setOperand(Idx, NewOp);
         if (!IterIsEmpty)
@@ -3566,13 +3567,13 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
                                    Mul, Ext0, Ext1, Ext)) {
       auto *NewExt0 = new VPWidenCastRecipe(
           Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0,
-          Ext0->getDebugLoc());
+          *Ext0, Ext0->getDebugLoc());
       NewExt0->insertBefore(Ext0);
 
       VPWidenCastRecipe *NewExt1 = NewExt0;
       if (Ext0 != Ext1) {
         NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),
-                                        Ext->getResultType(), *Ext1,
+                                        Ext->getResultType(), *Ext1, *Ext1,
                                         Ext1->getDebugLoc());
         NewExt1->insertBefore(Ext1);
       }

diff  --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
new file mode 100644
index 0000000000000..e923560bb77e8
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll
@@ -0,0 +1,351 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
+
+define i8 @preserve_flags_when_cloning_trunc(i8 %start, ptr noalias %src, ptr noalias %dst) {
+; CHECK-LABEL: define i8 @preserve_flags_when_cloning_trunc(
+; CHECK-SAME: i8 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i8> splat (i8 1), i8 [[START]], i32 0
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i8> [ splat (i8 1), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4
+; CHECK-NEXT:    store <4 x i16> [[TMP3]], ptr [[TMP4]], align 2
+; CHECK-NEXT:    store <4 x i16> [[TMP3]], ptr [[TMP5]], align 2
+; CHECK-NEXT:    [[TMP6]] = mul <4 x i8> [[VEC_PHI]], splat (i8 3)
+; CHECK-NEXT:    [[TMP7]] = mul <4 x i8> [[VEC_PHI1]], splat (i8 3)
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 416
+; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <4 x i8> [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> [[BIN_RDX]])
+; CHECK-NEXT:    br label %[[SCALAR_PH:.*]]
+; CHECK:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+  %red = phi i8 [ %red.next, %loop ], [ %start, %entry ]
+  %l = load i32, ptr %src, align 4
+  %cmp = icmp ne i32 %l, 0
+  %cmp.ext = zext i1 %cmp to i64
+  %cmp.trunc = trunc i64 %cmp.ext to i16
+  %gep.dst = getelementptr i16, ptr %dst, i64 %iv
+  store i16 %cmp.trunc, ptr %gep.dst, align 2
+  %red.next = mul i8 %red, 3
+  %iv.next = add i64 %iv, 1
+  %ec = icmp ult i64 %iv, 416
+  br i1 %ec, label %loop, label %exit
+
+exit:
+  ret i8 %red.next
+}
+
+
+define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
+; CHECK-LABEL: define void @preserve_flags_narrowing_extends_and_truncs(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK:       [[PRED_LOAD_IF]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE]]
+; CHECK:       [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <4 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP2]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT:    br i1 true, label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
+; CHECK:       [[PRED_LOAD_IF1]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i8> [[TMP3]], i8 [[TMP5]], i32 1
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK:       [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x i8> [ [[TMP3]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP6]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
+; CHECK:       [[PRED_LOAD_IF3]]:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
+; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP9]], i32 2
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE4]]
+; CHECK:       [[PRED_LOAD_CONTINUE4]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i8> [ [[TMP7]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP10]], %[[PRED_LOAD_IF3]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]]
+; CHECK:       [[PRED_LOAD_IF5]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 3
+; CHECK-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i8> [[TMP11]], i8 [[TMP13]], i32 3
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE6]]
+; CHECK:       [[PRED_LOAD_CONTINUE6]]:
+; CHECK-NEXT:    [[TMP15:%.*]] = phi <4 x i8> [ [[TMP11]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP14]], %[[PRED_LOAD_IF5]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]]
+; CHECK:       [[PRED_LOAD_IF7]]:
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i8> poison, i8 [[TMP17]], i32 0
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE8]]
+; CHECK:       [[PRED_LOAD_CONTINUE8]]:
+; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i8> [ poison, %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP18]], %[[PRED_LOAD_IF7]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10:.*]]
+; CHECK:       [[PRED_LOAD_IF9]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 5
+; CHECK-NEXT:    [[TMP21:%.*]] = load i8, ptr [[TMP20]], align 1
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP21]], i32 1
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE10]]
+; CHECK:       [[PRED_LOAD_CONTINUE10]]:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i8> [ [[TMP19]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP22]], %[[PRED_LOAD_IF9]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF11:.*]], label %[[PRED_LOAD_CONTINUE12:.*]]
+; CHECK:       [[PRED_LOAD_IF11]]:
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 6
+; CHECK-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
+; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i8> [[TMP23]], i8 [[TMP25]], i32 2
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE12]]
+; CHECK:       [[PRED_LOAD_CONTINUE12]]:
+; CHECK-NEXT:    [[TMP27:%.*]] = phi <4 x i8> [ [[TMP23]], %[[PRED_LOAD_CONTINUE10]] ], [ [[TMP26]], %[[PRED_LOAD_IF11]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF13:.*]], label %[[PRED_LOAD_CONTINUE14:.*]]
+; CHECK:       [[PRED_LOAD_IF13]]:
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 7
+; CHECK-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
+; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x i8> [[TMP27]], i8 [[TMP29]], i32 3
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE14]]
+; CHECK:       [[PRED_LOAD_CONTINUE14]]:
+; CHECK-NEXT:    [[TMP31:%.*]] = phi <4 x i8> [ [[TMP27]], %[[PRED_LOAD_CONTINUE12]] ], [ [[TMP30]], %[[PRED_LOAD_IF13]] ]
+; CHECK-NEXT:    [[TMP32:%.*]] = zext <4 x i8> [[TMP15]] to <4 x i64>
+; CHECK-NEXT:    [[TMP33:%.*]] = zext <4 x i8> [[TMP31]] to <4 x i64>
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 0
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <4 x i64> [[TMP32]], i32 0
+; CHECK-NEXT:    store i64 [[TMP35]], ptr [[TMP34]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
+; CHECK:       [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; CHECK:       [[PRED_STORE_IF15]]:
+; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x i64> [[TMP32]], i32 1
+; CHECK-NEXT:    store i64 [[TMP37]], ptr [[TMP36]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
+; CHECK:       [[PRED_STORE_CONTINUE16]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; CHECK:       [[PRED_STORE_IF17]]:
+; CHECK-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i64> [[TMP32]], i32 2
+; CHECK-NEXT:    store i64 [[TMP39]], ptr [[TMP38]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
+; CHECK:       [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK:       [[PRED_STORE_IF19]]:
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 3
+; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <4 x i64> [[TMP32]], i32 3
+; CHECK-NEXT:    store i64 [[TMP41]], ptr [[TMP40]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
+; CHECK:       [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK:       [[PRED_STORE_IF21]]:
+; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 4
+; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <4 x i64> [[TMP33]], i32 0
+; CHECK-NEXT:    store i64 [[TMP43]], ptr [[TMP42]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
+; CHECK:       [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK:       [[PRED_STORE_IF23]]:
+; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 5
+; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <4 x i64> [[TMP33]], i32 1
+; CHECK-NEXT:    store i64 [[TMP45]], ptr [[TMP44]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
+; CHECK:       [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK:       [[PRED_STORE_IF25]]:
+; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 6
+; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <4 x i64> [[TMP33]], i32 2
+; CHECK-NEXT:    store i64 [[TMP47]], ptr [[TMP46]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
+; CHECK:       [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK:       [[PRED_STORE_IF27]]:
+; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 7
+; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <4 x i64> [[TMP33]], i32 3
+; CHECK-NEXT:    store i64 [[TMP49]], ptr [[TMP48]], align 4
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
+; CHECK:       [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 0
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 1
+; CHECK-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 2
+; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 3
+; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP50]], i32 0
+; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x ptr> [[TMP54]], ptr [[TMP51]], i32 1
+; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <4 x ptr> [[TMP55]], ptr [[TMP52]], i32 2
+; CHECK-NEXT:    [[TMP57:%.*]] = insertelement <4 x ptr> [[TMP56]], ptr [[TMP53]], i32 3
+; CHECK-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 4
+; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 5
+; CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 6
+; CHECK-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 7
+; CHECK-NEXT:    [[TMP62:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP58]], i32 0
+; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x ptr> [[TMP62]], ptr [[TMP59]], i32 1
+; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <4 x ptr> [[TMP63]], ptr [[TMP60]], i32 2
+; CHECK-NEXT:    [[TMP65:%.*]] = insertelement <4 x ptr> [[TMP64]], ptr [[TMP61]], i32 3
+; CHECK-NEXT:    br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; CHECK:       [[PRED_LOAD_IF29]]:
+; CHECK-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP50]], align 1
+; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <4 x i8> poison, i8 [[TMP66]], i32 0
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE30]]
+; CHECK:       [[PRED_LOAD_CONTINUE30]]:
+; CHECK-NEXT:    [[TMP68:%.*]] = phi <4 x i8> [ poison, %[[PRED_STORE_CONTINUE28]] ], [ [[TMP67]], %[[PRED_LOAD_IF29]] ]
+; CHECK-NEXT:    br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; CHECK:       [[PRED_LOAD_IF31]]:
+; CHECK-NEXT:    [[TMP69:%.*]] = load i8, ptr [[TMP51]], align 1
+; CHECK-NEXT:    [[TMP70:%.*]] = insertelement <4 x i8> [[TMP68]], i8 [[TMP69]], i32 1
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE32]]
+; CHECK:       [[PRED_LOAD_CONTINUE32]]:
+; CHECK-NEXT:    [[TMP71:%.*]] = phi <4 x i8> [ [[TMP68]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP70]], %[[PRED_LOAD_IF31]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; CHECK:       [[PRED_LOAD_IF33]]:
+; CHECK-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP52]], align 1
+; CHECK-NEXT:    [[TMP73:%.*]] = insertelement <4 x i8> [[TMP71]], i8 [[TMP72]], i32 2
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE34]]
+; CHECK:       [[PRED_LOAD_CONTINUE34]]:
+; CHECK-NEXT:    [[TMP74:%.*]] = phi <4 x i8> [ [[TMP71]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP73]], %[[PRED_LOAD_IF33]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; CHECK:       [[PRED_LOAD_IF35]]:
+; CHECK-NEXT:    [[TMP75:%.*]] = load i8, ptr [[TMP53]], align 1
+; CHECK-NEXT:    [[TMP76:%.*]] = insertelement <4 x i8> [[TMP74]], i8 [[TMP75]], i32 3
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE36]]
+; CHECK:       [[PRED_LOAD_CONTINUE36]]:
+; CHECK-NEXT:    [[TMP77:%.*]] = phi <4 x i8> [ [[TMP74]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP76]], %[[PRED_LOAD_IF35]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; CHECK:       [[PRED_LOAD_IF37]]:
+; CHECK-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP58]], align 1
+; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <4 x i8> poison, i8 [[TMP78]], i32 0
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE38]]
+; CHECK:       [[PRED_LOAD_CONTINUE38]]:
+; CHECK-NEXT:    [[TMP80:%.*]] = phi <4 x i8> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP79]], %[[PRED_LOAD_IF37]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; CHECK:       [[PRED_LOAD_IF39]]:
+; CHECK-NEXT:    [[TMP81:%.*]] = load i8, ptr [[TMP59]], align 1
+; CHECK-NEXT:    [[TMP82:%.*]] = insertelement <4 x i8> [[TMP80]], i8 [[TMP81]], i32 1
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE40]]
+; CHECK:       [[PRED_LOAD_CONTINUE40]]:
+; CHECK-NEXT:    [[TMP83:%.*]] = phi <4 x i8> [ [[TMP80]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP82]], %[[PRED_LOAD_IF39]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; CHECK:       [[PRED_LOAD_IF41]]:
+; CHECK-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP60]], align 1
+; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <4 x i8> [[TMP83]], i8 [[TMP84]], i32 2
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE42]]
+; CHECK:       [[PRED_LOAD_CONTINUE42]]:
+; CHECK-NEXT:    [[TMP86:%.*]] = phi <4 x i8> [ [[TMP83]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP85]], %[[PRED_LOAD_IF41]] ]
+; CHECK-NEXT:    br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
+; CHECK:       [[PRED_LOAD_IF43]]:
+; CHECK-NEXT:    [[TMP87:%.*]] = load i8, ptr [[TMP61]], align 1
+; CHECK-NEXT:    [[TMP88:%.*]] = insertelement <4 x i8> [[TMP86]], i8 [[TMP87]], i32 3
+; CHECK-NEXT:    br label %[[PRED_LOAD_CONTINUE44]]
+; CHECK:       [[PRED_LOAD_CONTINUE44]]:
+; CHECK-NEXT:    [[TMP89:%.*]] = phi <4 x i8> [ [[TMP86]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP88]], %[[PRED_LOAD_IF43]] ]
+; CHECK-NEXT:    [[TMP90:%.*]] = trunc <4 x i8> [[TMP77]] to <4 x i1>
+; CHECK-NEXT:    [[TMP91:%.*]] = trunc <4 x i8> [[TMP89]] to <4 x i1>
+; CHECK-NEXT:    [[TMP92:%.*]] = and <4 x i1> [[TMP90]], splat (i1 true)
+; CHECK-NEXT:    [[TMP93:%.*]] = and <4 x i1> [[TMP91]], splat (i1 true)
+; CHECK-NEXT:    [[TMP94:%.*]] = select <4 x i1> [[TMP90]], <4 x float> splat (float 1.000000e+00), <4 x float> zeroinitializer
+; CHECK-NEXT:    [[TMP95:%.*]] = select <4 x i1> [[TMP91]], <4 x float> splat (float 1.000000e+00), <4 x float> zeroinitializer
+; CHECK-NEXT:    [[TMP96:%.*]] = select <4 x i1> [[TMP92]], <4 x float> splat (float 3.000000e+00), <4 x float> [[TMP94]]
+; CHECK-NEXT:    [[TMP97:%.*]] = select <4 x i1> [[TMP93]], <4 x float> splat (float 3.000000e+00), <4 x float> [[TMP95]]
+; CHECK-NEXT:    [[TMP98:%.*]] = bitcast <4 x float> [[TMP96]] to <4 x i32>
+; CHECK-NEXT:    [[TMP99:%.*]] = bitcast <4 x float> [[TMP97]] to <4 x i32>
+; CHECK-NEXT:    [[TMP100:%.*]] = trunc <4 x i32> [[TMP98]] to <4 x i8>
+; CHECK-NEXT:    [[TMP101:%.*]] = trunc <4 x i32> [[TMP99]] to <4 x i8>
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]]
+; CHECK:       [[PRED_STORE_IF45]]:
+; CHECK-NEXT:    [[TMP102:%.*]] = extractelement <4 x i8> [[TMP100]], i32 0
+; CHECK-NEXT:    store i8 [[TMP102]], ptr [[TMP50]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE46]]
+; CHECK:       [[PRED_STORE_CONTINUE46]]:
+; CHECK-NEXT:    br i1 true, label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]]
+; CHECK:       [[PRED_STORE_IF47]]:
+; CHECK-NEXT:    [[TMP103:%.*]] = extractelement <4 x i8> [[TMP100]], i32 1
+; CHECK-NEXT:    store i8 [[TMP103]], ptr [[TMP51]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE48]]
+; CHECK:       [[PRED_STORE_CONTINUE48]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF49:.*]], label %[[PRED_STORE_CONTINUE50:.*]]
+; CHECK:       [[PRED_STORE_IF49]]:
+; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <4 x i8> [[TMP100]], i32 2
+; CHECK-NEXT:    store i8 [[TMP104]], ptr [[TMP52]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE50]]
+; CHECK:       [[PRED_STORE_CONTINUE50]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF51:.*]], label %[[PRED_STORE_CONTINUE52:.*]]
+; CHECK:       [[PRED_STORE_IF51]]:
+; CHECK-NEXT:    [[TMP105:%.*]] = extractelement <4 x i8> [[TMP100]], i32 3
+; CHECK-NEXT:    store i8 [[TMP105]], ptr [[TMP53]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE52]]
+; CHECK:       [[PRED_STORE_CONTINUE52]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF53:.*]], label %[[PRED_STORE_CONTINUE54:.*]]
+; CHECK:       [[PRED_STORE_IF53]]:
+; CHECK-NEXT:    [[TMP106:%.*]] = extractelement <4 x i8> [[TMP101]], i32 0
+; CHECK-NEXT:    store i8 [[TMP106]], ptr [[TMP58]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE54]]
+; CHECK:       [[PRED_STORE_CONTINUE54]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF55:.*]], label %[[PRED_STORE_CONTINUE56:.*]]
+; CHECK:       [[PRED_STORE_IF55]]:
+; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <4 x i8> [[TMP101]], i32 1
+; CHECK-NEXT:    store i8 [[TMP107]], ptr [[TMP59]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE56]]
+; CHECK:       [[PRED_STORE_CONTINUE56]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF57:.*]], label %[[PRED_STORE_CONTINUE58:.*]]
+; CHECK:       [[PRED_STORE_IF57]]:
+; CHECK-NEXT:    [[TMP108:%.*]] = extractelement <4 x i8> [[TMP101]], i32 2
+; CHECK-NEXT:    store i8 [[TMP108]], ptr [[TMP60]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE58]]
+; CHECK:       [[PRED_STORE_CONTINUE58]]:
+; CHECK-NEXT:    br i1 false, label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]]
+; CHECK:       [[PRED_STORE_IF59]]:
+; CHECK-NEXT:    [[TMP109:%.*]] = extractelement <4 x i8> [[TMP101]], i32 3
+; CHECK-NEXT:    store i8 [[TMP109]], ptr [[TMP61]], align 1
+; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE60]]
+; CHECK:       [[PRED_STORE_CONTINUE60]]:
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br [[EXIT:label %.*]]
+; CHECK:       [[SCALAR_PH:.*:]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
+  %l = load i8, ptr %gep.A
+  %l.ext = zext i8 %l to i64
+  %gep.C = getelementptr inbounds i8, ptr %C, i64 %iv
+  store i64 %l.ext, ptr %gep.C
+  %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
+  %l.1 = load i8, ptr %gep.B, align 1
+  %masked = and i8 %l.1, 1
+  %l.1.trunc = trunc i8 %l.1 to i1
+  %sel.0 = select i1 %l.1.trunc, float 1.000000e+00, float 0.000000e+00
+  %masked.trunc = trunc i8 %masked to i1
+  %sel.1 = select i1 %masked.trunc, float 3.000000e+00, float %sel.0
+  %bc = bitcast float %sel.1 to i32
+  %bc.trunc = trunc i32 %bc to i8
+  store i8 %bc.trunc, ptr %gep.B, align 1
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 1
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list