[llvm] bb0d29a - [LV] fix logical error in trunc cost (#91136)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 24 10:02:02 PDT 2024
Author: Ramkumar Ramachandra
Date: 2024-05-24T18:01:58+01:00
New Revision: bb0d29a72d3a616ad6606c3996c39aba96f55615
URL: https://github.com/llvm/llvm-project/commit/bb0d29a72d3a616ad6606c3996c39aba96f55615
DIFF: https://github.com/llvm/llvm-project/commit/bb0d29a72d3a616ad6606c3996c39aba96f55615.diff
LOG: [LV] fix logical error in trunc cost (#91136)
In LoopVectorizationCostModel::getInstructionCost(), when the condition
canTruncateToMinimalBitwidth() is satisfied, for a trunc, the source
type is computed as the smallest type of the source vector and the
destination vector, and the destination type is computed as the largest
type of the instruction and destination type. This is clearly a logical
error, as the original source vector type could be smaller than the
original destination vector type, and the trunc semantics are broken
because we're attempting to widen.
Fixes #47665.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 52cb8c9f88f94..6dd04aed8d33f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3384,18 +3384,6 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
TargetTransformInfo::TCK_RecipThroughput);
}
-static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
- auto *I1 = cast<IntegerType>(cast<VectorType>(T1)->getElementType());
- auto *I2 = cast<IntegerType>(cast<VectorType>(T2)->getElementType());
- return I1->getBitWidth() < I2->getBitWidth() ? T1 : T2;
-}
-
-static Type *largestIntegerVectorType(Type *T1, Type *T2) {
- auto *I1 = cast<IntegerType>(cast<VectorType>(T1)->getElementType());
- auto *I2 = cast<IntegerType>(cast<VectorType>(T2)->getElementType());
- return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2;
-}
-
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
VPlan &Plan) {
// Fix widened non-induction PHIs by setting up the PHI operands.
@@ -7120,26 +7108,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
return *RedCost;
Type *SrcScalarTy = I->getOperand(0)->getType();
+ Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
+ if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
+ SrcScalarTy =
+ IntegerType::get(SrcScalarTy->getContext(), MinBWs[Op0AsInstruction]);
Type *SrcVecTy =
VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy;
- if (canTruncateToMinimalBitwidth(I, VF)) {
- // This cast is going to be shrunk. This may remove the cast or it might
- // turn it into slightly
diff erent cast. For example, if MinBW == 16,
- // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
- //
- // Calculate the modified src and dest types.
- Type *MinVecTy = VectorTy;
- if (Opcode == Instruction::Trunc) {
- SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
- VectorTy =
- largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
- } else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
- // Leave SrcVecTy unchanged - we only shrink the destination element
- // type.
- VectorTy =
- smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
- }
- }
return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index df3680abbf906..a91f92348ab26 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -1,7 +1,88 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
define void @test(ptr %p, i64 %a, i8 %b) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 48, i64 48, i64 48>
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], <i64 52, i64 52, i64 52, i64 52>
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32>
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
+; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[VECTOR_BODY]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
+; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
+; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
+; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK: pred.store.if5:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
+; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue6:
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
+; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.if7:
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
+; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.continue8:
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2
+; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52
+; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32
+; CHECK-NEXT: br i1 [[CMP_SLT]], label [[COND_FALSE:%.*]], label [[FOR_BODY]]
+; CHECK: cond.false:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: br label [[FOR_BODY]]
+; CHECK: for.body:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], [[FOR_COND]] ], [ [[ZEXT]], [[COND_FALSE]] ]
+; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
+; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 2
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
entry:
br label %for.cond
@@ -29,3 +110,9 @@ for.body: ; preds = %cond.false, %for.co
exit: ; preds = %for.body
ret void
}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
index 8451d4a4a5433..07a1cca1bc21e 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
@@ -1,7 +1,165 @@
-; REQUIRES: asserts
-; RUN: not --crash opt -passes=loop-vectorize -mtriple=s390x -mcpu=z14 -disable-output %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize -mtriple=s390x -mcpu=z14 -S %s | FileCheck %s
define void @test(ptr %p, i40 %a) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]], i40 [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
+; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
+; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
+; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK: pred.store.if5:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
+; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue6:
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK: pred.store.if7:
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
+; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.continue8:
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
+; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
+; CHECK: pred.store.if9:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
+; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
+; CHECK: pred.store.continue10:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
+; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
+; CHECK: pred.store.if11:
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
+; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
+; CHECK: pred.store.continue12:
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
+; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
+; CHECK: pred.store.if13:
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
+; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
+; CHECK: pred.store.continue14:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
+; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
+; CHECK: pred.store.if15:
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
+; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
+; CHECK: pred.store.continue16:
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
+; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
+; CHECK: pred.store.if17:
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
+; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
+; CHECK: pred.store.continue18:
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
+; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
+; CHECK: pred.store.if19:
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
+; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
+; CHECK: pred.store.continue20:
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
+; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
+; CHECK: pred.store.if21:
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
+; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
+; CHECK: pred.store.continue22:
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
+; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
+; CHECK: pred.store.if23:
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
+; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
+; CHECK: pred.store.continue24:
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
+; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
+; CHECK: pred.store.if25:
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
+; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
+; CHECK: pred.store.continue26:
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
+; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
+; CHECK: pred.store.if27:
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
+; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
+; CHECK: pred.store.continue28:
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
+; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
+; CHECK: pred.store.if29:
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
+; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
+; CHECK: pred.store.continue30:
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
+; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
+; CHECK: pred.store.if31:
+; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
+; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
+; CHECK: pred.store.continue32:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[SHL:%.*]] = shl i40 [[A]], 24
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i40 [[SHL]], 28
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i40 [[ASHR]] to i32
+; CHECK-NEXT: [[ICMP_EQ:%.*]] = icmp eq i32 [[TRUNC]], 0
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[ICMP_EQ]] to i32
+; CHECK-NEXT: [[ICMP_ULT:%.*]] = icmp ult i32 0, [[ZEXT]]
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[ICMP_ULT]], true
+; CHECK-NEXT: [[ICMP_SGT:%.*]] = icmp sgt i1 [[OR]], false
+; CHECK-NEXT: store i1 [[ICMP_SGT]], ptr [[P]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[IV_NEXT]], 10
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
entry:
br label %for.body
@@ -23,3 +181,9 @@ for.body: ; preds = %for.body, %entry
exit: ; preds = %for.body
ret void
}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list