[llvm] [LV] Avoid querying the cost of invalid operations (PR #89161)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 17 18:33:14 PDT 2024
https://github.com/arcbbb created https://github.com/llvm/llvm-project/pull/89161
In issue #88802, the LV cost model would query the cost of the TRUNC for source type 2xi1 and destination type 2xi32 after minimal bitwidth truncation.
This patch adds a check and return a cost of 0 for invalid cast operatons.
>From 96a5d3d8865ce9e535f8a040975ff982959ae04b Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Tue, 16 Apr 2024 00:59:45 -0700
Subject: [PATCH] [LV] Avoid querying the cost of invalid operations
In issue #88802, the LV cost model would query the cost of the TRUNC
for source type 2xi1 and destination type 2xi32 after minimal bitwidth
truncation.
This patch adds a check and return a cost of 0 for invalid cast operatons.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 4 +
.../RISCV/cost-on-invalid-cast.ll | 93 +++++++++++++++++++
2 files changed, 97 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/cost-on-invalid-cast.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0834865173b2f1..de02dc9171fec6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7067,6 +7067,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
VectorTy =
largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
+ // The truncation is unnecessary if the source is smaller than the
+ // destination.
+ if (SrcVecTy->getScalarSizeInBits() <= VectorTy->getScalarSizeInBits())
+ return 0;
} else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
// Leave SrcVecTy unchanged - we only shrink the destination element
// type.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/cost-on-invalid-cast.ll b/llvm/test/Transforms/LoopVectorize/RISCV/cost-on-invalid-cast.ll
new file mode 100644
index 00000000000000..a502a219940635
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/cost-on-invalid-cast.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S 2>&1 | FileCheck %s
+
+; The TTI cost model might trigger an assertion when LV tries to query the cost of an invalid cast operation.
+define void @c() {
+; CHECK-LABEL: define void @c(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 2, [[TMP4]]
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP7]], i32 2)
+; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = or <vscale x 2 x i1> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = trunc <vscale x 2 x i32> [[PREDPHI]] to <vscale x 2 x i8>
+; CHECK-NEXT: [[TMP12:%.*]] = and <vscale x 2 x i8> [[TMP11]], zeroinitializer
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> [[TMP12]], <vscale x 2 x ptr> zeroinitializer, i32 1, <vscale x 2 x i1> [[TMP10]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP6]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[F_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[ADD]] = add i32 [[F_0]], 1
+; CHECK-NEXT: br i1 false, label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
+; CHECK: cond.true:
+; CHECK-NEXT: [[CONV10:%.*]] = trunc i64 0 to i32
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.false:
+; CHECK-NEXT: [[TOBOOL15:%.*]] = zext i8 0 to i32
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[CONV10]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[COND]] to i8
+; CHECK-NEXT: [[CONV17:%.*]] = and i8 [[TMP14]], 0
+; CHECK-NEXT: store i8 [[CONV17]], ptr null, align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[F_0]], 1
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.cond
+
+for.cond:
+ %f.0 = phi i32 [ 0, %entry ], [ %add, %cond.end ]
+ %add = add i32 %f.0, 1
+ br i1 false, label %cond.false, label %cond.true
+
+cond.true:
+ %conv10 = trunc i64 0 to i32
+ br label %cond.end
+
+cond.false:
+ %tobool15 = zext i8 0 to i32
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %conv10, %cond.true ], [ 0, %cond.false ]
+ %0 = trunc i32 %cond to i8
+ %conv17 = and i8 %0, 0
+ store i8 %conv17, ptr null, align 1
+ %cmp = icmp slt i32 %f.0, 1
+ br i1 %cmp, label %for.cond, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list