[clang] 6b76c1e - [SCCP] Add support for vectors (#98026)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jul 9 03:25:56 PDT 2024
Author: Nikita Popov
Date: 2024-07-09T12:25:53+02:00
New Revision: 6b76c1e64ca7689ac9d9de8a4529c7af51e8b472
URL: https://github.com/llvm/llvm-project/commit/6b76c1e64ca7689ac9d9de8a4529c7af51e8b472
DIFF: https://github.com/llvm/llvm-project/commit/6b76c1e64ca7689ac9d9de8a4529c7af51e8b472.diff
LOG: [SCCP] Add support for vectors (#98026)
Add preliminary support for vectors of integers by using the
`ValueLatticeElement::asConstantRange()` helper instead of a custom
implementation, and relxing various integer type checks.
This enables just the part that works automatically, e.g. icmps with a
constant vector operand aren't supported yet.
The change in ssa.copy handling is because asConstantRange() returns an
unknown LV for empty range, while SCCP's getConstantRange() returned a
full range. I've made the change to preserve the existing behavior.
Added:
Modified:
clang/test/CodeGen/isfpclass.c
llvm/lib/Transforms/Utils/SCCPSolver.cpp
llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
llvm/test/Transforms/SCCP/overdefined-ext.ll
llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
Removed:
################################################################################
diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c
index fd35182a5dbbe..a0e04eaad5929 100644
--- a/clang/test/CodeGen/isfpclass.c
+++ b/clang/test/CodeGen/isfpclass.c
@@ -136,7 +136,7 @@ typedef double __attribute__((ext_vector_type(4))) double4;
typedef int __attribute__((ext_vector_type(4))) int4;
typedef long __attribute__((ext_vector_type(4))) long4;
-// CHECK-LABEL: define dso_local noundef <4 x i32> @check_isfpclass_nan_v4f32
+// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32
// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer
@@ -147,7 +147,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) {
return __builtin_isfpclass(x, 3 /*NaN*/);
}
-// CHECK-LABEL: define dso_local noundef <4 x i32> @check_isfpclass_nan_strict_v4f32
+// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32
// CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]]
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index db0d40b317d17..7bfff4dfa67ad 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -42,14 +42,6 @@ static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() {
MaxNumRangeExtensions);
}
-static ConstantRange getConstantRange(const ValueLatticeElement &LV, Type *Ty,
- bool UndefAllowed) {
- assert(Ty->isIntOrIntVectorTy() && "Should be int or int vector");
- if (LV.isConstantRange(UndefAllowed))
- return LV.getConstantRange();
- return ConstantRange::getFull(Ty->getScalarSizeInBits());
-}
-
namespace llvm {
bool SCCPSolver::isConstant(const ValueLatticeElement &LV) {
@@ -109,14 +101,14 @@ static bool refineInstruction(SCCPSolver &Solver,
Instruction &Inst) {
bool Changed = false;
auto GetRange = [&Solver, &InsertedValues](Value *Op) {
- if (auto *Const = dyn_cast<ConstantInt>(Op))
- return ConstantRange(Const->getValue());
- if (isa<Constant>(Op) || InsertedValues.contains(Op)) {
+ if (auto *Const = dyn_cast<Constant>(Op))
+ return Const->toConstantRange();
+ if (InsertedValues.contains(Op)) {
unsigned Bitwidth = Op->getType()->getScalarSizeInBits();
return ConstantRange::getFull(Bitwidth);
}
- return getConstantRange(Solver.getLatticeValueFor(Op), Op->getType(),
- /*UndefAllowed=*/false);
+ return Solver.getLatticeValueFor(Op).asConstantRange(
+ Op->getType(), /*UndefAllowed=*/false);
};
if (isa<OverflowingBinaryOperator>(Inst)) {
@@ -819,7 +811,7 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
}
void trackValueOfArgument(Argument *A) {
- if (A->getType()->isIntegerTy()) {
+ if (A->getType()->isIntOrIntVectorTy()) {
if (std::optional<ConstantRange> Range = A->getRange()) {
markConstantRange(ValueState[A], A, *Range);
return;
@@ -1296,11 +1288,12 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
}
// Ignore bitcasts, as they may change the number of vector elements.
- if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy() &&
+ if (I.getDestTy()->isIntOrIntVectorTy() &&
+ I.getSrcTy()->isIntOrIntVectorTy() &&
I.getOpcode() != Instruction::BitCast) {
auto &LV = getValueState(&I);
ConstantRange OpRange =
- getConstantRange(OpSt, I.getSrcTy(), /*UndefAllowed=*/false);
+ OpSt.asConstantRange(I.getSrcTy(), /*UndefAllowed=*/false);
Type *DestTy = I.getDestTy();
ConstantRange Res =
@@ -1322,8 +1315,8 @@ void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI,
return; // Wait to resolve.
Type *Ty = LHS->getType();
- ConstantRange LR = getConstantRange(L, Ty, /*UndefAllowed=*/false);
- ConstantRange RR = getConstantRange(R, Ty, /*UndefAllowed=*/false);
+ ConstantRange LR = L.asConstantRange(Ty, /*UndefAllowed=*/false);
+ ConstantRange RR = R.asConstantRange(Ty, /*UndefAllowed=*/false);
if (Idx == 0) {
ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR);
mergeInValue(&EVI, ValueLatticeElement::getRange(Res));
@@ -1523,14 +1516,14 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
}
// Only use ranges for binary operators on integers.
- if (!I.getType()->isIntegerTy())
+ if (!I.getType()->isIntOrIntVectorTy())
return markOverdefined(&I);
// Try to simplify to a constant range.
ConstantRange A =
- getConstantRange(V1State, I.getType(), /*UndefAllowed=*/false);
+ V1State.asConstantRange(I.getType(), /*UndefAllowed=*/false);
ConstantRange B =
- getConstantRange(V2State, I.getType(), /*UndefAllowed=*/false);
+ V2State.asConstantRange(I.getType(), /*UndefAllowed=*/false);
auto *BO = cast<BinaryOperator>(&I);
ConstantRange R = ConstantRange::getEmpty(I.getType()->getScalarSizeInBits());
@@ -1626,7 +1619,7 @@ void SCCPInstVisitor::visitStoreInst(StoreInst &SI) {
}
static ValueLatticeElement getValueFromMetadata(const Instruction *I) {
- if (I->getType()->isIntegerTy()) {
+ if (I->getType()->isIntOrIntVectorTy()) {
if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
return ValueLatticeElement::getRange(
getConstantRangeFromMetadata(*Ranges));
@@ -1813,8 +1806,11 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
// Combine range info for the original value with the new range from the
// condition.
- auto CopyOfCR = getConstantRange(CopyOfVal, CopyOf->getType(),
- /*UndefAllowed=*/true);
+ auto CopyOfCR = CopyOfVal.asConstantRange(CopyOf->getType(),
+ /*UndefAllowed=*/true);
+ // Treat an unresolved input like a full range.
+ if (CopyOfCR.isEmptySet())
+ CopyOfCR = ConstantRange::getFull(CopyOfCR.getBitWidth());
auto NewCR = ImposedCR.intersectWith(CopyOfCR);
// If the existing information is != x, do not use the information from
// a chained predicate, as the != x information is more likely to be
@@ -1860,7 +1856,7 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
if (State.isUnknownOrUndef())
return;
OpRanges.push_back(
- getConstantRange(State, Op->getType(), /*UndefAllowed=*/false));
+ State.asConstantRange(Op->getType(), /*UndefAllowed=*/false));
}
ConstantRange Result =
diff --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
index 05d9acd191962..1f2fbb6f53cdd 100644
--- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
@@ -43,7 +43,7 @@ define <4 x i8> @range_from_lshr_vec(<4 x i8> %a) {
; CHECK-LABEL: @range_from_lshr_vec(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], <i8 1, i8 2, i8 3, i8 4>
-; CHECK-NEXT: [[ADD_1:%.*]] = add <4 x i8> [[A_SHR]], <i8 1, i8 2, i8 3, i8 4>
+; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], <i8 1, i8 2, i8 3, i8 4>
; CHECK-NEXT: ret <4 x i8> [[ADD_1]]
;
entry:
@@ -56,7 +56,7 @@ define <4 x i8> @range_from_lshr_vec_2(<4 x i8> %a) {
; CHECK-LABEL: @range_from_lshr_vec_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT: [[ADD_1:%.*]] = add <4 x i8> [[A_SHR]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], <i8 2, i8 2, i8 2, i8 2>
; CHECK-NEXT: ret <4 x i8> [[ADD_1]]
;
entry:
@@ -169,7 +169,7 @@ else:
define <6 x i8> @vector_constant_replacement_in_add(<6 x i8> %a) {
; CHECK-LABEL: @vector_constant_replacement_in_add(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ADD:%.*]] = add <6 x i8> [[A:%.*]], zeroinitializer
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <6 x i8> [[A:%.*]], zeroinitializer
; CHECK-NEXT: ret <6 x i8> [[ADD]]
;
entry:
diff --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll
index 217daa750cc1d..05819c32d522d 100644
--- a/llvm/test/Transforms/SCCP/overdefined-ext.ll
+++ b/llvm/test/Transforms/SCCP/overdefined-ext.ll
@@ -21,7 +21,7 @@ define i1 @zext_icmp(i1 %t0) {
ret i1 %t2
}
-; negative test. SCCP operates poorly with vector ranges
+; TODO: SCCP operates poorly with vector ranges
define <2 x i1> @zext_vector(<2 x i1> %t0) {
; CHECK-LABEL: @zext_vector(
@@ -34,14 +34,11 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) {
ret <2 x i1> %t2
}
-; negative test. SCCP operates poorly with vector ranges
-
define <2 x i1> @zext_vector2(<2 x i1> %t0) {
; CHECK-LABEL: @zext_vector2(
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[T1]], <i32 2, i32 2>
-; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]]
-; CHECK-NEXT: ret <2 x i1> [[T3]]
+; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%t1 = zext <2 x i1> %t0 to <2 x i32>
%t2 = add <2 x i32> %t1, <i32 2, i32 2>
@@ -72,7 +69,7 @@ define i1 @sext_icmp(i1 %t0) {
ret i1 %t2
}
-; negative test. SCCP operates poorly with vector ranges
+; TODO: SCCP operates poorly with vector ranges
define <2 x i1> @sext_vector(<2 x i1> %t0) {
; CHECK-LABEL: @sext_vector(
@@ -85,14 +82,11 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) {
ret <2 x i1> %t2
}
-; negative test. SCCP operates poorly with vector ranges
-
define <2 x i1> @sext_vector2(<2 x i1> %t0) {
; CHECK-LABEL: @sext_vector2(
; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[T1]], <i32 2, i32 2>
-; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]]
-; CHECK-NEXT: ret <2 x i1> [[T3]]
+; CHECK-NEXT: [[T2:%.*]] = add nsw <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%t1 = sext <2 x i1> %t0 to <2 x i32>
%t2 = add <2 x i32> %t1, <i32 2, i32 2>
diff --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
index d3bac0d68a979..92d84f71bd9d4 100644
--- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
@@ -48,7 +48,7 @@ define <4 x i16> @range_from_and_nuw_vec(<4 x i32> %a) {
; CHECK-SAME: <4 x i32> [[A:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A]], <i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT: [[TRUNC1:%.*]] = trunc <4 x i32> [[AND1]] to <4 x i16>
+; CHECK-NEXT: [[TRUNC1:%.*]] = trunc nuw <4 x i32> [[AND1]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[TRUNC1]]
;
entry:
More information about the cfe-commits
mailing list