[clang] 6b76c1e - [SCCP] Add support for vectors (#98026)

Tue Jul 9 03:25:56 PDT 2024

Author: Nikita Popov
Date: 2024-07-09T12:25:53+02:00
New Revision: 6b76c1e64ca7689ac9d9de8a4529c7af51e8b472

URL: https://github.com/llvm/llvm-project/commit/6b76c1e64ca7689ac9d9de8a4529c7af51e8b472
DIFF: https://github.com/llvm/llvm-project/commit/6b76c1e64ca7689ac9d9de8a4529c7af51e8b472.diff

LOG: [SCCP] Add support for vectors (#98026)

Add preliminary support for vectors of integers by using the
`ValueLatticeElement::asConstantRange()` helper instead of a custom
implementation, and relxing various integer type checks.

This enables just the part that works automatically, e.g. icmps with a
constant vector operand aren't supported yet.

The change in ssa.copy handling is because asConstantRange() returns an
unknown LV for empty range, while SCCP's getConstantRange() returned a
full range. I've made the change to preserve the existing behavior.

Added: 
    

Modified: 
    clang/test/CodeGen/isfpclass.c
    llvm/lib/Transforms/Utils/SCCPSolver.cpp
    llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
    llvm/test/Transforms/SCCP/overdefined-ext.ll
    llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c
index fd35182a5dbbe..a0e04eaad5929 100644

--- a/clang/test/CodeGen/isfpclass.c
+++ b/clang/test/CodeGen/isfpclass.c
@@ -136,7 +136,7 @@ typedef double __attribute__((ext_vector_type(4))) double4;
 typedef int __attribute__((ext_vector_type(4))) int4;
 typedef long __attribute__((ext_vector_type(4))) long4;
 
-// CHECK-LABEL: define dso_local noundef <4 x i32> @check_isfpclass_nan_v4f32
+// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32
 // CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer
@@ -147,7 +147,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) {
   return __builtin_isfpclass(x, 3 /*NaN*/);
 }
 
-// CHECK-LABEL: define dso_local noundef <4 x i32> @check_isfpclass_nan_strict_v4f32
+// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32
 // CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]]

diff  --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index db0d40b317d17..7bfff4dfa67ad 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -42,14 +42,6 @@ static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() {
       MaxNumRangeExtensions);
 }
 
-static ConstantRange getConstantRange(const ValueLatticeElement &LV, Type *Ty,
-                                      bool UndefAllowed) {
-  assert(Ty->isIntOrIntVectorTy() && "Should be int or int vector");
-  if (LV.isConstantRange(UndefAllowed))
-    return LV.getConstantRange();
-  return ConstantRange::getFull(Ty->getScalarSizeInBits());
-}
-
 namespace llvm {
 
 bool SCCPSolver::isConstant(const ValueLatticeElement &LV) {
@@ -109,14 +101,14 @@ static bool refineInstruction(SCCPSolver &Solver,
                               Instruction &Inst) {
   bool Changed = false;
   auto GetRange = [&Solver, &InsertedValues](Value *Op) {
-    if (auto *Const = dyn_cast<ConstantInt>(Op))
-      return ConstantRange(Const->getValue());
-    if (isa<Constant>(Op) || InsertedValues.contains(Op)) {
+    if (auto *Const = dyn_cast<Constant>(Op))
+      return Const->toConstantRange();
+    if (InsertedValues.contains(Op)) {
       unsigned Bitwidth = Op->getType()->getScalarSizeInBits();
       return ConstantRange::getFull(Bitwidth);
     }
-    return getConstantRange(Solver.getLatticeValueFor(Op), Op->getType(),
-                            /*UndefAllowed=*/false);
+    return Solver.getLatticeValueFor(Op).asConstantRange(
+        Op->getType(), /*UndefAllowed=*/false);
   };
 
   if (isa<OverflowingBinaryOperator>(Inst)) {
@@ -819,7 +811,7 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
   }
 
   void trackValueOfArgument(Argument *A) {
-    if (A->getType()->isIntegerTy()) {
+    if (A->getType()->isIntOrIntVectorTy()) {
       if (std::optional<ConstantRange> Range = A->getRange()) {
         markConstantRange(ValueState[A], A, *Range);
         return;
@@ -1296,11 +1288,12 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
   }
 
   // Ignore bitcasts, as they may change the number of vector elements.
-  if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy() &&
+  if (I.getDestTy()->isIntOrIntVectorTy() &&
+      I.getSrcTy()->isIntOrIntVectorTy() &&
       I.getOpcode() != Instruction::BitCast) {
     auto &LV = getValueState(&I);
     ConstantRange OpRange =
-        getConstantRange(OpSt, I.getSrcTy(), /*UndefAllowed=*/false);
+        OpSt.asConstantRange(I.getSrcTy(), /*UndefAllowed=*/false);
 
     Type *DestTy = I.getDestTy();
     ConstantRange Res =
@@ -1322,8 +1315,8 @@ void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI,
     return; // Wait to resolve.
 
   Type *Ty = LHS->getType();
-  ConstantRange LR = getConstantRange(L, Ty, /*UndefAllowed=*/false);
-  ConstantRange RR = getConstantRange(R, Ty, /*UndefAllowed=*/false);
+  ConstantRange LR = L.asConstantRange(Ty, /*UndefAllowed=*/false);
+  ConstantRange RR = R.asConstantRange(Ty, /*UndefAllowed=*/false);
   if (Idx == 0) {
     ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR);
     mergeInValue(&EVI, ValueLatticeElement::getRange(Res));
@@ -1523,14 +1516,14 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
   }
 
   // Only use ranges for binary operators on integers.
-  if (!I.getType()->isIntegerTy())
+  if (!I.getType()->isIntOrIntVectorTy())
     return markOverdefined(&I);
 
   // Try to simplify to a constant range.
   ConstantRange A =
-      getConstantRange(V1State, I.getType(), /*UndefAllowed=*/false);
+      V1State.asConstantRange(I.getType(), /*UndefAllowed=*/false);
   ConstantRange B =
-      getConstantRange(V2State, I.getType(), /*UndefAllowed=*/false);
+      V2State.asConstantRange(I.getType(), /*UndefAllowed=*/false);
 
   auto *BO = cast<BinaryOperator>(&I);
   ConstantRange R = ConstantRange::getEmpty(I.getType()->getScalarSizeInBits());
@@ -1626,7 +1619,7 @@ void SCCPInstVisitor::visitStoreInst(StoreInst &SI) {
 }
 
 static ValueLatticeElement getValueFromMetadata(const Instruction *I) {
-  if (I->getType()->isIntegerTy()) {
+  if (I->getType()->isIntOrIntVectorTy()) {
     if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
       return ValueLatticeElement::getRange(
           getConstantRangeFromMetadata(*Ranges));
@@ -1813,8 +1806,11 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
 
         // Combine range info for the original value with the new range from the
         // condition.
-        auto CopyOfCR = getConstantRange(CopyOfVal, CopyOf->getType(),
-                                         /*UndefAllowed=*/true);
+        auto CopyOfCR = CopyOfVal.asConstantRange(CopyOf->getType(),
+                                                  /*UndefAllowed=*/true);
+        // Treat an unresolved input like a full range.
+        if (CopyOfCR.isEmptySet())
+          CopyOfCR = ConstantRange::getFull(CopyOfCR.getBitWidth());
         auto NewCR = ImposedCR.intersectWith(CopyOfCR);
         // If the existing information is != x, do not use the information from
         // a chained predicate, as the != x information is more likely to be
@@ -1860,7 +1856,7 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
         if (State.isUnknownOrUndef())
           return;
         OpRanges.push_back(
-            getConstantRange(State, Op->getType(), /*UndefAllowed=*/false));
+            State.asConstantRange(Op->getType(), /*UndefAllowed=*/false));
       }
 
       ConstantRange Result =

diff  --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
index 05d9acd191962..1f2fbb6f53cdd 100644
--- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
@@ -43,7 +43,7 @@ define <4 x i8> @range_from_lshr_vec(<4 x i8> %a) {
 ; CHECK-LABEL: @range_from_lshr_vec(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], <i8 1, i8 2, i8 3, i8 4>
-; CHECK-NEXT:    [[ADD_1:%.*]] = add <4 x i8> [[A_SHR]], <i8 1, i8 2, i8 3, i8 4>
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], <i8 1, i8 2, i8 3, i8 4>
 ; CHECK-NEXT:    ret <4 x i8> [[ADD_1]]
 ;
 entry:
@@ -56,7 +56,7 @@ define <4 x i8> @range_from_lshr_vec_2(<4 x i8> %a) {
 ; CHECK-LABEL: @range_from_lshr_vec_2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[ADD_1:%.*]] = add <4 x i8> [[A_SHR]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], <i8 2, i8 2, i8 2, i8 2>
 ; CHECK-NEXT:    ret <4 x i8> [[ADD_1]]
 ;
 entry:
@@ -169,7 +169,7 @@ else:
 define <6 x i8> @vector_constant_replacement_in_add(<6 x i8> %a) {
 ; CHECK-LABEL: @vector_constant_replacement_in_add(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ADD:%.*]] = add <6 x i8> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw <6 x i8> [[A:%.*]], zeroinitializer
 ; CHECK-NEXT:    ret <6 x i8> [[ADD]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll
index 217daa750cc1d..05819c32d522d 100644
--- a/llvm/test/Transforms/SCCP/overdefined-ext.ll
+++ b/llvm/test/Transforms/SCCP/overdefined-ext.ll
@@ -21,7 +21,7 @@ define i1 @zext_icmp(i1 %t0) {
   ret i1 %t2
 }
 
-; negative test. SCCP operates poorly with vector ranges
+; TODO: SCCP operates poorly with vector ranges
 
 define <2 x i1> @zext_vector(<2 x i1> %t0) {
 ; CHECK-LABEL: @zext_vector(
@@ -34,14 +34,11 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) {
   ret <2 x i1> %t2
 }
 
-; negative test. SCCP operates poorly with vector ranges
-
 define <2 x i1> @zext_vector2(<2 x i1> %t0) {
 ; CHECK-LABEL: @zext_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[T1]], <i32 2, i32 2>
-; CHECK-NEXT:    [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]]
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %t1 = zext <2 x i1> %t0 to <2 x i32>
   %t2 = add <2 x i32> %t1, <i32 2, i32 2>
@@ -72,7 +69,7 @@ define i1 @sext_icmp(i1 %t0) {
   ret i1 %t2
 }
 
-; negative test. SCCP operates poorly with vector ranges
+; TODO: SCCP operates poorly with vector ranges
 
 define <2 x i1> @sext_vector(<2 x i1> %t0) {
 ; CHECK-LABEL: @sext_vector(
@@ -85,14 +82,11 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) {
   ret <2 x i1> %t2
 }
 
-; negative test. SCCP operates poorly with vector ranges
-
 define <2 x i1> @sext_vector2(<2 x i1> %t0) {
 ; CHECK-LABEL: @sext_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[T1]], <i32 2, i32 2>
-; CHECK-NEXT:    [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]]
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[T2:%.*]] = add nsw <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %t1 = sext <2 x i1> %t0 to <2 x i32>
   %t2 = add <2 x i32> %t1, <i32 2, i32 2>

diff  --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
index d3bac0d68a979..92d84f71bd9d4 100644
--- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
@@ -48,7 +48,7 @@ define <4 x i16> @range_from_and_nuw_vec(<4 x i32> %a) {
 ; CHECK-SAME: <4 x i32> [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AND1:%.*]] = and <4 x i32> [[A]], <i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TRUNC1:%.*]] = trunc <4 x i32> [[AND1]] to <4 x i16>
+; CHECK-NEXT:    [[TRUNC1:%.*]] = trunc nuw <4 x i32> [[AND1]] to <4 x i16>
 ; CHECK-NEXT:    ret <4 x i16> [[TRUNC1]]
 ;
 entry: