[llvm] ece66db - [SelectionDAG] Add computeKnownBits support for ISD::STEP_VECTOR (#80452)

Wed Feb 7 18:04:59 PST 2024

Author: Luke Lau
Date: 2024-02-08T10:04:55+08:00
New Revision: ece66dbc60971cf43a96f63e05c5a507feae3854

URL: https://github.com/llvm/llvm-project/commit/ece66dbc60971cf43a96f63e05c5a507feae3854
DIFF: https://github.com/llvm/llvm-project/commit/ece66dbc60971cf43a96f63e05c5a507feae3854.diff

LOG: [SelectionDAG] Add computeKnownBits support for ISD::STEP_VECTOR (#80452)

This handles two cases where we can work out some known-zero bits for
ISD::STEP_VECTOR.

The first case handles when we know the low bits are zero because the
step
amount is a power of two. This is taken from
https://reviews.llvm.org/D128159,
and even though the original patch didn't end up landing this case due
to it
not having any test difference, I've included it here for completeness's
sake.

The second case handles the case when we have an upper bound on
vscale_range.
We can use this to work out the upper bound on the number of elements,
and thus
what the maximum step will be. From the maximum step we then know which
hi bits
are zero.

On its own, computing the known hi bits results in some small
improvements for
RVV with -mrvv-vector-bits=zvl across the llvm-test-suite. However I'm
hoping
to be able to use this later to reduce the LMUL in index calculations
for
vrgather/indexed accesses.

---------

Co-authored-by: Philip Reames <preames at rivosinc.com>

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/RISCV/rvv/stepvector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3c1343836187a9..55eee780d512c8 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3110,6 +3110,33 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     }
     break;
   }
+  case ISD::STEP_VECTOR: {
+    const APInt &Step = Op.getConstantOperandAPInt(0);
+
+    if (Step.isPowerOf2())
+      Known.Zero.setLowBits(Step.logBase2());
+
+    const Function &F = getMachineFunction().getFunction();
+
+    if (!isUIntN(BitWidth, Op.getValueType().getVectorMinNumElements()))
+      break;
+    const APInt MinNumElts =
+        APInt(BitWidth, Op.getValueType().getVectorMinNumElements());
+
+    bool Overflow;
+    const APInt MaxNumElts = getVScaleRange(&F, BitWidth)
+                                 .getUnsignedMax()
+                                 .umul_ov(MinNumElts, Overflow);
+    if (Overflow)
+      break;
+
+    const APInt MaxValue = (MaxNumElts - 1).umul_ov(Step, Overflow);
+    if (Overflow)
+      break;
+
+    Known.Zero.setHighBits(MaxValue.countl_zero());
+    break;
+  }
   case ISD::BUILD_VECTOR:
     assert(!Op.getValueType().isScalableVector());
     // Collect the known bits that are shared by every demanded vector element.

diff  --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
index 6ce307146be19d..2d65c9d178b789 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
@@ -733,3 +733,45 @@ entry:
   %3 = shl <vscale x 16 x i64> %2, %1
   ret <vscale x 16 x i64> %3
 }
+
+; maximum step is 4 * 2 = 8, so maximum step value is 7, so hi 61 bits are known
+; zero
+define <vscale x 2 x i64> @hi_bits_known_zero() vscale_range(2, 4) {
+; CHECK-LABEL: hi_bits_known_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    ret
+  %step = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  %and = and <vscale x 2 x i64> %step, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 u0xfffffffffffffff8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x i64> %and
+}
+
+; the maximum step here overflows so don't set the known hi bits
+define <vscale x 2 x i64> @hi_bits_known_zero_overflow() vscale_range(2, 4) {
+; CHECK-LABEL: hi_bits_known_zero_overflow:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vmul.vx v8, v8, a0
+; CHECK-NEXT:    vand.vi v8, v8, -8
+; CHECK-NEXT:    ret
+  %step = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  %step.mul = mul <vscale x 2 x i64> %step, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 u0xffffffffffffffff, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %and = and <vscale x 2 x i64> %step.mul, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 u0xfffffffffffffff8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x i64> %and
+}
+
+; step values are multiple of 8, so lo 3 bits are known zero
+define <vscale x 2 x i64> @lo_bits_known_zero() {
+; CHECK-LABEL: lo_bits_known_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    ret
+  %step = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  %step.mul = mul <vscale x 2 x i64> %step, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  %and = and <vscale x 2 x i64> %step.mul, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x i64> %and
+}