[llvm] 1ad5210 - [InstCombine] Add fold for extracting known elements from a stepvector

Caroline Concatto via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 10 05:37:46 PDT 2021


Author: Caroline Concatto
Date: 2021-06-10T13:36:57+01:00
New Revision: 1ad52105eb88bb25cb5a22e15df2ef442f553645

URL: https://github.com/llvm/llvm-project/commit/1ad52105eb88bb25cb5a22e15df2ef442f553645
DIFF: https://github.com/llvm/llvm-project/commit/1ad52105eb88bb25cb5a22e15df2ef442f553645.diff

LOG: [InstCombine] Add fold for extracting known elements from a stepvector

This patch allows folding stepvector + extract to the lane when the lane is
lower than the minimum size of the scalable vector. This fold is possible
because lane X of a stepvector is also X!
For instance, extracting element 3 of a <vscale x 4 x i64>stepvector is 3.

Differential Revision: https://reviews.llvm.org/D103153

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/test/Transforms/InstCombine/vscale_extractelement.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index dc9e2d43c27c1..1f9be3bbf3792 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -347,6 +347,24 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
     ElementCount EC = EI.getVectorOperandType()->getElementCount();
     unsigned NumElts = EC.getKnownMinValue();
 
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
+      Intrinsic::ID IID = II->getIntrinsicID();
+      // Index needs to be lower than the minimum size of the vector, because
+      // for scalable vector, the vector size is known at run time.
+      if (IID == Intrinsic::experimental_stepvector &&
+          IndexC->getValue().ult(NumElts)) {
+        Type *Ty = EI.getType();
+        unsigned BitWidth = Ty->getIntegerBitWidth();
+        Value *Idx;
+        // Return index when its value does not exceed the allowed limit
+        // for the element type of the vector, otherwise return undefined.
+        if (IndexC->getValue().getActiveBits() <= BitWidth)
+          Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
+        else
+          Idx = UndefValue::get(Ty);
+        return replaceInstUsesWith(EI, Idx);
+      }
+    }
     // InstSimplify should handle cases where the index is invalid.
     // For fixed-length vector, it's invalid to extract out-of-range element.
     if (!EC.isScalable() && IndexC->getValue().uge(NumElts))

diff  --git a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll
index a5d2d9443c86a..0d7eb90b2d3a6 100644
--- a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll
@@ -179,3 +179,94 @@ define i32 @ossfuzz_25272(float %f) {
   %E = extractelement <vscale x 4 x i32> %vec_int, i32 2147483647
   ret i32 %E
 }
+
+; Step vector optimization
+
+define i64 @ext_lane0_from_stepvec() {
+; CHECK-LABEL: @ext_lane0_from_stepvec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  %1 = extractelement <vscale x 4 x i64> %0, i32 0
+  ret i64 %1
+}
+
+define i32 @ext_lane3_from_stepvec() {
+; CHECK-LABEL: @ext_lane3_from_stepvec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 3
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+  %1 = extractelement <vscale x 4 x i32> %0, i64 3
+  ret i32 %1
+}
+
+define i64 @ext_lane_out_of_range_from_stepvec() {
+; CHECK-LABEL: @ext_lane_out_of_range_from_stepvec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <vscale x 4 x i64> [[TMP0]], i32 4
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+entry:
+  %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  %1 = extractelement <vscale x 4 x i64> %0, i32 4
+  ret i64 %1
+}
+
+define i64 @ext_lane_invalid_from_stepvec() {
+; CHECK-LABEL: @ext_lane_invalid_from_stepvec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <vscale x 4 x i64> [[TMP0]], i32 -1
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+entry:
+  %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  %1 = extractelement <vscale x 4 x i64> %0, i32 -1
+  ret i64 %1
+}
+
+define i64 @ext_lane_unknown_from_stepvec(i32 %v) {
+; CHECK-LABEL: @ext_lane_unknown_from_stepvec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <vscale x 4 x i64> [[TMP0]], i32 [[V:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+entry:
+  %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  %1 = extractelement <vscale x 4 x i64> %0, i32 %v
+  ret i64 %1
+}
+
+; Check that undef is returned when the extracted element has wrapped.
+
+define i8 @ext_lane256_from_stepvec() {
+; CHECK-LABEL: @ext_lane256_from_stepvec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i8 undef
+;
+entry:
+  %0 = call <vscale x 512 x i8> @llvm.experimental.stepvector.nxv512i8()
+  %1 = extractelement <vscale x 512 x i8> %0, i64 256
+  ret i8 %1
+}
+
+define i8 @ext_lane255_from_stepvec() {
+; CHECK-LABEL: @ext_lane255_from_stepvec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i8 -1
+;
+entry:
+  %0 = call <vscale x 512 x i8> @llvm.experimental.stepvector.nxv512i8()
+  %1 = extractelement <vscale x 512 x i8> %0, i64 255
+  ret i8 %1
+}
+
+declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 512 x i8> @llvm.experimental.stepvector.nxv512i8()


        


More information about the llvm-commits mailing list