[llvm] goldsteinn/insertelement fpclass (PR #87708)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 4 13:41:22 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: None (goldsteinn)

<details>
<summary>Changes</summary>

- **[ValueTracking] Add tests for non-constant idx for fpclass of `insertelement`; NFC**
- **[ValueTracking] Support non-constant idx for `computeKnownFPClass` of `insertelement`**


---
Full diff: https://github.com/llvm/llvm-project/pull/87708.diff


2 Files Affected:

- (modified) llvm/lib/Analysis/ValueTracking.cpp (+10-9) 
- (modified) llvm/test/Transforms/Attributor/nofpclass.ll (+20-1) 


``````````diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5ad4da43bca7db..685861a9116555 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5355,14 +5355,17 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     const Value *Vec = Op->getOperand(0);
     const Value *Elt = Op->getOperand(1);
     auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2));
-    // Early out if the index is non-constant or out-of-range.
     unsigned NumElts = DemandedElts.getBitWidth();
-    if (!CIdx || CIdx->getValue().uge(NumElts))
-      return;
+    APInt DemandedVecElts = DemandedElts;
+    bool NeedsElt = true;
+    // If we know the index we are inserting too, clear it from Vec check.
+    if (CIdx && CIdx->getValue().ult(NumElts)) {
+      DemandedVecElts.clearBit(CIdx->getZExtValue());
+      NeedsElt = DemandedElts[CIdx->getZExtValue()];
+    }
 
-    unsigned EltIdx = CIdx->getZExtValue();
     // Do we demand the inserted element?
-    if (DemandedElts[EltIdx]) {
+    if (NeedsElt) {
       computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q);
       // If we don't know any bits, early out.
       if (Known.isUnknown())
@@ -5371,10 +5374,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
       Known.KnownFPClasses = fcNone;
     }
 
-    // We don't need the base vector element that has been inserted.
-    APInt DemandedVecElts = DemandedElts;
-    DemandedVecElts.clearBit(EltIdx);
-    if (!!DemandedVecElts) {
+    // Do we need anymore elements from Vec?
+    if (!DemandedVecElts.isZero()) {
       KnownFPClass Known2;
       computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
                           Depth + 1, Q);
diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll
index 4df647cf3bb5b2..4370cea3c285cb 100644
--- a/llvm/test/Transforms/Attributor/nofpclass.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass.ll
@@ -1513,6 +1513,25 @@ define <4 x float> @insertelement_constant_chain() {
   ret <4 x float> %ins.3
 }
 
+define <4 x float> @insertelement_non_constant_chain(i32 %idx) {
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+; CHECK-LABEL: define nofpclass(nan inf nzero sub) <4 x float> @insertelement_non_constant_chain
+; CHECK-SAME: (i32 [[IDX:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <4 x float> poison, float 1.000000e+00, i32 0
+; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <4 x float> [[INS_0]], float 0.000000e+00, i32 1
+; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float -9.000000e+00, i32 2
+; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <4 x float> [[INS_2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <4 x float> [[INS_2]], float 4.000000e+00, i32 [[IDX]]
+; CHECK-NEXT:    ret <4 x float> [[INS_3]]
+;
+  %ins.0 = insertelement <4 x float> poison, float 1.0, i32 0
+  %ins.1 = insertelement <4 x float> %ins.0, float 0.0, i32 1
+  %ins.2 = insertelement <4 x float> %ins.1, float -9.0, i32 2
+  %ins.3 = insertelement <4 x float> %ins.2, float 3.0, i32 3
+  %ins.4 = insertelement <4 x float> %ins.2, float 4.0, i32 %idx
+  ret <4 x float> %ins.4
+}
+
 define <vscale x 4 x float> @insertelement_scalable_constant_chain() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define <vscale x 4 x float> @insertelement_scalable_constant_chain
@@ -1582,7 +1601,7 @@ define float @insertelement_extractelement_unknown(<4 x float> nofpclass(zero) %
 
 define <4 x float> @insertelement_index_oob_chain() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define <4 x float> @insertelement_index_oob_chain
+; CHECK-LABEL: define nofpclass(nan ninf nzero sub norm) <4 x float> @insertelement_index_oob_chain
 ; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:    [[INSERT:%.*]] = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4
 ; CHECK-NEXT:    ret <4 x float> [[INSERT]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/87708


More information about the llvm-commits mailing list