[llvm] c595c87 - [RISCV] Lower BUILD_VECTOR to RISCVISD::VID_VL if it is floating-point type.

Tue Sep 27 17:26:22 PDT 2022

Author: Han-Kuan Chen
Date: 2022-09-27T17:25:34-07:00
New Revision: c595c874cb32564d05a31b565a41d67b3319533e

URL: https://github.com/llvm/llvm-project/commit/c595c874cb32564d05a31b565a41d67b3319533e
DIFF: https://github.com/llvm/llvm-project/commit/c595c874cb32564d05a31b565a41d67b3319533e.diff

LOG: [RISCV] Lower BUILD_VECTOR to RISCVISD::VID_VL if it is floating-point type.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D133688

Added: 
    llvm/test/CodeGen/RISCV/rvv/vle_vid-vfcvt.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a61c647d2a2f5..7d78b7a1b2766 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2057,6 +2057,24 @@ struct VIDSequence {
   int64_t Addend;
 };
 
+static Optional<uint64_t> getExactInteger(const APFloat &APF,
+                                          uint32_t BitWidth) {
+  APSInt ValInt(BitWidth, !APF.isNegative());
+  // We use an arbitrary rounding mode here. If a floating-point is an exact
+  // integer (e.g., 1.0), the rounding mode does not affect the output value. If
+  // the rounding mode changes the output value, then it is not an exact
+  // integer.
+  RoundingMode ArbitraryRM = RoundingMode::TowardZero;
+  bool IsExact;
+  // If it is out of signed integer range, it will return an invalid operation.
+  // If it is not an exact integer, IsExact is false.
+  if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
+       APFloatBase::opInvalidOp) ||
+      !IsExact)
+    return None;
+  return ValInt.extractBitsAsZExtValue(BitWidth, 0);
+}
+
 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
 // to the (non-zero) step S and start value X. This can be then lowered as the
 // RVV sequence (VID * S) + X, for example.
@@ -2070,8 +2088,7 @@ struct VIDSequence {
 static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
   unsigned NumElts = Op.getNumOperands();
   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
-  if (!Op.getValueType().isInteger())
-    return None;
+  bool IsInteger = Op.getValueType().isInteger();
 
   Optional<unsigned> SeqStepDenom;
   Optional<int64_t> SeqStepNum, SeqAddend;
@@ -2082,12 +2099,25 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
     // when interpolating across them.
     if (Op.getOperand(Idx).isUndef())
       continue;
-    // The BUILD_VECTOR must be all constants.
-    if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
-      return None;
 
-    uint64_t Val = Op.getConstantOperandVal(Idx) &
-                   maskTrailingOnes<uint64_t>(EltSizeInBits);
+    uint64_t Val;
+    if (IsInteger) {
+      // The BUILD_VECTOR must be all constants.
+      if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
+        return None;
+      Val = Op.getConstantOperandVal(Idx) &
+            maskTrailingOnes<uint64_t>(EltSizeInBits);
+    } else {
+      // The BUILD_VECTOR must be all constants.
+      if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
+        return None;
+      if (auto ExactInteger = getExactInteger(
+              cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
+              EltSizeInBits))
+        Val = *ExactInteger;
+      else
+        return None;
+    }
 
     if (PrevElt) {
       // Calculate the step since the last non-undef element, and ensure
@@ -2136,8 +2166,15 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
     if (Op.getOperand(Idx).isUndef())
       continue;
-    uint64_t Val = Op.getConstantOperandVal(Idx) &
-                   maskTrailingOnes<uint64_t>(EltSizeInBits);
+    uint64_t Val;
+    if (IsInteger) {
+      Val = Op.getConstantOperandVal(Idx) &
+            maskTrailingOnes<uint64_t>(EltSizeInBits);
+    } else {
+      Val = *getExactInteger(
+          cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
+          EltSizeInBits);
+    }
     uint64_t ExpectedVal =
         (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
     int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
@@ -2345,27 +2382,36 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
         isPowerOf2_32(StepDenominator) &&
         (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
-      SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
+      MVT VIDVT =
+          VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
+      MVT VIDContainerVT =
+          getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
+      SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
       // Convert right out of the scalable type so we can use standard ISD
       // nodes for the rest of the computation. If we used scalable types with
       // these, we'd lose the fixed-length vector info and generate worse
       // vsetvli code.
-      VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
+      VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
         SDValue SplatStep = DAG.getSplatBuildVector(
-            VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
-        VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
+            VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
+        VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
       }
       if (StepDenominator != 1) {
         SDValue SplatStep = DAG.getSplatBuildVector(
-            VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
-        VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
+            VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
+        VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
       }
       if (Addend != 0 || Negate) {
         SDValue SplatAddend = DAG.getSplatBuildVector(
-            VT, DL, DAG.getConstant(Addend, DL, XLenVT));
-        VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
+            VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
+        VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
+                          VID);
+      }
+      if (VT.isFloatingPoint()) {
+        // TODO: Use vfwcvt to reduce register pressure.
+        VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
       }
       return VID;
     }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 0e04fb3149580..1bbc7f02ac6f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -71,12 +71,9 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x,
 define void @buildvec_dominant0_v2f32(<2 x float>* %x) {
 ; CHECK-LABEL: buildvec_dominant0_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI2_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI2_0)
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vlse32.v v8, (a1), zero
-; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
-; CHECK-NEXT:    vmv.s.x v8, zero
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   store <2 x float> <float 0.0, float 1.0>, <2 x float>* %x
@@ -89,10 +86,10 @@ define void @buildvec_dominant0_v2f32(<2 x float>* %x) {
 define void @buildvec_dominant1_v2f32(<2 x float>* %x) {
 ; CHECK-LABEL: buildvec_dominant1_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI3_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI3_0)
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vadd.vi v8, v8, 1
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   store <2 x float> <float 1.0, float 2.0>, <2 x float>* %x

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vle_vid-vfcvt.ll b/llvm/test/CodeGen/RISCV/rvv/vle_vid-vfcvt.ll
new file mode 100644
index 0000000000000..6bca92cfa02a9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vle_vid-vfcvt.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+define void @foo_1(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, %hi(.LCPI0_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI0_0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float -2147483648.0, float -2147483520.0, float -2147483392.0, float -2147483264.0>, ptr %t, align 16
+  ret void
+}
+
+define void @foo_2(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, %hi(.LCPI1_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI1_0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float 2147483136.0, float 2147483264.0, float 2147483392.0, float 2147483520.0>, ptr %t, align 16
+  ret void
+}
+
+define void @foo_3(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vadd.vi v8, v8, -1
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>, ptr %t, align 16
+  ret void
+}
+
+define void @foo_4(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vsll.vi v8, v8, 10
+; CHECK-NEXT:    vadd.vi v8, v8, -16
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float -16.0, float 1008.0, float 2032.0, float 3056.0>, ptr %t, align 16
+  ret void
+}
+
+define void @foo_5(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vfcvt.f.x.v v8, v8
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>, ptr %t, align 16
+  ret void
+}
+
+define void @foo_6(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, %hi(.LCPI5_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI5_0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float -0.0, float 1.0, float 2.0, float 3.0>, ptr %t, align 16
+  ret void
+}
+
+define void @foo_7(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, %hi(.LCPI6_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI6_0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float -3.0, float -2.0, float -1.0, float -0.0>, ptr %t, align 16
+  ret void
+}
+
+define void @foo_8(ptr nocapture noundef writeonly %t) {
+; CHECK-LABEL: foo_8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a1, %hi(.LCPI7_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  store <4 x float> <float 1.5, float 2.5, float 3.5, float 4.5>, ptr %t, align 16
+  ret void
+}