[llvm] [RISCV] Teach fillUpExtensionSupportForSplat to handle nxvXi64 VMV_V_X_VL on RV32. (PR #99251)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 16:00:47 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

A nxvXi64 VMV_V_X_VL on RV32 sign extends its 32 bit input to 64 bits. If that input is positive, the sign extend can also be considered as a zero extend.

---

Patch is 87.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99251.diff


16 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+15-3) 
- (modified) llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll (+102-220) 
- (modified) llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll (+64-132) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll (+60-124) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll (+70-146) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll (+99-81) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll (+86-38) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll (+120-60) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll (+21-51) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll (+21-48) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll (+6-13) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll (+24-54) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll (+32-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll (+32-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll (+6-15) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll (+9-13) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8b5e56bff4097..1e2d25109204a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14477,10 +14477,22 @@ struct NodeExtensionHelper {
     // widening opcode by splatting to smaller element size.
     unsigned EltBits = VT.getScalarSizeInBits();
     unsigned ScalarBits = Op.getValueSizeInBits();
-    // Make sure we're getting all element bits from the scalar register.
-    // FIXME: Support implicit sign extension of vmv.v.x?
-    if (ScalarBits < EltBits)
+    // Make sure we're not getting all bits from the element, we need special
+    // handling.
+    if (ScalarBits < EltBits) {
+      // This should only occur on RV32.
+      assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
+             !Subtarget.is64Bit() && "Unexpected splat");
+      // vmv.v.x sign extends narrow inputs.
+      SupportsSExt = true;
+
+      // If the input is positive, then sign extend is also zero extend.
+      if (DAG.SignBitIsZero(Op))
+        SupportsZExt = true;
+
+      EnforceOneUse = false;
       return;
+    }
 
     unsigned NarrowSize = EltBits / 2;
     // If the narrow type cannot be expressed with a legal VMV,
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index 6e538f3dfb38e..d51f5eacd7d91 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1229,36 +1229,20 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_nxv1i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; RV32F-NEXT:    vmv.v.x v9, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v10, v8
-; RV32F-NEXT:    vsrl.vi v8, v10, 23
-; RV32F-NEXT:    vwsubu.wv v9, v9, v8
-; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT:    vminu.vx v8, v9, a1
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_nxv1i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64F-NEXT:    vmv.v.x v9, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v10, v8
-; RV64F-NEXT:    vsrl.vi v8, v10, 23
-; RV64F-NEXT:    vwsubu.vv v10, v9, v8
-; RV64F-NEXT:    li a1, 64
-; RV64F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; RV64F-NEXT:    vminu.vx v8, v10, a1
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_nxv1i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v9, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT:    vsrl.vi v8, v10, 23
+; CHECK-F-NEXT:    vwsubu.vv v10, v9, v8
+; CHECK-F-NEXT:    li a1, 64
+; CHECK-F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-F-NEXT:    vminu.vx v8, v10, a1
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_nxv1i64:
 ; CHECK-D:       # %bb.0:
@@ -1385,36 +1369,20 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_nxv2i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
-; RV32F-NEXT:    vmv.v.x v10, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v12, v8
-; RV32F-NEXT:    vsrl.vi v8, v12, 23
-; RV32F-NEXT:    vwsubu.wv v10, v10, v8
-; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT:    vminu.vx v8, v10, a1
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_nxv2i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64F-NEXT:    vmv.v.x v10, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v11, v8
-; RV64F-NEXT:    vsrl.vi v8, v11, 23
-; RV64F-NEXT:    vwsubu.vv v12, v10, v8
-; RV64F-NEXT:    li a1, 64
-; RV64F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV64F-NEXT:    vminu.vx v8, v12, a1
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_nxv2i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v10, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v11, v8
+; CHECK-F-NEXT:    vsrl.vi v8, v11, 23
+; CHECK-F-NEXT:    vwsubu.vv v12, v10, v8
+; CHECK-F-NEXT:    li a1, 64
+; CHECK-F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-F-NEXT:    vminu.vx v8, v12, a1
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_nxv2i64:
 ; CHECK-D:       # %bb.0:
@@ -1541,36 +1509,20 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_nxv4i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
-; RV32F-NEXT:    vmv.v.x v12, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v16, v8
-; RV32F-NEXT:    vsrl.vi v8, v16, 23
-; RV32F-NEXT:    vwsubu.wv v12, v12, v8
-; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32F-NEXT:    vminu.vx v8, v12, a1
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_nxv4i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64F-NEXT:    vmv.v.x v12, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v14, v8
-; RV64F-NEXT:    vsrl.vi v8, v14, 23
-; RV64F-NEXT:    vwsubu.vv v16, v12, v8
-; RV64F-NEXT:    li a1, 64
-; RV64F-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV64F-NEXT:    vminu.vx v8, v16, a1
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_nxv4i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v12, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v14, v8
+; CHECK-F-NEXT:    vsrl.vi v8, v14, 23
+; CHECK-F-NEXT:    vwsubu.vv v16, v12, v8
+; CHECK-F-NEXT:    li a1, 64
+; CHECK-F-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-F-NEXT:    vminu.vx v8, v16, a1
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_nxv4i64:
 ; CHECK-D:       # %bb.0:
@@ -1697,36 +1649,20 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_nxv8i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32F-NEXT:    vmv.v.x v16, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v24, v8
-; RV32F-NEXT:    vsrl.vi v8, v24, 23
-; RV32F-NEXT:    vwsubu.wv v16, v16, v8
-; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32F-NEXT:    vminu.vx v8, v16, a1
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_nxv8i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64F-NEXT:    vmv.v.x v16, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v20, v8
-; RV64F-NEXT:    vsrl.vi v8, v20, 23
-; RV64F-NEXT:    vwsubu.vv v24, v16, v8
-; RV64F-NEXT:    li a1, 64
-; RV64F-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV64F-NEXT:    vminu.vx v8, v24, a1
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_nxv8i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v16, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v20, v8
+; CHECK-F-NEXT:    vsrl.vi v8, v20, 23
+; CHECK-F-NEXT:    vwsubu.vv v24, v16, v8
+; CHECK-F-NEXT:    li a1, 64
+; CHECK-F-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-F-NEXT:    vminu.vx v8, v24, a1
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_nxv8i64:
 ; CHECK-D:       # %bb.0:
@@ -2895,31 +2831,17 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_zero_undef_nxv1i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; RV32F-NEXT:    vmv.v.x v9, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v10, v8
-; RV32F-NEXT:    vsrl.vi v8, v10, 23
-; RV32F-NEXT:    vwsubu.wv v9, v9, v8
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    vmv1r.v v8, v9
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv1i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; RV64F-NEXT:    vmv.v.x v9, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v10, v8
-; RV64F-NEXT:    vsrl.vi v10, v10, 23
-; RV64F-NEXT:    vwsubu.vv v8, v9, v10
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v9, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT:    vsrl.vi v10, v10, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v9, v10
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64:
 ; CHECK-D:       # %bb.0:
@@ -3043,31 +2965,17 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_zero_undef_nxv2i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
-; RV32F-NEXT:    vmv.v.x v10, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v12, v8
-; RV32F-NEXT:    vsrl.vi v8, v12, 23
-; RV32F-NEXT:    vwsubu.wv v10, v10, v8
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    vmv2r.v v8, v10
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv2i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV64F-NEXT:    vmv.v.x v10, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v11, v8
-; RV64F-NEXT:    vsrl.vi v11, v11, 23
-; RV64F-NEXT:    vwsubu.vv v8, v10, v11
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v10, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v11, v8
+; CHECK-F-NEXT:    vsrl.vi v11, v11, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v10, v11
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64:
 ; CHECK-D:       # %bb.0:
@@ -3191,31 +3099,17 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_zero_undef_nxv4i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
-; RV32F-NEXT:    vmv.v.x v12, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v16, v8
-; RV32F-NEXT:    vsrl.vi v8, v16, 23
-; RV32F-NEXT:    vwsubu.wv v12, v12, v8
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    vmv4r.v v8, v12
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv4i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV64F-NEXT:    vmv.v.x v12, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v14, v8
-; RV64F-NEXT:    vsrl.vi v14, v14, 23
-; RV64F-NEXT:    vwsubu.vv v8, v12, v14
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v12, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v14, v8
+; CHECK-F-NEXT:    vsrl.vi v14, v14, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v12, v14
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64:
 ; CHECK-D:       # %bb.0:
@@ -3339,31 +3233,17 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: ctlz_zero_undef_nxv8i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    li a0, 190
-; RV32F-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32F-NEXT:    vmv.v.x v16, a0
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v24, v8
-; RV32F-NEXT:    vsrl.vi v8, v24, 23
-; RV32F-NEXT:    vwsubu.wv v16, v16, v8
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    vmv8r.v v8, v16
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv8i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    li a0, 190
-; RV64F-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV64F-NEXT:    vmv.v.x v16, a0
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vfncvt.f.xu.w v20, v8
-; RV64F-NEXT:    vsrl.vi v20, v20, 23
-; RV64F-NEXT:    vwsubu.vv v8, v16, v20
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    li a0, 190
+; CHECK-F-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-F-NEXT:    vmv.v.x v16, a0
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vfncvt.f.xu.w v20, v8
+; CHECK-F-NEXT:    vsrl.vi v20, v20, 23
+; CHECK-F-NEXT:    vwsubu.vv v8, v16, v20
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64:
 ; CHECK-D:       # %bb.0:
@@ -3387,4 +3267,6 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; RV32: {{.*}}
+; RV32F: {{.*}}
 ; RV64: {{.*}}
+; RV64F: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 479664c6f5f62..9737107974075 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1223,12 +1223,11 @@ define <vscale x 1 x i64> @cttz_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; RV32F-NEXT:    vfncvt.f.xu.w v9, v8
 ; RV32F-NEXT:    vsrl.vi v8, v9, 23
-; RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT:    vzext.vf2 v9, v8
 ; RV32F-NEXT:    li a1, 127
-; RV32F-NEXT:    vsub.vx v8, v9, a1
+; RV32F-NEXT:    vwsubu.vx v9, v8, a1
 ; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT:    vmerge.vxm v8, v9, a1, v0
 ; RV32F-NEXT:    fsrm a0
 ; RV32F-NEXT:    ret
 ;
@@ -1385,12 +1384,11 @@ define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32F-NEXT:    vfncvt.f.xu.w v10, v8
 ; RV32F-NEXT:    vsrl.vi v8, v10, 23
-; RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT:    vzext.vf2 v10, v8
 ; RV32F-NEXT:    li a1, 127
-; RV32F-NEXT:    vsub.vx v8, v10, a1
+; RV32F-NEXT:    vwsubu.vx v10, v8, a1
 ; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT:    vmerge.vxm v8, v10, a1, v0
 ; RV32F-NEXT:    fsrm a0
 ; RV32F-NEXT:    ret
 ;
@@ -1547,12 +1545,11 @@ define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV32F-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 ; RV32F-NEXT:    vfncvt.f.xu.w v12, v8
 ; RV32F-NEXT:    vsrl.vi v8, v12, 23
-; RV32F-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
-; RV32F-NEXT:    vzext.vf2 v12, v8
 ; RV32F-NEXT:    li a1, 127
-; RV32F-NEXT:    vsub.vx v8, v12, a1
+; RV32F-NEXT:    vwsubu.vx v12, v8, a1
 ; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT:    vsetvli zero, zero, e64, m4, ta, ma
+; RV32F-NEXT:    vmerge.vxm v8, v12, a1, v0
 ; RV32F-NEXT:    fsrm a0
 ; RV32F-NEXT:    ret
 ;
@@ -1709,12 +1706,11 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32F-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV32F-NEXT:    vfncvt.f.xu.w v16, v8
 ; RV32F-NEXT:    vsrl.vi v8, v16, 23
-; RV32F-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
-; RV32F-NEXT:    vzext.vf2 v16, v8
 ; RV32F-NEXT:    li a1, 127
-; RV32F-NEXT:    vsub.vx v8, v16, a1
+; RV32F-NEXT:    vwsubu.vx v16, v8, a1
 ; RV32F-NEXT:    li a1, 64
-; RV32F-NEXT:    vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
+; RV32F-NEXT:    vmerge.vxm v8, v16, a1, v0
 ; RV32F-NEXT:    fsrm a0
 ; RV32F-NEXT:    ret
 ;
@@ -2887,35 +2883,19 @@ define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: cttz_zero_undef_nxv1i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32F-NEXT:    vrsub.vi v9, v8, 0
-; RV32F-NEXT:    vand.vv v8, v8, v9
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v9, v8
-; RV32F-NEXT:    vsrl.vi v8, v9, 23
-; RV32F-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT:    vzext.vf2 v9, v8
-; RV32F-NEXT:    li a1, 127
-; RV32F-NEXT:    vsub.vx v8, v9, a1
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: cttz_zero_undef_nxv1i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV64F-NEXT:    vrsub.vi v9, v8, 0
-; RV64F-NEXT:    vand.vv v8, v8, v9
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; RV64F-NEXT:    vfncvt.f.xu.w v9, v8
-; RV64F-NEXT:    vsrl.vi v9, v9, 23
-; RV64F-NEXT:    li a1, 127
-; RV64F-NEXT:    vwsubu.vx v8, v9, a1
-; RV64F-NEXT:    fsrm a0
-; RV64F-NEXT:    ret
+; CHECK-F-LABEL: cttz_zero_undef_nxv1i64:
+; CHECK-F:       # %bb.0:
+; CHECK-F-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-F-NEXT:    vrsub.vi v9, v8, 0
+; CHECK-F-NEXT:    vand.vv v8, v8, v9
+; CHECK-F-NEXT:    fsrmi a0, 1
+; CHECK-F-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-F-NEXT:    vsrl.vi v9, v9, 23
+; CHECK-F-NEXT:    li a1, 127
+; CHECK-F-NEXT:    vwsubu.vx v8, v9, a1
+; CHECK-F-NEXT:    fsrm a0
+; CHECK-F-NEXT:    ret
 ;
 ; CHECK-D-LABEL: cttz_zero_undef_nxv1i64:
 ; CHECK-D:       # %bb.0:
@@ -3021,35 +3001,19 @@ define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV64I-NEXT:    vsrl.vx v8, v8, a0
 ; RV64I-NEXT:    ret
 ;
-; RV32F-LABEL: cttz_zero_undef_nxv2i64:
-; RV32F:       # %bb.0:
-; RV32F-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32F-NEXT:    vrsub.vi v10, v8, 0
-; RV32F-NEXT:    vand.vv v8, v8, v10
-; RV32F-NEXT:    fsrmi a0, 1
-; RV32F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT:    vfncvt.f.xu.w v10, v8
-; RV32F-NEXT:    vsrl.vi v8, v10, 23
-; RV32F-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT:    vzext.vf2 v10, v8
-; RV32F-NEXT:    li a1, 127
-; RV32F-NEXT:    vsub.vx v8, v10, a1
-; RV32F-NEXT:    fsrm a0
-; RV32F-NEXT:    ret
-;
-; RV64F-LABEL: cttz_zero_undef_nxv2i64:
-; RV64F:       # %bb.0:
-; RV64F-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV64F-NEXT:    vrsub.vi v10, v8, 0
-; RV64F-NEXT:    vand.vv v8, v8, v10
-; RV64F-NEXT:    fsrmi a0, 1
-; RV64F-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64F-NEXT:    vfncvt.f.xu.w v10, v8
-; RV64F-NEXT:    vsrl.vi v10, v10, 23
-; RV64F-NEXT:    li a1, 127...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/99251


More information about the llvm-commits mailing list