[llvm] [RISCV] Teach fillUpExtensionSupportForSplat to handle nxvXi64 VMV_V_X_VL on RV32. (PR #99251)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 16:00:47 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
A nxvXi64 VMV_V_X_VL on RV32 sign extends its 32 bit input to 64 bits. If that input is positive, the sign extend can also be considered as a zero extend.
---
Patch is 87.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99251.diff
16 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+15-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll (+102-220)
- (modified) llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll (+64-132)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll (+60-124)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll (+70-146)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll (+99-81)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll (+86-38)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll (+120-60)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll (+21-51)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll (+21-48)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll (+6-13)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll (+24-54)
- (modified) llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll (+32-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll (+32-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll (+6-15)
- (modified) llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll (+9-13)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8b5e56bff4097..1e2d25109204a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14477,10 +14477,22 @@ struct NodeExtensionHelper {
// widening opcode by splatting to smaller element size.
unsigned EltBits = VT.getScalarSizeInBits();
unsigned ScalarBits = Op.getValueSizeInBits();
- // Make sure we're getting all element bits from the scalar register.
- // FIXME: Support implicit sign extension of vmv.v.x?
- if (ScalarBits < EltBits)
+ // Make sure we're not getting all bits from the element, we need special
+ // handling.
+ if (ScalarBits < EltBits) {
+ // This should only occur on RV32.
+ assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
+ !Subtarget.is64Bit() && "Unexpected splat");
+ // vmv.v.x sign extends narrow inputs.
+ SupportsSExt = true;
+
+ // If the input is positive, then sign extend is also zero extend.
+ if (DAG.SignBitIsZero(Op))
+ SupportsZExt = true;
+
+ EnforceOneUse = false;
return;
+ }
unsigned NarrowSize = EltBits / 2;
// If the narrow type cannot be expressed with a legal VMV,
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index 6e538f3dfb38e..d51f5eacd7d91 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1229,36 +1229,20 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv1i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; RV32F-NEXT: vmv.v.x v9, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v10, v8
-; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vwsubu.wv v9, v9, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT: vminu.vx v8, v9, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv1i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64F-NEXT: vmv.v.x v9, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v10, v8
-; RV64F-NEXT: vsrl.vi v8, v10, 23
-; RV64F-NEXT: vwsubu.vv v10, v9, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64F-NEXT: vminu.vx v8, v10, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv1i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v9, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT: vsrl.vi v8, v10, 23
+; CHECK-F-NEXT: vwsubu.vv v10, v9, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v10, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv1i64:
; CHECK-D: # %bb.0:
@@ -1385,36 +1369,20 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv2i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; RV32F-NEXT: vmv.v.x v10, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v12, v8
-; RV32F-NEXT: vsrl.vi v8, v12, 23
-; RV32F-NEXT: vwsubu.wv v10, v10, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT: vminu.vx v8, v10, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv2i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64F-NEXT: vmv.v.x v10, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v11, v8
-; RV64F-NEXT: vsrl.vi v8, v11, 23
-; RV64F-NEXT: vwsubu.vv v12, v10, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64F-NEXT: vminu.vx v8, v12, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv2i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-F-NEXT: vmv.v.x v10, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v11, v8
+; CHECK-F-NEXT: vsrl.vi v8, v11, 23
+; CHECK-F-NEXT: vwsubu.vv v12, v10, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v12, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv2i64:
; CHECK-D: # %bb.0:
@@ -1541,36 +1509,20 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv4i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; RV32F-NEXT: vmv.v.x v12, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v16, v8
-; RV32F-NEXT: vsrl.vi v8, v16, 23
-; RV32F-NEXT: vwsubu.wv v12, v12, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV32F-NEXT: vminu.vx v8, v12, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv4i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64F-NEXT: vmv.v.x v12, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v14, v8
-; RV64F-NEXT: vsrl.vi v8, v14, 23
-; RV64F-NEXT: vwsubu.vv v16, v12, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64F-NEXT: vminu.vx v8, v16, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv4i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v12, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v14, v8
+; CHECK-F-NEXT: vsrl.vi v8, v14, 23
+; CHECK-F-NEXT: vwsubu.vv v16, v12, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v16, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv4i64:
; CHECK-D: # %bb.0:
@@ -1697,36 +1649,20 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv8i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV32F-NEXT: vmv.v.x v16, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v24, v8
-; RV32F-NEXT: vsrl.vi v8, v24, 23
-; RV32F-NEXT: vwsubu.wv v16, v16, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32F-NEXT: vminu.vx v8, v16, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv8i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64F-NEXT: vmv.v.x v16, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v20, v8
-; RV64F-NEXT: vsrl.vi v8, v20, 23
-; RV64F-NEXT: vwsubu.vv v24, v16, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64F-NEXT: vminu.vx v8, v24, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv8i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-F-NEXT: vmv.v.x v16, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v20, v8
+; CHECK-F-NEXT: vsrl.vi v8, v20, 23
+; CHECK-F-NEXT: vwsubu.vv v24, v16, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v24, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv8i64:
; CHECK-D: # %bb.0:
@@ -2895,31 +2831,17 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv1i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; RV32F-NEXT: vmv.v.x v9, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v10, v8
-; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vwsubu.wv v9, v9, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv1r.v v8, v9
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv1i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64F-NEXT: vmv.v.x v9, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v10, v8
-; RV64F-NEXT: vsrl.vi v10, v10, 23
-; RV64F-NEXT: vwsubu.vv v8, v9, v10
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v9, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT: vsrl.vi v10, v10, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64:
; CHECK-D: # %bb.0:
@@ -3043,31 +2965,17 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv2i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; RV32F-NEXT: vmv.v.x v10, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v12, v8
-; RV32F-NEXT: vsrl.vi v8, v12, 23
-; RV32F-NEXT: vwsubu.wv v10, v10, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv2r.v v8, v10
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv2i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64F-NEXT: vmv.v.x v10, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v11, v8
-; RV64F-NEXT: vsrl.vi v11, v11, 23
-; RV64F-NEXT: vwsubu.vv v8, v10, v11
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-F-NEXT: vmv.v.x v10, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v11, v8
+; CHECK-F-NEXT: vsrl.vi v11, v11, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64:
; CHECK-D: # %bb.0:
@@ -3191,31 +3099,17 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv4i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; RV32F-NEXT: vmv.v.x v12, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v16, v8
-; RV32F-NEXT: vsrl.vi v8, v16, 23
-; RV32F-NEXT: vwsubu.wv v12, v12, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv4r.v v8, v12
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv4i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64F-NEXT: vmv.v.x v12, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v14, v8
-; RV64F-NEXT: vsrl.vi v14, v14, 23
-; RV64F-NEXT: vwsubu.vv v8, v12, v14
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v12, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v14, v8
+; CHECK-F-NEXT: vsrl.vi v14, v14, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v12, v14
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64:
; CHECK-D: # %bb.0:
@@ -3339,31 +3233,17 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv8i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV32F-NEXT: vmv.v.x v16, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v24, v8
-; RV32F-NEXT: vsrl.vi v8, v24, 23
-; RV32F-NEXT: vwsubu.wv v16, v16, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv8r.v v8, v16
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv8i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64F-NEXT: vmv.v.x v16, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v20, v8
-; RV64F-NEXT: vsrl.vi v20, v20, 23
-; RV64F-NEXT: vwsubu.vv v8, v16, v20
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-F-NEXT: vmv.v.x v16, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v20, v8
+; CHECK-F-NEXT: vsrl.vi v20, v20, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v16, v20
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64:
; CHECK-D: # %bb.0:
@@ -3387,4 +3267,6 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
+; RV32F: {{.*}}
; RV64: {{.*}}
+; RV64F: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 479664c6f5f62..9737107974075 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1223,12 +1223,11 @@ define <vscale x 1 x i64> @cttz_nxv1i64(<vscale x 1 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v9, v8
; RV32F-NEXT: vsrl.vi v8, v9, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT: vzext.vf2 v9, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v9, a1
+; RV32F-NEXT: vwsubu.vx v9, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v9, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -1385,12 +1384,11 @@ define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v10, v8
; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT: vzext.vf2 v10, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v10, a1
+; RV32F-NEXT: vwsubu.vx v10, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v10, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -1547,12 +1545,11 @@ define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v12, v8
; RV32F-NEXT: vsrl.vi v8, v12, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV32F-NEXT: vzext.vf2 v12, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v12, a1
+; RV32F-NEXT: vwsubu.vx v12, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v12, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -1709,12 +1706,11 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v16, v8
; RV32F-NEXT: vsrl.vi v8, v16, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32F-NEXT: vzext.vf2 v16, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v16, a1
+; RV32F-NEXT: vwsubu.vx v16, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v16, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -2887,35 +2883,19 @@ define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: cttz_zero_undef_nxv1i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32F-NEXT: vrsub.vi v9, v8, 0
-; RV32F-NEXT: vand.vv v8, v8, v9
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v9, v8
-; RV32F-NEXT: vsrl.vi v8, v9, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT: vzext.vf2 v9, v8
-; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v9, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: cttz_zero_undef_nxv1i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64F-NEXT: vrsub.vi v9, v8, 0
-; RV64F-NEXT: vand.vv v8, v8, v9
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV64F-NEXT: vfncvt.f.xu.w v9, v8
-; RV64F-NEXT: vsrl.vi v9, v9, 23
-; RV64F-NEXT: li a1, 127
-; RV64F-NEXT: vwsubu.vx v8, v9, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: cttz_zero_undef_nxv1i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-F-NEXT: vrsub.vi v9, v8, 0
+; CHECK-F-NEXT: vand.vv v8, v8, v9
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8
+; CHECK-F-NEXT: vsrl.vi v9, v9, 23
+; CHECK-F-NEXT: li a1, 127
+; CHECK-F-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv1i64:
; CHECK-D: # %bb.0:
@@ -3021,35 +3001,19 @@ define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: cttz_zero_undef_nxv2i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; RV32F-NEXT: vrsub.vi v10, v8, 0
-; RV32F-NEXT: vand.vv v8, v8, v10
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v10, v8
-; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT: vzext.vf2 v10, v8
-; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v10, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: cttz_zero_undef_nxv2i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; RV64F-NEXT: vrsub.vi v10, v8, 0
-; RV64F-NEXT: vand.vv v8, v8, v10
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64F-NEXT: vfncvt.f.xu.w v10, v8
-; RV64F-NEXT: vsrl.vi v10, v10, 23
-; RV64F-NEXT: li a1, 127...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/99251
More information about the llvm-commits
mailing list