[llvm] [RISCV] Use vwadd.vx for splat vector with extension (PR #87249)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 8 04:31:37 PDT 2024
https://github.com/sun-jacobi updated https://github.com/llvm/llvm-project/pull/87249
>From 3696f9fe7b3dc825839987b6b24f5a9eaf2d3a5e Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sat, 6 Apr 2024 19:12:36 +0900
Subject: [PATCH 1/5] [RISCV] use vwadd.vx for extended splat.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 98 ++++++++-----
llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 155 ++++++++++++++++++++
2 files changed, 214 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b426f1a7b3791d..aa70c24528c914 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13597,6 +13597,7 @@ struct NodeExtensionHelper {
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
case RISCVISD::FP_EXTEND_VL:
+ case ISD::SPLAT_VECTOR:
return OrigOperand.getOperand(0);
default:
return OrigOperand;
@@ -13605,7 +13606,8 @@ struct NodeExtensionHelper {
/// Check if this instance represents a splat.
bool isSplat() const {
- return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
+ return (OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL) ||
+ (OrigOperand.getOpcode() == ISD::SPLAT_VECTOR);
}
/// Get the extended opcode.
@@ -13649,6 +13651,8 @@ struct NodeExtensionHelper {
case RISCVISD::VZEXT_VL:
case RISCVISD::FP_EXTEND_VL:
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
+ case ISD::SPLAT_VECTOR:
+ return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
case RISCVISD::VMV_V_X_VL:
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
@@ -13781,6 +13785,57 @@ struct NodeExtensionHelper {
/// Check if this node needs to be fully folded or extended for all users.
bool needToPromoteOtherUsers() const { return EnforceOneUse; }
+ void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ unsigned Opc = OrigOperand.getOpcode();
+ MVT VT = OrigOperand.getSimpleValueType();
+
+ assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
+ "Unexpected Opcode");
+
+ if (Opc == ISD::SPLAT_VECTOR && !VT.isVector())
+ return;
+
+ // The pasthru must be undef for tail agnostic.
+ if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
+ return;
+
+ // Get the scalar value.
+ SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
+ : OrigOperand.getOperand(1);
+
+ // See if we have enough sign bits or zero bits in the scalar to use a
+ // widening opcode by splatting to smaller element size.
+ unsigned EltBits = VT.getScalarSizeInBits();
+ unsigned ScalarBits = Op.getValueSizeInBits();
+ // Make sure we're getting all element bits from the scalar register.
+ // FIXME: Support implicit sign extension of vmv.v.x?
+ if (ScalarBits < EltBits)
+ return;
+
+ unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
+ // If the narrow type cannot be expressed with a legal VMV,
+ // this is not a valid candidate.
+ if (NarrowSize < 8)
+ return;
+
+ if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
+ SupportsSExt = true;
+
+ if (DAG.MaskedValueIsZero(Op,
+ APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
+ SupportsZExt = true;
+
+ EnforceOneUse = false;
+ CheckMask = Opc == ISD::SPLAT_VECTOR;
+
+ if (Opc == ISD::SPLAT_VECTOR)
+ std::tie(Mask, VL) =
+ getDefaultScalableVLOps(VT, SDLoc(Root), DAG, Subtarget);
+ else
+ VL = OrigOperand.getOperand(2);
+ }
+
/// Helper method to set the various fields of this struct based on the
/// type of \p Root.
void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
@@ -13826,45 +13881,10 @@ struct NodeExtensionHelper {
Mask = OrigOperand.getOperand(1);
VL = OrigOperand.getOperand(2);
break;
- case RISCVISD::VMV_V_X_VL: {
- // Historically, we didn't care about splat values not disappearing during
- // combines.
- EnforceOneUse = false;
- CheckMask = false;
- VL = OrigOperand.getOperand(2);
-
- // The operand is a splat of a scalar.
-
- // The pasthru must be undef for tail agnostic.
- if (!OrigOperand.getOperand(0).isUndef())
- break;
-
- // Get the scalar value.
- SDValue Op = OrigOperand.getOperand(1);
-
- // See if we have enough sign bits or zero bits in the scalar to use a
- // widening opcode by splatting to smaller element size.
- MVT VT = Root->getSimpleValueType(0);
- unsigned EltBits = VT.getScalarSizeInBits();
- unsigned ScalarBits = Op.getValueSizeInBits();
- // Make sure we're getting all element bits from the scalar register.
- // FIXME: Support implicit sign extension of vmv.v.x?
- if (ScalarBits < EltBits)
- break;
-
- unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
- // If the narrow type cannot be expressed with a legal VMV,
- // this is not a valid candidate.
- if (NarrowSize < 8)
- break;
-
- if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
- SupportsSExt = true;
- if (DAG.MaskedValueIsZero(Op,
- APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
- SupportsZExt = true;
+ case ISD::SPLAT_VECTOR:
+ case RISCVISD::VMV_V_X_VL:
+ fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
break;
- }
default:
break;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index 66e6883dd1d3e3..985424e3557b98 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1466,3 +1466,158 @@ define <vscale x 2 x i32> @vwadd_wv_disjoint_or(<vscale x 2 x i32> %x.i32, <vsca
%or = or disjoint <vscale x 2 x i32> %x.i32, %y.i32
ret <vscale x 2 x i32> %or
}
+
+define <vscale x 8 x i64> @vwadd_vx_splat_zext(<vscale x 8 x i32> %va, i32 %b) {
+; RV32-LABEL: vwadd_vx_splat_zext:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vwaddu.wv v16, v16, v8
+; RV32-NEXT: vmv8r.v v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_splat_zext:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV64-NEXT: vwaddu.vx v16, v8, a0
+; RV64-NEXT: vmv8r.v v8, v16
+; RV64-NEXT: ret
+ %sb = zext i32 %b to i64
+ %head = insertelement <vscale x 8 x i64> poison, i64 %sb, i32 0
+ %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = zext <vscale x 8 x i32> %va to <vscale x 8 x i64>
+ %ve = add <vscale x 8 x i64> %vc, %splat
+ ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i32> @vwadd_vx_splat_zext_i1(<vscale x 8 x i1> %va, i16 %b) {
+; RV32-LABEL: vwadd_vx_splat_zext_i1:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a0, a0, 16
+; RV32-NEXT: srli a0, a0, 16
+; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vadd.vi v8, v8, 1, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_splat_zext_i1:
+; RV64: # %bb.0:
+; RV64-NEXT: slli a0, a0, 48
+; RV64-NEXT: srli a0, a0, 48
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: vadd.vi v8, v8, 1, v0.t
+; RV64-NEXT: ret
+ %sb = zext i16 %b to i32
+ %head = insertelement <vscale x 8 x i32> poison, i32 %sb, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = zext <vscale x 8 x i1> %va to <vscale x 8 x i32>
+ %ve = add <vscale x 8 x i32> %vc, %splat
+ ret <vscale x 8 x i32> %ve
+}
+
+define <vscale x 8 x i64> @vwadd_wx_splat_zext(<vscale x 8 x i64> %va, i32 %b) {
+; RV32-LABEL: vwadd_wx_splat_zext:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vadd.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_wx_splat_zext:
+; RV64: # %bb.0:
+; RV64-NEXT: slli a0, a0, 32
+; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %sb = zext i32 %b to i64
+ %head = insertelement <vscale x 8 x i64> poison, i64 %sb, i32 0
+ %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %ve = add <vscale x 8 x i64> %va, %splat
+ ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i64> @vwadd_vx_splat_sext(<vscale x 8 x i32> %va, i32 %b) {
+; RV32-LABEL: vwadd_vx_splat_sext:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vmv.v.x v16, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT: vwadd.wv v16, v16, v8
+; RV32-NEXT: vmv8r.v v8, v16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_splat_sext:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV64-NEXT: vwadd.vx v16, v8, a0
+; RV64-NEXT: vmv8r.v v8, v16
+; RV64-NEXT: ret
+ %sb = sext i32 %b to i64
+ %head = insertelement <vscale x 8 x i64> poison, i64 %sb, i32 0
+ %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = sext <vscale x 8 x i32> %va to <vscale x 8 x i64>
+ %ve = add <vscale x 8 x i64> %vc, %splat
+ ret <vscale x 8 x i64> %ve
+}
+
+define <vscale x 8 x i32> @vwadd_vx_splat_sext_i1(<vscale x 8 x i1> %va, i16 %b) {
+; RV32-LABEL: vwadd_vx_splat_sext_i1:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a0, a0, 16
+; RV32-NEXT: srai a0, a0, 16
+; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_splat_sext_i1:
+; RV64: # %bb.0:
+; RV64-NEXT: slli a0, a0, 48
+; RV64-NEXT: srai a0, a0, 48
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: li a0, 1
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %sb = sext i16 %b to i32
+ %head = insertelement <vscale x 8 x i32> poison, i32 %sb, i32 0
+ %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = sext <vscale x 8 x i1> %va to <vscale x 8 x i32>
+ %ve = add <vscale x 8 x i32> %vc, %splat
+ ret <vscale x 8 x i32> %ve
+}
+
+define <vscale x 8 x i64> @vwadd_wx_splat_sext(<vscale x 8 x i64> %va, i32 %b) {
+; RV32-LABEL: vwadd_wx_splat_sext:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vadd.vx v8, v8, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_wx_splat_sext:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %sb = sext i32 %b to i64
+ %head = insertelement <vscale x 8 x i64> poison, i64 %sb, i32 0
+ %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %ve = add <vscale x 8 x i64> %va, %splat
+ ret <vscale x 8 x i64> %ve
+}
>From 72741704185602bed39795df97557b922ca94e82 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Sun, 7 Apr 2024 14:29:10 +0900
Subject: [PATCH 2/5] modify getSource for SPLAT_VECTOR
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 18 +-
llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll | 548 ++++++++++++++++----
llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 204 +++++---
3 files changed, 576 insertions(+), 194 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index aa70c24528c914..6a41b67f2a1ef3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13597,8 +13597,18 @@ struct NodeExtensionHelper {
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
case RISCVISD::FP_EXTEND_VL:
- case ISD::SPLAT_VECTOR:
return OrigOperand.getOperand(0);
+ case ISD::SPLAT_VECTOR: {
+ SDValue Op = OrigOperand.getOperand(0);
+ unsigned Opc = Op.getOpcode();
+ if (SupportsSExt && Opc == ISD::SIGN_EXTEND_INREG)
+ return Op.getOperand(0);
+
+ if (SupportsZExt && Opc == ISD::AND)
+ return Op.getOperand(0);
+
+ return Op;
+ }
default:
return OrigOperand;
}
@@ -13606,8 +13616,8 @@ struct NodeExtensionHelper {
/// Check if this instance represents a splat.
bool isSplat() const {
- return (OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL) ||
- (OrigOperand.getOpcode() == ISD::SPLAT_VECTOR);
+ return (OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
+ OrigOperand.getOpcode() == ISD::SPLAT_VECTOR);
}
/// Get the extended opcode.
@@ -13652,7 +13662,7 @@ struct NodeExtensionHelper {
case RISCVISD::FP_EXTEND_VL:
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
case ISD::SPLAT_VECTOR:
- return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
+ return DAG.getSplat(NarrowVT, DL, Source);
case RISCVISD::VMV_V_X_VL:
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index fc94f8c2a52797..c1c6aa58f38878 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I
; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32F
+; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64F
; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64
; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB
@@ -1229,21 +1229,36 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_nxv1i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vmv.v.x v9, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT: vsrl.vi v8, v10, 23
-; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
-; CHECK-F-NEXT: li a1, 64
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v9, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_nxv1i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32F-NEXT: vmv.v.x v9, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v10, v8
+; RV32F-NEXT: vsrl.vi v8, v10, 23
+; RV32F-NEXT: vwsubu.wv v9, v9, v8
+; RV32F-NEXT: li a1, 64
+; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT: vminu.vx v8, v9, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_nxv1i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV64F-NEXT: vmv.v.x v9, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v10, v8
+; RV64F-NEXT: vsrl.vi v8, v10, 23
+; RV64F-NEXT: vwsubu.vv v10, v9, v8
+; RV64F-NEXT: li a1, 64
+; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64F-NEXT: vminu.vx v8, v10, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv1i64:
; CHECK-D: # %bb.0:
@@ -1264,6 +1279,35 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_nxv1i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v9, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-RV32F-NEXT: vsrl.vi v8, v10, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v9, v9, v8
+; CHECK-RV32F-NEXT: li a1, 64
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-RV32F-NEXT: vminu.vx v8, v9, a1
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_nxv1i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v9, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-RV64F-NEXT: vsrl.vi v8, v10, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v10, v9, v8
+; CHECK-RV64F-NEXT: li a1, 64
+; CHECK-RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-RV64F-NEXT: vminu.vx v8, v10, a1
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 false)
ret <vscale x 1 x i64> %a
}
@@ -1370,21 +1414,36 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_nxv2i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vmv.v.x v10, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT: vsrl.vi v8, v12, 23
-; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
-; CHECK-F-NEXT: li a1, 64
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v10, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_nxv2i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32F-NEXT: vmv.v.x v10, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v12, v8
+; RV32F-NEXT: vsrl.vi v8, v12, 23
+; RV32F-NEXT: vwsubu.wv v10, v10, v8
+; RV32F-NEXT: li a1, 64
+; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT: vminu.vx v8, v10, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_nxv2i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV64F-NEXT: vmv.v.x v10, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v11, v8
+; RV64F-NEXT: vsrl.vi v8, v11, 23
+; RV64F-NEXT: vwsubu.vv v12, v10, v8
+; RV64F-NEXT: li a1, 64
+; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64F-NEXT: vminu.vx v8, v12, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv2i64:
; CHECK-D: # %bb.0:
@@ -1405,6 +1464,35 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_nxv2i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v10, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v12, v8
+; CHECK-RV32F-NEXT: vsrl.vi v8, v12, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v10, v10, v8
+; CHECK-RV32F-NEXT: li a1, 64
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-RV32F-NEXT: vminu.vx v8, v10, a1
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_nxv2i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v10, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v11, v8
+; CHECK-RV64F-NEXT: vsrl.vi v8, v11, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v12, v10, v8
+; CHECK-RV64F-NEXT: li a1, 64
+; CHECK-RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-RV64F-NEXT: vminu.vx v8, v12, a1
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 false)
ret <vscale x 2 x i64> %a
}
@@ -1511,21 +1599,36 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_nxv4i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vmv.v.x v12, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT: vsrl.vi v8, v16, 23
-; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
-; CHECK-F-NEXT: li a1, 64
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v12, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_nxv4i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32F-NEXT: vmv.v.x v12, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v16, v8
+; RV32F-NEXT: vsrl.vi v8, v16, 23
+; RV32F-NEXT: vwsubu.wv v12, v12, v8
+; RV32F-NEXT: li a1, 64
+; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV32F-NEXT: vminu.vx v8, v12, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_nxv4i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV64F-NEXT: vmv.v.x v12, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v14, v8
+; RV64F-NEXT: vsrl.vi v8, v14, 23
+; RV64F-NEXT: vwsubu.vv v16, v12, v8
+; RV64F-NEXT: li a1, 64
+; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64F-NEXT: vminu.vx v8, v16, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv4i64:
; CHECK-D: # %bb.0:
@@ -1546,6 +1649,35 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_nxv4i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v12, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v16, v8
+; CHECK-RV32F-NEXT: vsrl.vi v8, v16, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v12, v12, v8
+; CHECK-RV32F-NEXT: li a1, 64
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-RV32F-NEXT: vminu.vx v8, v12, a1
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_nxv4i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v12, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v14, v8
+; CHECK-RV64F-NEXT: vsrl.vi v8, v14, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v16, v12, v8
+; CHECK-RV64F-NEXT: li a1, 64
+; CHECK-RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-RV64F-NEXT: vminu.vx v8, v16, a1
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 false)
ret <vscale x 4 x i64> %a
}
@@ -1652,21 +1784,36 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_nxv8i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vmv.v.x v16, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v24, v8
-; CHECK-F-NEXT: vsrl.vi v8, v24, 23
-; CHECK-F-NEXT: vwsubu.wv v16, v16, v8
-; CHECK-F-NEXT: li a1, 64
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vminu.vx v8, v16, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_nxv8i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32F-NEXT: vmv.v.x v16, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v24, v8
+; RV32F-NEXT: vsrl.vi v8, v24, 23
+; RV32F-NEXT: vwsubu.wv v16, v16, v8
+; RV32F-NEXT: li a1, 64
+; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV32F-NEXT: vminu.vx v8, v16, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_nxv8i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV64F-NEXT: vmv.v.x v16, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v20, v8
+; RV64F-NEXT: vsrl.vi v8, v20, 23
+; RV64F-NEXT: vwsubu.vv v24, v16, v8
+; RV64F-NEXT: li a1, 64
+; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64F-NEXT: vminu.vx v8, v24, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv8i64:
; CHECK-D: # %bb.0:
@@ -1687,6 +1834,35 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_nxv8i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v16, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v24, v8
+; CHECK-RV32F-NEXT: vsrl.vi v8, v24, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v16, v16, v8
+; CHECK-RV32F-NEXT: li a1, 64
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-RV32F-NEXT: vminu.vx v8, v16, a1
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_nxv8i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v16, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v20, v8
+; CHECK-RV64F-NEXT: vsrl.vi v8, v20, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v24, v16, v8
+; CHECK-RV64F-NEXT: li a1, 64
+; CHECK-RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-RV64F-NEXT: vminu.vx v8, v24, a1
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 false)
ret <vscale x 8 x i64> %a
}
@@ -2835,19 +3011,31 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vmv.v.x v9, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT: vsrl.vi v8, v10, 23
-; CHECK-F-NEXT: vwsubu.wv v9, v9, v8
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: vmv1r.v v8, v9
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_zero_undef_nxv1i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32F-NEXT: vmv.v.x v9, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v10, v8
+; RV32F-NEXT: vsrl.vi v8, v10, 23
+; RV32F-NEXT: vwsubu.wv v9, v9, v8
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: vmv1r.v v8, v9
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_zero_undef_nxv1i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV64F-NEXT: vmv.v.x v9, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v10, v8
+; RV64F-NEXT: vsrl.vi v10, v10, 23
+; RV64F-NEXT: vwsubu.vv v8, v9, v10
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64:
; CHECK-D: # %bb.0:
@@ -2866,6 +3054,30 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_zero_undef_nxv1i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v9, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-RV32F-NEXT: vsrl.vi v8, v10, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v9, v9, v8
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: vmv1r.v v8, v9
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_zero_undef_nxv1i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v9, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-RV64F-NEXT: vsrl.vi v10, v10, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 true)
ret <vscale x 1 x i64> %a
}
@@ -2971,19 +3183,31 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vmv.v.x v10, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT: vsrl.vi v8, v12, 23
-; CHECK-F-NEXT: vwsubu.wv v10, v10, v8
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: vmv2r.v v8, v10
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_zero_undef_nxv2i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32F-NEXT: vmv.v.x v10, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v12, v8
+; RV32F-NEXT: vsrl.vi v8, v12, 23
+; RV32F-NEXT: vwsubu.wv v10, v10, v8
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: vmv2r.v v8, v10
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_zero_undef_nxv2i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV64F-NEXT: vmv.v.x v10, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v11, v8
+; RV64F-NEXT: vsrl.vi v11, v11, 23
+; RV64F-NEXT: vwsubu.vv v8, v10, v11
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64:
; CHECK-D: # %bb.0:
@@ -3002,6 +3226,30 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_zero_undef_nxv2i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v10, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v12, v8
+; CHECK-RV32F-NEXT: vsrl.vi v8, v12, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v10, v10, v8
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: vmv2r.v v8, v10
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_zero_undef_nxv2i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v10, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v11, v8
+; CHECK-RV64F-NEXT: vsrl.vi v11, v11, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 true)
ret <vscale x 2 x i64> %a
}
@@ -3107,19 +3355,31 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vmv.v.x v12, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT: vsrl.vi v8, v16, 23
-; CHECK-F-NEXT: vwsubu.wv v12, v12, v8
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: vmv4r.v v8, v12
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_zero_undef_nxv4i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32F-NEXT: vmv.v.x v12, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v16, v8
+; RV32F-NEXT: vsrl.vi v8, v16, 23
+; RV32F-NEXT: vwsubu.wv v12, v12, v8
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: vmv4r.v v8, v12
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_zero_undef_nxv4i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV64F-NEXT: vmv.v.x v12, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v14, v8
+; RV64F-NEXT: vsrl.vi v14, v14, 23
+; RV64F-NEXT: vwsubu.vv v8, v12, v14
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64:
; CHECK-D: # %bb.0:
@@ -3138,6 +3398,30 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_zero_undef_nxv4i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v12, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v16, v8
+; CHECK-RV32F-NEXT: vsrl.vi v8, v16, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v12, v12, v8
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: vmv4r.v v8, v12
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_zero_undef_nxv4i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v12, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v14, v8
+; CHECK-RV64F-NEXT: vsrl.vi v14, v14, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v8, v12, v14
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 true)
ret <vscale x 4 x i64> %a
}
@@ -3243,19 +3527,31 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: vmv8r.v v16, v8
-; CHECK-F-NEXT: li a0, 190
-; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vmv.v.x v8, a0
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16
-; CHECK-F-NEXT: vsrl.vi v16, v24, 23
-; CHECK-F-NEXT: vwsubu.wv v8, v8, v16
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: ctlz_zero_undef_nxv8i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: vmv8r.v v16, v8
+; RV32F-NEXT: li a0, 190
+; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32F-NEXT: vmv.v.x v8, a0
+; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v24, v16
+; RV32F-NEXT: vsrl.vi v16, v24, 23
+; RV32F-NEXT: vwsubu.wv v8, v8, v16
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: ctlz_zero_undef_nxv8i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: li a0, 190
+; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV64F-NEXT: vmv.v.x v16, a0
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v20, v8
+; RV64F-NEXT: vsrl.vi v20, v20, 23
+; RV64F-NEXT: vwsubu.vv v8, v16, v20
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64:
; CHECK-D: # %bb.0:
@@ -3274,6 +3570,30 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-ZVBB-NEXT: vclz.v v8, v8
; CHECK-ZVBB-NEXT: ret
+; CHECK-RV32F-LABEL: ctlz_zero_undef_nxv8i64:
+; CHECK-RV32F: # %bb.0:
+; CHECK-RV32F-NEXT: vmv8r.v v16, v8
+; CHECK-RV32F-NEXT: li a0, 190
+; CHECK-RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-RV32F-NEXT: vmv.v.x v8, a0
+; CHECK-RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-RV32F-NEXT: fsrmi a0, 1
+; CHECK-RV32F-NEXT: vfncvt.f.xu.w v24, v16
+; CHECK-RV32F-NEXT: vsrl.vi v16, v24, 23
+; CHECK-RV32F-NEXT: vwsubu.wv v8, v8, v16
+; CHECK-RV32F-NEXT: fsrm a0
+; CHECK-RV32F-NEXT: ret
+; CHECK-RV64F-LABEL: ctlz_zero_undef_nxv8i64:
+; CHECK-RV64F: # %bb.0:
+; CHECK-RV64F-NEXT: li a0, 190
+; CHECK-RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-RV64F-NEXT: vmv.v.x v16, a0
+; CHECK-RV64F-NEXT: fsrmi a0, 1
+; CHECK-RV64F-NEXT: vfncvt.f.xu.w v20, v8
+; CHECK-RV64F-NEXT: vsrl.vi v20, v20, 23
+; CHECK-RV64F-NEXT: vwsubu.vv v8, v16, v20
+; CHECK-RV64F-NEXT: fsrm a0
+; CHECK-RV64F-NEXT: ret
%a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 true)
ret <vscale x 8 x i64> %a
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index b14cde25aa85b2..d13f4d2dca1ff4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1241,13 +1241,12 @@ define <vscale x 1 x i64> @cttz_nxv1i64(<vscale x 1 x i64> %va) {
; RV64F-NEXT: fsrmi a0, 1
; RV64F-NEXT: vfncvt.f.xu.w v10, v9
; RV64F-NEXT: vsrl.vi v9, v10, 23
-; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64F-NEXT: vzext.vf2 v10, v9
; RV64F-NEXT: li a1, 127
-; RV64F-NEXT: vsub.vx v9, v10, a1
+; RV64F-NEXT: vwsubu.vx v10, v9, a1
+; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64F-NEXT: vmseq.vi v0, v8, 0
; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vmerge.vxm v8, v9, a1, v0
+; RV64F-NEXT: vmerge.vxm v8, v10, a1, v0
; RV64F-NEXT: fsrm a0
; RV64F-NEXT: ret
;
@@ -1404,13 +1403,12 @@ define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
; RV64F-NEXT: fsrmi a0, 1
; RV64F-NEXT: vfncvt.f.xu.w v12, v10
; RV64F-NEXT: vsrl.vi v10, v12, 23
-; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64F-NEXT: vzext.vf2 v12, v10
; RV64F-NEXT: li a1, 127
-; RV64F-NEXT: vsub.vx v10, v12, a1
+; RV64F-NEXT: vwsubu.vx v12, v10, a1
+; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV64F-NEXT: vmseq.vi v0, v8, 0
; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vmerge.vxm v8, v10, a1, v0
+; RV64F-NEXT: vmerge.vxm v8, v12, a1, v0
; RV64F-NEXT: fsrm a0
; RV64F-NEXT: ret
;
@@ -1567,13 +1565,12 @@ define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
; RV64F-NEXT: fsrmi a0, 1
; RV64F-NEXT: vfncvt.f.xu.w v16, v12
; RV64F-NEXT: vsrl.vi v12, v16, 23
-; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64F-NEXT: vzext.vf2 v16, v12
; RV64F-NEXT: li a1, 127
-; RV64F-NEXT: vsub.vx v12, v16, a1
+; RV64F-NEXT: vwsubu.vx v16, v12, a1
+; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; RV64F-NEXT: vmseq.vi v0, v8, 0
; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vmerge.vxm v8, v12, a1, v0
+; RV64F-NEXT: vmerge.vxm v8, v16, a1, v0
; RV64F-NEXT: fsrm a0
; RV64F-NEXT: ret
;
@@ -1730,13 +1727,12 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
; RV64F-NEXT: fsrmi a0, 1
; RV64F-NEXT: vfncvt.f.xu.w v24, v16
; RV64F-NEXT: vsrl.vi v16, v24, 23
-; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64F-NEXT: vzext.vf2 v24, v16
; RV64F-NEXT: li a1, 127
-; RV64F-NEXT: vsub.vx v16, v24, a1
+; RV64F-NEXT: vwsubu.vx v24, v16, a1
+; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64F-NEXT: vmseq.vi v0, v8, 0
; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vmerge.vxm v8, v16, a1, v0
+; RV64F-NEXT: vmerge.vxm v8, v24, a1, v0
; RV64F-NEXT: fsrm a0
; RV64F-NEXT: ret
;
@@ -2891,21 +2887,35 @@ define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: cttz_zero_undef_nxv1i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vrsub.vi v9, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v9
-; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8
-; CHECK-F-NEXT: vsrl.vi v8, v9, 23
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v9, v8
-; CHECK-F-NEXT: li a1, 127
-; CHECK-F-NEXT: vsub.vx v8, v9, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: cttz_zero_undef_nxv1i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV32F-NEXT: vrsub.vi v9, v8, 0
+; RV32F-NEXT: vand.vv v8, v8, v9
+; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v9, v8
+; RV32F-NEXT: vsrl.vi v8, v9, 23
+; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT: vzext.vf2 v9, v8
+; RV32F-NEXT: li a1, 127
+; RV32F-NEXT: vsub.vx v8, v9, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: cttz_zero_undef_nxv1i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64F-NEXT: vrsub.vi v9, v8, 0
+; RV64F-NEXT: vand.vv v8, v8, v9
+; RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v9, v8
+; RV64F-NEXT: vsrl.vi v9, v9, 23
+; RV64F-NEXT: li a1, 127
+; RV64F-NEXT: vwsubu.vx v8, v9, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv1i64:
; CHECK-D: # %bb.0:
@@ -3011,21 +3021,35 @@ define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: cttz_zero_undef_nxv2i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vrsub.vi v10, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v10
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
-; CHECK-F-NEXT: vsrl.vi v8, v10, 23
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v10, v8
-; CHECK-F-NEXT: li a1, 127
-; CHECK-F-NEXT: vsub.vx v8, v10, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: cttz_zero_undef_nxv2i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; RV32F-NEXT: vrsub.vi v10, v8, 0
+; RV32F-NEXT: vand.vv v8, v8, v10
+; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v10, v8
+; RV32F-NEXT: vsrl.vi v8, v10, 23
+; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT: vzext.vf2 v10, v8
+; RV32F-NEXT: li a1, 127
+; RV32F-NEXT: vsub.vx v8, v10, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: cttz_zero_undef_nxv2i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; RV64F-NEXT: vrsub.vi v10, v8, 0
+; RV64F-NEXT: vand.vv v8, v8, v10
+; RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v10, v8
+; RV64F-NEXT: vsrl.vi v10, v10, 23
+; RV64F-NEXT: li a1, 127
+; RV64F-NEXT: vwsubu.vx v8, v10, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv2i64:
; CHECK-D: # %bb.0:
@@ -3131,21 +3155,35 @@ define <vscale x 4 x i64> @cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: cttz_zero_undef_nxv4i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: vsetvli a0, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vrsub.vi v12, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v12
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8
-; CHECK-F-NEXT: vsrl.vi v8, v12, 23
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v12, v8
-; CHECK-F-NEXT: li a1, 127
-; CHECK-F-NEXT: vsub.vx v8, v12, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: cttz_zero_undef_nxv4i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV32F-NEXT: vrsub.vi v12, v8, 0
+; RV32F-NEXT: vand.vv v8, v8, v12
+; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v12, v8
+; RV32F-NEXT: vsrl.vi v8, v12, 23
+; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV32F-NEXT: vzext.vf2 v12, v8
+; RV32F-NEXT: li a1, 127
+; RV32F-NEXT: vsub.vx v8, v12, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: cttz_zero_undef_nxv4i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV64F-NEXT: vrsub.vi v12, v8, 0
+; RV64F-NEXT: vand.vv v8, v8, v12
+; RV64F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v12, v8
+; RV64F-NEXT: vsrl.vi v12, v12, 23
+; RV64F-NEXT: li a1, 127
+; RV64F-NEXT: vwsubu.vx v8, v12, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv4i64:
; CHECK-D: # %bb.0:
@@ -3251,21 +3289,35 @@ define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; CHECK-F-LABEL: cttz_zero_undef_nxv8i64:
-; CHECK-F: # %bb.0:
-; CHECK-F-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vrsub.vi v16, v8, 0
-; CHECK-F-NEXT: vand.vv v8, v8, v16
-; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-F-NEXT: fsrmi a0, 1
-; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8
-; CHECK-F-NEXT: vsrl.vi v8, v16, 23
-; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-F-NEXT: vzext.vf2 v16, v8
-; CHECK-F-NEXT: li a1, 127
-; CHECK-F-NEXT: vsub.vx v8, v16, a1
-; CHECK-F-NEXT: fsrm a0
-; CHECK-F-NEXT: ret
+; RV32F-LABEL: cttz_zero_undef_nxv8i64:
+; RV32F: # %bb.0:
+; RV32F-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; RV32F-NEXT: vrsub.vi v16, v8, 0
+; RV32F-NEXT: vand.vv v8, v8, v16
+; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32F-NEXT: fsrmi a0, 1
+; RV32F-NEXT: vfncvt.f.xu.w v16, v8
+; RV32F-NEXT: vsrl.vi v8, v16, 23
+; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV32F-NEXT: vzext.vf2 v16, v8
+; RV32F-NEXT: li a1, 127
+; RV32F-NEXT: vsub.vx v8, v16, a1
+; RV32F-NEXT: fsrm a0
+; RV32F-NEXT: ret
+;
+; RV64F-LABEL: cttz_zero_undef_nxv8i64:
+; RV64F: # %bb.0:
+; RV64F-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; RV64F-NEXT: vrsub.vi v16, v8, 0
+; RV64F-NEXT: vand.vv v8, v8, v16
+; RV64F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV64F-NEXT: fsrmi a0, 1
+; RV64F-NEXT: vfncvt.f.xu.w v16, v8
+; RV64F-NEXT: vsrl.vi v16, v16, 23
+; RV64F-NEXT: li a1, 127
+; RV64F-NEXT: vwsubu.vx v8, v16, a1
+; RV64F-NEXT: fsrm a0
+; RV64F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv8i64:
; CHECK-D: # %bb.0:
>From 69ddef75f58a51c00035f19b1b59be606154aceb Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Apr 2024 14:05:42 +0900
Subject: [PATCH 3/5] fix bracket in isSplat
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6a41b67f2a1ef3..12b24d18a58037 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13616,8 +13616,8 @@ struct NodeExtensionHelper {
/// Check if this instance represents a splat.
bool isSplat() const {
- return (OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
- OrigOperand.getOpcode() == ISD::SPLAT_VECTOR);
+ return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
+ OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
}
/// Get the extended opcode.
>From 55963c4d5541c36b1393563b82f5092d409b30db Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Apr 2024 17:33:43 +0900
Subject: [PATCH 4/5] fix getSource for splat
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +------------
llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 1 +
2 files changed, 2 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 12b24d18a58037..c95c6ed5269448 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13598,17 +13598,6 @@ struct NodeExtensionHelper {
case RISCVISD::VZEXT_VL:
case RISCVISD::FP_EXTEND_VL:
return OrigOperand.getOperand(0);
- case ISD::SPLAT_VECTOR: {
- SDValue Op = OrigOperand.getOperand(0);
- unsigned Opc = Op.getOpcode();
- if (SupportsSExt && Opc == ISD::SIGN_EXTEND_INREG)
- return Op.getOperand(0);
-
- if (SupportsZExt && Opc == ISD::AND)
- return Op.getOperand(0);
-
- return Op;
- }
default:
return OrigOperand;
}
@@ -13662,7 +13651,7 @@ struct NodeExtensionHelper {
case RISCVISD::FP_EXTEND_VL:
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
case ISD::SPLAT_VECTOR:
- return DAG.getSplat(NarrowVT, DL, Source);
+ return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
case RISCVISD::VMV_V_X_VL:
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index 985424e3557b98..34e8122b89bd74 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1484,6 +1484,7 @@ define <vscale x 8 x i64> @vwadd_vx_splat_zext(<vscale x 8 x i32> %va, i32 %b) {
;
; RV64-LABEL: vwadd_vx_splat_zext:
; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV64-NEXT: vwaddu.vx v16, v8, a0
; RV64-NEXT: vmv8r.v v8, v16
>From 83d9b7b5c76cc762b7e2966069c8d4bd3c550d91 Mon Sep 17 00:00:00 2001
From: sun-jacobi <sun1011jacobi at gmail.com>
Date: Mon, 8 Apr 2024 20:20:02 +0900
Subject: [PATCH 5/5] remove isVector check in fillUpExtensionSupportForSplat
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c95c6ed5269448..b88a2b574f12bf 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13792,9 +13792,6 @@ struct NodeExtensionHelper {
assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
"Unexpected Opcode");
- if (Opc == ISD::SPLAT_VECTOR && !VT.isVector())
- return;
-
// The pasthru must be undef for tail agnostic.
if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
return;
More information about the llvm-commits
mailing list