[llvm] [RISCV] Fold vector shift of sext/zext to widening multiply (PR #121563)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 3 03:43:49 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Piotr Fusik (pfusik)
<details>
<summary>Changes</summary>
(shl (sext X), C) -> (vwmulsu X, 1u << C)
(shl (zext X), C) -> (vwmulu X, 1u << C)
---
Patch is 115.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121563.diff
7 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+74-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll (+106-96)
- (modified) llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll (+112-102)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll (+217-206)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll (+195-236)
- (modified) llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll (+8-13)
- (modified) llvm/test/CodeGen/RISCV/rvv/vwsll-vp.ll (+22-12)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 04dd23d9cdaa20..955a15393ca8a1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -17341,6 +17341,78 @@ static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG,
return DAG.getZExtOrTrunc(Pop, DL, VT);
}
+static SDValue combineSHL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ // (shl (zext x), y) -> (vwsll x, y)
+ if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
+ return V;
+
+ // (shl (sext x), C) -> (vwmulsu x, 1u << C)
+ // (shl (zext x), C) -> (vwmulu x, 1u << C)
+
+ SDValue LHS = N->getOperand(0);
+ if (!LHS.hasOneUse())
+ return SDValue();
+ unsigned Opcode;
+ switch (LHS.getOpcode()) {
+ case ISD::SIGN_EXTEND:
+ Opcode = RISCVISD::VWMULSU_VL;
+ break;
+ case ISD::ZERO_EXTEND:
+ Opcode = RISCVISD::VWMULU_VL;
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue RHS = N->getOperand(1);
+ APInt ShAmt;
+ if (!ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
+ return SDValue();
+
+ // Better foldings:
+ // (shl (sext x), 1) -> (vwadd x, x)
+ // (shl (zext x), 1) -> (vwaddu x, x)
+ uint64_t ShAmtInt = ShAmt.getZExtValue();
+ if (ShAmtInt <= 1)
+ return SDValue();
+
+ SDValue NarrowOp = LHS.getOperand(0);
+ EVT NarrowVT = NarrowOp.getValueType();
+ uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
+ if (ShAmtInt >= NarrowBits)
+ return SDValue();
+ EVT VT = N->getValueType(0);
+ if (NarrowBits * 2 != VT.getScalarSizeInBits())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+ SDValue Passthru, Mask, VL;
+ switch (N->getOpcode()) {
+ case ISD::SHL:
+ if (!VT.isScalableVector())
+ return SDValue();
+ Passthru = DAG.getUNDEF(VT);
+ std::tie(Mask, VL) =
+ getDefaultScalableVLOps(VT.getSimpleVT(), DL, DAG, Subtarget);
+ break;
+ case RISCVISD::SHL_VL:
+ Passthru = N->getOperand(2);
+ Mask = N->getOperand(3);
+ VL = N->getOperand(4);
+ break;
+ default:
+ llvm_unreachable("Expected SHL");
+ }
+ return DAG.getNode(Opcode, DL, VT, NarrowOp,
+ DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
+ Passthru, Mask, VL);
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -17970,7 +18042,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::SHL_VL:
- if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
+ if (SDValue V = combineSHL(N, DCI, Subtarget))
return V;
[[fallthrough]];
case RISCVISD::SRA_VL:
@@ -17995,7 +18067,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SRL:
case ISD::SHL: {
if (N->getOpcode() == ISD::SHL) {
- if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
+ if (SDValue V = combineSHL(N, DCI, Subtarget))
return V;
}
SDValue ShAmt = N->getOperand(1);
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index 9ee2324f615dd8..0fad09f27007c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -775,11 +775,11 @@ define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscal
define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsll.vi v8, v10, 2
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwmulu.vx v10, v8, a1
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t
+; CHECK-NEXT: vluxei16.v v12, (a0), v10, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -791,10 +791,11 @@ define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscal
define <vscale x 8 x i32> @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT: vsext.vf2 v16, v8
-; RV32-NEXT: vsll.vi v8, v16, 2
-; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT: li a1, 4
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v16, v8, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
@@ -815,10 +816,11 @@ define <vscale x 8 x i32> @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x
define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT: vsext.vf2 v16, v8
-; RV32-NEXT: vsll.vi v8, v16, 2
-; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT: li a1, 4
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v16, v8, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
@@ -840,10 +842,11 @@ define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vsca
define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vsll.vi v8, v16, 2
-; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vwmulu.vx v16, v8, a1
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vluxei32.v v12, (a0), v16, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -863,10 +866,9 @@ define <vscale x 8 x i32> @mgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32>
;
; RV64-LABEL: mgather_baseidx_nxv8i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v8
-; RV64-NEXT: vsll.vi v16, v16, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; RV64-NEXT: li a1, 4
+; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, mu
+; RV64-NEXT: vwmulsu.vx v16, v8, a1
; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
@@ -1034,11 +1036,11 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscal
define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsll.vi v8, v10, 3
+; CHECK-NEXT: li a1, 8
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwmulu.vx v10, v8, a1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t
+; CHECK-NEXT: vluxei16.v v16, (a0), v10, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -1050,11 +1052,11 @@ define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscal
define <vscale x 8 x i64> @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vsll.vi v8, v12, 3
+; RV32-NEXT: li a1, 8
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v12, v8, a1
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
@@ -1074,11 +1076,11 @@ define <vscale x 8 x i64> @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x
define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vsll.vi v8, v12, 3
+; RV32-NEXT: li a1, 8
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v12, v8, a1
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
@@ -1099,11 +1101,11 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vsca
define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vsll.vi v8, v12, 3
+; CHECK-NEXT: li a1, 8
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vwmulu.vx v12, v8, a1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; CHECK-NEXT: vluxei32.v v16, (a0), v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -1124,10 +1126,11 @@ define <vscale x 8 x i64> @mgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x
;
; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT: vsext.vf2 v24, v8
-; RV64-NEXT: vsll.vi v8, v24, 3
-; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT: li a1, 8
+; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT: vwmulsu.vx v24, v8, a1
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
@@ -1147,10 +1150,11 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vsca
;
; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT: vsext.vf2 v24, v8
-; RV64-NEXT: vsll.vi v8, v24, 3
-; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT: li a1, 8
+; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT: vwmulsu.vx v24, v8, a1
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1171,10 +1175,11 @@ define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vsca
;
; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT: vzext.vf2 v24, v8
-; RV64-NEXT: vsll.vi v8, v24, 3
-; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT: li a1, 8
+; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT: vwmulu.vx v24, v8, a1
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
@@ -1845,11 +1850,11 @@ define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vsc
define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsll.vi v8, v10, 2
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwmulu.vx v10, v8, a1
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT: vluxei16.v v12, (a0), v8, v0.t
+; CHECK-NEXT: vluxei16.v v12, (a0), v10, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
@@ -1861,10 +1866,11 @@ define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vsc
define <vscale x 8 x float> @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT: vsext.vf2 v16, v8
-; RV32-NEXT: vsll.vi v8, v16, 2
-; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT: li a1, 4
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v16, v8, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
@@ -1885,10 +1891,11 @@ define <vscale x 8 x float> @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale
define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT: vsext.vf2 v16, v8
-; RV32-NEXT: vsll.vi v8, v16, 2
-; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t
+; RV32-NEXT: li a1, 4
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v16, v8, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
@@ -1910,10 +1917,11 @@ define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vs
define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT: vzext.vf2 v16, v8
-; CHECK-NEXT: vsll.vi v8, v16, 2
-; CHECK-NEXT: vluxei32.v v12, (a0), v8, v0.t
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vwmulu.vx v16, v8, a1
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vluxei32.v v12, (a0), v16, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
@@ -1933,10 +1941,9 @@ define <vscale x 8 x float> @mgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i3
;
; RV64-LABEL: mgather_baseidx_nxv8f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v8
-; RV64-NEXT: vsll.vi v16, v16, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; RV64-NEXT: li a1, 4
+; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, mu
+; RV64-NEXT: vwmulsu.vx v16, v8, a1
; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
@@ -2104,11 +2111,11 @@ define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vs
define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vsll.vi v8, v10, 3
+; CHECK-NEXT: li a1, 8
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vwmulu.vx v10, v8, a1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vluxei16.v v16, (a0), v8, v0.t
+; CHECK-NEXT: vluxei16.v v16, (a0), v10, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
@@ -2120,11 +2127,11 @@ define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vs
define <vscale x 8 x double> @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vsll.vi v8, v12, 3
+; RV32-NEXT: li a1, 8
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v12, v8, a1
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
@@ -2144,11 +2151,11 @@ define <vscale x 8 x double> @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale
define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vsext.vf2 v12, v8
-; RV32-NEXT: vsll.vi v8, v12, 3
+; RV32-NEXT: li a1, 8
+; RV32-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; RV32-NEXT: vwmulsu.vx v12, v8, a1
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
@@ -2169,11 +2176,11 @@ define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <v
define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf2 v12, v8
-; CHECK-NEXT: vsll.vi v8, v12, 3
+; CHECK-NEXT: li a1, 8
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vwmulu.vx v12, v8, a1
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vluxei32.v v16, (a0), v8, v0.t
+; CHECK-NEXT: vluxei32.v v16, (a0), v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
@@ -2194,10 +2201,11 @@ define <vscale x 8 x double> @mgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale
;
; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT: vsext.vf2 v24, v8
-; RV64-NEXT: vsll.vi v8, v24, 3
-; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT: li a1, 8
+; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
+; RV64-NEXT: vwmulsu.vx v24, v8, a1
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t
; RV64-NEXT: vmv.v.v v8, v16
; RV64-NEXT: ret
%ptrs = getelementptr inb...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/121563
More information about the llvm-commits
mailing list