[llvm] 20637e7 - [RISCV] Sink splatted fpext operands (#125800)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 4 21:02:48 PST 2025
Author: Luke Lau
Date: 2025-02-05T13:02:46+08:00
New Revision: 20637e7fa7649b181333e2b07b0afd1aab37128e
URL: https://github.com/llvm/llvm-project/commit/20637e7fa7649b181333e2b07b0afd1aab37128e
DIFF: https://github.com/llvm/llvm-project/commit/20637e7fa7649b181333e2b07b0afd1aab37128e.diff
LOG: [RISCV] Sink splatted fpext operands (#125800)
We sink splatted operands in codegenprepare to help match .vx/.vf
patterns. This extends it to also splat any fpext so that we can match
widening vfwadd.vf/vfwadd.wf patterns too.
Some instructions don't have .wf forms so there's no benefit to sinking
the fpext. For simplicity this sinks them anyway and lets
earlymachine-licm hoist them back out.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index ee649c12007809..d2e652fb5f4de0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2758,7 +2758,12 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
return false;
}
- Ops.push_back(&Op->getOperandUse(0));
+ Use *InsertEltUse = &Op->getOperandUse(0);
+ // Sink any fpexts since they might be used in a widening fp pattern.
+ auto *InsertElt = cast<InsertElementInst>(InsertEltUse);
+ if (isa<FPExtInst>(InsertElt->getOperand(1)))
+ Ops.push_back(&InsertElt->getOperandUse(1));
+ Ops.push_back(InsertEltUse);
Ops.push_back(&OpIdx.value());
}
return true;
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 735621aa4390e3..1948675ae9cf0a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -5741,3 +5741,152 @@ vector.body:
for.cond.cleanup:
ret void
}
+
+define void @sink_splat_vfwadd_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwadd_vf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: li a2, 1020
+; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT: .LBB125_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vl1re32.v v8, (a0)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: addi a2, a2, -4
+; CHECK-NEXT: vfwadd.vf v10, v8, fa0
+; CHECK-NEXT: vs2r.v v10, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: j .LBB125_1
+entry:
+ %f.ext = fpext float %f to double
+ %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr float, ptr %a, i64 %index
+ %wide.load = load <vscale x 2 x float>, ptr %0
+ %ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
+ %1 = fadd <vscale x 2 x double> %ext, %broadcast.splat
+ %2 = getelementptr double, ptr %b, i64 %index
+ store <vscale x 2 x double> %1, ptr %0
+ %index.next = add i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup: ; preds = %vector.body
+ ret void
+}
+
+define void @sink_splat_vfwadd_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwadd_wf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: li a2, 1020
+; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT: .LBB126_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vl2re64.v v8, (a0)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: addi a2, a2, -4
+; CHECK-NEXT: vfwadd.wf v8, v8, fa0
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 32
+; CHECK-NEXT: j .LBB126_1
+entry:
+ %f.ext = fpext float %f to double
+ %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr double, ptr %a, i64 %index
+ %wide.load = load <vscale x 2 x double>, ptr %0
+ %1 = fadd <vscale x 2 x double> %wide.load, %broadcast.splat
+ %2 = getelementptr double, ptr %b, i64 %index
+ store <vscale x 2 x double> %1, ptr %0
+ %index.next = add i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup: ; preds = %vector.body
+ ret void
+}
+
+define void @sink_splat_vfwmul_vf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwmul_vf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: li a2, 1020
+; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT: .LBB127_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vl1re32.v v8, (a0)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: addi a2, a2, -4
+; CHECK-NEXT: vfwmul.vf v10, v8, fa0
+; CHECK-NEXT: vs2r.v v10, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: j .LBB127_1
+entry:
+ %f.ext = fpext float %f to double
+ %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr float, ptr %a, i64 %index
+ %wide.load = load <vscale x 2 x float>, ptr %0
+ %ext = fpext <vscale x 2 x float> %wide.load to <vscale x 2 x double>
+ %1 = fmul <vscale x 2 x double> %ext, %broadcast.splat
+ %2 = getelementptr double, ptr %b, i64 %index
+ store <vscale x 2 x double> %1, ptr %0
+ %index.next = add i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup: ; preds = %vector.body
+ ret void
+}
+
+; Even though there's no vfwmul.wf we'll sink the fcvt.d.s. Make sure
+; early-machinelicm undos the sink after isel.
+define void @sink_splat_vfwmul_wf(ptr nocapture %a, ptr nocapture %b, float %f) {
+; CHECK-LABEL: sink_splat_vfwmul_wf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: li a2, 1020
+; CHECK-NEXT: fcvt.d.s fa5, fa0
+; CHECK-NEXT: vsetvli a3, zero, e64, m2, ta, ma
+; CHECK-NEXT: .LBB128_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vl2re64.v v8, (a0)
+; CHECK-NEXT: addi a1, a1, 4
+; CHECK-NEXT: addi a2, a2, -4
+; CHECK-NEXT: vfmul.vf v8, v8, fa5
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: j .LBB128_1
+entry:
+ %f.ext = fpext float %f to double
+ %broadcast.splatinsert = insertelement <vscale x 2 x double> poison, double %f.ext, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x double> %broadcast.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr float, ptr %a, i64 %index
+ %wide.load = load <vscale x 2 x double>, ptr %0
+ %1 = fmul <vscale x 2 x double> %wide.load, %broadcast.splat
+ %2 = getelementptr double, ptr %b, i64 %index
+ store <vscale x 2 x double> %1, ptr %0
+ %index.next = add i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 32, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup: ; preds = %vector.body
+ ret void
+}
More information about the llvm-commits
mailing list