[llvm] a611d67 - [RISCV][TTI] Add llvm.fmuladd and llvm.vp.fmuladd into canSplatOperand (#119508)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 12 02:39:25 PST 2024
Author: LiqinWeng
Date: 2024-12-12T18:39:21+08:00
New Revision: a611d67601528cb18ae26794a1482cff59ca5254
URL: https://github.com/llvm/llvm-project/commit/a611d67601528cb18ae26794a1482cff59ca5254
DIFF: https://github.com/llvm/llvm-project/commit/a611d67601528cb18ae26794a1482cff59ca5254.diff
LOG: [RISCV][TTI] Add llvm.fmuladd and llvm.vp.fmuladd into canSplatOperand (#119508)
The first or second operand of fmuladd is a splat operand , it can help
fmuladd fold vv instructions to vf instructions.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 489fc3c4918fd4..49192bd6380223 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2446,6 +2446,8 @@ bool RISCVTTIImpl::canSplatOperand(Instruction *I, int Operand) const {
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
case Intrinsic::vp_fma:
+ case Intrinsic::fmuladd:
+ case Intrinsic::vp_fmuladd:
return Operand == 0 || Operand == 1;
case Intrinsic::vp_shl:
case Intrinsic::vp_lshr:
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 75a5eea1cb409e..735621aa4390e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -5581,3 +5581,163 @@ vector.body:
for.cond.cleanup:
ret void
}
+
+define void @sink_splat_fmuladd(ptr %a, ptr %b, float %x) {
+; CHECK-LABEL: sink_splat_fmuladd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a1, a2
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: .LBB121_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vfmacc.vf v9, fa0, v8
+; CHECK-NEXT: vse32.v v9, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: bne a1, a2, .LBB121_1
+; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+ %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr inbounds float, ptr %a, i64 %index
+ %wide.load = load <4 x float>, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %b, i64 %index
+ %wide.load12 = load <4 x float>, ptr %1, align 4
+ %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12)
+ store <4 x float> %2, ptr %0, align 4
+ %index.next = add nuw i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 %3, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @sink_splat_fmuladd_commute(ptr %a, ptr %b, float %x) {
+; CHECK-LABEL: sink_splat_fmuladd_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a2, 1
+; CHECK-NEXT: add a2, a1, a2
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: .LBB122_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vfmacc.vf v9, fa0, v8
+; CHECK-NEXT: vse32.v v9, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: bne a1, a2, .LBB122_1
+; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+ %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr inbounds float, ptr %a, i64 %index
+ %wide.load = load <4 x float>, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %b, i64 %index
+ %wide.load12 = load <4 x float>, ptr %1, align 4
+ %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
+ store <4 x float> %2, ptr %0, align 4
+ %index.next = add nuw i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 %3, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @sink_splat_vp_fmuladd(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) {
+; CHECK-LABEL: sink_splat_vp_fmuladd:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: slli a4, a2, 32
+; CHECK-NEXT: add a2, a1, a3
+; CHECK-NEXT: srli a3, a4, 32
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: .LBB123_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: bne a1, a2, .LBB123_1
+; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+ %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr inbounds float, ptr %a, i64 %index
+ %wide.load = load <4 x float>, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %b, i64 %index
+ %wide.load12 = load <4 x float>, ptr %1, align 4
+ %2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
+ store <4 x float> %2, ptr %0, align 4
+ %index.next = add nuw i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 %3, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @sink_splat_vp_fmuladd_commute(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) {
+; CHECK-LABEL: sink_splat_vp_fmuladd_commute:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a3, 1
+; CHECK-NEXT: slli a4, a2, 32
+; CHECK-NEXT: add a2, a1, a3
+; CHECK-NEXT: srli a3, a4, 32
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: .LBB124_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: bne a1, a2, .LBB124_1
+; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT: ret
+entry:
+ %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+ %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %0 = getelementptr inbounds float, ptr %a, i64 %index
+ %wide.load = load <4 x float>, ptr %0, align 4
+ %1 = getelementptr inbounds float, ptr %b, i64 %index
+ %wide.load12 = load <4 x float>, ptr %1, align 4
+ %2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
+ store <4 x float> %2, ptr %0, align 4
+ %index.next = add nuw i64 %index, 4
+ %3 = icmp eq i64 %index.next, 1024
+ br i1 %3, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:
+ ret void
+}
More information about the llvm-commits
mailing list