[llvm] [CodeGen][AArch64] Sink splat operands of FMul instructions (PR #116222)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 14 05:06:01 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Hari Limaye (hazzlim)
<details>
<summary>Changes</summary>
Sink shuffle operands of FMul instructions if these are splats, as we can generate lane-indexed variants for these.
---
Full diff: https://github.com/llvm/llvm-project/pull/116222.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+8)
- (modified) llvm/test/CodeGen/AArch64/sinksplat.ll (+15-11)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a97b0d3b1db92a..eb3b9609f697ca 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5239,6 +5239,14 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
// Is it profitable to sink if we found two of the same type of extends.
return !Ops.empty() && (NumSExts == 2 || NumZExts == 2);
}
+ case Instruction::FMul: {
+ // Sink splats for index lane variants
+ if (isSplatShuffle(I->getOperand(0)))
+ Ops.push_back(&I->getOperandUse(0));
+ if (isSplatShuffle(I->getOperand(1)))
+ Ops.push_back(&I->getOperandUse(1));
+ return !Ops.empty();
+ }
default:
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll
index d156ec079ae941..c94d5bf2a208ff 100644
--- a/llvm/test/CodeGen/AArch64/sinksplat.ll
+++ b/llvm/test/CodeGen/AArch64/sinksplat.ll
@@ -230,29 +230,34 @@ l2:
ret <4 x i32> %c
}
-define <4 x float> @fmul(<4 x float> %x, ptr %y) {
+define <4 x float> @fmul(ptr %x, ptr %y) {
; CHECK-LABEL: fmul:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: ldr q2, [x0]
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: fmul v1.4s, v2.4s, v1.s[3]
+; CHECK-NEXT: ldr s1, [x0]
+; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: .LBB7_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: subs w8, w8, #1
+; CHECK-NEXT: ldr q2, [x1, x8]
+; CHECK-NEXT: add x8, x8, #16
+; CHECK-NEXT: cmp w8, #16
+; CHECK-NEXT: fmul v2.4s, v2.4s, v1.s[0]
+; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
; CHECK-NEXT: b.eq .LBB7_1
; CHECK-NEXT: // %bb.2: // %l2
; CHECK-NEXT: ret
entry:
- %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %x.val = load float, ptr %x
+ %x.ins = insertelement <4 x float> poison, float %x.val, i64 0
+ %a = shufflevector <4 x float> %x.ins, <4 x float> undef, <4 x i32> zeroinitializer
br label %l1
l1:
%p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
%q = phi <4 x float> [ zeroinitializer, %entry ], [ %c, %l1 ]
- %l = load <4 x float>, ptr %y
+ %idx.y = mul nuw nsw i32 %p, 4
+ %ptr.y = getelementptr float, ptr %y, i32 %idx.y
+ %l = load <4 x float>, ptr %ptr.y
%b = fmul <4 x float> %l, %a
%c = fadd <4 x float> %b, %q
%pa = add i32 %p, 1
@@ -270,10 +275,9 @@ define <4 x float> @fmuladd(<4 x float> %x, ptr %y) {
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr q2, [x0]
; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: dup v1.4s, v1.s[3]
; CHECK-NEXT: .LBB8_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: fmla v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: fmla v0.4s, v2.4s, v1.s[3]
; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: b.eq .LBB8_1
; CHECK-NEXT: // %bb.2: // %l2
``````````
</details>
https://github.com/llvm/llvm-project/pull/116222
More information about the llvm-commits
mailing list