[llvm] [LLVM] treat `@llvm.ssub.sat` the same as `@llvm.aarch64.neon.sqsub` (PR #140454)
Folkert de Vries via llvm-commits
llvm-commits at lists.llvm.org
Sun May 18 07:54:52 PDT 2025
folkertdev wrote:
Yeah, two tests fail in `llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll`, I believe this is a regression
```llvm
define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
; CHECK-LABEL: optimize_dup:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3]
; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[3]
; CHECK-NEXT: ret
entry:
%lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
%lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%1 = fmul <4 x float> %lane2, %c
%s = fsub <4 x float> %0, %1
ret <4 x float> %s
}
define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) {
; CHECK-LABEL: no_optimize_dup:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3]
; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[1]
; CHECK-NEXT: ret
entry:
%lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a)
%lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%1 = fmul <4 x float> %lane2, %c
%s = fsub <4 x float> %0, %1
ret <4 x float> %s
}
```
```
3738: .type optimize_dup, at function
3739: optimize_dup: // @optimize_dup
3740: .cfi_startproc
3741: // %bb.0: // %entry
3742: fmul v2.4s, v2.4s, v3.s[3]
3743: fmla v0.4s, v1.4s, v3.s[3]
next:4507 !~~~~~~~~~~~~~~~~~~~~~~~~~ error: match on wrong line
3744: fsub v0.4s, v0.4s, v2.4s
3745: ret
3746: .Lfunc_end298:
3747: .size optimize_dup, .Lfunc_end298-optimize_dup
3748: .cfi_endproc
3749: // -- End function
3750: .globl no_optimize_dup // -- Begin function no_optimize_dup
3751: .p2align 2
3752: .type no_optimize_dup, at function
3753: no_optimize_dup: // @no_optimize_dup
3754: .cfi_startproc
3755: // %bb.0: // %entry
3756: fmul v2.4s, v2.4s, v3.s[1]
3757: fmla v0.4s, v1.4s, v3.s[3]
next:4522 !~~~~~~~~~~~~~~~~~~~~~~~~~ error: match on wrong line
3758: fsub v0.4s, v0.4s, v2.4s
3759: ret
3760: .Lfunc_end299:
3761: .size no_optimize_dup, .Lfunc_end299-no_optimize_dup
3762: .cfi_endproc
```
It looks like the test assumes that the operations are fused, and with these changes they no longer are.
Does that sound right? I guess there is some other rule with fma that we've missed?
https://github.com/llvm/llvm-project/pull/140454
More information about the llvm-commits
mailing list