[PATCH] D120706: [AArch64] Use first op of FADDPv* instead of implicit def.
Florian Hahn via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 1 03:18:02 PST 2022
fhahn updated this revision to Diff 412031.
fhahn added a comment.
In D120706#3350754 <https://reviews.llvm.org/D120706#3350754>, @t.p.northover wrote:
> Looks like a sensible change to me, but could you commit it with a small comment explaining? I think I remember puzzling over these patterns a little when someone asked about them, and with this change the actual dataflow we care about is even less apparent.
Thanks Tim!
I added a comment in the latest update. It would be great if you could take a quick look to double-check if it is helpful.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D120706/new/
https://reviews.llvm.org/D120706
Files:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
Index: llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
===================================================================
--- llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -45,8 +45,8 @@
define half @add_H(<8 x half> %bin.rdx) {
; FULLFP16-LABEL: add_H:
; FULLFP16: // %bb.0:
-; FULLFP16-NEXT: faddp v0.8h, v0.8h, v0.8h
-; FULLFP16-NEXT: faddp v0.8h, v0.8h, v0.8h
+; FULLFP16-NEXT: faddp v1.8h, v0.8h, v0.8h
+; FULLFP16-NEXT: faddp v0.8h, v1.8h, v0.8h
; FULLFP16-NEXT: faddp h0, v0.2h
; FULLFP16-NEXT: ret
;
@@ -115,8 +115,8 @@
; FULLFP16-LABEL: add_2H:
; FULLFP16: // %bb.0:
; FULLFP16-NEXT: fadd v0.8h, v0.8h, v1.8h
-; FULLFP16-NEXT: faddp v0.8h, v0.8h, v0.8h
-; FULLFP16-NEXT: faddp v0.8h, v0.8h, v0.8h
+; FULLFP16-NEXT: faddp v1.8h, v0.8h, v0.8h
+; FULLFP16-NEXT: faddp v0.8h, v1.8h, v0.8h
; FULLFP16-NEXT: faddp h0, v0.2h
; FULLFP16-NEXT: ret
;
@@ -248,7 +248,7 @@
; CHECK-NEXT: ldr q1, [x0, x8]
; CHECK-NEXT: add x8, x8, #16
; CHECK-NEXT: cmp w8, #112
-; CHECK-NEXT: faddp v1.4s, v1.4s, v0.4s
+; CHECK-NEXT: faddp v1.4s, v1.4s, v1.4s
; CHECK-NEXT: faddp s1, v1.2s
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: b.ne .LBB9_1
@@ -286,7 +286,7 @@
; FULLFP16-NEXT: ldr d1, [x0, x8]
; FULLFP16-NEXT: add x8, x8, #8
; FULLFP16-NEXT: cmp w8, #56
-; FULLFP16-NEXT: faddp v1.4h, v1.4h, v0.4h
+; FULLFP16-NEXT: faddp v1.4h, v1.4h, v1.4h
; FULLFP16-NEXT: faddp h1, v1.2h
; FULLFP16-NEXT: fadd h0, h1, h0
; FULLFP16-NEXT: b.ne .LBB10_1
@@ -357,8 +357,8 @@
; FULLFP16-NEXT: ldr q1, [x0, x8]
; FULLFP16-NEXT: add x8, x8, #8
; FULLFP16-NEXT: cmp w8, #56
-; FULLFP16-NEXT: faddp v1.8h, v1.8h, v0.8h
-; FULLFP16-NEXT: faddp v1.8h, v1.8h, v0.8h
+; FULLFP16-NEXT: faddp v2.8h, v1.8h, v1.8h
+; FULLFP16-NEXT: faddp v1.8h, v2.8h, v1.8h
; FULLFP16-NEXT: faddp h1, v1.2h
; FULLFP16-NEXT: fadd h0, h1, h0
; FULLFP16-NEXT: b.ne .LBB11_1
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5345,19 +5345,22 @@
defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
+// Only the lower half of the result of the inner FADDP is used in the patterns
+// below, so the second operand does not matter. Re-use the first input
+// operand, so no additional dependencies need to be introduced.
let Predicates = [HasFullFP16] in {
def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
(FADDPv2i16p
(EXTRACT_SUBREG
- (FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
+ (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
dsub))>;
def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
- (FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
+ (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
}
def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
(FADDPv2i32p
(EXTRACT_SUBREG
- (FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
+ (FADDPv4f32 V128:$Rn, V128:$Rn),
dsub))>;
def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120706.412031.patch
Type: text/x-patch
Size: 3456 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220301/c9f7ad7e/attachment.bin>
More information about the llvm-commits
mailing list