[llvm] 673b4ad - [AArch64] Add FP16 instructions to isAssociativeAndCommutative
KAWASHIMA Takahiro via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 20 06:47:57 PST 2022
Author: KAWASHIMA Takahiro
Date: 2022-12-20T23:47:51+09:00
New Revision: 673b4ad64577e3336cb8109869919b21341e0d74
URL: https://github.com/llvm/llvm-project/commit/673b4ad64577e3336cb8109869919b21341e0d74
DIFF: https://github.com/llvm/llvm-project/commit/673b4ad64577e3336cb8109869919b21341e0d74.diff
LOG: [AArch64] Add FP16 instructions to isAssociativeAndCommutative
`-mcpu=` in `llvm/test/CodeGen/AArch64/machine-combiner.ll` is changed
to `neoverse-n2` to use FP16 and SVE/SVE2 instructions. By this, the
register allocation and/or instruction scheduling are slightly changed
and some existing `CHECK` lines need to be updated.
Differential Revision: https://reviews.llvm.org/D139809
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/test/CodeGen/AArch64/machine-combiner.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 684e75db630f..a22a67a9d431 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4951,19 +4951,28 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
switch (Inst.getOpcode()) {
// == Floating-point types ==
// -- Floating-point instructions --
+ case AArch64::FADDHrr:
case AArch64::FADDSrr:
case AArch64::FADDDrr:
+ case AArch64::FMULHrr:
case AArch64::FMULSrr:
case AArch64::FMULDrr:
+ case AArch64::FMULX16:
case AArch64::FMULX32:
case AArch64::FMULX64:
// -- Advanced SIMD instructions --
+ case AArch64::FADDv4f16:
+ case AArch64::FADDv8f16:
case AArch64::FADDv2f32:
case AArch64::FADDv4f32:
case AArch64::FADDv2f64:
+ case AArch64::FMULv4f16:
+ case AArch64::FMULv8f16:
case AArch64::FMULv2f32:
case AArch64::FMULv4f32:
case AArch64::FMULv2f64:
+ case AArch64::FMULXv4f16:
+ case AArch64::FMULXv8f16:
case AArch64::FMULXv2f32:
case AArch64::FMULXv4f32:
case AArch64::FMULXv2f64:
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll
index 87bf7c29a79c..f5fcdda363f8 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
; Incremental updates of the instruction depths should be enough for this test
; case.
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math \
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \
; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
; Verify that the first two adds are independent regardless of how the inputs are
@@ -189,8 +189,8 @@ define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
; CHECK-STD-LABEL: reassociate_muls1:
; CHECK-STD: // %bb.0:
; CHECK-STD-NEXT: fdiv s0, s0, s1
-; CHECK-STD-NEXT: fmul s1, s2, s0
-; CHECK-STD-NEXT: fmul s0, s3, s1
+; CHECK-STD-NEXT: fmul s0, s2, s0
+; CHECK-STD-NEXT: fmul s0, s3, s0
; CHECK-STD-NEXT: ret
;
; CHECK-UNSAFE-LABEL: reassociate_muls1:
@@ -233,8 +233,8 @@ define double @reassociate_muls_double(double %x0, double %x1, double %x2, doubl
; CHECK-STD-LABEL: reassociate_muls_double:
; CHECK-STD: // %bb.0:
; CHECK-STD-NEXT: fdiv d0, d0, d1
-; CHECK-STD-NEXT: fmul d1, d2, d0
-; CHECK-STD-NEXT: fmul d0, d3, d1
+; CHECK-STD-NEXT: fmul d0, d2, d0
+; CHECK-STD-NEXT: fmul d0, d3, d0
; CHECK-STD-NEXT: ret
;
; CHECK-UNSAFE-LABEL: reassociate_muls_double:
@@ -249,6 +249,50 @@ define double @reassociate_muls_double(double %x0, double %x1, double %x2, doubl
ret double %t2
}
+; Verify that scalar half-precision adds are reassociated.
+
+define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-STD-LABEL: reassociate_adds_half:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fdiv h0, h0, h1
+; CHECK-STD-NEXT: fadd h0, h2, h0
+; CHECK-STD-NEXT: fadd h0, h3, h0
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_adds_half:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
+; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
+; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fdiv half %x0, %x1
+ %t1 = fadd half %x2, %t0
+ %t2 = fadd half %x3, %t1
+ ret half %t2
+}
+
+; Verify that scalar half-precision multiplies are reassociated.
+
+define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-STD-LABEL: reassociate_muls_half:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fdiv h0, h0, h1
+; CHECK-STD-NEXT: fmul h0, h2, h0
+; CHECK-STD-NEXT: fmul h0, h3, h0
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_muls_half:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
+; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
+; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fdiv half %x0, %x1
+ %t1 = fmul half %x2, %t0
+ %t2 = fmul half %x3, %t1
+ ret half %t2
+}
+
; Verify that scalar integer adds are reassociated.
define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
@@ -399,6 +443,51 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <
%t2 = fadd <4 x float> %x3, %t1
ret <4 x float> %t2
}
+
+; Verify that 64-bit vector half-precision adds are reassociated.
+
+define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
+; CHECK-STD-LABEL: reassociate_adds_v4f16:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h
+; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h
+; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fadd <4 x half> %x0, %x1
+ %t1 = fadd <4 x half> %x2, %t0
+ %t2 = fadd <4 x half> %x3, %t1
+ ret <4 x half> %t2
+}
+
+; Verify that 128-bit vector half-precision multiplies are reassociated.
+
+define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
+; CHECK-STD-LABEL: reassociate_muls_v8f16:
+; CHECK-STD: // %bb.0:
+; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h
+; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h
+; CHECK-STD-NEXT: ret
+;
+; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16:
+; CHECK-UNSAFE: // %bb.0:
+; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h
+; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h
+; CHECK-UNSAFE-NEXT: ret
+ %t0 = fadd <8 x half> %x0, %x1
+ %t1 = fmul <8 x half> %x2, %t0
+ %t2 = fmul <8 x half> %x3, %t1
+ ret <8 x half> %t2
+}
+
; Verify that 128-bit vector single-precision multiplies are reassociated.
define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
@@ -492,9 +581,9 @@ define double @reassociate_adds_from_calls() {
; CHECK-UNSAFE-NEXT: fmov d10, d0
; CHECK-UNSAFE-NEXT: bl bar
; CHECK-UNSAFE-NEXT: fadd d1, d8, d9
-; CHECK-UNSAFE-NEXT: fadd d0, d10, d0
-; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-UNSAFE-NEXT: fadd d0, d10, d0
; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
; CHECK-UNSAFE-NEXT: ret
@@ -527,9 +616,9 @@ define double @already_reassociated() {
; CHECK-NEXT: fmov d10, d0
; CHECK-NEXT: bl bar
; CHECK-NEXT: fadd d1, d8, d9
-; CHECK-NEXT: fadd d0, d10, d0
-; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d0, d10, d0
; CHECK-NEXT: fadd d0, d1, d0
; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list