[llvm] b670da7 - [AArch64] Allow strict opcodes in indexed fmul and fma patterns
John Brawn via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 17 05:12:32 PST 2022
Author: John Brawn
Date: 2022-02-17T13:11:54Z
New Revision: b670da798d352c2edcee1d5ad832905b3923c8f3
URL: https://github.com/llvm/llvm-project/commit/b670da798d352c2edcee1d5ad832905b3923c8f3
DIFF: https://github.com/llvm/llvm-project/commit/b670da798d352c2edcee1d5ad832905b3923c8f3.diff
LOG: [AArch64] Allow strict opcodes in indexed fmul and fma patterns
Using an indexed version instead of a non-indexed version doesn't
change anything with regards to exceptions or rounding.
Differential Revision: https://reviews.llvm.org/D118487
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-vmul.ll
llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0f88fc950eb4..53a06c2b9e8e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6280,18 +6280,18 @@ let hasSideEffects = 0 in {
// On the other hand, there are quite a few valid combinatorial options due to
// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
defm : SIMDFPIndexedTiedPatterns<"FMLA",
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
+ TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
defm : SIMDFPIndexedTiedPatterns<"FMLA",
- TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
+ TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+ TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
+ TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
+ TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
- TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
+ TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
// 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
@@ -6370,22 +6370,22 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
}
defm : FMLSIndexedAfterNegPatterns<
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+ TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
defm : FMLSIndexedAfterNegPatterns<
- TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
+ TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
-def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv2i32_indexed V64:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
(i64 0))>;
-def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv4i32_indexed V128:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
(i64 0))>;
-def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
+def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
(FMULv2i64_indexed V128:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
(i64 0))>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 30a1bc5d8c1d..482a1c5941e2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -845,6 +845,90 @@ entry:
ret <2 x double> %fmla1
}
+define <2 x float> @fmls_indexed_2s_strict(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+; CHECK-LABEL: fmls_indexed_2s_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: fmls.2s v0, v2, v1[0]
+; CHECK-NEXT: ret
+entry:
+ %0 = fneg <2 x float> %c
+ %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer
+ %fmls1 = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x float> %fmls1
+}
+
+define <4 x float> @fmls_indexed_4s_strict(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp {
+; CHECK-LABEL: fmls_indexed_4s_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmls.4s v0, v2, v1[0]
+; CHECK-NEXT: ret
+entry:
+ %0 = fneg <4 x float> %c
+ %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+ %fmls1 = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %fmls1
+}
+
+define <2 x double> @fmls_indexed_2d_strict(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp {
+; CHECK-LABEL: fmls_indexed_2d_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmls.2d v0, v2, v1[0]
+; CHECK-NEXT: ret
+entry:
+ %0 = fneg <2 x double> %c
+ %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
+ %fmls1 = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %fmls1
+}
+
+define <2 x float> @fmla_indexed_scalar_2s_strict(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp {
+; CHECK-LABEL: fmla_indexed_scalar_2s_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
+; CHECK-NEXT: fmla.2s v0, v1, v2[0]
+; CHECK-NEXT: ret
+entry:
+ %v1 = insertelement <2 x float> undef, float %c, i32 0
+ %v2 = insertelement <2 x float> %v1, float %c, i32 1
+ %fmla1 = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %v2, <2 x float> %b, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x float> %fmla1
+}
+
+define <4 x float> @fmla_indexed_scalar_4s_strict(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp {
+; CHECK-LABEL: fmla_indexed_scalar_4s_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
+; CHECK-NEXT: fmla.4s v0, v1, v2[0]
+; CHECK-NEXT: ret
+entry:
+ %v1 = insertelement <4 x float> undef, float %c, i32 0
+ %v2 = insertelement <4 x float> %v1, float %c, i32 1
+ %v3 = insertelement <4 x float> %v2, float %c, i32 2
+ %v4 = insertelement <4 x float> %v3, float %c, i32 3
+ %fmla1 = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %fmla1
+}
+
+define <2 x double> @fmla_indexed_scalar_2d_strict(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fmla_indexed_scalar_2d_strict:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: fmla.2d v0, v1, v2[0]
+; CHECK-NEXT: ret
+entry:
+ %v1 = insertelement <2 x double> undef, double %c, i32 0
+ %v2 = insertelement <2 x double> %v1, double %c, i32 1
+ %fmla1 = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <2 x double> %fmla1
+}
+
+attributes #0 = { strictfp }
+
+declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+
define <4 x i16> @mul_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: mul_4h:
; CHECK: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
index 32f59626b381..5ae08cf20c39 100644
--- a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -1,7 +1,11 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+attributes #0 = { strictfp }
+
declare float @llvm.fma.f32(float, float, float)
declare double @llvm.fma.f64(double, double, double)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
; CHECK-LABEL: test_fmla_ss4S
@@ -106,3 +110,105 @@ define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
ret double %tmp3
}
+define float @test_fmla_ss4S_strict(float %a, float %b, <4 x float> %v) {
+ ; CHECK-LABEL: test_fmla_ss4S_strict
+ ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+ %tmp1 = extractelement <4 x float> %v, i32 3
+ %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %tmp2
+}
+
+define float @test_fmla_ss4S_swap_strict(float %a, float %b, <4 x float> %v) {
+ ; CHECK-LABEL: test_fmla_ss4S_swap_strict
+ ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+ %tmp1 = extractelement <4 x float> %v, i32 3
+ %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %tmp2
+}
+
+define float @test_fmla_ss2S_strict(float %a, float %b, <2 x float> %v) {
+ ; CHECK-LABEL: test_fmla_ss2S_strict
+ ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+ %tmp1 = extractelement <2 x float> %v, i32 1
+ %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %tmp2
+}
+
+define double @test_fmla_ddD_strict(double %a, double %b, <1 x double> %v) {
+ ; CHECK-LABEL: test_fmla_ddD_strict
+ ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+ %tmp1 = extractelement <1 x double> %v, i32 0
+ %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %tmp2
+}
+
+define double @test_fmla_dd2D_strict(double %a, double %b, <2 x double> %v) {
+ ; CHECK-LABEL: test_fmla_dd2D_strict
+ ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+ %tmp1 = extractelement <2 x double> %v, i32 1
+ %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %tmp2
+}
+
+define double @test_fmla_dd2D_swap_strict(double %a, double %b, <2 x double> %v) {
+ ; CHECK-LABEL: test_fmla_dd2D_swap_strict
+ ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+ %tmp1 = extractelement <2 x double> %v, i32 1
+ %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %tmp2
+}
+
+define float @test_fmls_ss4S_strict(float %a, float %b, <4 x float> %v) {
+ ; CHECK-LABEL: test_fmls_ss4S_strict
+ ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+ %tmp1 = extractelement <4 x float> %v, i32 3
+ %tmp2 = fneg float %tmp1
+ %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %tmp3
+}
+
+define float @test_fmls_ss4S_swap_strict(float %a, float %b, <4 x float> %v) {
+ ; CHECK-LABEL: test_fmls_ss4S_swap_strict
+ ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+ %tmp1 = extractelement <4 x float> %v, i32 3
+ %tmp2 = fneg float %tmp1
+ %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %tmp2, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %tmp3
+}
+
+define float @test_fmls_ss2S_strict(float %a, float %b, <2 x float> %v) {
+ ; CHECK-LABEL: test_fmls_ss2S_strict
+ ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+ %tmp1 = extractelement <2 x float> %v, i32 1
+ %tmp2 = fneg float %tmp1
+ %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret float %tmp3
+}
+
+define double @test_fmls_ddD_strict(double %a, double %b, <1 x double> %v) {
+ ; CHECK-LABEL: test_fmls_ddD_strict
+ ; CHECK: {{fmls d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmsub d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+ %tmp1 = extractelement <1 x double> %v, i32 0
+ %tmp2 = fneg double %tmp1
+ %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp2, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %tmp3
+}
+
+define double @test_fmls_dd2D_strict(double %a, double %b, <2 x double> %v) {
+ ; CHECK-LABEL: test_fmls_dd2D_strict
+ ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+ %tmp1 = extractelement <2 x double> %v, i32 1
+ %tmp2 = fneg double %tmp1
+ %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp2, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %tmp3
+}
+
+define double @test_fmls_dd2D_swap_strict(double %a, double %b, <2 x double> %v) {
+ ; CHECK-LABEL: test_fmls_dd2D_swap_strict
+ ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+ %tmp1 = extractelement <2 x double> %v, i32 1
+ %tmp2 = fneg double %tmp1
+ %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %tmp2, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret double %tmp3
+}
+
More information about the llvm-commits
mailing list