[llvm] c33fd3b - [AArch64] Lower all fp zero buildvectors through BUILD_VECTOR.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 22 03:26:46 PST 2023
Author: David Green
Date: 2023-02-22T11:26:41Z
New Revision: c33fd3b47faae130518d2217c66da75af0e969b3
URL: https://github.com/llvm/llvm-project/commit/c33fd3b47faae130518d2217c66da75af0e969b3
DIFF: https://github.com/llvm/llvm-project/commit/c33fd3b47faae130518d2217c66da75af0e969b3.diff
LOG: [AArch64] Lower all fp zero buildvectors through BUILD_VECTOR.
Just like with integers, we can treat zero fp buildvector as legal so that they
can be recognized in tablegen patterns using immAllZerosV.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-build-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1cc102be0494..f51bbcb6a54c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12230,20 +12230,22 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (Op.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
- if (VT.isInteger()) {
- // Certain vector constants, used to express things like logical NOT and
- // arithmetic NEG, are passed through unmodified. This allows special
- // patterns for these operations to match, which will lower these constants
- // to whatever is proven necessary.
- BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- if (BVN->isConstant())
- if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
- unsigned BitSize = VT.getVectorElementType().getSizeInBits();
- APInt Val(BitSize,
- Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
- if (Val.isZero() || Val.isAllOnes())
- return Op;
- }
+ // Certain vector constants, used to express things like logical NOT and
+ // arithmetic NEG, are passed through unmodified. This allows special
+ // patterns for these operations to match, which will lower these constants
+ // to whatever is proven necessary.
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ if (BVN->isConstant()) {
+ if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
+ unsigned BitSize = VT.getVectorElementType().getSizeInBits();
+ APInt Val(BitSize,
+ Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
+ if (Val.isZero() || (VT.isInteger() && Val.isAllOnes()))
+ return Op;
+ }
+ if (ConstantFPSDNode *Const = BVN->getConstantFPSplatNode())
+ if (Const->isZero() && !Const->isNegative())
+ return Op;
}
if (SDValue V = ConstantBuildVector(Op, DAG))
@@ -12445,7 +12447,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
APInt ConstantValueAPInt(1, 0);
if (auto *C = dyn_cast<ConstantSDNode>(ConstantValue))
ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
- if (!isNullConstant(ConstantValue) && !ConstantValueAPInt.isAllOnes()) {
+ if (!isNullConstant(ConstantValue) && !isNullFPConstant(ConstantValue) &&
+ !ConstantValueAPInt.isAllOnes()) {
Val = ConstantBuildVector(Val, DAG);
if (!Val)
// Otherwise, materialize the constant and splat it.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 762563089c28..1226164fc54a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6447,6 +6447,10 @@ def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
@@ -6459,6 +6463,10 @@ def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
index 9b4660c94790..f9f57e662d6a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+fullfp16,+bf16 | FileCheck %s
; Check that building a vector from floats doesn't insert an unnecessary
; copy for lane zero.
@@ -10,9 +10,9 @@ define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
-; CHECK-NEXT: mov.s v0[1], v1[0]
-; CHECK-NEXT: mov.s v0[2], v2[0]
-; CHECK-NEXT: mov.s v0[3], v3[0]
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-NEXT: mov v0.s[3], v3.s[0]
; CHECK-NEXT: ret
%1 = insertelement <4 x float> undef, float %a, i32 0
%2 = insertelement <4 x float> %1, float %b, i32 1
@@ -26,7 +26,7 @@ define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #44672
; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mul.8h v0, v0, v1
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%b = add <8 x i16> %a, <i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
%c = mul <8 x i16> %b, <i16 -20864, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
@@ -41,7 +41,7 @@ define <8 x i16> @build_all_zero(<8 x i16> %a) #1 {
define <8 x i16> @concat_2_build_vector(<4 x i16> %in0) {
; CHECK-LABEL: concat_2_build_vector:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi.2d v0, #0000000000000000
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
%vshl_n = shl <4 x i16> %in0, <i16 8, i16 8, i16 8, i16 8>
%vshl_n2 = shl <4 x i16> %vshl_n, <i16 9, i16 9, i16 9, i16 9>
@@ -98,9 +98,165 @@ define <1 x double> @convert_single_fp_vector_constant(i1 %cmp) {
; CHECK-NEXT: csetm x9, ne
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fmov d1, x9
-; CHECK-NEXT: and.8b v0, v0, v1
+; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
entry:
%sel = select i1 %cmp, <1 x double> <double 1.000000e+00>, <1 x double> zeroinitializer
ret <1 x double> %sel
}
+
+; All Zero and All -Zero tests.
+
+define <2 x double> @poszero_v2f64(<2 x double> %a) {
+; CHECK-LABEL: poszero_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %b = fadd <2 x double> %a, <double 0.0, double 0.0>
+ ret <2 x double> %b
+}
+
+define <2 x double> @negzero_v2f64(<2 x double> %a) {
+; CHECK-LABEL: negzero_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-9223372036854775808
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %b = fmul <2 x double> %a, <double -0.0, double -0.0>
+ ret <2 x double> %b
+}
+
+define <1 x double> @poszero_v1f64(<1 x double> %a) {
+; CHECK-LABEL: poszero_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: ret
+ %b = fadd <1 x double> %a, <double 0.0>
+ ret <1 x double> %b
+}
+
+define <1 x double> @negzero_v1f64(<1 x double> %a) {
+; CHECK-LABEL: negzero_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-9223372036854775808
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %b = fmul <1 x double> %a, <double -0.0>
+ ret <1 x double> %b
+}
+
+define <4 x float> @poszero_v4f32(<4 x float> %a) {
+; CHECK-LABEL: poszero_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %b = fadd <4 x float> %a, <float 0.0, float 0.0, float 0.0, float 0.0>
+ ret <4 x float> %b
+}
+
+define <4 x float> @negzero_v4f32(<4 x float> %a) {
+; CHECK-LABEL: negzero_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #128, lsl #24
+; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %b = fmul <4 x float> %a, <float -0.0, float -0.0, float -0.0, float -0.0>
+ ret <4 x float> %b
+}
+
+define <2 x float> @poszero_v2f32(<2 x float> %a) {
+; CHECK-LABEL: poszero_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %b = fadd <2 x float> %a, <float 0.0, float 0.0>
+ ret <2 x float> %b
+}
+
+define <2 x float> @negzero_v2f32(<2 x float> %a) {
+; CHECK-LABEL: negzero_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2s, #128, lsl #24
+; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %b = fmul <2 x float> %a, <float -0.0, float -0.0>
+ ret <2 x float> %b
+}
+
+define <8 x half> @poszero_v8f16(<8 x half> %a) {
+; CHECK-LABEL: poszero_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %b = fadd <8 x half> %a, <half 0.0, half 0.0, half 0.0, half 0.0, half 0.0, half 0.0, half 0.0, half 0.0>
+ ret <8 x half> %b
+}
+
+define <8 x half> @negzero_v8f16(<8 x half> %a) {
+; CHECK-LABEL: negzero_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.8h, #128, lsl #8
+; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
+ %b = fmul <8 x half> %a, <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>
+ ret <8 x half> %b
+}
+
+define <4 x half> @poszero_v4f16(<4 x half> %a) {
+; CHECK-LABEL: poszero_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %b = fadd <4 x half> %a, <half 0.0, half 0.0, half 0.0, half 0.0>
+ ret <4 x half> %b
+}
+
+define <4 x half> @negzero_v4f16(<4 x half> %a) {
+; CHECK-LABEL: negzero_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4h, #128, lsl #8
+; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: ret
+ %b = fmul <4 x half> %a, <half -0.0, half -0.0, half -0.0, half -0.0>
+ ret <4 x half> %b
+}
+
+define <8 x bfloat> @poszero_v8bf16(<8 x bfloat> %a) {
+; CHECK-LABEL: poszero_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ ret <8 x bfloat> <bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0>
+}
+
+define <8 x bfloat> @negzero_v8bf16(<8 x bfloat> %a) {
+; CHECK-LABEL: negzero_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #128, lsl #8
+; CHECK-NEXT: ret
+ ret <8 x bfloat> <bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0>
+}
+
+define <4 x bfloat> @poszero_v4bf16(<4 x bfloat> %a) {
+; CHECK-LABEL: poszero_v4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+ ret <4 x bfloat> <bfloat 0.0, bfloat 0.0, bfloat 0.0, bfloat 0.0>
+}
+
+define <4 x bfloat> @negzero_v4bf16(<4 x bfloat> %a) {
+; CHECK-LABEL: negzero_v4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4h, #128, lsl #8
+; CHECK-NEXT: ret
+ ret <4 x bfloat> <bfloat -0.0, bfloat -0.0, bfloat -0.0, bfloat -0.0>
+}
More information about the llvm-commits
mailing list