[llvm] b720dcb - [AArch64][GISel] Split large f64 vectors for fcmp.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 10 00:19:27 PDT 2023
Author: David Green
Date: 2023-08-10T08:19:22+01:00
New Revision: b720dcba92cc377da873c03d9ac3ac6fb80e0499
URL: https://github.com/llvm/llvm-project/commit/b720dcba92cc377da873c03d9ac3ac6fb80e0499
DIFF: https://github.com/llvm/llvm-project/commit/b720dcba92cc377da873c03d9ac3ac6fb80e0499.diff
LOG: [AArch64][GISel] Split large f64 vectors for fcmp.
This adds some very basic f64 handling for larger fcmp vectors, which seemed to
be missing.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 819628213e3eb6..8124cd890dc2c8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -486,7 +486,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
Ty.getElementType() != SrcTy.getElementType();
},
0, 1)
- .clampNumElements(0, v2s32, v4s32);
+ .clampNumElements(0, v2s32, v4s32)
+ .clampMaxNumElements(1, s64, 2);
// Extensions
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 76e04840758035..23848c8598249d 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -5449,3 +5449,45 @@ define <4 x i1> @fcmule4xfloat_fast_aext(<4 x float> %A, <4 x float> %B) {
%tmp3 = fcmp fast ule <4 x float> %A, %B
ret <4 x i1> %tmp3
}
+
+define <4 x i64> @fcmoeq4xdouble(<4 x double> %A, <4 x double> %B) {
+; CHECK-SD-LABEL: fcmoeq4xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: fcmeq v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmoeq4xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmeq v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: fcmeq v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp oeq <4 x double> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i64>
+ ret <4 x i64> %tmp4
+}
+
+define <8 x i32> @fcmoeq8xfloat(<8 x float> %A, <8 x float> %B) {
+; CHECK-SD-LABEL: fcmoeq8xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmeq v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: fcmeq v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmoeq8xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmeq v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: fcmeq v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp oeq <8 x float> %A, %B
+ %tmp4 = sext <8 x i1> %tmp3 to <8 x i32>
+ ret <8 x i32> %tmp4
+}
More information about the llvm-commits
mailing list