[llvm] b720dcb - [AArch64][GISel] Split large f64 vectors for fcmp.

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 10 00:19:27 PDT 2023


Author: David Green
Date: 2023-08-10T08:19:22+01:00
New Revision: b720dcba92cc377da873c03d9ac3ac6fb80e0499

URL: https://github.com/llvm/llvm-project/commit/b720dcba92cc377da873c03d9ac3ac6fb80e0499
DIFF: https://github.com/llvm/llvm-project/commit/b720dcba92cc377da873c03d9ac3ac6fb80e0499.diff

LOG: [AArch64][GISel] Split large f64 vectors for fcmp.

This adds some very basic f64 handling for larger fcmp vectors, which seemed to
be missing.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/neon-compare-instructions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 819628213e3eb6..8124cd890dc2c8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -486,7 +486,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                    Ty.getElementType() != SrcTy.getElementType();
           },
           0, 1)
-      .clampNumElements(0, v2s32, v4s32);
+      .clampNumElements(0, v2s32, v4s32)
+      .clampMaxNumElements(1, s64, 2);
 
   // Extensions
   auto ExtLegalFunc = [=](const LegalityQuery &Query) {

diff  --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 76e04840758035..23848c8598249d 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -5449,3 +5449,45 @@ define <4 x i1> @fcmule4xfloat_fast_aext(<4 x float> %A, <4 x float> %B) {
   %tmp3 = fcmp fast ule <4 x float> %A, %B
   ret <4 x i1> %tmp3
 }
+
+define <4 x i64> @fcmoeq4xdouble(<4 x double> %A, <4 x double> %B) {
+; CHECK-SD-LABEL: fcmoeq4xdouble:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmeq v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT:    fcmeq v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmoeq4xdouble:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmeq v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT:    fcmeq v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #63
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT:    sshr v1.2d, v1.2d, #63
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp oeq <4 x double> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i64>
+  ret <4 x i64> %tmp4
+}
+
+define <8 x i32> @fcmoeq8xfloat(<8 x float> %A, <8 x float> %B) {
+; CHECK-SD-LABEL: fcmoeq8xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT:    fcmeq v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmoeq8xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fcmeq v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    fcmeq v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #31
+; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #31
+; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp oeq <8 x float> %A, %B
+  %tmp4 = sext <8 x i1> %tmp3 to <8 x i32>
+  ret <8 x i32> %tmp4
+}


        


More information about the llvm-commits mailing list