[llvm-branch-commits] [llvm] release/18.x: [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection (#86972) (PR #91580)
Marc Auberer via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu May 9 03:10:12 PDT 2024
https://github.com/marcauberer created https://github.com/llvm/llvm-project/pull/91580
Backport c482fad2c1de367f8fef2b40361dec00523707f7
This cherry-pick includes some ref fixes due to the incompatibility between `main` and `release/18.x`.
>From 39f3ca9677a63ddc404e7ee13edace2bbf72fc93 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Thu, 28 Mar 2024 23:08:38 +0100
Subject: [PATCH 1/2] [AArch64][GISEL] Consider fcmp true and fcmp false in
cond code selection (#86972)
Fixes #86917
`FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended
up in an llvm_unreachable assertion.
---
.../AArch64/GISel/AArch64GlobalISelUtils.cpp | 6 ++
.../CodeGen/AArch64/GlobalISel/select.mir | 20 ++++
.../AArch64/neon-compare-instructions.ll | 101 ++++++++++++++++++
3 files changed, 127 insertions(+)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 92db89cc0915b..80fe4bcb8b58f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC(
case CmpInst::FCMP_UNE:
CondCode = AArch64CC::NE;
break;
+ case CmpInst::FCMP_TRUE:
+ CondCode = AArch64CC::AL;
+ break;
+ case CmpInst::FCMP_FALSE:
+ CondCode = AArch64CC::NV;
+ break;
}
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index 60cddbf794bc7..ae78d4be0f88a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -183,6 +183,14 @@ registers:
- { id: 5, class: gpr }
- { id: 6, class: gpr }
- { id: 7, class: gpr }
+ - { id: 8, class: fpr }
+ - { id: 9, class: gpr }
+ - { id: 10, class: fpr }
+ - { id: 11, class: gpr }
+ - { id: 12, class: gpr }
+ - { id: 13, class: gpr }
+ - { id: 14, class: gpr }
+ - { id: 15, class: gpr }
# CHECK: body:
# CHECK: nofpexcept FCMPSrr %0, %0, implicit-def $nzcv
@@ -209,6 +217,18 @@ body: |
%7(s32) = G_ANYEXT %5
$w0 = COPY %7(s32)
+ %8(s32) = COPY $s0
+ %9(s32) = G_FCMP floatpred(true), %8, %8
+ %12(s8) = G_TRUNC %9(s32)
+ %14(s32) = G_ANYEXT %12
+ $w0 = COPY %14(s32)
+
+ %10(s64) = COPY $d0
+ %11(s32) = G_FCMP floatpred(false), %10, %10
+ %13(s8) = G_TRUNC %11(s32)
+ %15(s32) = G_ANYEXT %13
+ $w0 = COPY %15(s32)
+
...
---
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 765c81e26e13c..c4c00f8e97942 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
ret <2 x i64> %tmp4
}
+define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-SD-LABEL: fcmal2xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2s, #1
+; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmal4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-SD-LABEL: fcmal2xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI221_0
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0]
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmnv2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmnv4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmnv4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp false <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmnv2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmnv2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
; CHECK-LABEL: fcmoeqz2xfloat:
; CHECK: // %bb.0:
>From 66222e2555d730380d7b8cd31de9fa706058b0e5 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Thu, 9 May 2024 12:06:59 +0200
Subject: [PATCH 2/2] Ref fixes
---
.../AArch64/neon-compare-instructions.ll | 24 +++++--------------
1 file changed, 6 insertions(+), 18 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index c4c00f8e97942..f398a7f8b8caa 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2895,15 +2895,9 @@ define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
;
; CHECK-GI-LABEL: fcmal4xfloat:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov w8, #1 // =0x1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov v1.16b, v0.16b
-; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
-; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: movi v0.2s, #1
+; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
; CHECK-GI-NEXT: ret
%tmp3 = fcmp true <4 x float> %A, %B
@@ -2946,15 +2940,9 @@ define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
;
; CHECK-GI-LABEL: fcmnv4xfloat:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov w8, #0 // =0x0
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov v1.16b, v0.16b
-; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
-; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
; CHECK-GI-NEXT: ret
%tmp3 = fcmp false <4 x float> %A, %B
More information about the llvm-branch-commits
mailing list