[llvm] 3a4b30e - [AArch64][GISel] Scalarize i128 ICmp and Select.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 13 10:44:33 PDT 2024
Author: David Green
Date: 2024-09-13T18:44:26+01:00
New Revision: 3a4b30e11eb8a2015aac185cd2368f4dc3ed1e53
URL: https://github.com/llvm/llvm-project/commit/3a4b30e11eb8a2015aac185cd2368f4dc3ed1e53
DIFF: https://github.com/llvm/llvm-project/commit/3a4b30e11eb8a2015aac185cd2368f4dc3ed1e53.diff
LOG: [AArch64][GISel] Scalarize i128 ICmp and Select.
Similar to other i128 bit operations, we scalarizer any icmps or selects larger
than 64bits.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/fcmp.ll
llvm/test/CodeGen/AArch64/icmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 623e59c4be8053..3957d21ea695ba 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -543,6 +543,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarOrEltToNextPow2(1)
.clampScalar(1, s32, s64)
.clampScalar(0, s32, s32)
+ .scalarizeIf(scalarOrEltWiderThan(1, 64), 1)
.minScalarEltSameAsIf(
[=](const LegalityQuery &Query) {
const LLT &Ty = Query.Types[0];
@@ -785,6 +786,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.clampScalar(1, s32, s32)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
.lowerIf(isVector(0));
diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index 5e44da5fcfa2d8..baab53d8bdbd46 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -465,49 +465,33 @@ define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d,
; CHECK-GI-NEXT: .cfi_offset w30, -16
; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
-; CHECK-GI-NEXT: stp q4, q5, [sp, #32] // 32-byte Folded Spill
-; CHECK-GI-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill
+; CHECK-GI-NEXT: stp q6, q4, [sp, #32] // 32-byte Folded Spill
+; CHECK-GI-NEXT: stp q7, q5, [sp, #64] // 32-byte Folded Spill
; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: cset w19, lt
+; CHECK-GI-NEXT: mov w19, w0
; CHECK-GI-NEXT: bl __lttf2
-; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: bfi x19, x8, #32, #32
-; CHECK-GI-NEXT: cset w8, lt
-; CHECK-GI-NEXT: fmov x10, d0
-; CHECK-GI-NEXT: mov x11, v0.d[1]
-; CHECK-GI-NEXT: bfi x8, x8, #32, #32
-; CHECK-GI-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload
-; CHECK-GI-NEXT: lsl x9, x19, #63
-; CHECK-GI-NEXT: lsl x8, x8, #63
+; CHECK-GI-NEXT: ldp q3, q2, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w19, #0
; CHECK-GI-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-GI-NEXT: asr x9, x9, #63
-; CHECK-GI-NEXT: fmov x12, d0
-; CHECK-GI-NEXT: mov x13, v0.d[1]
-; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: fmov x14, d1
-; CHECK-GI-NEXT: asr x8, x8, #63
-; CHECK-GI-NEXT: and x10, x10, x9
-; CHECK-GI-NEXT: fmov x15, d0
-; CHECK-GI-NEXT: mov x16, v1.d[1]
-; CHECK-GI-NEXT: mov x17, v0.d[1]
-; CHECK-GI-NEXT: and x12, x12, x8
-; CHECK-GI-NEXT: bic x14, x14, x9
-; CHECK-GI-NEXT: bic x15, x15, x8
-; CHECK-GI-NEXT: orr x10, x10, x14
-; CHECK-GI-NEXT: orr x12, x12, x15
-; CHECK-GI-NEXT: mov v0.d[0], x10
-; CHECK-GI-NEXT: and x10, x11, x9
-; CHECK-GI-NEXT: mov v1.d[0], x12
-; CHECK-GI-NEXT: and x11, x13, x8
-; CHECK-GI-NEXT: bic x9, x16, x9
-; CHECK-GI-NEXT: bic x8, x17, x8
-; CHECK-GI-NEXT: orr x9, x10, x9
-; CHECK-GI-NEXT: orr x8, x11, x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
-; CHECK-GI-NEXT: mov v1.d[1], x8
+; CHECK-GI-NEXT: mov d0, v2.d[1]
+; CHECK-GI-NEXT: mov d1, v3.d[1]
+; CHECK-GI-NEXT: fcsel d2, d2, d3, lt
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fcsel d3, d0, d1, lt
+; CHECK-GI-NEXT: ldp q5, q0, [sp, #64] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: mov d4, v5.d[1]
+; CHECK-GI-NEXT: fcsel d0, d0, d5, lt
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d3
+; CHECK-GI-NEXT: fcsel d2, d1, d4, lt
+; CHECK-GI-NEXT: mov v1.d[0], x9
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
; CHECK-GI-NEXT: add sp, sp, #112
; CHECK-GI-NEXT: ret
entry:
@@ -567,77 +551,52 @@ define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d,
; CHECK-GI-NEXT: mov v1.16b, v3.16b
; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill
; CHECK-GI-NEXT: ldr q2, [sp, #192]
-; CHECK-GI-NEXT: str q7, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp q6, q2, [sp, #80] // 32-byte Folded Spill
+; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q2, [sp, #208]
-; CHECK-GI-NEXT: str q2, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp q2, q6, [sp, #64] // 32-byte Folded Spill
; CHECK-GI-NEXT: ldr q2, [sp, #224]
-; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp q7, q2, [sp, #96] // 32-byte Folded Spill
; CHECK-GI-NEXT: ldr q2, [sp, #240]
-; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Folded Spill
; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
-; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: cset w19, lt
+; CHECK-GI-NEXT: mov w19, w0
; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
-; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: cset w20, lt
+; CHECK-GI-NEXT: mov w20, w0
; CHECK-GI-NEXT: bl __lttf2
-; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: bfi x19, x8, #32, #32
-; CHECK-GI-NEXT: bfi x20, x8, #32, #32
-; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: ldp q5, q4, [sp, #64] // 32-byte Folded Reload
+; CHECK-GI-NEXT: cmp w19, #0
+; CHECK-GI-NEXT: ldp q7, q6, [sp, #96] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: mov x10, v0.d[1]
-; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT: cset w9, lt
-; CHECK-GI-NEXT: lsl x13, x19, #63
-; CHECK-GI-NEXT: lsl x14, x20, #63
-; CHECK-GI-NEXT: fmov x11, d0
-; CHECK-GI-NEXT: mov x12, v0.d[1]
-; CHECK-GI-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
-; CHECK-GI-NEXT: bfi x9, x8, #32, #32
-; CHECK-GI-NEXT: asr x13, x13, #63
-; CHECK-GI-NEXT: asr x14, x14, #63
-; CHECK-GI-NEXT: fmov x15, d0
-; CHECK-GI-NEXT: mov x16, v0.d[1]
-; CHECK-GI-NEXT: ldp q0, q1, [sp, #112] // 32-byte Folded Reload
-; CHECK-GI-NEXT: lsl x9, x9, #63
-; CHECK-GI-NEXT: and x8, x8, x13
-; CHECK-GI-NEXT: and x11, x11, x14
-; CHECK-GI-NEXT: asr x9, x9, #63
+; CHECK-GI-NEXT: mov d0, v4.d[1]
+; CHECK-GI-NEXT: mov d1, v5.d[1]
+; CHECK-GI-NEXT: fcsel d4, d4, d5, lt
+; CHECK-GI-NEXT: mov d2, v7.d[1]
+; CHECK-GI-NEXT: mov d3, v6.d[1]
+; CHECK-GI-NEXT: fmov x8, d4
+; CHECK-GI-NEXT: fcsel d5, d0, d1, lt
+; CHECK-GI-NEXT: cmp w20, #0
+; CHECK-GI-NEXT: fcsel d1, d7, d6, lt
+; CHECK-GI-NEXT: ldp q7, q0, [sp, #128] // 32-byte Folded Reload
+; CHECK-GI-NEXT: fcsel d3, d2, d3, lt
+; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
-; CHECK-GI-NEXT: fmov x17, d0
-; CHECK-GI-NEXT: mov x18, v0.d[1]
-; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
-; CHECK-GI-NEXT: fmov x0, d1
-; CHECK-GI-NEXT: and x15, x15, x9
-; CHECK-GI-NEXT: mov x2, v1.d[1]
-; CHECK-GI-NEXT: fmov x1, d0
-; CHECK-GI-NEXT: mov x3, v0.d[1]
-; CHECK-GI-NEXT: bic x17, x17, x13
-; CHECK-GI-NEXT: bic x0, x0, x14
-; CHECK-GI-NEXT: orr x8, x8, x17
-; CHECK-GI-NEXT: bic x1, x1, x9
-; CHECK-GI-NEXT: orr x11, x11, x0
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d6, v7.d[1]
+; CHECK-GI-NEXT: fcsel d7, d0, d7, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: orr x15, x15, x1
-; CHECK-GI-NEXT: mov v1.d[0], x11
-; CHECK-GI-NEXT: and x8, x10, x13
-; CHECK-GI-NEXT: mov v2.d[0], x15
-; CHECK-GI-NEXT: and x10, x12, x14
-; CHECK-GI-NEXT: and x11, x16, x9
-; CHECK-GI-NEXT: bic x12, x18, x13
-; CHECK-GI-NEXT: bic x13, x2, x14
-; CHECK-GI-NEXT: bic x9, x3, x9
-; CHECK-GI-NEXT: orr x8, x8, x12
-; CHECK-GI-NEXT: orr x10, x10, x13
-; CHECK-GI-NEXT: orr x9, x11, x9
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: fmov x9, d7
+; CHECK-GI-NEXT: fcsel d4, d2, d6, lt
+; CHECK-GI-NEXT: mov v1.d[0], x8
+; CHECK-GI-NEXT: fmov x8, d5
+; CHECK-GI-NEXT: mov v2.d[0], x9
+; CHECK-GI-NEXT: fmov x9, d3
+; CHECK-GI-NEXT: fmov x10, d4
; CHECK-GI-NEXT: mov v0.d[1], x8
-; CHECK-GI-NEXT: mov v1.d[1], x10
-; CHECK-GI-NEXT: mov v2.d[1], x9
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: mov v2.d[1], x10
; CHECK-GI-NEXT: add sp, sp, #192
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index b00e5d6c701d8b..61964060ca2c8b 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i64 @i64_i64(i64 %a, i64 %b, i64 %d, i64 %e) {
; CHECK-LABEL: i64_i64:
@@ -1376,6 +1376,62 @@ entry:
ret <32 x i8> %s
}
+define <2 x i128> @v2i128_i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %d, <2 x i128> %e) {
+; CHECK-SD-LABEL: v2i128_i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: add x10, sp, #32
+; CHECK-SD-NEXT: mov x11, sp
+; CHECK-SD-NEXT: cmp x0, x4
+; CHECK-SD-NEXT: orr x12, x10, #0x8
+; CHECK-SD-NEXT: orr x13, x11, #0x8
+; CHECK-SD-NEXT: sbcs xzr, x1, x5
+; CHECK-SD-NEXT: add x8, sp, #48
+; CHECK-SD-NEXT: add x9, sp, #16
+; CHECK-SD-NEXT: csel x12, x13, x12, lt
+; CHECK-SD-NEXT: csel x10, x11, x10, lt
+; CHECK-SD-NEXT: cmp x2, x6
+; CHECK-SD-NEXT: orr x11, x8, #0x8
+; CHECK-SD-NEXT: orr x13, x9, #0x8
+; CHECK-SD-NEXT: sbcs xzr, x3, x7
+; CHECK-SD-NEXT: ldr x0, [x10]
+; CHECK-SD-NEXT: csel x8, x9, x8, lt
+; CHECK-SD-NEXT: csel x9, x13, x11, lt
+; CHECK-SD-NEXT: ldr x1, [x12]
+; CHECK-SD-NEXT: ldr x2, [x8]
+; CHECK-SD-NEXT: ldr x3, [x9]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2i128_i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp x1, x5
+; CHECK-GI-NEXT: ldp x8, x9, [sp]
+; CHECK-GI-NEXT: cset w10, lt
+; CHECK-GI-NEXT: cmp x0, x4
+; CHECK-GI-NEXT: cset w13, lo
+; CHECK-GI-NEXT: cmp x1, x5
+; CHECK-GI-NEXT: csel w10, w13, w10, eq
+; CHECK-GI-NEXT: cmp x3, x7
+; CHECK-GI-NEXT: ldp x13, x14, [sp, #32]
+; CHECK-GI-NEXT: cset w15, lt
+; CHECK-GI-NEXT: cmp x2, x6
+; CHECK-GI-NEXT: ldp x11, x12, [sp, #16]
+; CHECK-GI-NEXT: cset w16, lo
+; CHECK-GI-NEXT: cmp x3, x7
+; CHECK-GI-NEXT: ldp x17, x18, [sp, #48]
+; CHECK-GI-NEXT: csel w15, w16, w15, eq
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: csel x0, x8, x13, ne
+; CHECK-GI-NEXT: csel x1, x9, x14, ne
+; CHECK-GI-NEXT: tst w15, #0x1
+; CHECK-GI-NEXT: csel x2, x11, x17, ne
+; CHECK-GI-NEXT: csel x3, x12, x18, ne
+; CHECK-GI-NEXT: ret
+entry:
+ %c = icmp slt <2 x i128> %a, %b
+ %s = select <2 x i1> %c, <2 x i128> %d, <2 x i128> %e
+ ret <2 x i128> %s
+}
+
; ===== ICMP Zero RHS =====
define <8 x i1> @icmp_eq_v8i8_Zero_RHS(<8 x i8> %a) {
More information about the llvm-commits
mailing list