[llvm] 0e6ea09 - [AArch64][GlobalISel] Scalarize zext with larger than i64 elements.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 23:53:06 PDT 2025
Author: David Green
Date: 2025-03-20T06:53:00Z
New Revision: 0e6ea0945e64d26a89ec8a9e770578726eda1f9f
URL: https://github.com/llvm/llvm-project/commit/0e6ea0945e64d26a89ec8a9e770578726eda1f9f
DIFF: https://github.com/llvm/llvm-project/commit/0e6ea0945e64d26a89ec8a9e770578726eda1f9f.diff
LOG: [AArch64][GlobalISel] Scalarize zext with larger than i64 elements.
Like other operations larger than i64, we scalarize i128 and allow them to
legalize from there. This also helps with v2i64 udiv by constant, which needs a
legalize a umulh.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
llvm/test/CodeGen/AArch64/arm64-vabs.ll
llvm/test/CodeGen/AArch64/sext.ll
llvm/test/CodeGen/AArch64/zext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 3270a8145399b..d8245ce2a57ba 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -759,7 +759,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
Query.Types[1].getScalarSizeInBits() == 16);
})
.clampMinNumElements(1, s8, 8)
- .clampMinNumElements(1, s16, 4);
+ .clampMinNumElements(1, s16, 4)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
getActionDefinitionsBuilder(G_TRUNC)
.legalFor({{v8s8, v8s16}, {v4s16, v4s32}, {v2s32, v2s64}})
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
index 0022778c480ea..ca6bb8360de59 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for udiv_v2i64
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <16 x i8> @div16xi8(<16 x i8> %x) {
; CHECK-SD-LABEL: div16xi8:
@@ -219,25 +217,42 @@ define <4 x i32> @udiv32xi4(<4 x i32> %x) {
}
define <2 x i64> @udiv_v2i64(<2 x i64> %a) {
-; CHECK-LABEL: udiv_v2i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #9363 // =0x2493
-; CHECK-NEXT: fmov x10, d0
-; CHECK-NEXT: mov x9, v0.d[1]
-; CHECK-NEXT: movk x8, #37449, lsl #16
-; CHECK-NEXT: movk x8, #18724, lsl #32
-; CHECK-NEXT: movk x8, #9362, lsl #48
-; CHECK-NEXT: umulh x11, x10, x8
-; CHECK-NEXT: umulh x8, x9, x8
-; CHECK-NEXT: sub x10, x10, x11
-; CHECK-NEXT: add x10, x11, x10, lsr #1
-; CHECK-NEXT: sub x9, x9, x8
-; CHECK-NEXT: add x8, x8, x9, lsr #1
-; CHECK-NEXT: lsr x9, x10, #2
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: lsr x8, x8, #2
-; CHECK-NEXT: mov v0.d[1], x8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: udiv_v2i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, #9363 // =0x2493
+; CHECK-SD-NEXT: fmov x10, d0
+; CHECK-SD-NEXT: mov x9, v0.d[1]
+; CHECK-SD-NEXT: movk x8, #37449, lsl #16
+; CHECK-SD-NEXT: movk x8, #18724, lsl #32
+; CHECK-SD-NEXT: movk x8, #9362, lsl #48
+; CHECK-SD-NEXT: umulh x11, x10, x8
+; CHECK-SD-NEXT: umulh x8, x9, x8
+; CHECK-SD-NEXT: sub x10, x10, x11
+; CHECK-SD-NEXT: add x10, x11, x10, lsr #1
+; CHECK-SD-NEXT: sub x9, x9, x8
+; CHECK-SD-NEXT: add x8, x8, x9, lsr #1
+; CHECK-SD-NEXT: lsr x9, x10, #2
+; CHECK-SD-NEXT: fmov d0, x9
+; CHECK-SD-NEXT: lsr x8, x8, #2
+; CHECK-SD-NEXT: mov v0.d[1], x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: udiv_v2i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov x8, #9363 // =0x2493
+; CHECK-GI-NEXT: fmov x9, d0
+; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: movk x8, #37449, lsl #16
+; CHECK-GI-NEXT: movk x8, #18724, lsl #32
+; CHECK-GI-NEXT: movk x8, #9362, lsl #48
+; CHECK-GI-NEXT: umulh x9, x9, x8
+; CHECK-GI-NEXT: umulh x8, x10, x8
+; CHECK-GI-NEXT: mov v1.d[0], x9
+; CHECK-GI-NEXT: mov v1.d[1], x8
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: usra v1.2d, v0.2d, #1
+; CHECK-GI-NEXT: ushr v0.2d, v1.2d, #2
+; CHECK-GI-NEXT: ret
%r = udiv <2 x i64> %a, splat (i64 7)
ret <2 x i64> %r
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 0b95a56151433..b3258514aaa26 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for uabd_i64
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sabdl8h:
@@ -1803,17 +1801,51 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
}
define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: uabd_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmgt.2d v2, v0, v1
-; CHECK-NEXT: sub.2d v0, v0, v1
-; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: mov x3, xzr
-; CHECK-NEXT: eor.16b v0, v0, v2
-; CHECK-NEXT: sub.2d v0, v2, v0
-; CHECK-NEXT: mov.d x2, v0[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: uabd_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmgt.2d v2, v0, v1
+; CHECK-SD-NEXT: sub.2d v0, v0, v1
+; CHECK-SD-NEXT: mov x1, xzr
+; CHECK-SD-NEXT: mov x3, xzr
+; CHECK-SD-NEXT: eor.16b v0, v0, v2
+; CHECK-SD-NEXT: sub.2d v0, v2, v0
+; CHECK-SD-NEXT: mov.d x2, v0[1]
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: uabd_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d2, v0[1]
+; CHECK-GI-NEXT: mov d3, v1[1]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x10, d1
+; CHECK-GI-NEXT: asr x9, x8, #63
+; CHECK-GI-NEXT: fmov x11, d2
+; CHECK-GI-NEXT: fmov x13, d3
+; CHECK-GI-NEXT: asr x12, x10, #63
+; CHECK-GI-NEXT: subs x8, x8, x10
+; CHECK-GI-NEXT: sbc x9, x9, x12
+; CHECK-GI-NEXT: asr x14, x11, #63
+; CHECK-GI-NEXT: asr x15, x13, #63
+; CHECK-GI-NEXT: subs x10, x11, x13
+; CHECK-GI-NEXT: sbc x11, x14, x15
+; CHECK-GI-NEXT: cmp x9, #0
+; CHECK-GI-NEXT: cset w12, lt
+; CHECK-GI-NEXT: csel w12, wzr, w12, eq
+; CHECK-GI-NEXT: cmp x11, #0
+; CHECK-GI-NEXT: cset w13, lt
+; CHECK-GI-NEXT: csel w13, wzr, w13, eq
+; CHECK-GI-NEXT: negs x14, x8
+; CHECK-GI-NEXT: ngc x15, x9
+; CHECK-GI-NEXT: negs x16, x10
+; CHECK-GI-NEXT: ngc x17, x11
+; CHECK-GI-NEXT: tst w12, #0x1
+; CHECK-GI-NEXT: csel x0, x14, x8, ne
+; CHECK-GI-NEXT: csel x1, x15, x9, ne
+; CHECK-GI-NEXT: tst w13, #0x1
+; CHECK-GI-NEXT: csel x2, x16, x10, ne
+; CHECK-GI-NEXT: csel x3, x17, x11, ne
+; CHECK-GI-NEXT: ret
%aext = sext <2 x i64> %a to <2 x i128>
%bext = sext <2 x i64> %b to <2 x i128>
%ab
diff = sub nsw <2 x i128> %aext, %bext
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index 1e5b34a26e89f..ce5b80f3e2be4 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for sext_v2i64_v2i128
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i16 @sext_i8_to_i16(i8 %a) {
; CHECK-LABEL: sext_i8_to_i16:
@@ -1245,15 +1243,26 @@ entry:
}
define <2 x i128> @sext_v2i64_v2i128(<2 x i64> %a) {
-; CHECK-LABEL: sext_v2i64_v2i128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, v0.d[1]
-; CHECK-NEXT: dup v1.2d, v0.d[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: fmov x2, d1
-; CHECK-NEXT: asr x1, x0, #63
-; CHECK-NEXT: asr x3, x8, #63
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sext_v2i64_v2i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, v0.d[1]
+; CHECK-SD-NEXT: dup v1.2d, v0.d[1]
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: fmov x2, d1
+; CHECK-SD-NEXT: asr x1, x0, #63
+; CHECK-SD-NEXT: asr x3, x8, #63
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sext_v2i64_v2i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: asr x1, x8, #63
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: fmov x2, d1
+; CHECK-GI-NEXT: asr x3, x9, #63
+; CHECK-GI-NEXT: ret
entry:
%c = sext <2 x i64> %a to <2 x i128>
ret <2 x i128> %c
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 153a33e1d7af2..e40b9cb5c8482 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -3,7 +3,6 @@
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for zext_v16i10_v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v2i64_v2i128
define i16 @zext_i8_to_i16(i8 %a) {
; CHECK-LABEL: zext_i8_to_i16:
@@ -1215,13 +1214,22 @@ entry:
}
define <2 x i128> @zext_v2i64_v2i128(<2 x i64> %a) {
-; CHECK-LABEL: zext_v2i64_v2i128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x2, v0.d[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: mov x3, xzr
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: zext_v2i64_v2i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x2, v0.d[1]
+; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: mov x1, xzr
+; CHECK-SD-NEXT: mov x3, xzr
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: zext_v2i64_v2i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmov x0, d0
+; CHECK-GI-NEXT: mov x1, xzr
+; CHECK-GI-NEXT: mov x3, xzr
+; CHECK-GI-NEXT: fmov x2, d1
+; CHECK-GI-NEXT: ret
entry:
%c = zext <2 x i64> %a to <2 x i128>
ret <2 x i128> %c
More information about the llvm-commits
mailing list