[llvm] 49b2936 - [AArch64][GlobalISel] Expand handling of phi operations
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 17 00:25:57 PST 2024
Author: David Green
Date: 2024-11-17T08:25:53Z
New Revision: 49b29368f72c493e61506b4203e7852f55e17062
URL: https://github.com/llvm/llvm-project/commit/49b29368f72c493e61506b4203e7852f55e17062
DIFF: https://github.com/llvm/llvm-project/commit/49b29368f72c493e61506b4203e7852f55e17062.diff
LOG: [AArch64][GlobalISel] Expand handling of phi operations
Like other operations, non-power-2 vectors are widened to a power-2, larger
vectors with i128 elements are scalarized and smaller vectors are widened to be
at least 64bit.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/phi.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d42ecc1c72dce9..baa42302756a56 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -109,11 +109,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({p0, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
.widenScalarToNextPow2(0)
+ .moreElementsToNextPow2(0)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.clampScalar(0, s16, s64)
- // Maximum: sN * k = 128
- .clampMaxNumElements(0, s8, 16)
- .clampMaxNumElements(0, s16, 8)
- .clampMaxNumElements(0, s32, 4)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
.clampMaxNumElements(0, s64, 2)
.clampMaxNumElements(0, p0, 2);
diff --git a/llvm/test/CodeGen/AArch64/phi.ll b/llvm/test/CodeGen/AArch64/phi.ll
index d02d4c4f272bc0..402c7eeabb291d 100644
--- a/llvm/test/CodeGen/AArch64/phi.ll
+++ b/llvm/test/CodeGen/AArch64/phi.ll
@@ -2,18 +2,6 @@
; RUN: llc -mtriple=aarch64 -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for tv2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv3i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv3i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv3i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv2i128
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv2f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv3f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for tv2f128
-
define i8 @ti8(i1 %c, ptr %p, i8 %a, i8 %b) {
; CHECK-SD-LABEL: ti8:
; CHECK-SD: // %bb.0: // %entry
@@ -271,15 +259,37 @@ e:
}
define <2 x i8> @tv2i8(i1 %c, ptr %p, <2 x i8> %a, <2 x i8> %b) {
-; CHECK-LABEL: tv2i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: tbz w0, #0, .LBB9_2
-; CHECK-NEXT: // %bb.1: // %t
-; CHECK-NEXT: fmov d1, d0
-; CHECK-NEXT: str wzr, [x1]
-; CHECK-NEXT: .LBB9_2: // %e
-; CHECK-NEXT: fmov d0, d1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tv2i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: tbz w0, #0, .LBB9_2
+; CHECK-SD-NEXT: // %bb.1: // %t
+; CHECK-SD-NEXT: fmov d1, d0
+; CHECK-SD-NEXT: str wzr, [x1]
+; CHECK-SD-NEXT: .LBB9_2: // %e
+; CHECK-SD-NEXT: fmov d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tv2i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: tbz w0, #0, .LBB9_2
+; CHECK-GI-NEXT: // %bb.1: // %t
+; CHECK-GI-NEXT: mov w8, v0.s[1]
+; CHECK-GI-NEXT: str wzr, [x1]
+; CHECK-GI-NEXT: mov v0.b[1], w8
+; CHECK-GI-NEXT: fmov d1, d0
+; CHECK-GI-NEXT: b .LBB9_3
+; CHECK-GI-NEXT: .LBB9_2:
+; CHECK-GI-NEXT: mov w8, v1.s[1]
+; CHECK-GI-NEXT: mov v1.b[1], w8
+; CHECK-GI-NEXT: .LBB9_3: // %e
+; CHECK-GI-NEXT: umov w8, v1.b[0]
+; CHECK-GI-NEXT: umov w9, v1.b[1]
+; CHECK-GI-NEXT: mov v0.s[0], w8
+; CHECK-GI-NEXT: mov v0.s[1], w9
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
br i1 %c, label %t, label %e
t:
@@ -291,19 +301,38 @@ e:
}
define <3 x i8> @tv3i8(i1 %c, ptr %p, <3 x i8> %a, <3 x i8> %b) {
-; CHECK-LABEL: tv3i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: tbz w0, #0, .LBB10_2
-; CHECK-NEXT: // %bb.1: // %t
-; CHECK-NEXT: mov w5, w2
-; CHECK-NEXT: mov w6, w3
-; CHECK-NEXT: mov w7, w4
-; CHECK-NEXT: str wzr, [x1]
-; CHECK-NEXT: .LBB10_2: // %e
-; CHECK-NEXT: mov w0, w5
-; CHECK-NEXT: mov w1, w6
-; CHECK-NEXT: mov w2, w7
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tv3i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: tbz w0, #0, .LBB10_2
+; CHECK-SD-NEXT: // %bb.1: // %t
+; CHECK-SD-NEXT: mov w5, w2
+; CHECK-SD-NEXT: mov w6, w3
+; CHECK-SD-NEXT: mov w7, w4
+; CHECK-SD-NEXT: str wzr, [x1]
+; CHECK-SD-NEXT: .LBB10_2: // %e
+; CHECK-SD-NEXT: mov w0, w5
+; CHECK-SD-NEXT: mov w1, w6
+; CHECK-SD-NEXT: mov w2, w7
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tv3i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: tbz w0, #0, .LBB10_2
+; CHECK-GI-NEXT: // %bb.1: // %t
+; CHECK-GI-NEXT: fmov s0, w2
+; CHECK-GI-NEXT: str wzr, [x1]
+; CHECK-GI-NEXT: mov v0.b[1], w3
+; CHECK-GI-NEXT: mov v0.b[2], w4
+; CHECK-GI-NEXT: b .LBB10_3
+; CHECK-GI-NEXT: .LBB10_2:
+; CHECK-GI-NEXT: fmov s0, w5
+; CHECK-GI-NEXT: mov v0.b[1], w6
+; CHECK-GI-NEXT: mov v0.b[2], w7
+; CHECK-GI-NEXT: .LBB10_3: // %e
+; CHECK-GI-NEXT: umov w0, v0.b[0]
+; CHECK-GI-NEXT: umov w1, v0.b[1]
+; CHECK-GI-NEXT: umov w2, v0.b[2]
+; CHECK-GI-NEXT: ret
entry:
br i1 %c, label %t, label %e
t:
@@ -315,15 +344,30 @@ e:
}
define <4 x i8> @tv4i8(i1 %c, ptr %p, <4 x i8> %a, <4 x i8> %b) {
-; CHECK-LABEL: tv4i8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: tbz w0, #0, .LBB11_2
-; CHECK-NEXT: // %bb.1: // %t
-; CHECK-NEXT: fmov d1, d0
-; CHECK-NEXT: str wzr, [x1]
-; CHECK-NEXT: .LBB11_2: // %e
-; CHECK-NEXT: fmov d0, d1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tv4i8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: tbz w0, #0, .LBB11_2
+; CHECK-SD-NEXT: // %bb.1: // %t
+; CHECK-SD-NEXT: fmov d1, d0
+; CHECK-SD-NEXT: str wzr, [x1]
+; CHECK-SD-NEXT: .LBB11_2: // %e
+; CHECK-SD-NEXT: fmov d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tv4i8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: tbz w0, #0, .LBB11_2
+; CHECK-GI-NEXT: // %bb.1: // %t
+; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT: str wzr, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB11_2:
+; CHECK-GI-NEXT: uzp1 v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
br i1 %c, label %t, label %e
t:
@@ -410,15 +454,30 @@ e:
}
define <2 x i16> @tv2i16(i1 %c, ptr %p, <2 x i16> %a, <2 x i16> %b) {
-; CHECK-LABEL: tv2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: tbz w0, #0, .LBB15_2
-; CHECK-NEXT: // %bb.1: // %t
-; CHECK-NEXT: fmov d1, d0
-; CHECK-NEXT: str wzr, [x1]
-; CHECK-NEXT: .LBB15_2: // %e
-; CHECK-NEXT: fmov d0, d1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tv2i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: tbz w0, #0, .LBB15_2
+; CHECK-SD-NEXT: // %bb.1: // %t
+; CHECK-SD-NEXT: fmov d1, d0
+; CHECK-SD-NEXT: str wzr, [x1]
+; CHECK-SD-NEXT: .LBB15_2: // %e
+; CHECK-SD-NEXT: fmov d0, d1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tv2i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: tbz w0, #0, .LBB15_2
+; CHECK-GI-NEXT: // %bb.1: // %t
+; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: str wzr, [x1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB15_2:
+; CHECK-GI-NEXT: uzp1 v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
br i1 %c, label %t, label %e
t:
@@ -661,8 +720,11 @@ define <3 x i64> @tv3i64(i1 %c, ptr %p, <3 x i64> %a, <3 x i64> %b) {
; CHECK-GI-NEXT: tbz w0, #0, .LBB25_2
; CHECK-GI-NEXT: // %bb.1: // %t
; CHECK-GI-NEXT: fmov d6, d0
+; CHECK-GI-NEXT: fmov d7, d2
; CHECK-GI-NEXT: str wzr, [x1]
; CHECK-GI-NEXT: mov v6.d[1], v1.d[0]
+; CHECK-GI-NEXT: mov v2.16b, v7.16b
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-GI-NEXT: mov d1, v6.d[1]
; CHECK-GI-NEXT: mov v0.16b, v6.16b
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -670,6 +732,7 @@ define <3 x i64> @tv3i64(i1 %c, ptr %p, <3 x i64> %a, <3 x i64> %b) {
; CHECK-GI-NEXT: .LBB25_2:
; CHECK-GI-NEXT: fmov d0, d3
; CHECK-GI-NEXT: fmov d2, d5
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -720,22 +783,40 @@ e:
}
define <2 x i128> @tv2i128(i1 %c, ptr %p, <2 x i128> %a, <2 x i128> %b) {
-; CHECK-LABEL: tv2i128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: tbz w0, #0, .LBB27_2
-; CHECK-NEXT: // %bb.1: // %t
-; CHECK-NEXT: mov x6, x2
-; CHECK-NEXT: mov x7, x3
-; CHECK-NEXT: str wzr, [x1]
-; CHECK-NEXT: b .LBB27_3
-; CHECK-NEXT: .LBB27_2:
-; CHECK-NEXT: ldp x4, x5, [sp]
-; CHECK-NEXT: .LBB27_3: // %e
-; CHECK-NEXT: mov x0, x6
-; CHECK-NEXT: mov x1, x7
-; CHECK-NEXT: mov x2, x4
-; CHECK-NEXT: mov x3, x5
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tv2i128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: tbz w0, #0, .LBB27_2
+; CHECK-SD-NEXT: // %bb.1: // %t
+; CHECK-SD-NEXT: mov x6, x2
+; CHECK-SD-NEXT: mov x7, x3
+; CHECK-SD-NEXT: str wzr, [x1]
+; CHECK-SD-NEXT: b .LBB27_3
+; CHECK-SD-NEXT: .LBB27_2:
+; CHECK-SD-NEXT: ldp x4, x5, [sp]
+; CHECK-SD-NEXT: .LBB27_3: // %e
+; CHECK-SD-NEXT: mov x0, x6
+; CHECK-SD-NEXT: mov x1, x7
+; CHECK-SD-NEXT: mov x2, x4
+; CHECK-SD-NEXT: mov x3, x5
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tv2i128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: tbz w0, #0, .LBB27_2
+; CHECK-GI-NEXT: // %bb.1: // %t
+; CHECK-GI-NEXT: mov x9, x2
+; CHECK-GI-NEXT: mov x10, x3
+; CHECK-GI-NEXT: mov x2, x4
+; CHECK-GI-NEXT: mov x3, x5
+; CHECK-GI-NEXT: str wzr, [x1]
+; CHECK-GI-NEXT: mov x0, x9
+; CHECK-GI-NEXT: mov x1, x10
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB27_2:
+; CHECK-GI-NEXT: ldp x2, x3, [sp]
+; CHECK-GI-NEXT: mov x0, x6
+; CHECK-GI-NEXT: mov x1, x7
+; CHECK-GI-NEXT: ret
entry:
br i1 %c, label %t, label %e
t:
@@ -998,8 +1079,11 @@ define <3 x double> @tv3f64(i1 %c, ptr %p, <3 x double> %a, <3 x double> %b) {
; CHECK-GI-NEXT: tbz w0, #0, .LBB38_2
; CHECK-GI-NEXT: // %bb.1: // %t
; CHECK-GI-NEXT: fmov d6, d0
+; CHECK-GI-NEXT: fmov d7, d2
; CHECK-GI-NEXT: str wzr, [x1]
; CHECK-GI-NEXT: mov v6.d[1], v1.d[0]
+; CHECK-GI-NEXT: mov v2.16b, v7.16b
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-GI-NEXT: mov d1, v6.d[1]
; CHECK-GI-NEXT: mov v0.16b, v6.16b
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -1007,6 +1091,7 @@ define <3 x double> @tv3f64(i1 %c, ptr %p, <3 x double> %a, <3 x double> %b) {
; CHECK-GI-NEXT: .LBB38_2:
; CHECK-GI-NEXT: fmov d0, d3
; CHECK-GI-NEXT: fmov d2, d5
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-GI-NEXT: mov v0.d[1], v4.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -1057,17 +1142,41 @@ e:
}
define <2 x fp128> @tv2f128(i1 %c, ptr %p, <2 x fp128> %a, <2 x fp128> %b) {
-; CHECK-LABEL: tv2f128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: tbz w0, #0, .LBB40_2
-; CHECK-NEXT: // %bb.1: // %t
-; CHECK-NEXT: mov v2.16b, v0.16b
-; CHECK-NEXT: mov v3.16b, v1.16b
-; CHECK-NEXT: str wzr, [x1]
-; CHECK-NEXT: .LBB40_2: // %e
-; CHECK-NEXT: mov v0.16b, v2.16b
-; CHECK-NEXT: mov v1.16b, v3.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tv2f128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: tbz w0, #0, .LBB40_2
+; CHECK-SD-NEXT: // %bb.1: // %t
+; CHECK-SD-NEXT: mov v2.16b, v0.16b
+; CHECK-SD-NEXT: mov v3.16b, v1.16b
+; CHECK-SD-NEXT: str wzr, [x1]
+; CHECK-SD-NEXT: .LBB40_2: // %e
+; CHECK-SD-NEXT: mov v0.16b, v2.16b
+; CHECK-SD-NEXT: mov v1.16b, v3.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tv2f128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: tbz w0, #0, .LBB40_2
+; CHECK-GI-NEXT: // %bb.1: // %t
+; CHECK-GI-NEXT: mov d4, v1.d[1]
+; CHECK-GI-NEXT: mov d5, v0.d[1]
+; CHECK-GI-NEXT: str wzr, [x1]
+; CHECK-GI-NEXT: fmov d2, d0
+; CHECK-GI-NEXT: fmov d3, d1
+; CHECK-GI-NEXT: b .LBB40_3
+; CHECK-GI-NEXT: .LBB40_2:
+; CHECK-GI-NEXT: mov d4, v3.d[1]
+; CHECK-GI-NEXT: mov d5, v2.d[1]
+; CHECK-GI-NEXT: .LBB40_3: // %e
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fmov x9, d3
+; CHECK-GI-NEXT: mov v0.d[0], x8
+; CHECK-GI-NEXT: mov v1.d[0], x9
+; CHECK-GI-NEXT: fmov x8, d5
+; CHECK-GI-NEXT: fmov x9, d4
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: ret
entry:
br i1 %c, label %t, label %e
t:
More information about the llvm-commits
mailing list