[llvm] dc1b772 - [AArch64][GlobalISel] Add missing legalization for v16i8 extract element.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 18 23:27:02 PST 2024
Author: David Green
Date: 2024-02-19T07:26:57Z
New Revision: dc1b7729337a3e04ad1bb69394122c7f95f193d2
URL: https://github.com/llvm/llvm-project/commit/dc1b7729337a3e04ad1bb69394122c7f95f193d2
DIFF: https://github.com/llvm/llvm-project/commit/dc1b7729337a3e04ad1bb69394122c7f95f193d2.diff
LOG: [AArch64][GlobalISel] Add missing legalization for v16i8 extract element.
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir
llvm/test/CodeGen/AArch64/insertextract.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4a3f710163e723..261078cd4bd7d0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -863,6 +863,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s16, 8)
+ .clampMaxNumElements(1, s8, 16)
.clampMaxNumElements(1, p0, 2);
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir
index d8fa456cf7e947..8803da265aa115 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir
@@ -858,21 +858,24 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000)
; CHECK-NEXT: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %cond:_(s1) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %val_1:_(<32 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: G_BRCOND %cond(s1), %bb.2
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF1]], [[C]]
+ ; CHECK-NEXT: G_BRCOND [[AND]](s32), %bb.2
; CHECK-NEXT: G_BR %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %val_2:_(<32 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: %phi:_(<32 x s8>) = G_PHI %val_2(<32 x s8>), %bb.1, %val_1(<32 x s8>), %bb.0
- ; CHECK-NEXT: %one:_(s8) = G_CONSTANT i8 1
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %one(s8)
- ; CHECK-NEXT: %extract:_(s8) = G_EXTRACT_VECTOR_ELT %phi(<32 x s8>), [[SEXT]](s64)
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s8>) = G_PHI [[BUILD_VECTOR1]](<16 x s8>), %bb.1, [[BUILD_VECTOR]](<16 x s8>), %bb.0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: %extract:_(s8) = G_EXTRACT_VECTOR_ELT [[PHI]](<16 x s8>), [[C1]](s64)
; CHECK-NEXT: $b0 = COPY %extract(s8)
; CHECK-NEXT: RET_ReallyLR implicit $b0
bb.0:
diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll
index 6074d44cb03d48..b0df5cb3d83717 100644
--- a/llvm/test/CodeGen/AArch64/insertextract.ll
+++ b/llvm/test/CodeGen/AArch64/insertextract.ll
@@ -39,9 +39,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_0
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_2
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_c
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v32i8_0
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v32i8_2
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v32i8_c
define <2 x double> @insert_v2f64_0(<2 x double> %a, double %b, i32 %c) {
; CHECK-LABEL: insert_v2f64_0:
@@ -1670,16 +1667,36 @@ entry:
}
define i8 @extract_v32i8_c(<32 x i8> %a, i32 %c) {
-; CHECK-LABEL: extract_v32i8_c:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: stp q0, q1, [sp, #-32]!
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: and x8, x0, #0x1f
-; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: ldrb w0, [x9, x8]
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: extract_v32i8_c:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT: and x8, x0, #0x1f
+; CHECK-SD-NEXT: mov x9, sp
+; CHECK-SD-NEXT: ldrb w0, [x9, x8]
+; CHECK-SD-NEXT: add sp, sp, #32
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extract_v32i8_c:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-GI-NEXT: sub x9, sp, #48
+; CHECK-GI-NEXT: mov x29, sp
+; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0
+; CHECK-GI-NEXT: .cfi_def_cfa w29, 16
+; CHECK-GI-NEXT: .cfi_offset w30, -8
+; CHECK-GI-NEXT: .cfi_offset w29, -16
+; CHECK-GI-NEXT: mov w8, w0
+; CHECK-GI-NEXT: stp q0, q1, [sp]
+; CHECK-GI-NEXT: mov x10, sp
+; CHECK-GI-NEXT: and x8, x8, #0x1f
+; CHECK-GI-NEXT: lsl x9, x8, #1
+; CHECK-GI-NEXT: sub x8, x9, x8
+; CHECK-GI-NEXT: ldrb w0, [x10, x8]
+; CHECK-GI-NEXT: mov sp, x29
+; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%d = extractelement <32 x i8> %a, i32 %c
ret i8 %d
More information about the llvm-commits
mailing list