[llvm] 6c0154f - [AArch64][GlobalISel] Ensure vector types match in shift instructions

David Green via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 6 04:38:35 PDT 2025


Author: David Green
Date: 2025-09-06T12:38:28+01:00
New Revision: 6c0154ff01ae3fa459c699f3f783797659f596f7

URL: https://github.com/llvm/llvm-project/commit/6c0154ff01ae3fa459c699f3f783797659f596f7
DIFF: https://github.com/llvm/llvm-project/commit/6c0154ff01ae3fa459c699f3f783797659f596f7.diff

LOG: [AArch64][GlobalISel] Ensure vector types match in shift instructions

This adds legalizations for shifts with mis-matching shift types, tha can be
created from trunc(shift patterns. This helps reduce the amount of fallbacks.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/arm64-vshift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 82391f13d1370..ff09b375c3108 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -222,7 +222,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0)
       .minScalarSameAs(1, 0)
-      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+      .minScalarEltSameAsIf(isVector(0), 1, 0)
+      .maxScalarEltSameAsIf(isVector(0), 1, 0);
 
   getActionDefinitionsBuilder(G_PTR_ADD)
       .legalFor({{p0, s64}, {v2p0, v2s64}})

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 7af15f3ad2520..8ec5434085d6a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -101,11 +101,6 @@
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sli4s
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sli2d
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqshlu_zero_shift_amount
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lshr_trunc_v2i64_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lshr_trunc_v4i64_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ashr_trunc_v2i64_v2i8
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ashr_trunc_v4i64_v4i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shl_trunc_v4i64_v4i16
 
 define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: sqshl8b:
@@ -4381,48 +4376,82 @@ define <8 x i16> @signbits_vashr(<8 x i16> %a)  {
 }
 
 define <2 x i8> @lshr_trunc_v2i64_v2i8(<2 x i64> %a) {
-; CHECK-LABEL: lshr_trunc_v2i64_v2i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shrn v0.2s, v0.2d, #16
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: lshr_trunc_v2i64_v2i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    shrn v0.2s, v0.2d, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: lshr_trunc_v2i64_v2i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ushr v0.2s, v0.2s, #16
+; CHECK-GI-NEXT:    ret
   %b = lshr <2 x i64> %a, <i64 16, i64 16>
   %c = trunc <2 x i64> %b to <2 x i8>
   ret <2 x i8> %c
 }
 
 define <4 x i16> @lshr_trunc_v4i64_v4i16(<4 x i64> %a) {
-; CHECK-LABEL: lshr_trunc_v4i64_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    ushr v1.2s, v1.2s, #8
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #8
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: lshr_trunc_v4i64_v4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ushr v1.2s, v1.2s, #8
+; CHECK-SD-NEXT:    ushr v0.2s, v0.2s, #8
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: lshr_trunc_v4i64_v4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI270_0
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI270_0]
+; CHECK-GI-NEXT:    uzp1 v2.4s, v2.4s, v2.4s
+; CHECK-GI-NEXT:    neg v1.4s, v2.4s
+; CHECK-GI-NEXT:    ushl v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
   %b = lshr <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
   %c = trunc <4 x i64> %b to <4 x i16>
   ret <4 x i16> %c
 }
 
 define <2 x i8> @ashr_trunc_v2i64_v2i8(<2 x i64> %a) {
-; CHECK-LABEL: ashr_trunc_v2i64_v2i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shrn v0.2s, v0.2d, #16
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ashr_trunc_v2i64_v2i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    shrn v0.2s, v0.2d, #16
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ashr_trunc_v2i64_v2i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    sshr v0.2s, v0.2s, #16
+; CHECK-GI-NEXT:    ret
   %b = ashr <2 x i64> %a, <i64 16, i64 16>
   %c = trunc <2 x i64> %b to <2 x i8>
   ret <2 x i8> %c
 }
 
 define <4 x i16> @ashr_trunc_v4i64_v4i16(<4 x i64> %a) {
-; CHECK-LABEL: ashr_trunc_v4i64_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    ushr v1.2s, v1.2s, #8
-; CHECK-NEXT:    ushr v0.2s, v0.2s, #8
-; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ashr_trunc_v4i64_v4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SD-NEXT:    ushr v1.2s, v1.2s, #8
+; CHECK-SD-NEXT:    ushr v0.2s, v0.2s, #8
+; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ashr_trunc_v4i64_v4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI272_0
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI272_0]
+; CHECK-GI-NEXT:    uzp1 v2.4s, v2.4s, v2.4s
+; CHECK-GI-NEXT:    neg v1.4s, v2.4s
+; CHECK-GI-NEXT:    sshl v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    ret
   %b = ashr <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
   %c = trunc <4 x i64> %b to <4 x i16>
   ret <4 x i16> %c
@@ -4446,12 +4475,23 @@ define <2 x i8> @shl_trunc_v2i64_v2i8(<2 x i64> %a) {
 }
 
 define <4 x i16> @shl_trunc_v4i64_v4i16(<4 x i64> %a) {
-; CHECK-LABEL: shl_trunc_v4i64_v4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    shl v0.4h, v0.4h, #8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: shl_trunc_v4i64_v4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
+; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: shl_trunc_v4i64_v4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI274_0
+; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI274_0]
+; CHECK-GI-NEXT:    uzp1 v1.4s, v2.4s, v2.4s
+; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
+; CHECK-GI-NEXT:    xtn v1.4h, v1.4s
+; CHECK-GI-NEXT:    ushl v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT:    ret
   %b = shl <4 x i64> %a, <i64 8, i64 8, i64 8, i64 8>
   %c = trunc <4 x i64> %b to <4 x i16>
   ret <4 x i16> %c


        


More information about the llvm-commits mailing list