[llvm] [AArch64][GlobalISel] Select SHL({Z|S}EXT, DUP Imm) into {U|S}HLL Imm (PR #96782)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 26 08:29:10 PDT 2024


https://github.com/chuongg3 created https://github.com/llvm/llvm-project/pull/96782

`ushll r0, r0, #0`
`shl r0, r0, #3`

=>

`ushll r0, r0, #3`


>From e27c2f94ba27539a3ca6ebb9bb700b8e58b2fe20 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Thu, 20 Jun 2024 12:32:49 +0000
Subject: [PATCH 1/3] [AArch64][GlobalISel] Make G_DUP immediate operand
 32-bits or larger

---
 .../GISel/AArch64InstructionSelector.cpp      |  3 +-
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 25 +++++-
 .../AArch64/GlobalISel/combine-udiv.ll        |  4 +-
 .../AArch64/GlobalISel/regbank-dup.mir        | 75 ++++++++++--------
 llvm/test/CodeGen/AArch64/aarch64-smull.ll    | 34 +++-----
 llvm/test/CodeGen/AArch64/neon-mov.ll         | 77 ++++---------------
 6 files changed, 96 insertions(+), 122 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4a7c82b393c10..df342c8beef19 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5536,7 +5536,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
   }
 
   if (CV->getSplatValue()) {
-    APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
+    APInt DefBits = APInt::getSplat(
+        DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
     auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
       MachineInstr *NewOp;
       bool Inv = false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4aa6999d1d3ca..6bbf7cc689abb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -420,6 +420,25 @@ void AArch64RegisterBankInfo::applyMappingImpl(
     MI.getOperand(2).setReg(Ext.getReg(0));
     return applyDefaultMapping(OpdMapper);
   }
+  case AArch64::G_DUP: {
+    // Extend smaller gpr to 32-bits
+    Builder.setInsertPt(*MI.getParent(), MI.getIterator());
+
+    Register ConstReg;
+    auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+    if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+      auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
+      ConstReg =
+          Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
+      ConstMI->eraseFromParent();
+    } else {
+      ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
+                     .getReg(0);
+    }
+    MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
+    MI.getOperand(1).setReg(ConstReg);
+    return applyDefaultMapping(OpdMapper);
+  }
   default:
     llvm_unreachable("Don't know how to handle that operation");
   }
@@ -774,8 +793,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
              (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
               onlyDefinesFP(*ScalarDef, MRI, TRI)))
       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
-    else
+    else {
+      if (ScalarTy.getSizeInBits() < 32 &&
+          getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank)
+        MappingID = 1;
       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+    }
     break;
   }
   case TargetOpcode::G_TRUNC: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index c97a00ccdd455..2b9ef7acd4a4d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -18,8 +18,8 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
 ;
 ; GISEL-LABEL: combine_vec_udiv_uniform:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI0_0
-; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
+; GISEL-NEXT:    mov w8, #25645 // =0x642d
+; GISEL-NEXT:    dup v1.8h, w8
 ; GISEL-NEXT:    umull2 v2.4s, v0.8h, v1.8h
 ; GISEL-NEXT:    umull v1.4s, v0.4h, v1.4h
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
index 4cd6eef531ce0..66c8c2efda9bc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
@@ -16,10 +16,11 @@ body:             |
 
     ; CHECK-LABEL: name: v4s32_gpr
     ; CHECK: liveins: $w0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
-    ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+    ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s32) = COPY $w0
     %4:_(<4 x s32>) = G_DUP %0(s32)
     $q0 = COPY %4(<4 x s32>)
@@ -37,10 +38,11 @@ body:             |
 
     ; CHECK-LABEL: name: v4s64_gpr
     ; CHECK: liveins: $x0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
-    ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+    ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s64) = COPY $x0
     %4:_(<2 x s64>) = G_DUP %0(s64)
     $q0 = COPY %4(<2 x s64>)
@@ -58,10 +60,11 @@ body:             |
 
     ; CHECK-LABEL: name: v2s32_gpr
     ; CHECK: liveins: $w0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
-    ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+    ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:_(s32) = COPY $w0
     %4:_(<2 x s32>) = G_DUP %0(s32)
     $d0 = COPY %4(<2 x s32>)
@@ -79,10 +82,11 @@ body:             |
 
     ; CHECK-LABEL: name: v4s32_fpr
     ; CHECK: liveins: $s0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
-    ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+    ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s32) = COPY $s0
     %4:_(<4 x s32>) = G_DUP %0(s32)
     $q0 = COPY %4(<4 x s32>)
@@ -100,10 +104,11 @@ body:             |
 
     ; CHECK-LABEL: name: v2s64_fpr
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
-    ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+    ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s64) = COPY $d0
     %4:_(<2 x s64>) = G_DUP %0(s64)
     $q0 = COPY %4(<2 x s64>)
@@ -121,10 +126,11 @@ body:             |
 
     ; CHECK-LABEL: name: v2s32_fpr
     ; CHECK: liveins: $s0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
-    ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+    ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:_(s32) = COPY $s0
     %4:_(<2 x s32>) = G_DUP %0(s32)
     $d0 = COPY %4(<2 x s32>)
@@ -142,10 +148,11 @@ body:             |
 
     ; CHECK-LABEL: name: v2s64_fpr_copy
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
-    ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+    ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s64) = COPY $d0
     %6:_(<2 x s64>) = G_DUP %0(s64)
     $q0 = COPY %6(<2 x s64>)
@@ -163,11 +170,13 @@ body:             |
 
     ; CHECK-LABEL: name: v416s8_gpr
     ; CHECK: liveins: $w0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
-    ; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
-    ; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
+    ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
+    ; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s32) = COPY $w0
     %trunc:_(s8) = G_TRUNC %0(s32)
     %1:_(<16 x s8>) = G_DUP %trunc(s8)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 307aa397eabbb..5aff8e0351487 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -994,9 +994,9 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
 ;
 ; CHECK-GI-LABEL: smull_noextvec_v8i8_v8i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
+; CHECK-GI-NEXT:    mov w8, #-999 // =0xfffffc19
 ; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT:    dup v1.8h, w8
 ; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
 ; CHECK-GI-NEXT:    ret
   %tmp3 = sext <8 x i8> %arg to <8 x i16>
@@ -1088,29 +1088,13 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
 
 define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
 ; Do not use SMULL if the BUILD_VECTOR element values are too big.
-; CHECK-NEON-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-NEON:       // %bb.0:
-; CHECK-NEON-NEXT:    mov w8, #999 // =0x3e7
-; CHECK-NEON-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-NEON-NEXT:    dup v1.8h, w8
-; CHECK-NEON-NEXT:    mul v0.8h, v0.8h, v1.8h
-; CHECK-NEON-NEXT:    ret
-;
-; CHECK-SVE-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-SVE:       // %bb.0:
-; CHECK-SVE-NEXT:    mov w8, #999 // =0x3e7
-; CHECK-SVE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-SVE-NEXT:    dup v1.8h, w8
-; CHECK-SVE-NEXT:    mul v0.8h, v0.8h, v1.8h
-; CHECK-SVE-NEXT:    ret
-;
-; CHECK-GI-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
-; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_0]
-; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #999 // =0x3e7
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    dup v1.8h, w8
+; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
   %tmp3 = zext <8 x i8> %arg to <8 x i16>
   %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
   ret <8 x i16> %tmp4
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 170ba7292ae60..cb85bbda80a80 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -109,29 +109,11 @@ define <4 x i32> @movi4s_lsl16() {
 }
 
 define <4 x i32> @movi4s_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi4s_fneg:
-; CHECK-NOFP16-SD:       // %bb.0:
-; CHECK-NOFP16-SD-NEXT:    movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-SD-NEXT:    fneg v0.4s, v0.4s
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: movi4s_fneg:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    movi v0.4s, #240, lsl #8
-; CHECK-FP16-SD-NEXT:    fneg v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
-; CHECK-NOFP16-GI:       // %bb.0:
-; CHECK-NOFP16-GI-NEXT:    movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-GI-NEXT:    fneg v0.4s, v0.4s
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: movi4s_fneg:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    movi v0.4s, #240, lsl #8
-; CHECK-FP16-GI-NEXT:    fneg v0.4s, v0.4s
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: movi4s_fneg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.4s, #240, lsl #8
+; CHECK-NEXT:    fneg v0.4s, v0.4s
+; CHECK-NEXT:    ret
    ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
 }
 
@@ -308,23 +290,17 @@ define <8 x i16> @mvni8h_neg() {
 ; CHECK-NOFP16-SD-NEXT:    dup v0.8h, w8
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: mvni8h_neg:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    movi v0.8h, #240
-; CHECK-FP16-SD-NEXT:    fneg v0.8h, v0.8h
-; CHECK-FP16-SD-NEXT:    ret
+; CHECK-FP16-LABEL: mvni8h_neg:
+; CHECK-FP16:       // %bb.0:
+; CHECK-FP16-NEXT:    movi v0.8h, #240
+; CHECK-FP16-NEXT:    fneg v0.8h, v0.8h
+; CHECK-FP16-NEXT:    ret
 ;
 ; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
 ; CHECK-NOFP16-GI:       // %bb.0:
-; CHECK-NOFP16-GI-NEXT:    adrp x8, .LCPI32_0
-; CHECK-NOFP16-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI32_0]
+; CHECK-NOFP16-GI-NEXT:    mov w8, #-32528 // =0xffff80f0
+; CHECK-NOFP16-GI-NEXT:    dup v0.8h, w8
 ; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: mvni8h_neg:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    movi v0.8h, #240
-; CHECK-FP16-GI-NEXT:    fneg v0.8h, v0.8h
-; CHECK-FP16-GI-NEXT:    ret
    ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
 }
 
@@ -494,29 +470,11 @@ define <2 x double> @fmov2d() {
 }
 
 define <2 x double> @fmov2d_neg0() {
-; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-SD:       // %bb.0:
-; CHECK-NOFP16-SD-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NOFP16-SD-NEXT:    fneg v0.2d, v0.2d
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: fmov2d_neg0:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-FP16-SD-NEXT:    fneg v0.2d, v0.2d
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-GI:       // %bb.0:
-; CHECK-NOFP16-GI-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NOFP16-GI-NEXT:    fneg v0.2d, v0.2d
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: fmov2d_neg0:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-FP16-GI-NEXT:    fneg v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: fmov2d_neg0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    fneg v0.2d, v0.2d
+; CHECK-NEXT:    ret
 	ret <2 x double> <double -0.0, double -0.0>
 }
 
@@ -581,5 +539,4 @@ define <2 x i32> @movi1d() {
   ret <2 x i32> %1
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
 ; CHECK-NOFP16: {{.*}}

>From f29cb3bc90fdda4d0baae2808996f6c73fe66af7 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 31 May 2024 15:32:07 +0000
Subject: [PATCH 2/3] [AArch64][NFC] Pre-commit tests for Shift Left Long

---
 .../CodeGen/AArch64/neon-shift-left-long.ll   | 238 ++++++++++++++----
 1 file changed, 186 insertions(+), 52 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
index d10d551805a6b..3dcd23051ce52 100644
--- a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
+++ b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -1,56 +1,114 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {
-; CHECK: test_sshll_v8i8:
-; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+; CHECK-SD-LABEL: test_sshll_v8i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sshll_v8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
+; CHECK-GI-NEXT:    ret
   %1 = sext <8 x i8> %a to <8 x i16>
   %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_sshll_v4i16(<4 x i16> %a) {
-; CHECK: test_sshll_v4i16:
-; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+; CHECK-SD-LABEL: test_sshll_v4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sshll_v4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
+; CHECK-GI-NEXT:    ret
   %1 = sext <4 x i16> %a to <4 x i32>
   %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_sshll_v2i32(<2 x i32> %a) {
-; CHECK: test_sshll_v2i32:
-; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+; CHECK-SD-LABEL: test_sshll_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #19
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sshll_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
+; CHECK-GI-NEXT:    ret
   %1 = sext <2 x i32> %a to <2 x i64>
   %tmp = shl <2 x i64> %1, <i64 19, i64 19>
   ret <2 x i64> %tmp
 }
 
 define <8 x i16> @test_ushll_v8i8(<8 x i8> %a) {
-; CHECK: test_ushll_v8i8:
-; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #3
+; CHECK-SD-LABEL: test_ushll_v8i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ushll_v8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
+; CHECK-GI-NEXT:    ret
   %1 = zext <8 x i8> %a to <8 x i16>
   %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_ushll_v4i16(<4 x i16> %a) {
-; CHECK: test_ushll_v4i16:
-; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #9
+; CHECK-SD-LABEL: test_ushll_v4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ushll_v4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
+; CHECK-GI-NEXT:    ret
   %1 = zext <4 x i16> %a to <4 x i32>
   %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_ushll_v2i32(<2 x i32> %a) {
-; CHECK: test_ushll_v2i32:
-; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #19
+; CHECK-SD-LABEL: test_ushll_v2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #19
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ushll_v2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
+; CHECK-GI-NEXT:    ret
   %1 = zext <2 x i32> %a to <2 x i64>
   %tmp = shl <2 x i64> %1, <i64 19, i64 19>
   ret <2 x i64> %tmp
 }
 
 define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) {
-; CHECK: test_sshll2_v16i8:
-; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+; CHECK-SD-LABEL: test_sshll2_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll2 v0.8h, v0.16b, #3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sshll2_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
+; CHECK-GI-NEXT:    ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %2 = sext <8 x i8> %1 to <8 x i16>
   %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -58,8 +116,16 @@ define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) {
 }
 
 define <4 x i32> @test_sshll2_v8i16(<8 x i16> %a) {
-; CHECK: test_sshll2_v8i16:
-; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+; CHECK-SD-LABEL: test_sshll2_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll2 v0.4s, v0.8h, #9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sshll2_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
+; CHECK-GI-NEXT:    ret
   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %2 = sext <4 x i16> %1 to <4 x i32>
   %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
@@ -67,8 +133,16 @@ define <4 x i32> @test_sshll2_v8i16(<8 x i16> %a) {
 }
 
 define <2 x i64> @test_sshll2_v4i32(<4 x i32> %a) {
-; CHECK: test_sshll2_v4i32:
-; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+; CHECK-SD-LABEL: test_sshll2_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sshll2 v0.2d, v0.4s, #19
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_sshll2_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    sshll2 v0.2d, v0.4s, #0
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
+; CHECK-GI-NEXT:    ret
   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %2 = sext <2 x i32> %1 to <2 x i64>
   %tmp = shl <2 x i64> %2, <i64 19, i64 19>
@@ -76,8 +150,16 @@ define <2 x i64> @test_sshll2_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i16> @test_ushll2_v16i8(<16 x i8> %a) {
-; CHECK: test_ushll2_v16i8:
-; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #3
+; CHECK-SD-LABEL: test_ushll2_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll2 v0.8h, v0.16b, #3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ushll2_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
+; CHECK-GI-NEXT:    ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %2 = zext <8 x i8> %1 to <8 x i16>
   %tmp = shl <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -85,8 +167,16 @@ define <8 x i16> @test_ushll2_v16i8(<16 x i8> %a) {
 }
 
 define <4 x i32> @test_ushll2_v8i16(<8 x i16> %a) {
-; CHECK: test_ushll2_v8i16:
-; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #9
+; CHECK-SD-LABEL: test_ushll2_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll2 v0.4s, v0.8h, #9
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ushll2_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
+; CHECK-GI-NEXT:    ret
   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %2 = zext <4 x i16> %1 to <4 x i32>
   %tmp = shl <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
@@ -94,8 +184,16 @@ define <4 x i32> @test_ushll2_v8i16(<8 x i16> %a) {
 }
 
 define <2 x i64> @test_ushll2_v4i32(<4 x i32> %a) {
-; CHECK: test_ushll2_v4i32:
-; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #19
+; CHECK-SD-LABEL: test_ushll2_v4i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushll2 v0.2d, v0.4s, #19
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ushll2_v4i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
+; CHECK-GI-NEXT:    ret
   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %2 = zext <2 x i32> %1 to <2 x i64>
   %tmp = shl <2 x i64> %2, <i64 19, i64 19>
@@ -103,99 +201,135 @@ define <2 x i64> @test_ushll2_v4i32(<4 x i32> %a) {
 }
 
 define <8 x i16> @test_sshll_shl0_v8i8(<8 x i8> %a) {
-; CHECK: test_sshll_shl0_v8i8:
-; CHECK: sshll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+; CHECK-LABEL: test_sshll_shl0_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ret
   %tmp = sext <8 x i8> %a to <8 x i16>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_sshll_shl0_v4i16(<4 x i16> %a) {
-; CHECK: test_sshll_shl0_v4i16:
-; CHECK: sshll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+; CHECK-LABEL: test_sshll_shl0_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
   %tmp = sext <4 x i16> %a to <4 x i32>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_sshll_shl0_v2i32(<2 x i32> %a) {
-; CHECK: test_sshll_shl0_v2i32:
-; CHECK: sshll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+; CHECK-LABEL: test_sshll_shl0_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ret
   %tmp = sext <2 x i32> %a to <2 x i64>
   ret <2 x i64> %tmp
 }
 
 define <8 x i16> @test_ushll_shl0_v8i8(<8 x i8> %a) {
-; CHECK: test_ushll_shl0_v8i8:
-; CHECK: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+; CHECK-LABEL: test_ushll_shl0_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ret
   %tmp = zext <8 x i8> %a to <8 x i16>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_ushll_shl0_v4i16(<4 x i16> %a) {
-; CHECK: test_ushll_shl0_v4i16:
-; CHECK: ushll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #0
+; CHECK-LABEL: test_ushll_shl0_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ret
   %tmp = zext <4 x i16> %a to <4 x i32>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_ushll_shl0_v2i32(<2 x i32> %a) {
-; CHECK: test_ushll_shl0_v2i32:
-; CHECK: ushll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #0
+; CHECK-LABEL: test_ushll_shl0_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ret
   %tmp = zext <2 x i32> %a to <2 x i64>
   ret <2 x i64> %tmp
 }
 
 define <8 x i16> @test_sshll2_shl0_v16i8(<16 x i8> %a) {
-; CHECK: test_sshll2_shl0_v16i8:
-; CHECK: sshll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+; CHECK-LABEL: test_sshll2_shl0_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %tmp = sext <8 x i8> %1 to <8 x i16>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_sshll2_shl0_v8i16(<8 x i16> %a) {
-; CHECK: test_sshll2_shl0_v8i16:
-; CHECK: sshll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+; CHECK-LABEL: test_sshll2_shl0_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    ret
   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp = sext <4 x i16> %1 to <4 x i32>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_sshll2_shl0_v4i32(<4 x i32> %a) {
-; CHECK: test_sshll2_shl0_v4i32:
-; CHECK: sshll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+; CHECK-LABEL: test_sshll2_shl0_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    ret
   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp = sext <2 x i32> %1 to <2 x i64>
   ret <2 x i64> %tmp
 }
 
 define <8 x i16> @test_ushll2_shl0_v16i8(<16 x i8> %a) {
-; CHECK: test_ushll2_shl0_v16i8:
-; CHECK: ushll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #0
+; CHECK-LABEL: test_ushll2_shl0_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll2 v0.8h, v0.16b, #0
+; CHECK-NEXT:    ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %tmp = zext <8 x i8> %1 to <8 x i16>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_ushll2_shl0_v8i16(<8 x i16> %a) {
-; CHECK: test_ushll2_shl0_v8i16:
-; CHECK: ushll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #0
+; CHECK-LABEL: test_ushll2_shl0_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    ret
   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp = zext <4 x i16> %1 to <4 x i32>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_ushll2_shl0_v4i32(<4 x i32> %a) {
-; CHECK: test_ushll2_shl0_v4i32:
-; CHECK: ushll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #0
+; CHECK-LABEL: test_ushll2_shl0_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    ret
   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp = zext <2 x i32> %1 to <2 x i64>
   ret <2 x i64> %tmp
 }
 
 define <8 x i16> @test_ushll_cmp(<8 x i8> %a, <8 x i8> %b) #0 {
-; CHECK: test_ushll_cmp:
-; CHECK: cmeq	{{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-NEXT: ushll	{{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0
+; CHECK-SD-LABEL: test_ushll_cmp:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    cmeq v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_ushll_cmp:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    cmeq v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #15
+; CHECK-GI-NEXT:    sshr v0.8h, v0.8h, #15
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
   %cmp.i = icmp eq <8 x i8> %a, %b
   %vcgtz.i.i = sext <8 x i1> %cmp.i to <8 x i8>
   %vmovl.i.i.i = zext <8 x i8> %vcgtz.i.i to <8 x i16>

>From c0a690236717069589745a98ce454a5a50dade8d Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Thu, 20 Jun 2024 12:40:51 +0000
Subject: [PATCH 3/3] [AArch64][GlobalISel] Select SHL(ZEXT, DUP imm) into
 {U/S}HLL imm

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  14 +++
 .../CodeGen/AArch64/neon-shift-left-long.ll   | 108 ++++++------------
 2 files changed, 50 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index dd54520c8ddad..2bb6ee6dc2b87 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7879,6 +7879,20 @@ def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
           (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
                            V128:$Rn, vecshiftR32Narrow:$imm)>;
 
+def : Pat<(shl (v8i16 (zext (v8i8 V64:$Rm))), (v8i16 (AArch64dup (i32 imm:$size)))),
+          (USHLLv8i8_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v4i32 (zext (v4i16 V64:$Rm))), (v4i32 (AArch64dup (i32 imm:$size)))),
+          (USHLLv4i16_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v2i64 (zext (v2i32 V64:$Rm))), (v2i64 (AArch64dup (i64 imm:$size)))),
+          (USHLLv2i32_shift V64:$Rm, (trunc_imm imm:$size))>;
+
+def : Pat<(shl (v8i16 (sext (v8i8 V64:$Rm))), (v8i16 (AArch64dup (i32 imm:$size)))),
+          (SSHLLv8i8_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v4i32 (sext (v4i16 V64:$Rm))), (v4i32 (AArch64dup (i32 imm:$size)))),
+          (SSHLLv4i16_shift V64:$Rm, (i32 imm:$size))>;
+def : Pat<(shl (v2i64 (sext (v2i32 V64:$Rm))), (v2i64 (AArch64dup (i64 imm:$size)))),
+          (SSHLLv2i32_shift V64:$Rm, (trunc_imm imm:$size))>;
+
 // Vector sign and zero extensions are implemented with SSHLL and USSHLL.
 // Anyexts are implemented as zexts.
 def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
diff --git a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
index 3dcd23051ce52..a06bc0856c9f1 100644
--- a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
+++ b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -3,96 +3,60 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) {
-; CHECK-SD-LABEL: test_sshll_v8i8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #3
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_sshll_v8i8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_sshll_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #3
+; CHECK-NEXT:    ret
   %1 = sext <8 x i8> %a to <8 x i16>
   %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_sshll_v4i16(<4 x i16> %a) {
-; CHECK-SD-LABEL: test_sshll_v4i16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #9
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_sshll_v4i16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_sshll_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #9
+; CHECK-NEXT:    ret
   %1 = sext <4 x i16> %a to <4 x i32>
   %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_sshll_v2i32(<2 x i32> %a) {
-; CHECK-SD-LABEL: test_sshll_v2i32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #19
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_sshll_v2i32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_sshll_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #19
+; CHECK-NEXT:    ret
   %1 = sext <2 x i32> %a to <2 x i64>
   %tmp = shl <2 x i64> %1, <i64 19, i64 19>
   ret <2 x i64> %tmp
 }
 
 define <8 x i16> @test_ushll_v8i8(<8 x i8> %a) {
-; CHECK-SD-LABEL: test_ushll_v8i8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #3
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_ushll_v8i8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_ushll_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #3
+; CHECK-NEXT:    ret
   %1 = zext <8 x i8> %a to <8 x i16>
   %tmp = shl <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %tmp
 }
 
 define <4 x i32> @test_ushll_v4i16(<4 x i16> %a) {
-; CHECK-SD-LABEL: test_ushll_v4i16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #9
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_ushll_v4i16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_ushll_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #9
+; CHECK-NEXT:    ret
   %1 = zext <4 x i16> %a to <4 x i32>
   %tmp = shl <4 x i32> %1, <i32 9, i32 9, i32 9, i32 9>
   ret <4 x i32> %tmp
 }
 
 define <2 x i64> @test_ushll_v2i32(<2 x i32> %a) {
-; CHECK-SD-LABEL: test_ushll_v2i32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #19
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: test_ushll_v2i32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: test_ushll_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #19
+; CHECK-NEXT:    ret
   %1 = zext <2 x i32> %a to <2 x i64>
   %tmp = shl <2 x i64> %1, <i64 19, i64 19>
   ret <2 x i64> %tmp
@@ -106,8 +70,8 @@ define <8 x i16> @test_sshll2_v16i8(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: test_sshll2_v16i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #3
 ; CHECK-GI-NEXT:    ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %2 = sext <8 x i8> %1 to <8 x i16>
@@ -123,8 +87,8 @@ define <4 x i32> @test_sshll2_v8i16(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: test_sshll2_v8i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #9
 ; CHECK-GI-NEXT:    ret
   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %2 = sext <4 x i16> %1 to <4 x i32>
@@ -140,8 +104,8 @@ define <2 x i64> @test_sshll2_v4i32(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: test_sshll2_v4i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    sshll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #19
 ; CHECK-GI-NEXT:    ret
   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %2 = sext <2 x i32> %1 to <2 x i64>
@@ -157,8 +121,8 @@ define <8 x i16> @test_ushll2_v16i8(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: test_ushll2_v16i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-NEXT:    shl v0.8h, v0.8h, #3
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #3
 ; CHECK-GI-NEXT:    ret
   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %2 = zext <8 x i8> %1 to <8 x i16>
@@ -174,8 +138,8 @@ define <4 x i32> @test_ushll2_v8i16(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: test_ushll2_v8i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
-; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #9
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #9
 ; CHECK-GI-NEXT:    ret
   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %2 = zext <4 x i16> %1 to <4 x i32>
@@ -191,8 +155,8 @@ define <2 x i64> @test_ushll2_v4i32(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: test_ushll2_v4i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ushll2 v0.2d, v0.4s, #0
-; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #19
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #19
 ; CHECK-GI-NEXT:    ret
   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %2 = zext <2 x i32> %1 to <2 x i64>



More information about the llvm-commits mailing list