[llvm] 0b066e0 - [AArch64] add GlobalIsel support for scalar CNT instruction

Ties Stuij via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 6 03:08:53 PST 2023


Author: Ties Stuij
Date: 2023-01-06T11:08:34Z
New Revision: 0b066e02a6794fc086a89c38eafcb76a224fee59

URL: https://github.com/llvm/llvm-project/commit/0b066e02a6794fc086a89c38eafcb76a224fee59
DIFF: https://github.com/llvm/llvm-project/commit/0b066e02a6794fc086a89c38eafcb76a224fee59.diff

LOG: [AArch64] add GlobalIsel support for scalar CNT instruction

When feature CSSC is available we should use instruction CNT for s32, s64 and
s128 types in GlobalIsel's G_CTPOP.

spec:
https://developer.arm.com/documentation/ddi0602/2022-09/Base-Instructions/CNT--Count-bits-

Reviewed By: aemerson

Differential Revision: https://reviews.llvm.org/D139417

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 29df3341916f5..b64df6ae3369d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -807,20 +807,36 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .customFor({{s32, s32}, {s64, s64}});
 
   auto always = [=](const LegalityQuery &Q) { return true; };
-  getActionDefinitionsBuilder(G_CTPOP)
-      .legalFor({{v8s8, v8s8}, {v16s8, v16s8}})
+  auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);
+  if (HasCSSC)
+    CTPOPActions
+        .legalFor({{s32, s32},
+                   {s64, s64},
+                   {v8s8, v8s8},
+                   {v16s8, v16s8}})
+        .customFor({{s128, s128},
+                    {v2s64, v2s64},
+                    {v2s32, v2s32},
+                    {v4s32, v4s32},
+                    {v4s16, v4s16},
+                    {v8s16, v8s16}});
+  else
+    CTPOPActions
+        .legalFor({{v8s8, v8s8},
+                   {v16s8, v16s8}})
+        .customFor({{s32, s32},
+                    {s64, s64},
+                    {s128, s128},
+                    {v2s64, v2s64},
+                    {v2s32, v2s32},
+                    {v4s32, v4s32},
+                    {v4s16, v4s16},
+                    {v8s16, v8s16}});
+  CTPOPActions
       .clampScalar(0, s32, s128)
       .widenScalarToNextPow2(0)
       .minScalarEltSameAsIf(always, 1, 0)
-      .maxScalarEltSameAsIf(always, 1, 0)
-      .customFor({{s32, s32},
-                  {s64, s64},
-                  {s128, s128},
-                  {v2s64, v2s64},
-                  {v2s32, v2s32},
-                  {v4s32, v4s32},
-                  {v4s16, v4s16},
-                  {v8s16, v8s16}});
+      .maxScalarEltSameAsIf(always, 1, 0);
 
   // TODO: Vector types.
   getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
@@ -1278,10 +1294,10 @@ bool AArch64LegalizerInfo::legalizeBitfieldExtract(
 bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          LegalizerHelper &Helper) const {
-  // While there is no integer popcount instruction, it can
-  // be more efficiently lowered to the following sequence that uses
-  // AdvSIMD registers/instructions as long as the copies to/from
-  // the AdvSIMD registers are cheap.
+  // When there is no integer popcount instruction (FEAT_CSSC isn't available),
+  // it can be more efficiently lowered to the following sequence that uses
+  // AdvSIMD registers/instructions as long as the copies to/from the AdvSIMD
+  // registers are cheap.
   //  FMOV    D0, X0        // copy 64-bit int to vector, high bits zero'd
   //  CNT     V0.8B, V0.8B  // 8xbyte pop-counts
   //  ADDV    B0, V0.8B     // sum 8xbyte pop-counts
@@ -1302,10 +1318,23 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
   Register Dst = MI.getOperand(0).getReg();
   Register Val = MI.getOperand(1).getReg();
   LLT Ty = MRI.getType(Val);
+  unsigned Size = Ty.getSizeInBits();
 
   assert(Ty == MRI.getType(Dst) &&
          "Expected src and dst to have the same type!");
-  unsigned Size = Ty.getSizeInBits();
+
+  if (ST->hasCSSC() && Ty.isScalar() && Size == 128) {
+    LLT s64 = LLT::scalar(64);
+
+    auto Split = MIRBuilder.buildUnmerge(s64, Val);
+    auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0));
+    auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1));
+    auto Add = MIRBuilder.buildAdd(s64, CTPOP1, CTPOP2);
+
+    MIRBuilder.buildZExt(Dst, Add);
+    MI.eraseFromParent();
+    return true;
+  }
 
   if (!ST->hasNEON() ||
       MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) {

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
index 8b9d04fc574cb..d2352be81503d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - 2>&1 | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer -mattr=+cssc %s -o - | FileCheck %s --check-prefix=CHECK-CSSC
 --- |
   define void @s32() noimplicitfloat { unreachable }
   define void @s64() noimplicitfloat { unreachable }
@@ -36,6 +37,13 @@ body:             |
     ; CHECK-NEXT: %ctpop:_(s32) = G_LSHR [[MUL]], [[C7]](s64)
     ; CHECK-NEXT: $w0 = COPY %ctpop(s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-CSSC-LABEL: name: s32
+    ; CHECK-CSSC: liveins: $w0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-CSSC-NEXT: %ctpop:_(s32) = G_CTPOP %copy(s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY %ctpop(s32)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %copy:_(s32) = COPY $w0
     %ctpop:_(s32) = G_CTPOP %copy(s32)
     $w0 = COPY %ctpop(s32)
@@ -74,6 +82,13 @@ body:             |
     ; CHECK-NEXT: %ctpop:_(s64) = G_LSHR [[MUL]], [[C7]](s64)
     ; CHECK-NEXT: $x0 = COPY %ctpop(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    ; CHECK-CSSC-LABEL: name: s64
+    ; CHECK-CSSC: liveins: $x0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s64) = COPY $x0
+    ; CHECK-CSSC-NEXT: %ctpop:_(s64) = G_CTPOP %copy(s64)
+    ; CHECK-CSSC-NEXT: $x0 = COPY %ctpop(s64)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0
     %copy:_(s64) = COPY $x0
     %ctpop:_(s64) = G_CTPOP %copy(s64)
     $x0 = COPY %ctpop(s64)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
index fc02f47011879..9aeff9f3b7846 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer -mattr=+cssc %s -o - | FileCheck %s --check-prefix=CHECK-CSSC
 ...
 ---
 name:            v8s8_legal
@@ -9,10 +10,18 @@ body:             |
     liveins: $d0
     ; CHECK-LABEL: name: v8s8_legal
     ; CHECK: liveins: $d0
-    ; CHECK: %copy:_(<8 x s8>) = COPY $d0
-    ; CHECK: %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>)
-    ; CHECK: $d0 = COPY %ctpop(<8 x s8>)
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(<8 x s8>) = COPY $d0
+    ; CHECK-NEXT: %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>)
+    ; CHECK-NEXT: $d0 = COPY %ctpop(<8 x s8>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    ; CHECK-CSSC-LABEL: name: v8s8_legal
+    ; CHECK-CSSC: liveins: $d0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(<8 x s8>) = COPY $d0
+    ; CHECK-CSSC-NEXT: %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>)
+    ; CHECK-CSSC-NEXT: $d0 = COPY %ctpop(<8 x s8>)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0
     %copy:_(<8 x s8>) = COPY $d0
     %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>)
     $d0 = COPY %ctpop(<8 x s8>)
@@ -27,10 +36,18 @@ body:             |
     liveins: $q0
     ; CHECK-LABEL: name: v16s8_legal
     ; CHECK: liveins: $q0
-    ; CHECK: %copy:_(<16 x s8>) = COPY $q0
-    ; CHECK: %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>)
-    ; CHECK: $q0 = COPY %ctpop(<16 x s8>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(<16 x s8>) = COPY $q0
+    ; CHECK-NEXT: %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>)
+    ; CHECK-NEXT: $q0 = COPY %ctpop(<16 x s8>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    ; CHECK-CSSC-LABEL: name: v16s8_legal
+    ; CHECK-CSSC: liveins: $q0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(<16 x s8>) = COPY $q0
+    ; CHECK-CSSC-NEXT: %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>)
+    ; CHECK-CSSC-NEXT: $q0 = COPY %ctpop(<16 x s8>)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
     %copy:_(<16 x s8>) = COPY $q0
     %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>)
     $q0 = COPY %ctpop(<16 x s8>)
@@ -45,13 +62,21 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: s32_lower
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:_(s32) = COPY $w0
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %copy(s32)
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[ZEXT]](s64)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: %ctpop:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
-    ; CHECK: $w0 = COPY %ctpop(s32)
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %copy(s32)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[ZEXT]](s64)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: %ctpop:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: $w0 = COPY %ctpop(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-CSSC-LABEL: name: s32_lower
+    ; CHECK-CSSC: liveins: $w0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-CSSC-NEXT: %ctpop:_(s32) = G_CTPOP %copy(s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY %ctpop(s32)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %copy:_(s32) = COPY $w0
     %ctpop:_(s32) = G_CTPOP %copy(s32)
     $w0 = COPY %ctpop(s32)
@@ -66,13 +91,21 @@ body:             |
     liveins: $x0
     ; CHECK-LABEL: name: s64_lower
     ; CHECK: liveins: $x0
-    ; CHECK: %copy:_(s64) = COPY $x0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST %copy(s64)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
-    ; CHECK: %ctpop:_(s64) = G_ZEXT [[INT]](s32)
-    ; CHECK: $x0 = COPY %ctpop(s64)
-    ; CHECK: RET_ReallyLR implicit $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST %copy(s64)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: %ctpop:_(s64) = G_ZEXT [[INT]](s32)
+    ; CHECK-NEXT: $x0 = COPY %ctpop(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    ; CHECK-CSSC-LABEL: name: s64_lower
+    ; CHECK-CSSC: liveins: $x0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s64) = COPY $x0
+    ; CHECK-CSSC-NEXT: %ctpop:_(s64) = G_CTPOP %copy(s64)
+    ; CHECK-CSSC-NEXT: $x0 = COPY %ctpop(s64)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0
     %copy:_(s64) = COPY $x0
     %ctpop:_(s64) = G_CTPOP %copy(s64)
     $x0 = COPY %ctpop(s64)
@@ -84,23 +117,43 @@ name:            s128_lower
 tracksRegLiveness: true
 body:             |
   bb.0:
-    liveins: $q0
+    liveins: $x0, $x1
+
     ; CHECK-LABEL: name: s128_lower
-    ; CHECK: liveins: $q0
-    ; CHECK: %copy:_(s128) = COPY $q0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST %copy(s128)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<16 x s8>)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[INT]](s32), [[C]](s32)
-    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK: %ctpop:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64)
-    ; CHECK: $q0 = COPY %ctpop(s128)
-    ; CHECK: RET_ReallyLR implicit $q0
-    %copy:_(s128) = COPY $q0
-    %ctpop:_(s128) = G_CTPOP %copy(s128)
-    $q0 = COPY %ctpop(s128)
-    RET_ReallyLR implicit $q0
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64)
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[MV]](s128)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<16 x s8>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[INT]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-NEXT: $x0 = COPY [[MV1]](s64)
+    ; CHECK-NEXT: $x1 = COPY [[C1]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    ; CHECK-CSSC-LABEL: name: s128_lower
+    ; CHECK-CSSC: liveins: $x0, $x1
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-CSSC-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[COPY]](s64)
+    ; CHECK-CSSC-NEXT: [[CTPOP1:%[0-9]+]]:_(s64) = G_CTPOP [[COPY1]](s64)
+    ; CHECK-CSSC-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTPOP]], [[CTPOP1]]
+    ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK-CSSC-NEXT: $x0 = COPY [[ADD]](s64)
+    ; CHECK-CSSC-NEXT: $x1 = COPY [[C]](s64)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %1:_(s64) = COPY $x0
+    %2:_(s64) = COPY $x1
+    %0:_(s128) = G_MERGE_VALUES %1(s64), %2(s64)
+    %3:_(s128) = G_CTPOP %0(s128)
+    %4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3(s128)
+    $x0 = COPY %4(s64)
+    $x1 = COPY %5(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
 
 ...
 ---
@@ -112,16 +165,27 @@ body:             |
 
     ; CHECK-LABEL: name: widen_s16
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:_(s32) = COPY $w0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
-    ; CHECK: $w0 = COPY [[COPY]](s32)
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-CSSC-LABEL: name: widen_s16
+    ; CHECK-CSSC: liveins: $w0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]]
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32)
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %copy:_(s32) = COPY $w0
     %trunc:_(s16) = G_TRUNC %copy(s32)
     %ctpop:_(s16) = G_CTPOP %trunc(s16)
@@ -139,16 +203,27 @@ body:             |
 
     ; CHECK-LABEL: name: widen_s8
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:_(s32) = COPY $w0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
-    ; CHECK: $w0 = COPY [[COPY]](s32)
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-CSSC-LABEL: name: widen_s8
+    ; CHECK-CSSC: liveins: $w0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]]
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32)
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %copy:_(s32) = COPY $w0
     %trunc:_(s8) = G_TRUNC %copy(s32)
     %ctpop:_(s8) = G_CTPOP %trunc(s8)
@@ -166,16 +241,27 @@ body:             |
 
     ; CHECK-LABEL: name: widen_s3
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:_(s32) = COPY $w0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
-    ; CHECK: $w0 = COPY [[COPY]](s32)
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-CSSC-LABEL: name: widen_s3
+    ; CHECK-CSSC: liveins: $w0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]]
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32)
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %copy:_(s32) = COPY $w0
     %trunc:_(s3) = G_TRUNC %copy(s32)
     %ctpop:_(s3) = G_CTPOP %trunc(s3)
@@ -192,16 +278,27 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: 
diff erent_sizes
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:_(s32) = COPY $w0
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
-    ; CHECK: $w0 = COPY [[COPY]](s32)
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-CSSC-LABEL: name: 
diff erent_sizes
+    ; CHECK-CSSC: liveins: $w0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0
+    ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]]
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32)
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32)
+    ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0
     %copy:_(s32) = COPY $w0
     %trunc:_(s8) = G_TRUNC %copy(s32)
     %ctpop:_(s16) = G_CTPOP %trunc(s8)
@@ -219,12 +316,22 @@ body:             |
 
     ; CHECK-LABEL: name: custom_8x16
     ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
-    ; CHECK: $q0 = COPY [[INT]](<8 x s16>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+    ; CHECK-NEXT: $q0 = COPY [[INT]](<8 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    ; CHECK-CSSC-LABEL: name: custom_8x16
+    ; CHECK-CSSC: liveins: $q0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+    ; CHECK-CSSC-NEXT: $q0 = COPY [[INT]](<8 x s16>)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
     %0:_(<8 x s16>) = COPY $q0
     %1:_(<8 x s16>) = G_CTPOP %0(<8 x s16>)
     $q0 = COPY %1(<8 x s16>)
@@ -240,13 +347,24 @@ body:             |
 
     ; CHECK-LABEL: name: custom_4x32
     ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
-    ; CHECK: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
-    ; CHECK: $q0 = COPY [[INT1]](<4 x s32>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+    ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
+    ; CHECK-NEXT: $q0 = COPY [[INT1]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    ; CHECK-CSSC-LABEL: name: custom_4x32
+    ; CHECK-CSSC: liveins: $q0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
+    ; CHECK-CSSC-NEXT: $q0 = COPY [[INT1]](<4 x s32>)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
     %0:_(<4 x s32>) = COPY $q0
     %1:_(<4 x s32>) = G_CTPOP %0(<4 x s32>)
     $q0 = COPY %1(<4 x s32>)
@@ -262,14 +380,26 @@ body:             |
 
     ; CHECK-LABEL: name: custom_2x64
     ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
-    ; CHECK: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
-    ; CHECK: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
-    ; CHECK: $q0 = COPY [[INT2]](<2 x s64>)
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+    ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
+    ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
+    ; CHECK-NEXT: $q0 = COPY [[INT2]](<2 x s64>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    ; CHECK-CSSC-LABEL: name: custom_2x64
+    ; CHECK-CSSC: liveins: $q0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
+    ; CHECK-CSSC-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
+    ; CHECK-CSSC-NEXT: $q0 = COPY [[INT2]](<2 x s64>)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
     %0:_(<2 x s64>) = COPY $q0
     %1:_(<2 x s64>) = G_CTPOP %0(<2 x s64>)
     $q0 = COPY %1(<2 x s64>)
@@ -285,12 +415,22 @@ body:             |
 
     ; CHECK-LABEL: name: custom_4x16
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
-    ; CHECK: $d0 = COPY [[INT]](<4 x s16>)
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: $d0 = COPY [[INT]](<4 x s16>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    ; CHECK-CSSC-LABEL: name: custom_4x16
+    ; CHECK-CSSC: liveins: $d0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+    ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
+    ; CHECK-CSSC-NEXT: $d0 = COPY [[INT]](<4 x s16>)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0
     %0:_(<4 x s16>) = COPY $d0
     %1:_(<4 x s16>) = G_CTPOP %0(<4 x s16>)
     $d0 = COPY %1(<4 x s16>)
@@ -306,13 +446,24 @@ body:             |
 
     ; CHECK-LABEL: name: custom_2x32
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
-    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
-    ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
-    ; CHECK: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
-    ; CHECK: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
-    ; CHECK: $d0 = COPY [[INT1]](<2 x s32>)
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
+    ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
+    ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
+    ; CHECK-NEXT: $d0 = COPY [[INT1]](<2 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    ; CHECK-CSSC-LABEL: name: custom_2x32
+    ; CHECK-CSSC: liveins: $d0
+    ; CHECK-CSSC-NEXT: {{  $}}
+    ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
+    ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
+    ; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
+    ; CHECK-CSSC-NEXT: $d0 = COPY [[INT1]](<2 x s32>)
+    ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0
     %0:_(<2 x s32>) = COPY $d0
     %1:_(<2 x s32>) = G_CTPOP %0(<2 x s32>)
     $d0 = COPY %1(<2 x s32>)


        


More information about the llvm-commits mailing list