[llvm] b087699 - [AArch64][GlobalISel] Clean up CTLZ vector type legalization. (#131514)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 12:28:40 PDT 2025


Author: David Green
Date: 2025-03-19T19:28:36Z
New Revision: b0876994ebe3118407a8a70d79bb3149cebb5fda

URL: https://github.com/llvm/llvm-project/commit/b0876994ebe3118407a8a70d79bb3149cebb5fda
DIFF: https://github.com/llvm/llvm-project/commit/b0876994ebe3118407a8a70d79bb3149cebb5fda.diff

LOG: [AArch64][GlobalISel] Clean up CTLZ vector type legalization. (#131514)

Similar to other operations, s8, s16 and s32 vector elements are clamped
to legal vector sizes, but in this case s64 are scalarized to use the
gpr instructions. This allows vector types to split as opposed to
scalarizing.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/ctlz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 622d6d0052c51..04b1f8429d93c 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -6139,6 +6139,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_FCANONICALIZE:
   case TargetOpcode::G_SEXT_INREG:
   case TargetOpcode::G_ABS:
+  case TargetOpcode::G_CTLZ:
     if (TypeIdx != 0)
       return UnableToLegalize;
     Observer.changingInstr(MI);

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 0da3c73b6926d..3270a8145399b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -326,12 +326,23 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .maxScalarEltSameAsIf(always, 1, 0);
 
   getActionDefinitionsBuilder(G_CTLZ)
-      .legalForCartesianProduct(
-          {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
-      .scalarize(1)
+      .legalFor({{s32, s32},
+                 {s64, s64},
+                 {v8s8, v8s8},
+                 {v16s8, v16s8},
+                 {v4s16, v4s16},
+                 {v8s16, v8s16},
+                 {v2s32, v2s32},
+                 {v4s32, v4s32}})
       .widenScalarToNextPow2(1, /*Min=*/32)
       .clampScalar(1, s32, s64)
+      .clampNumElements(0, v8s8, v16s8)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .moreElementsToNextPow2(0)
+      .scalarizeIf(scalarOrEltWiderThan(0, 32), 0)
       .scalarSameSizeAs(0, 1);
+
   getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
 
   getActionDefinitionsBuilder(G_CTTZ)

diff  --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll
index 7b8f6cf24f278..742433c50d390 100644
--- a/llvm/test/CodeGen/AArch64/ctlz.ll
+++ b/llvm/test/CodeGen/AArch64/ctlz.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 
 define void @v2i8(ptr %p1) {
@@ -21,14 +21,14 @@ define void @v2i8(ptr %p1) {
 ;
 ; CHECK-GI-LABEL: v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    ldrb w9, [x0, #1]
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    strb w8, [x0]
-; CHECK-GI-NEXT:    strb w9, [x0, #1]
+; CHECK-GI-NEXT:    ldr b0, [x0]
+; CHECK-GI-NEXT:    ldr b1, [x0, #1]
+; CHECK-GI-NEXT:    add x8, x0, #1
+; CHECK-GI-NEXT:    mov v0.b[0], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT:    clz v0.8b, v0.8b
+; CHECK-GI-NEXT:    st1 { v0.b }[0], [x0]
+; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
 ; CHECK-GI-NEXT:    ret
 entry:
   %d = load <2 x i8>, ptr %p1
@@ -59,18 +59,18 @@ define void @v3i8(ptr %p1) {
 ;
 ; CHECK-GI-LABEL: v3i8:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ldrb w8, [x0]
-; CHECK-GI-NEXT:    ldrb w9, [x0, #1]
-; CHECK-GI-NEXT:    ldrb w10, [x0, #2]
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w10
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    strb w8, [x0]
-; CHECK-GI-NEXT:    sub w8, w10, #24
-; CHECK-GI-NEXT:    strb w9, [x0, #1]
-; CHECK-GI-NEXT:    strb w8, [x0, #2]
+; CHECK-GI-NEXT:    ldr b0, [x0]
+; CHECK-GI-NEXT:    ldr b1, [x0, #1]
+; CHECK-GI-NEXT:    add x8, x0, #1
+; CHECK-GI-NEXT:    add x9, x0, #2
+; CHECK-GI-NEXT:    mov v0.b[0], v0.b[0]
+; CHECK-GI-NEXT:    mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT:    ldr b1, [x0, #2]
+; CHECK-GI-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-GI-NEXT:    clz v0.8b, v0.8b
+; CHECK-GI-NEXT:    st1 { v0.b }[0], [x0]
+; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
+; CHECK-GI-NEXT:    st1 { v0.b }[2], [x9]
 ; CHECK-GI-NEXT:    ret
 entry:
   %d = load <3 x i8>, ptr %p1
@@ -95,29 +95,15 @@ define void @v4i8(ptr %p1) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
 ; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    uxtb w8, w8
-; CHECK-GI-NEXT:    clz w8, w8
 ; CHECK-GI-NEXT:    mov b1, v0.b[1]
-; CHECK-GI-NEXT:    mov b2, v0.b[2]
-; CHECK-GI-NEXT:    sub w8, w8, #24
+; CHECK-GI-NEXT:    mov v2.b[0], v0.b[0]
+; CHECK-GI-NEXT:    mov b3, v0.b[2]
 ; CHECK-GI-NEXT:    mov b0, v0.b[3]
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    fmov s1, w8
-; CHECK-GI-NEXT:    uxtb w9, w9
-; CHECK-GI-NEXT:    uxtb w8, w10
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    mov v1.b[1], w9
-; CHECK-GI-NEXT:    fmov w9, s0
-; CHECK-GI-NEXT:    uxtb w9, w9
-; CHECK-GI-NEXT:    mov v1.b[2], w8
-; CHECK-GI-NEXT:    clz w8, w9
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    mov v1.b[3], w8
-; CHECK-GI-NEXT:    fmov w8, s1
+; CHECK-GI-NEXT:    mov v2.b[1], v1.b[0]
+; CHECK-GI-NEXT:    mov v2.b[2], v3.b[0]
+; CHECK-GI-NEXT:    mov v2.b[3], v0.b[0]
+; CHECK-GI-NEXT:    clz v0.8b, v2.8b
+; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -148,145 +134,11 @@ entry:
 }
 
 define <32 x i8> @v32i8(<32 x i8> %d) {
-; CHECK-SD-LABEL: v32i8:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    clz v0.16b, v0.16b
-; CHECK-SD-NEXT:    clz v1.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v32i8:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    umov w9, v0.b[0]
-; CHECK-GI-NEXT:    umov w11, v1.b[0]
-; CHECK-GI-NEXT:    umov w10, v0.b[1]
-; CHECK-GI-NEXT:    umov w13, v1.b[1]
-; CHECK-GI-NEXT:    umov w8, v0.b[2]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w11, w11
-; CHECK-GI-NEXT:    clz w10, w10
-; CHECK-GI-NEXT:    sub w14, w9, #24
-; CHECK-GI-NEXT:    sub w12, w11, #24
-; CHECK-GI-NEXT:    clz w11, w13
-; CHECK-GI-NEXT:    fmov s2, w14
-; CHECK-GI-NEXT:    fmov s3, w12
-; CHECK-GI-NEXT:    umov w9, v1.b[2]
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    sub w11, w11, #24
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    mov v2.b[1], w10
-; CHECK-GI-NEXT:    mov v3.b[1], w11
-; CHECK-GI-NEXT:    umov w10, v0.b[3]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    umov w11, v1.b[3]
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    mov v2.b[2], w8
-; CHECK-GI-NEXT:    mov v3.b[2], w9
-; CHECK-GI-NEXT:    clz w8, w10
-; CHECK-GI-NEXT:    umov w9, v0.b[4]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[4]
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[3], w8
-; CHECK-GI-NEXT:    mov v3.b[3], w10
-; CHECK-GI-NEXT:    umov w8, v0.b[5]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[5]
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[4], w9
-; CHECK-GI-NEXT:    mov v3.b[4], w10
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    umov w9, v0.b[6]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[6]
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[5], w8
-; CHECK-GI-NEXT:    mov v3.b[5], w10
-; CHECK-GI-NEXT:    umov w8, v0.b[7]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[7]
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[6], w9
-; CHECK-GI-NEXT:    mov v3.b[6], w10
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    umov w9, v0.b[8]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[8]
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[7], w8
-; CHECK-GI-NEXT:    mov v3.b[7], w10
-; CHECK-GI-NEXT:    umov w8, v0.b[9]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[9]
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[8], w9
-; CHECK-GI-NEXT:    mov v3.b[8], w10
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    umov w9, v0.b[10]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[10]
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[9], w8
-; CHECK-GI-NEXT:    mov v3.b[9], w10
-; CHECK-GI-NEXT:    umov w8, v0.b[11]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[11]
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[10], w9
-; CHECK-GI-NEXT:    mov v3.b[10], w10
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    umov w9, v0.b[12]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[12]
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[11], w8
-; CHECK-GI-NEXT:    mov v3.b[11], w10
-; CHECK-GI-NEXT:    umov w8, v0.b[13]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[13]
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[12], w9
-; CHECK-GI-NEXT:    mov v3.b[12], w10
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    umov w9, v0.b[14]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.b[14]
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w10, w10, #24
-; CHECK-GI-NEXT:    mov v2.b[13], w8
-; CHECK-GI-NEXT:    mov v3.b[13], w10
-; CHECK-GI-NEXT:    umov w8, v0.b[15]
-; CHECK-GI-NEXT:    umov w10, v1.b[15]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w11, w11
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    sub w11, w11, #24
-; CHECK-GI-NEXT:    mov v2.b[14], w9
-; CHECK-GI-NEXT:    mov v3.b[14], w11
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w9, w10
-; CHECK-GI-NEXT:    sub w8, w8, #24
-; CHECK-GI-NEXT:    sub w9, w9, #24
-; CHECK-GI-NEXT:    mov v2.b[15], w8
-; CHECK-GI-NEXT:    mov v3.b[15], w9
-; CHECK-GI-NEXT:    mov v0.16b, v2.16b
-; CHECK-GI-NEXT:    mov v1.16b, v3.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v32i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz v0.16b, v0.16b
+; CHECK-NEXT:    clz v1.16b, v1.16b
+; CHECK-NEXT:    ret
 entry:
   %s = call <32 x i8> @llvm.ctlz(<32 x i8> %d, i1 false)
   ret <32 x i8> %s
@@ -309,14 +161,12 @@ define void @v2i16(ptr %p1) {
 ;
 ; CHECK-GI-LABEL: v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ldrh w8, [x0]
-; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    sub w8, w8, #16
-; CHECK-GI-NEXT:    sub w9, w9, #16
-; CHECK-GI-NEXT:    strh w8, [x0]
-; CHECK-GI-NEXT:    strh w9, [x0, #2]
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    add x8, x0, #2
+; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
+; CHECK-GI-NEXT:    clz v0.4h, v0.4h
+; CHECK-GI-NEXT:    str h0, [x0]
+; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
 ; CHECK-GI-NEXT:    ret
 entry:
   %d = load <2 x i16>, ptr %p1
@@ -337,18 +187,15 @@ define void @v3i16(ptr %p1) {
 ;
 ; CHECK-GI-LABEL: v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    ldrh w8, [x0]
-; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    ldrh w10, [x0, #4]
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w10
-; CHECK-GI-NEXT:    sub w8, w8, #16
-; CHECK-GI-NEXT:    sub w9, w9, #16
-; CHECK-GI-NEXT:    strh w8, [x0]
-; CHECK-GI-NEXT:    sub w8, w10, #16
-; CHECK-GI-NEXT:    strh w9, [x0, #2]
-; CHECK-GI-NEXT:    strh w8, [x0, #4]
+; CHECK-GI-NEXT:    ldr h0, [x0]
+; CHECK-GI-NEXT:    add x8, x0, #2
+; CHECK-GI-NEXT:    add x9, x0, #4
+; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
+; CHECK-GI-NEXT:    ld1 { v0.h }[2], [x9]
+; CHECK-GI-NEXT:    clz v0.4h, v0.4h
+; CHECK-GI-NEXT:    str h0, [x0]
+; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
+; CHECK-GI-NEXT:    st1 { v0.h }[2], [x9]
 ; CHECK-GI-NEXT:    ret
 entry:
   %d = load <3 x i16>, ptr %p1
@@ -378,81 +225,11 @@ entry:
 }
 
 define <16 x i16> @v16i16(<16 x i16> %d) {
-; CHECK-SD-LABEL: v16i16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    clz v0.8h, v0.8h
-; CHECK-SD-NEXT:    clz v1.8h, v1.8h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v16i16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    umov w8, v0.h[0]
-; CHECK-GI-NEXT:    umov w10, v1.h[0]
-; CHECK-GI-NEXT:    umov w9, v0.h[1]
-; CHECK-GI-NEXT:    umov w11, v1.h[1]
-; CHECK-GI-NEXT:    umov w12, v0.h[2]
-; CHECK-GI-NEXT:    umov w13, v1.h[2]
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w10, w10
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    sub w8, w8, #16
-; CHECK-GI-NEXT:    sub w10, w10, #16
-; CHECK-GI-NEXT:    clz w11, w11
-; CHECK-GI-NEXT:    fmov s2, w8
-; CHECK-GI-NEXT:    fmov s3, w10
-; CHECK-GI-NEXT:    sub w9, w9, #16
-; CHECK-GI-NEXT:    sub w11, w11, #16
-; CHECK-GI-NEXT:    umov w8, v0.h[3]
-; CHECK-GI-NEXT:    clz w10, w13
-; CHECK-GI-NEXT:    sub w10, w10, #16
-; CHECK-GI-NEXT:    mov v2.h[1], w9
-; CHECK-GI-NEXT:    mov v3.h[1], w11
-; CHECK-GI-NEXT:    clz w9, w12
-; CHECK-GI-NEXT:    umov w11, v1.h[3]
-; CHECK-GI-NEXT:    sub w9, w9, #16
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    sub w8, w8, #16
-; CHECK-GI-NEXT:    mov v2.h[2], w9
-; CHECK-GI-NEXT:    mov v3.h[2], w10
-; CHECK-GI-NEXT:    umov w9, v0.h[4]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.h[4]
-; CHECK-GI-NEXT:    sub w10, w10, #16
-; CHECK-GI-NEXT:    mov v2.h[3], w8
-; CHECK-GI-NEXT:    mov v3.h[3], w10
-; CHECK-GI-NEXT:    umov w8, v0.h[5]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.h[5]
-; CHECK-GI-NEXT:    sub w9, w9, #16
-; CHECK-GI-NEXT:    sub w10, w10, #16
-; CHECK-GI-NEXT:    mov v2.h[4], w9
-; CHECK-GI-NEXT:    mov v3.h[4], w10
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    umov w9, v0.h[6]
-; CHECK-GI-NEXT:    clz w10, w11
-; CHECK-GI-NEXT:    umov w11, v1.h[6]
-; CHECK-GI-NEXT:    sub w8, w8, #16
-; CHECK-GI-NEXT:    sub w10, w10, #16
-; CHECK-GI-NEXT:    mov v2.h[5], w8
-; CHECK-GI-NEXT:    mov v3.h[5], w10
-; CHECK-GI-NEXT:    umov w8, v0.h[7]
-; CHECK-GI-NEXT:    umov w10, v1.h[7]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w11, w11
-; CHECK-GI-NEXT:    sub w9, w9, #16
-; CHECK-GI-NEXT:    sub w11, w11, #16
-; CHECK-GI-NEXT:    mov v2.h[6], w9
-; CHECK-GI-NEXT:    mov v3.h[6], w11
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w9, w10
-; CHECK-GI-NEXT:    sub w8, w8, #16
-; CHECK-GI-NEXT:    sub w9, w9, #16
-; CHECK-GI-NEXT:    mov v2.h[7], w8
-; CHECK-GI-NEXT:    mov v3.h[7], w9
-; CHECK-GI-NEXT:    mov v0.16b, v2.16b
-; CHECK-GI-NEXT:    mov v1.16b, v3.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v16i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz v0.8h, v0.8h
+; CHECK-NEXT:    clz v1.8h, v1.8h
+; CHECK-NEXT:    ret
 entry:
   %s = call <16 x i16> @llvm.ctlz(<16 x i16> %d, i1 false)
   ret <16 x i16> %s
@@ -469,24 +246,10 @@ entry:
 }
 
 define <3 x i32> @v3i32(<3 x i32> %d) {
-; CHECK-SD-LABEL: v3i32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    clz v0.4s, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v3i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov w9, v0.s[1]
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    mov v1.s[0], w8
-; CHECK-GI-NEXT:    mov w8, v0.s[2]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    mov v1.s[1], w9
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    mov v1.s[2], w8
-; CHECK-GI-NEXT:    mov v0.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v3i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz v0.4s, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %s = call <3 x i32> @llvm.ctlz(<3 x i32> %d, i1 false)
   ret <3 x i32> %s
@@ -503,41 +266,11 @@ entry:
 }
 
 define <8 x i32> @v8i32(<8 x i32> %d) {
-; CHECK-SD-LABEL: v8i32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    clz v0.4s, v0.4s
-; CHECK-SD-NEXT:    clz v1.4s, v1.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: v8i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    fmov w9, s0
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    mov w8, v0.s[1]
-; CHECK-GI-NEXT:    mov w11, v1.s[1]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w10
-; CHECK-GI-NEXT:    mov v2.s[0], w9
-; CHECK-GI-NEXT:    mov v3.s[0], w10
-; CHECK-GI-NEXT:    mov w9, v0.s[2]
-; CHECK-GI-NEXT:    mov w10, v1.s[2]
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w11, w11
-; CHECK-GI-NEXT:    mov v2.s[1], w8
-; CHECK-GI-NEXT:    mov v3.s[1], w11
-; CHECK-GI-NEXT:    mov w8, v0.s[3]
-; CHECK-GI-NEXT:    mov w11, v1.s[3]
-; CHECK-GI-NEXT:    clz w9, w9
-; CHECK-GI-NEXT:    clz w10, w10
-; CHECK-GI-NEXT:    mov v2.s[2], w9
-; CHECK-GI-NEXT:    mov v3.s[2], w10
-; CHECK-GI-NEXT:    clz w8, w8
-; CHECK-GI-NEXT:    clz w9, w11
-; CHECK-GI-NEXT:    mov v2.s[3], w8
-; CHECK-GI-NEXT:    mov v3.s[3], w9
-; CHECK-GI-NEXT:    mov v0.16b, v2.16b
-; CHECK-GI-NEXT:    mov v1.16b, v3.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: v8i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    clz v0.4s, v0.4s
+; CHECK-NEXT:    clz v1.4s, v1.4s
+; CHECK-NEXT:    ret
 entry:
   %s = call <8 x i32> @llvm.ctlz(<8 x i32> %d, i1 false)
   ret <8 x i32> %s


        


More information about the llvm-commits mailing list