[llvm] r354299 - [GlobalISel][AArch64] Legalize + select some llvm.ctlz.* intrinsics

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 18 15:33:24 PST 2019


Author: paquette
Date: Mon Feb 18 15:33:24 2019
New Revision: 354299

URL: http://llvm.org/viewvc/llvm-project?rev=354299&view=rev
Log:
[GlobalISel][AArch64] Legalize + select some llvm.ctlz.* intrinsics

Legalize/select llvm.ctlz.*

Add select-ctlz to show that we actually select them. Update arm64-clrsb.ll and
arm64-vclz.ll to show that we perform valid transformations in optimized builds,
and document where GISel can improve.

Differential Revision: https://reviews.llvm.org/D58155

Added:
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir
Modified:
    llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
    llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll
    llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll

Modified: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp Mon Feb 18 15:33:24 2019
@@ -2099,6 +2099,7 @@ LegalizerHelper::fewerElementsVector(Mac
   case G_FSIN:
   case G_FSQRT:
   case G_BSWAP:
+  case G_CTLZ:
     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
   case G_SHL:
   case G_LSHR:

Modified: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp Mon Feb 18 15:33:24 2019
@@ -457,6 +457,10 @@ AArch64LegalizerInfo::AArch64LegalizerIn
       })
       .minScalarSameAs(1, 0);
 
+  getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
+      {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+      .scalarize(1);
+
   computeTables();
   verify(*ST.getInstrInfo());
 }

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir Mon Feb 18 15:33:24 2019
@@ -322,7 +322,7 @@
 # DEBUG:      .. type index coverage check SKIPPED: no rules defined
 #
 # DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 2 type indices
-# DEBUG:      .. type index coverage check SKIPPED: no rules defined
+# DEBUG:      .. the first uncovered type index: 2, OK
 #
 # DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices
 # DEBUG:      .. type index coverage check SKIPPED: no rules defined

Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir?rev=354299&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir Mon Feb 18 15:33:24 2019
@@ -0,0 +1,200 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=arm64-unknown-unknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
+
+name:            test_v8s8
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name:            test_v8s8
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[CLZv8i8_:%[0-9]+]]:fpr64 = CLZv8i8 [[COPY]]
+    ; CHECK: $d0 = COPY [[CLZv8i8_]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(<8 x s8>) = COPY $d0
+    %1:fpr(<8 x s8>) = G_CTLZ %0(<8 x s8>)
+    $d0 = COPY %1(<8 x s8>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_v4s16
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_v4s16
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[CLZv4i16_:%[0-9]+]]:fpr64 = CLZv4i16 [[COPY]]
+    ; CHECK: $d0 = COPY [[CLZv4i16_]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(<4 x s16>) = COPY $d0
+    %1:fpr(<4 x s16>) = G_CTLZ %0(<4 x s16>)
+    $d0 = COPY %1(<4 x s16>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_v2s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_v2s32
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[CLZv2i32_:%[0-9]+]]:fpr64 = CLZv2i32 [[COPY]]
+    ; CHECK: $d0 = COPY [[CLZv2i32_]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(<2 x s32>) = COPY $d0
+    %1:fpr(<2 x s32>) = G_CTLZ %0(<2 x s32>)
+    $d0 = COPY %1(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: test_s64
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]]
+    ; CHECK: [[CLZXr:%[0-9]+]]:gpr64 = CLZXr [[COPY1]]
+    ; CHECK: $d0 = COPY [[CLZXr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(s64) = COPY $d0
+    %2:gpr(s64) = COPY %0(s64)
+    %1:gpr(s64) = G_CTLZ %2(s64)
+    $d0 = COPY %1(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            test_s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $s0
+    ; CHECK-LABEL: name: test_s32
+    ; CHECK: liveins: $s0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; CHECK: [[CLZWr:%[0-9]+]]:gpr32 = CLZWr [[COPY1]]
+    ; CHECK: $s0 = COPY [[CLZWr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %2:gpr(s32) = COPY %0(s32)
+    %1:gpr(s32) = G_CTLZ %2(s32)
+    $s0 = COPY %1(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            test_v16s8
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v16s8
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[CLZv16i8_:%[0-9]+]]:fpr128 = CLZv16i8 [[COPY]]
+    ; CHECK: $q0 = COPY [[CLZv16i8_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<16 x s8>) = COPY $q0
+    %1:fpr(<16 x s8>) = G_CTLZ %0(<16 x s8>)
+    $q0 = COPY %1(<16 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v8s16
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v8s16
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[CLZv8i16_:%[0-9]+]]:fpr128 = CLZv8i16 [[COPY]]
+    ; CHECK: $q0 = COPY [[CLZv8i16_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<8 x s16>) = COPY $q0
+    %1:fpr(<8 x s16>) = G_CTLZ %0(<8 x s16>)
+    $q0 = COPY %1(<8 x s16>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v4s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: test_v4s32
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[CLZv4i32_:%[0-9]+]]:fpr128 = CLZv4i32 [[COPY]]
+    ; CHECK: $q0 = COPY [[CLZv4i32_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:fpr(<4 x s32>) = G_CTLZ %0(<4 x s32>)
+    $q0 = COPY %1(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            test_v2s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0
+    ; CHECK-LABEL: name: test_v2s64
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0
+    ; CHECK: [[CTLZ:%[0-9]+]]:fpr(<2 x s64>) = G_CTLZ [[COPY]](<2 x s64>)
+    ; CHECK: $q0 = COPY [[CTLZ]](<2 x s64>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<2 x s64>) = COPY $q0
+    %1:fpr(<2 x s64>) = G_CTLZ %0(<2 x s64>)
+    $q0 = COPY %1(<2 x s64>)
+    RET_ReallyLR implicit $q0

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll Mon Feb 18 15:33:24 2019
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 |  FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -O0 -pass-remarks-missed=gisel* -global-isel-abort=2 |  FileCheck %s --check-prefixes=GISEL,FALLBACK
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
@@ -7,6 +8,7 @@ declare i32 @llvm.ctlz.i32(i32, i1) #0
 declare i64 @llvm.ctlz.i64(i64, i1) #1
 
 ; Function Attrs: nounwind ssp
+; FALLBACK-NOT: remark{{.*}}clrsb32
 define i32 @clrsb32(i32 %x) #2 {
 entry:
   %shr = ashr i32 %x, 31
@@ -18,9 +20,15 @@ entry:
   ret i32 %0
 ; CHECK-LABEL: clrsb32
 ; CHECK:   cls [[TEMP:w[0-9]+]], [[TEMP]]
+
+; FIXME: We should produce the same result here to save some code size. After
+; that, we can remove the GISEL special casing.
+; GISEL-LABEL: clrsb32
+; GISEL: clz
 }
 
 ; Function Attrs: nounwind ssp
+; FALLBACK-NOT: remark{{.*}}clrsb64
 define i64 @clrsb64(i64 %x) #3 {
 entry:
   %shr = ashr i64 %x, 63
@@ -32,4 +40,6 @@ entry:
   ret i64 %0
 ; CHECK-LABEL: clrsb64
 ; CHECK:   cls [[TEMP:x[0-9]+]], [[TEMP]]
+; GISEL-LABEL: clrsb64
+; GISEL:   cls [[TEMP:x[0-9]+]], [[TEMP]]
 }

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll Mon Feb 18 15:33:24 2019
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_u8
 define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_u8:
   ; CHECK: clz.8b v0, v0
@@ -8,6 +10,7 @@ define <8 x i8> @test_vclz_u8(<8 x i8> %
   ret <8 x i8> %vclz.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_s8
 define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_s8:
   ; CHECK: clz.8b v0, v0
@@ -16,6 +19,7 @@ define <8 x i8> @test_vclz_s8(<8 x i8> %
   ret <8 x i8> %vclz.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_u16
 define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_u16:
   ; CHECK: clz.4h v0, v0
@@ -24,6 +28,7 @@ define <4 x i16> @test_vclz_u16(<4 x i16
   ret <4 x i16> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_s16
 define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_s16:
   ; CHECK: clz.4h v0, v0
@@ -32,6 +37,7 @@ define <4 x i16> @test_vclz_s16(<4 x i16
   ret <4 x i16> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_u32
 define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_u32:
   ; CHECK: clz.2s v0, v0
@@ -40,6 +46,7 @@ define <2 x i32> @test_vclz_u32(<2 x i32
   ret <2 x i32> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_s32
 define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_s32:
   ; CHECK: clz.2s v0, v0
@@ -48,18 +55,21 @@ define <2 x i32> @test_vclz_s32(<2 x i32
   ret <2 x i32> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_u64
 define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_u64:
   %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
   ret <1 x i64> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclz_s64
 define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclz_s64:
   %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
   ret <1 x i64> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u8
 define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_u8:
   ; CHECK: clz.16b v0, v0
@@ -68,6 +78,7 @@ define <16 x i8> @test_vclzq_u8(<16 x i8
   ret <16 x i8> %vclz.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s8
 define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_s8:
   ; CHECK: clz.16b v0, v0
@@ -76,6 +87,7 @@ define <16 x i8> @test_vclzq_s8(<16 x i8
   ret <16 x i8> %vclz.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u16
 define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_u16:
   ; CHECK: clz.8h v0, v0
@@ -84,6 +96,7 @@ define <8 x i16> @test_vclzq_u16(<8 x i1
   ret <8 x i16> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s16
 define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_s16:
   ; CHECK: clz.8h v0, v0
@@ -92,6 +105,7 @@ define <8 x i16> @test_vclzq_s16(<8 x i1
   ret <8 x i16> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u32
 define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_u32:
   ; CHECK: clz.4s v0, v0
@@ -100,6 +114,7 @@ define <4 x i32> @test_vclzq_u32(<4 x i3
   ret <4 x i32> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s32
 define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_s32:
   ; CHECK: clz.4s v0, v0
@@ -108,12 +123,14 @@ define <4 x i32> @test_vclzq_s32(<4 x i3
   ret <4 x i32> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u64
 define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_u64:
   %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
   ret <2 x i64> %vclz1.i
 }
 
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s64
 define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp {
   ; CHECK-LABEL: test_vclzq_s64:
   %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind




More information about the llvm-commits mailing list