[llvm] r354299 - [GlobalISel][AArch64] Legalize + select some llvm.ctlz.* intrinsics
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 18 15:33:24 PST 2019
Author: paquette
Date: Mon Feb 18 15:33:24 2019
New Revision: 354299
URL: http://llvm.org/viewvc/llvm-project?rev=354299&view=rev
Log:
[GlobalISel][AArch64] Legalize + select some llvm.ctlz.* intrinsics
Legalize/select llvm.ctlz.*
Add select-ctlz to show that we actually select them. Update arm64-clrsb.ll and
arm64-vclz.ll to show that we perform valid transformations in optimized builds,
and document where GISel can improve.
Differential Revision: https://reviews.llvm.org/D58155
Added:
llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir
Modified:
llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll
Modified: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp Mon Feb 18 15:33:24 2019
@@ -2099,6 +2099,7 @@ LegalizerHelper::fewerElementsVector(Mac
case G_FSIN:
case G_FSQRT:
case G_BSWAP:
+ case G_CTLZ:
return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
case G_SHL:
case G_LSHR:
Modified: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp Mon Feb 18 15:33:24 2019
@@ -457,6 +457,10 @@ AArch64LegalizerInfo::AArch64LegalizerIn
})
.minScalarSameAs(1, 0);
+ getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
+ {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+ .scalarize(1);
+
computeTables();
verify(*ST.getInstrInfo());
}
Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir Mon Feb 18 15:33:24 2019
@@ -322,7 +322,7 @@
# DEBUG: .. type index coverage check SKIPPED: no rules defined
#
# DEBUG-NEXT: G_CTLZ (opcode {{[0-9]+}}): 2 type indices
-# DEBUG: .. type index coverage check SKIPPED: no rules defined
+# DEBUG: .. the first uncovered type index: 2, OK
#
# DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices
# DEBUG: .. type index coverage check SKIPPED: no rules defined
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir?rev=354299&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-ctlz.mir Mon Feb 18 15:33:24 2019
@@ -0,0 +1,200 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=arm64-unknown-unknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
+
+name: test_v8s8
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: test_v8s8
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[CLZv8i8_:%[0-9]+]]:fpr64 = CLZv8i8 [[COPY]]
+ ; CHECK: $d0 = COPY [[CLZv8i8_]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(<8 x s8>) = COPY $d0
+ %1:fpr(<8 x s8>) = G_CTLZ %0(<8 x s8>)
+ $d0 = COPY %1(<8 x s8>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v4s16
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: test_v4s16
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[CLZv4i16_:%[0-9]+]]:fpr64 = CLZv4i16 [[COPY]]
+ ; CHECK: $d0 = COPY [[CLZv4i16_]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(<4 x s16>) = COPY $d0
+ %1:fpr(<4 x s16>) = G_CTLZ %0(<4 x s16>)
+ $d0 = COPY %1(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v2s32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: test_v2s32
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[CLZv2i32_:%[0-9]+]]:fpr64 = CLZv2i32 [[COPY]]
+ ; CHECK: $d0 = COPY [[CLZv2i32_]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %1:fpr(<2 x s32>) = G_CTLZ %0(<2 x s32>)
+ $d0 = COPY %1(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_s64
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: test_s64
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]]
+ ; CHECK: [[CLZXr:%[0-9]+]]:gpr64 = CLZXr [[COPY1]]
+ ; CHECK: $d0 = COPY [[CLZXr]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:fpr(s64) = COPY $d0
+ %2:gpr(s64) = COPY %0(s64)
+ %1:gpr(s64) = G_CTLZ %2(s64)
+ $d0 = COPY %1(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_s32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $s0
+ ; CHECK-LABEL: name: test_s32
+ ; CHECK: liveins: $s0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+ ; CHECK: [[CLZWr:%[0-9]+]]:gpr32 = CLZWr [[COPY1]]
+ ; CHECK: $s0 = COPY [[CLZWr]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ %0:fpr(s32) = COPY $s0
+ %2:gpr(s32) = COPY %0(s32)
+ %1:gpr(s32) = G_CTLZ %2(s32)
+ $s0 = COPY %1(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: test_v16s8
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: test_v16s8
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[CLZv16i8_:%[0-9]+]]:fpr128 = CLZv16i8 [[COPY]]
+ ; CHECK: $q0 = COPY [[CLZv16i8_]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:fpr(<16 x s8>) = COPY $q0
+ %1:fpr(<16 x s8>) = G_CTLZ %0(<16 x s8>)
+ $q0 = COPY %1(<16 x s8>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v8s16
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: test_v8s16
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[CLZv8i16_:%[0-9]+]]:fpr128 = CLZv8i16 [[COPY]]
+ ; CHECK: $q0 = COPY [[CLZv8i16_]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:fpr(<8 x s16>) = COPY $q0
+ %1:fpr(<8 x s16>) = G_CTLZ %0(<8 x s16>)
+ $q0 = COPY %1(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v4s32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: test_v4s32
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[CLZv4i32_:%[0-9]+]]:fpr128 = CLZv4i32 [[COPY]]
+ ; CHECK: $q0 = COPY [[CLZv4i32_]]
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:fpr(<4 x s32>) = COPY $q0
+ %1:fpr(<4 x s32>) = G_CTLZ %0(<4 x s32>)
+ $q0 = COPY %1(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v2s64
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: test_v2s64
+ ; CHECK: liveins: $q0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0
+ ; CHECK: [[CTLZ:%[0-9]+]]:fpr(<2 x s64>) = G_CTLZ [[COPY]](<2 x s64>)
+ ; CHECK: $q0 = COPY [[CTLZ]](<2 x s64>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %0:fpr(<2 x s64>) = COPY $q0
+ %1:fpr(<2 x s64>) = G_CTLZ %0(<2 x s64>)
+ $q0 = COPY %1(<2 x s64>)
+ RET_ReallyLR implicit $q0
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-clrsb.ll Mon Feb 18 15:33:24 2019
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -O0 -pass-remarks-missed=gisel* -global-isel-abort=2 | FileCheck %s --check-prefixes=GISEL,FALLBACK
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -7,6 +8,7 @@ declare i32 @llvm.ctlz.i32(i32, i1) #0
declare i64 @llvm.ctlz.i64(i64, i1) #1
; Function Attrs: nounwind ssp
+; FALLBACK-NOT: remark{{.*}}clrsb32
define i32 @clrsb32(i32 %x) #2 {
entry:
%shr = ashr i32 %x, 31
@@ -18,9 +20,15 @@ entry:
ret i32 %0
; CHECK-LABEL: clrsb32
; CHECK: cls [[TEMP:w[0-9]+]], [[TEMP]]
+
+; FIXME: We should produce the same result here to save some code size. After
+; that, we can remove the GISEL special casing.
+; GISEL-LABEL: clrsb32
+; GISEL: clz
}
; Function Attrs: nounwind ssp
+; FALLBACK-NOT: remark{{.*}}clrsb64
define i64 @clrsb64(i64 %x) #3 {
entry:
%shr = ashr i64 %x, 63
@@ -32,4 +40,6 @@ entry:
ret i64 %0
; CHECK-LABEL: clrsb64
; CHECK: cls [[TEMP:x[0-9]+]], [[TEMP]]
+; GISEL-LABEL: clrsb64
+; GISEL: cls [[TEMP:x[0-9]+]], [[TEMP]]
}
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll?rev=354299&r1=354298&r2=354299&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vclz.ll Mon Feb 18 15:33:24 2019
@@ -1,5 +1,7 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; FALLBACK-NOT: remark{{.*}}test_vclz_u8
define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u8:
; CHECK: clz.8b v0, v0
@@ -8,6 +10,7 @@ define <8 x i8> @test_vclz_u8(<8 x i8> %
ret <8 x i8> %vclz.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclz_s8
define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s8:
; CHECK: clz.8b v0, v0
@@ -16,6 +19,7 @@ define <8 x i8> @test_vclz_s8(<8 x i8> %
ret <8 x i8> %vclz.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclz_u16
define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u16:
; CHECK: clz.4h v0, v0
@@ -24,6 +28,7 @@ define <4 x i16> @test_vclz_u16(<4 x i16
ret <4 x i16> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclz_s16
define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s16:
; CHECK: clz.4h v0, v0
@@ -32,6 +37,7 @@ define <4 x i16> @test_vclz_s16(<4 x i16
ret <4 x i16> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclz_u32
define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u32:
; CHECK: clz.2s v0, v0
@@ -40,6 +46,7 @@ define <2 x i32> @test_vclz_u32(<2 x i32
ret <2 x i32> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclz_s32
define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s32:
; CHECK: clz.2s v0, v0
@@ -48,18 +55,21 @@ define <2 x i32> @test_vclz_s32(<2 x i32
ret <2 x i32> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclz_u64
define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_u64:
%vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
ret <1 x i64> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclz_s64
define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclz_s64:
%vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
ret <1 x i64> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u8
define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u8:
; CHECK: clz.16b v0, v0
@@ -68,6 +78,7 @@ define <16 x i8> @test_vclzq_u8(<16 x i8
ret <16 x i8> %vclz.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s8
define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s8:
; CHECK: clz.16b v0, v0
@@ -76,6 +87,7 @@ define <16 x i8> @test_vclzq_s8(<16 x i8
ret <16 x i8> %vclz.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u16
define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u16:
; CHECK: clz.8h v0, v0
@@ -84,6 +96,7 @@ define <8 x i16> @test_vclzq_u16(<8 x i1
ret <8 x i16> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s16
define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s16:
; CHECK: clz.8h v0, v0
@@ -92,6 +105,7 @@ define <8 x i16> @test_vclzq_s16(<8 x i1
ret <8 x i16> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u32
define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u32:
; CHECK: clz.4s v0, v0
@@ -100,6 +114,7 @@ define <4 x i32> @test_vclzq_u32(<4 x i3
ret <4 x i32> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s32
define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s32:
; CHECK: clz.4s v0, v0
@@ -108,12 +123,14 @@ define <4 x i32> @test_vclzq_s32(<4 x i3
ret <4 x i32> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_u64
define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_u64:
%vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
ret <2 x i64> %vclz1.i
}
+; FALLBACK-NOT: remark{{.*}}test_vclzq_s64
define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp {
; CHECK-LABEL: test_vclzq_s64:
%vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
More information about the llvm-commits
mailing list