[llvm] 2f778e6 - [AArch64] SelectionDag codegen for gpr CTZ instruction
Ties Stuij via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 6 02:43:03 PST 2022
Author: Ties Stuij
Date: 2022-12-06T10:42:07Z
New Revision: 2f778e60c9bf6bf446ff339e2a9393dac21a7095
URL: https://github.com/llvm/llvm-project/commit/2f778e60c9bf6bf446ff339e2a9393dac21a7095
DIFF: https://github.com/llvm/llvm-project/commit/2f778e60c9bf6bf446ff339e2a9393dac21a7095.diff
LOG: [AArch64] SelectionDag codegen for gpr CTZ instruction
When feature CSSC is available we should use instruction CTZ in SelectionDag
where applicable:
- CTTZ intrinsics are lowered to using the gpr CTZ instruction
- BITREVERSE -> CTLZ instruction pattern gets replaced by CTZ
spec:
https://developer.arm.com/documentation/ddi0602/2022-09/Base-Instructions/CTZ--Count-Trailing-Zeros-
Reviewed By: lenary
Differential Revision: https://reviews.llvm.org/D138811
Added:
llvm/test/CodeGen/AArch64/gpr_cttz.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4ae57c9ec9e1f..a3b82d0987e20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -546,6 +546,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
setOperationAction(ISD::CTPOP, MVT::i128, Expand);
setOperationAction(ISD::PARITY, MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+ setOperationAction(ISD::CTTZ, MVT::i64, Legal);
+ setOperationAction(ISD::CTTZ, MVT::i128, Expand);
} else {
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
@@ -932,6 +935,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::GlobalAddress);
+ setTargetDAGCombine(ISD::CTLZ);
+
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -20280,6 +20285,17 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(MinOffset, DL, MVT::i64));
}
+static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ SDValue BR = N->getOperand(0);
+ if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE ||
+ !BR.getValueType().isScalarInteger())
+ return SDValue();
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::CTTZ, DL, BR.getValueType(), BR.getOperand(0));
+}
+
// Turns the vector of indices into a vector of byte offstes by scaling Offset
// by (BitWidth / 8).
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
@@ -21185,6 +21201,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
+ case ISD::CTLZ:
+ return performCTLZCombine(N, DAG, Subtarget);
}
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a6a70e07404a5..409c9a9221e99 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8556,7 +8556,7 @@ defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">;
//===----------------------------------------------------------------------===//
defm ABS : OneOperandData<0b001000, "abs">, Requires<[HasCSSC]>;
defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
-defm CTZ : OneOperandData<0b000110, "ctz">, Requires<[HasCSSC]>;
+defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
defm SMAX : ComparisonOp<0, 0, "smax">, Requires<[HasCSSC]>;
defm SMIN : ComparisonOp<0, 1, "smin">, Requires<[HasCSSC]>;
diff --git a/llvm/test/CodeGen/AArch64/gpr_cttz.ll b/llvm/test/CodeGen/AArch64/gpr_cttz.ll
new file mode 100644
index 0000000000000..632514f5b805d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/gpr_cttz.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+cssc | FileCheck %s -check-prefix=CHECK-CSSC
+
+define i4 @cttz4(i4 %x) {
+; CHECK-LABEL: cttz4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, #0x10
+; CHECK-NEXT: rbit w8, w8
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz4:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: orr w8, w0, #0x10
+; CHECK-CSSC-NEXT: ctz w0, w8
+; CHECK-CSSC-NEXT: ret
+ %ctz = tail call i4 @llvm.cttz.i4(i4 %x)
+ ret i4 %ctz
+}
+
+define i8 @cttz8(i8 %x) {
+; CHECK-LABEL: cttz8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, #0x100
+; CHECK-NEXT: rbit w8, w8
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz8:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: orr w8, w0, #0x100
+; CHECK-CSSC-NEXT: ctz w0, w8
+; CHECK-CSSC-NEXT: ret
+ %ctz = tail call i8 @llvm.cttz.i8(i8 %x)
+ ret i8 %ctz
+}
+
+define i16 @cttz16(i16 %x) {
+; CHECK-LABEL: cttz16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, #0x10000
+; CHECK-NEXT: rbit w8, w8
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz16:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: orr w8, w0, #0x10000
+; CHECK-CSSC-NEXT: ctz w0, w8
+; CHECK-CSSC-NEXT: ret
+ %ctz = tail call i16 @llvm.cttz.i16(i16 %x)
+ ret i16 %ctz
+}
+
+define i17 @cttz17(i17 %x) {
+; CHECK-LABEL: cttz17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: orr w8, w0, #0x20000
+; CHECK-NEXT: rbit w8, w8
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz17:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: orr w8, w0, #0x20000
+; CHECK-CSSC-NEXT: ctz w0, w8
+; CHECK-CSSC-NEXT: ret
+ %ctz = tail call i17 @llvm.cttz.i17(i17 %x)
+ ret i17 %ctz
+}
+
+define i32 @cttz32(i32 %x) nounwind readnone {
+; CHECK-LABEL: cttz32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rbit w8, w0
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz32:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: ctz w0, w0
+; CHECK-CSSC-NEXT: ret
+ %ctz = tail call i32 @llvm.cttz.i32(i32 %x)
+ ret i32 %ctz
+}
+
+define i64 @cttz64(i64 %x) nounwind readnone {
+; CHECK-LABEL: cttz64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rbit x8, x0
+; CHECK-NEXT: clz x0, x8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz64:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: ctz x0, x0
+; CHECK-CSSC-NEXT: ret
+ %ctz = tail call i64 @llvm.cttz.i64(i64 %x)
+ ret i64 %ctz
+}
+
+define i128 @cttz128(i128 %x) nounwind readnone {
+; CHECK-LABEL: cttz128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rbit x9, x1
+; CHECK-NEXT: rbit x8, x0
+; CHECK-NEXT: clz x9, x9
+; CHECK-NEXT: clz x8, x8
+; CHECK-NEXT: add x9, x9, #64
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: csel x0, x8, x9, ne
+; CHECK-NEXT: mov x1, xzr
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz128:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: ctz x9, x1
+; CHECK-CSSC-NEXT: ctz x8, x0
+; CHECK-CSSC-NEXT: add x9, x9, #64
+; CHECK-CSSC-NEXT: cmp x0, #0
+; CHECK-CSSC-NEXT: csel x0, x8, x9, ne
+; CHECK-CSSC-NEXT: mov x1, xzr
+; CHECK-CSSC-NEXT: ret
+ %ctz = tail call i128 @llvm.cttz.i128(i128 %x)
+ ret i128 %ctz
+}
+
+define i32 @cttz32combine(i32 %x) nounwind readnone {
+; CHECK-LABEL: cttz32combine:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rbit w8, w0
+; CHECK-NEXT: clz w0, w8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz32combine:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: ctz w0, w0
+; CHECK-CSSC-NEXT: ret
+ %rev = tail call i32 @llvm.bitreverse.i32(i32 %x)
+ %ctz = tail call i32 @llvm.ctlz.i32(i32 %rev)
+ ret i32 %ctz
+}
+
+define i64 @cttz64combine(i64 %x) nounwind readnone {
+; CHECK-LABEL: cttz64combine:
+; CHECK: // %bb.0:
+; CHECK-NEXT: rbit x8, x0
+; CHECK-NEXT: clz x0, x8
+; CHECK-NEXT: ret
+;
+; CHECK-CSSC-LABEL: cttz64combine:
+; CHECK-CSSC: // %bb.0:
+; CHECK-CSSC-NEXT: ctz x0, x0
+; CHECK-CSSC-NEXT: ret
+ %rev = tail call i64 @llvm.bitreverse.i64(i64 %x)
+ %ctz = tail call i64 @llvm.ctlz.i64(i64 %rev)
+ ret i64 %ctz
+}
+
+declare i4 @llvm.cttz.i4(i4 %x) nounwind readnone
+declare i8 @llvm.cttz.i8(i8 %x) nounwind readnone
+declare i16 @llvm.cttz.i16(i16 %x) nounwind readnone
+declare i17 @llvm.cttz.i17(i17 %x) nounwind readnone
+declare i32 @llvm.cttz.i32(i32) nounwind readnone
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+declare i128 @llvm.cttz.i128(i128) nounwind readnone
+
+declare i32 @llvm.ctlz.i32(i32 %x) nounwind readnone
+declare i32 @llvm.bitreverse.i32(i32 %x) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64 %x) nounwind readnone
+declare i64 @llvm.bitreverse.i64(i64 %x) nounwind readnone
More information about the llvm-commits
mailing list