[llvm] 2f778e6 - [AArch64] SelectionDag codegen for gpr CTZ instruction

Ties Stuij via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 6 02:43:03 PST 2022


Author: Ties Stuij
Date: 2022-12-06T10:42:07Z
New Revision: 2f778e60c9bf6bf446ff339e2a9393dac21a7095

URL: https://github.com/llvm/llvm-project/commit/2f778e60c9bf6bf446ff339e2a9393dac21a7095
DIFF: https://github.com/llvm/llvm-project/commit/2f778e60c9bf6bf446ff339e2a9393dac21a7095.diff

LOG: [AArch64] SelectionDag codegen for gpr CTZ instruction

When feature CSSC is available we should use instruction CTZ in SelectionDag
where applicable:

- CTTZ intrinsics are lowered to using the gpr CTZ instruction
- BITREVERSE -> CTLZ instruction pattern gets replaced by CTZ

spec:
https://developer.arm.com/documentation/ddi0602/2022-09/Base-Instructions/CTZ--Count-Trailing-Zeros-

Reviewed By: lenary

Differential Revision: https://reviews.llvm.org/D138811

Added: 
    llvm/test/CodeGen/AArch64/gpr_cttz.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4ae57c9ec9e1f..a3b82d0987e20 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -546,6 +546,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTPOP, MVT::i64, Legal);
     setOperationAction(ISD::CTPOP, MVT::i128, Expand);
     setOperationAction(ISD::PARITY, MVT::i128, Expand);
+    setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+    setOperationAction(ISD::CTTZ, MVT::i64, Legal);
+    setOperationAction(ISD::CTTZ, MVT::i128, Expand);
   } else {
     setOperationAction(ISD::CTPOP, MVT::i32, Custom);
     setOperationAction(ISD::CTPOP, MVT::i64, Custom);
@@ -932,6 +935,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   setTargetDAGCombine(ISD::GlobalAddress);
 
+  setTargetDAGCombine(ISD::CTLZ);
+
   // In case of strict alignment, avoid an excessive number of byte wide stores.
   MaxStoresPerMemsetOptSize = 8;
   MaxStoresPerMemset =
@@ -20280,6 +20285,17 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
                      DAG.getConstant(MinOffset, DL, MVT::i64));
 }
 
+static SDValue performCTLZCombine(SDNode *N, SelectionDAG &DAG,
+                                  const AArch64Subtarget *Subtarget) {
+  SDValue BR = N->getOperand(0);
+  if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE ||
+      !BR.getValueType().isScalarInteger())
+    return SDValue();
+
+  SDLoc DL(N);
+  return DAG.getNode(ISD::CTTZ, DL, BR.getValueType(), BR.getOperand(0));
+}
+
 // Turns the vector of indices into a vector of byte offstes by scaling Offset
 // by (BitWidth / 8).
 static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
@@ -21185,6 +21201,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     break;
   case ISD::GlobalAddress:
     return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
+  case ISD::CTLZ:
+    return performCTLZCombine(N, DAG, Subtarget);
   }
   return SDValue();
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a6a70e07404a5..409c9a9221e99 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8556,7 +8556,7 @@ defm RCWSWP  : ReadCheckWriteOperation<0b010, "swp">;
 //===----------------------------------------------------------------------===//
 defm ABS : OneOperandData<0b001000, "abs">, Requires<[HasCSSC]>;
 defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
-defm CTZ : OneOperandData<0b000110, "ctz">, Requires<[HasCSSC]>;
+defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
 
 defm SMAX : ComparisonOp<0, 0, "smax">, Requires<[HasCSSC]>;
 defm SMIN : ComparisonOp<0, 1, "smin">, Requires<[HasCSSC]>;

diff  --git a/llvm/test/CodeGen/AArch64/gpr_cttz.ll b/llvm/test/CodeGen/AArch64/gpr_cttz.ll
new file mode 100644
index 0000000000000..632514f5b805d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/gpr_cttz.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+cssc | FileCheck %s -check-prefix=CHECK-CSSC
+
+define i4 @cttz4(i4 %x) {
+; CHECK-LABEL: cttz4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, #0x10
+; CHECK-NEXT:    rbit w8, w8
+; CHECK-NEXT:    clz w0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz4:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    orr w8, w0, #0x10
+; CHECK-CSSC-NEXT:    ctz w0, w8
+; CHECK-CSSC-NEXT:    ret
+  %ctz = tail call i4 @llvm.cttz.i4(i4 %x)
+  ret i4 %ctz
+}
+
+define i8 @cttz8(i8 %x) {
+; CHECK-LABEL: cttz8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, #0x100
+; CHECK-NEXT:    rbit w8, w8
+; CHECK-NEXT:    clz w0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz8:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    orr w8, w0, #0x100
+; CHECK-CSSC-NEXT:    ctz w0, w8
+; CHECK-CSSC-NEXT:    ret
+  %ctz = tail call i8 @llvm.cttz.i8(i8 %x)
+  ret i8 %ctz
+}
+
+define i16 @cttz16(i16 %x) {
+; CHECK-LABEL: cttz16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, #0x10000
+; CHECK-NEXT:    rbit w8, w8
+; CHECK-NEXT:    clz w0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz16:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    orr w8, w0, #0x10000
+; CHECK-CSSC-NEXT:    ctz w0, w8
+; CHECK-CSSC-NEXT:    ret
+  %ctz = tail call i16 @llvm.cttz.i16(i16 %x)
+  ret i16 %ctz
+}
+
+define i17 @cttz17(i17 %x) {
+; CHECK-LABEL: cttz17:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orr w8, w0, #0x20000
+; CHECK-NEXT:    rbit w8, w8
+; CHECK-NEXT:    clz w0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz17:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    orr w8, w0, #0x20000
+; CHECK-CSSC-NEXT:    ctz w0, w8
+; CHECK-CSSC-NEXT:    ret
+  %ctz = tail call i17 @llvm.cttz.i17(i17 %x)
+  ret i17 %ctz
+}
+
+define i32 @cttz32(i32 %x) nounwind readnone {
+; CHECK-LABEL: cttz32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit w8, w0
+; CHECK-NEXT:    clz w0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz32:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    ctz w0, w0
+; CHECK-CSSC-NEXT:    ret
+  %ctz = tail call i32 @llvm.cttz.i32(i32 %x)
+  ret i32 %ctz
+}
+
+define i64 @cttz64(i64 %x) nounwind readnone {
+; CHECK-LABEL: cttz64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit x8, x0
+; CHECK-NEXT:    clz x0, x8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz64:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    ctz x0, x0
+; CHECK-CSSC-NEXT:    ret
+  %ctz = tail call i64 @llvm.cttz.i64(i64 %x)
+  ret i64 %ctz
+}
+
+define i128 @cttz128(i128 %x) nounwind readnone {
+; CHECK-LABEL: cttz128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit x9, x1
+; CHECK-NEXT:    rbit x8, x0
+; CHECK-NEXT:    clz x9, x9
+; CHECK-NEXT:    clz x8, x8
+; CHECK-NEXT:    add x9, x9, #64
+; CHECK-NEXT:    cmp x0, #0
+; CHECK-NEXT:    csel x0, x8, x9, ne
+; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz128:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    ctz x9, x1
+; CHECK-CSSC-NEXT:    ctz x8, x0
+; CHECK-CSSC-NEXT:    add x9, x9, #64
+; CHECK-CSSC-NEXT:    cmp x0, #0
+; CHECK-CSSC-NEXT:    csel x0, x8, x9, ne
+; CHECK-CSSC-NEXT:    mov x1, xzr
+; CHECK-CSSC-NEXT:    ret
+  %ctz = tail call i128 @llvm.cttz.i128(i128 %x)
+  ret i128 %ctz
+}
+
+define i32 @cttz32combine(i32 %x) nounwind readnone {
+; CHECK-LABEL: cttz32combine:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit w8, w0
+; CHECK-NEXT:    clz w0, w8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz32combine:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    ctz w0, w0
+; CHECK-CSSC-NEXT:    ret
+  %rev = tail call i32 @llvm.bitreverse.i32(i32 %x)
+  %ctz = tail call i32 @llvm.ctlz.i32(i32 %rev)
+  ret i32 %ctz
+}
+
+define i64 @cttz64combine(i64 %x) nounwind readnone {
+; CHECK-LABEL: cttz64combine:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rbit x8, x0
+; CHECK-NEXT:    clz x0, x8
+; CHECK-NEXT:    ret
+;
+; CHECK-CSSC-LABEL: cttz64combine:
+; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    ctz x0, x0
+; CHECK-CSSC-NEXT:    ret
+  %rev = tail call i64 @llvm.bitreverse.i64(i64 %x)
+  %ctz = tail call i64 @llvm.ctlz.i64(i64 %rev)
+  ret i64 %ctz
+}
+
+declare i4 @llvm.cttz.i4(i4 %x) nounwind readnone
+declare i8 @llvm.cttz.i8(i8 %x) nounwind readnone
+declare i16 @llvm.cttz.i16(i16 %x) nounwind readnone
+declare i17 @llvm.cttz.i17(i17 %x) nounwind readnone
+declare i32 @llvm.cttz.i32(i32) nounwind readnone
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+declare i128 @llvm.cttz.i128(i128) nounwind readnone
+
+declare i32 @llvm.ctlz.i32(i32 %x) nounwind readnone
+declare i32 @llvm.bitreverse.i32(i32 %x) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64 %x) nounwind readnone
+declare i64 @llvm.bitreverse.i64(i64 %x) nounwind readnone


        


More information about the llvm-commits mailing list