[llvm] [Hexagon] Implement isMaskAndCmp0FoldingBeneficial (PR #166891)

Sun Nov 9 18:20:06 PST 2025

https://github.com/svs-quic updated https://github.com/llvm/llvm-project/pull/166891

>From ae7ef105469b953fe0441d7549ccbd52ad379fa8 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Fri, 7 Nov 2025 10:42:45 +0530
Subject: [PATCH 1/2] Precommit test

---
 .../CodeGen/Hexagon/and_mask_cmp0_sink.ll     | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll

diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll
new file mode 100644
index 0000000000000..8eef8c73354aa
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; Test that 'and' mask is sunk to the cmp use block only if it is masking a single bit
+; RUN: llc -march=hexagon --verify-machineinstrs < %s | FileCheck %s
+
+ at A = global i32 zeroinitializer
+
+define i32 @and_sink1(i32 %a) {
+; CHECK-LABEL: and_sink1:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1 = and(r0,##2048)
+; CHECK-NEXT:     r0 = ##A
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     p0 = cmp.eq(r1,#0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_1: // %bb0
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    {
+; CHECK-NEXT:     if (p0) jump:nt .LBB0_1
+; CHECK-NEXT:     memw(r0+#0) = #0
+; CHECK-NEXT:    }
+; CHECK-NEXT:  // %bb.2: // %bb2
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = #0
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %and = and i32 %a, 2048
+  br label %bb0
+bb0:
+  %cmp = icmp eq i32 %and, 0
+  store i32 0, i32* @A
+  br i1 %cmp, label %bb0, label %bb2
+bb2:
+  ret i32 0
+}
+
+define i32 @and_sink2(i32 %a) {
+; CHECK-LABEL: and_sink2:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1 = and(r0,##2049)
+; CHECK-NEXT:     r0 = ##A
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     p0 = cmp.eq(r1,#0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB1_1: // %bb0
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    {
+; CHECK-NEXT:     if (p0) jump:nt .LBB1_1
+; CHECK-NEXT:     memw(r0+#0) = #0
+; CHECK-NEXT:    }
+; CHECK-NEXT:  // %bb.2: // %bb2
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = #0
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %and = and i32 %a, 2049
+  br label %bb0
+bb0:
+  %cmp = icmp eq i32 %and, 0
+  store i32 0, i32* @A
+  br i1 %cmp, label %bb0, label %bb2
+bb2:
+  ret i32 0
+}

>From 5deaf1336b7301ede75e1cc3b162215823bc3aa2 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli <quic_svs at quicinc.com>
Date: Fri, 7 Nov 2025 10:49:40 +0530
Subject: [PATCH 2/2] isMaskAndCmp0FoldingBeneficial

---
 llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 10 ++++++++++
 llvm/lib/Target/Hexagon/HexagonISelLowering.h   |  2 ++
 llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll |  5 +----
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 526b4de975915..04a97606cb7f8 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
     AtomicCmpXchgInst *AI) const {
   return AtomicExpansionKind::LLSC;
 }
+
+bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial(
+    const Instruction &AndI) const {
+  // Only sink 'and' mask to cmp use block if it is masking a single bit since
+  // this will fold the and/cmp/br into a single tstbit instruction.
+  ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+  if (!Mask)
+    return false;
+  return Mask->getValue().isPowerOf2();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 8d04edbea5b43..4ac3e7671592a 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -160,6 +160,8 @@ class HexagonTargetLowering : public TargetLowering {
 
   bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 
+  bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
   /// Return true if an FMA operation is faster than a pair of mul and add
   /// instructions. fmuladd intrinsics will be expanded to FMAs when this
   /// method returns true (and FMAs are legal), otherwise fmuladd is
diff --git a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll
index 8eef8c73354aa..b5c3399ce6605 100644
--- a/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll
+++ b/llvm/test/CodeGen/Hexagon/and_mask_cmp0_sink.ll
@@ -9,12 +9,9 @@ define i32 @and_sink1(i32 %a) {
 ; CHECK:         .cfi_startproc
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     r1 = and(r0,##2048)
+; CHECK-NEXT:     p0 = !tstbit(r0,#11)
 ; CHECK-NEXT:     r0 = ##A
 ; CHECK-NEXT:    }
-; CHECK-NEXT:    {
-; CHECK-NEXT:     p0 = cmp.eq(r1,#0)
-; CHECK-NEXT:    }
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: // %bb0
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1