[llvm] [NFC][AMDGPU] Move cmp+select arguments optimization to SIISelLowering. (PR #150929)

Mon Jul 28 04:37:34 PDT 2025

https://github.com/dfukalov created https://github.com/llvm/llvm-project/pull/150929

As requested in #148740.

>From 03b089b9b7e1aace9313ed128e7012bba7921fa5 Mon Sep 17 00:00:00 2001
From: Daniil Fukalov <dfukalov at gmail.com>
Date: Mon, 28 Jul 2025 13:34:06 +0200
Subject: [PATCH] [NFC][AMDGPU] Move cmp+select arguments optimization to
 SIISelLowering.

As requested in #148740.
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 83 -------------------
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 76 +++++++++++++++++
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  1 +
 3 files changed, 77 insertions(+), 83 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f25ce8723a2dc..61189337e5233 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4846,94 +4846,11 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
   return SDValue();
 }
 
-// Detect when CMP and SELECT use the same constant and fold them to avoid
-// loading the constant twice. Specifically handles patterns like:
-// %cmp = icmp eq i32 %val, 4242
-// %sel = select i1 %cmp, i32 4242, i32 %other
-// It can be optimized to reuse %val instead of 4242 in select.
-static SDValue
-foldCmpSelectWithSharedConstant(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
-                                const AMDGPUSubtarget *ST) {
-  SDValue Cond = N->getOperand(0);
-  SDValue TrueVal = N->getOperand(1);
-  SDValue FalseVal = N->getOperand(2);
-
-  // Check if condition is a comparison.
-  if (Cond.getOpcode() != ISD::SETCC)
-    return SDValue();
-
-  SDValue LHS = Cond.getOperand(0);
-  SDValue RHS = Cond.getOperand(1);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
-  bool isFloatingPoint = LHS.getValueType().isFloatingPoint();
-  bool isInteger = LHS.getValueType().isInteger();
-
-  // Handle simple floating-point and integer types only.
-  if (!isFloatingPoint && !isInteger)
-    return SDValue();
-
-  bool isEquality = CC == (isFloatingPoint ? ISD::SETOEQ : ISD::SETEQ);
-  bool isNonEquality = CC == (isFloatingPoint ? ISD::SETONE : ISD::SETNE);
-  if (!isEquality && !isNonEquality)
-    return SDValue();
-
-  SDValue ArgVal, ConstVal;
-  if ((isFloatingPoint && isa<ConstantFPSDNode>(RHS)) ||
-      (isInteger && isa<ConstantSDNode>(RHS))) {
-    ConstVal = RHS;
-    ArgVal = LHS;
-  } else if ((isFloatingPoint && isa<ConstantFPSDNode>(LHS)) ||
-             (isInteger && isa<ConstantSDNode>(LHS))) {
-    ConstVal = LHS;
-    ArgVal = RHS;
-  } else {
-    return SDValue();
-  }
-
-  // Check if constant should not be optimized - early return if not.
-  if (isFloatingPoint) {
-    const APFloat &Val = cast<ConstantFPSDNode>(ConstVal)->getValueAPF();
-    const GCNSubtarget *GCNST = static_cast<const GCNSubtarget *>(ST);
-
-    // Only optimize normal floating-point values (finite, non-zero, and
-    // non-subnormal as per IEEE 754), skip optimization for inlinable
-    // floating-point constants.
-    if (!Val.isNormal() || GCNST->getInstrInfo()->isInlineConstant(Val))
-      return SDValue();
-  } else {
-    int64_t IntVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
-
-    // Skip optimization for inlinable integer immediates.
-    // Inlinable immediates include: -16 to 64 (inclusive).
-    if (IntVal >= -16 && IntVal <= 64)
-      return SDValue();
-  }
-
-  // For equality and non-equality comparisons, patterns:
-  // select (setcc x, const), const, y -> select (setcc x, const), x, y
-  // select (setccinv x, const), y, const -> select (setccinv x, const), y, x
-  if (!(isEquality && TrueVal == ConstVal) &&
-      !(isNonEquality && FalseVal == ConstVal))
-    return SDValue();
-
-  SDValue SelectLHS = (isEquality && TrueVal == ConstVal) ? ArgVal : TrueVal;
-  SDValue SelectRHS =
-      (isNonEquality && FalseVal == ConstVal) ? ArgVal : FalseVal;
-  return DCI.DAG.getNode(ISD::SELECT, SDLoc(N), N->getValueType(0), Cond,
-                         SelectLHS, SelectRHS);
-}
-
 SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
                                                    DAGCombinerInfo &DCI) const {
   if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
     return Folded;
 
-  // Try to fold CMP + SELECT patterns with shared constants (both FP and
-  // integer).
-  if (SDValue Folded = foldCmpSelectWithSharedConstant(N, DCI, Subtarget))
-    return Folded;
-
   SDValue Cond = N->getOperand(0);
   if (Cond.getOpcode() != ISD::SETCC)
     return SDValue();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8d51ec6dc7f31..0fac43fa78e82 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15896,6 +15896,78 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
   return SDValue(CSrc, 0);
 }
 
+SDValue SITargetLowering::performSelectCombine(SDNode *N,
+                                               DAGCombinerInfo &DCI) const {
+
+  // Try to fold CMP + SELECT patterns with shared constants (both FP and
+  // integer).
+  // Detect when CMP and SELECT use the same constant and fold them to avoid
+  // loading the constant twice. Specifically handles patterns like:
+  // %cmp = icmp eq i32 %val, 4242
+  // %sel = select i1 %cmp, i32 4242, i32 %other
+  // It can be optimized to reuse %val instead of 4242 in select.
+  SDValue Cond = N->getOperand(0);
+  SDValue TrueVal = N->getOperand(1);
+  SDValue FalseVal = N->getOperand(2);
+
+  // Check if condition is a comparison.
+  if (Cond.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  SDValue LHS = Cond.getOperand(0);
+  SDValue RHS = Cond.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+  bool isFloatingPoint = LHS.getValueType().isFloatingPoint();
+  bool isInteger = LHS.getValueType().isInteger();
+
+  // Handle simple floating-point and integer types only.
+  if (!isFloatingPoint && !isInteger)
+    return SDValue();
+
+  bool isEquality = CC == (isFloatingPoint ? ISD::SETOEQ : ISD::SETEQ);
+  bool isNonEquality = CC == (isFloatingPoint ? ISD::SETONE : ISD::SETNE);
+  if (!isEquality && !isNonEquality)
+    return SDValue();
+
+  SDValue ArgVal, ConstVal;
+  if ((isFloatingPoint && isa<ConstantFPSDNode>(RHS)) ||
+      (isInteger && isa<ConstantSDNode>(RHS))) {
+    ConstVal = RHS;
+    ArgVal = LHS;
+  } else if ((isFloatingPoint && isa<ConstantFPSDNode>(LHS)) ||
+             (isInteger && isa<ConstantSDNode>(LHS))) {
+    ConstVal = LHS;
+    ArgVal = RHS;
+  } else {
+    return SDValue();
+  }
+
+  // Skip optimization for inlinable immediates.
+  if (isFloatingPoint) {
+    const APFloat &Val = cast<ConstantFPSDNode>(ConstVal)->getValueAPF();
+    if (!Val.isNormal() ||
+        Subtarget->getInstrInfo()->isInlineConstant(Val))
+      return SDValue();
+  } else {
+    if (AMDGPU::isInlinableIntLiteral(cast<ConstantSDNode>(ConstVal)->getSExtValue()))
+      return SDValue();
+  }
+
+  // For equality and non-equality comparisons, patterns:
+  // select (setcc x, const), const, y -> select (setcc x, const), x, y
+  // select (setccinv x, const), y, const -> select (setccinv x, const), y, x
+  if (!(isEquality && TrueVal == ConstVal) &&
+      !(isNonEquality && FalseVal == ConstVal))
+    return SDValue();
+
+  SDValue SelectLHS = (isEquality && TrueVal == ConstVal) ? ArgVal : TrueVal;
+  SDValue SelectRHS =
+      (isNonEquality && FalseVal == ConstVal) ? ArgVal : FalseVal;
+  return DCI.DAG.getNode(ISD::SELECT, SDLoc(N), N->getValueType(0), Cond,
+                         SelectLHS, SelectRHS);
+}
+
 SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
@@ -15944,6 +16016,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
     return performFMulCombine(N, DCI);
   case ISD::SETCC:
     return performSetCCCombine(N, DCI);
+  case ISD::SELECT:
+    if (auto Res = performSelectCombine(N, DCI))
+      return Res;
+    break;
   case ISD::FMAXNUM:
   case ISD::FMINNUM:
   case ISD::FMAXNUM_IEEE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index acf6158572a4d..dedd9ae170774 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -211,6 +211,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
   unsigned getFusedOpcode(const SelectionDAG &DAG,