[llvm] [X86] Extend `combinei64TruncSrlAdd` to handle patterns with `or` and `xor` (PR #128435)

Fri Feb 28 19:33:36 PST 2025

https://github.com/joaotgouveia updated https://github.com/llvm/llvm-project/pull/128435

>From fe8151f38832e8f6861d0e3783a34ca0357ee3c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 23 Feb 2025 16:52:43 +0000
Subject: [PATCH 1/8] [X86] Add pre-commit tests.

---
 .../CodeGen/X86/combine-i64-trunc-srl-add.ll  | 59 ++++++++++++++++++-
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index 14992ca5bf488..bd3c05a2c9302 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -128,6 +128,61 @@ define i32 @test_trunc_add(i64 %x) {
   ret i32 %conv
 }
 
+define i32 @test_trunc_sub(i64 %x) {
+; X64-LABEL: test_trunc_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    shrq $48, %rdi
+; X64-NEXT:    addl $65522, %edi # imm = 0xFFF2
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    retq
+  %sub = sub i64 %x, 3940649673949184
+  %shr = lshr i64 %sub, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_and(i64 %x) {
+; X64-LABEL: test_trunc_and:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    andl $14, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %and = and i64 %x, 3940649673949184
+  %shr = lshr i64 %and, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_or(i64 %x) {
+; X64-LABEL: test_trunc_or:
+; X64:       # %bb.0:
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %or = or i64 %x, 3940649673949184
+  %shr = lshr i64 %or, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_xor(i64 %x) {
+; X64-LABEL: test_trunc_xor:
+; X64:       # %bb.0:
+; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
+; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %xor = xor i64 %x, 3940649673949184
+  %shr = lshr i64 %xor, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
 ; Make sure we don't crash on this test case.
 
 define i32 @pr128158(i64 %x) {
@@ -137,10 +192,10 @@ define i32 @pr128158(i64 %x) {
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    shrq $32, %rax
 ; X64-NEXT:    .p2align 4
-; X64-NEXT:  .LBB9_1: # %for.body
+; X64-NEXT:  .LBB13_1: # %for.body
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X64-NEXT:    cmpl $9, %eax
-; X64-NEXT:    jb .LBB9_1
+; X64-NEXT:    jb .LBB13_1
 ; X64-NEXT:  # %bb.2: # %exit
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    retq

>From 5574dce692ba8c5218106950410655165a9977f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sun, 23 Feb 2025 19:18:11 +0000
Subject: [PATCH 2/8] [X86] Extend `combinei64TruncSrlAdd` to handle patterns
 with `or` and `xor`

---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 48 ++++++++++---------
 .../CodeGen/X86/combine-i64-trunc-srl-add.ll  |  8 ++--
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c146e1e6c0334..47dc9ffb4b24d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53733,36 +53733,42 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src);
 }
 
-// Attempt to fold some (truncate (srl (add X, C1), C2)) patterns to
-// (add (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are able
-// to avoid generating code with MOVABS and large constants in certain cases.
-static SDValue combinei64TruncSrlAdd(SDValue N, EVT VT, SelectionDAG &DAG,
-                                     const SDLoc &DL) {
+// Attempt to fold some (truncate (srl (binop X, C1), C2)) patterns to
+// (binop (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are
+// able to avoid generating code with MOVABS and large constants in certain
+// cases.
+static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG,
+                                       const SDLoc &DL) {
   using namespace llvm::SDPatternMatch;
 
-  SDValue AddLhs;
-  APInt AddConst, SrlConst;
+  SDValue BinopLhs;
+  APInt BinopConst, SrlConst;
   if (VT != MVT::i32 ||
-      !sd_match(N, m_AllOf(m_SpecificVT(MVT::i64),
-                           m_Srl(m_OneUse(m_Add(m_Value(AddLhs),
-                                                m_ConstInt(AddConst))),
-                                 m_ConstInt(SrlConst)))))
+      !sd_match(
+          N,
+          m_AllOf(m_SpecificVT(MVT::i64),
+                  m_Srl(m_OneUse(m_AnyOf(
+                            m_Add(m_Value(BinopLhs), m_ConstInt(BinopConst)),
+                            m_Or(m_Value(BinopLhs), m_ConstInt(BinopConst)),
+                            m_Xor(m_Value(BinopLhs), m_ConstInt(BinopConst)))),
+                        m_ConstInt(SrlConst)))))
     return SDValue();
 
-  if (SrlConst.ule(32) || AddConst.countr_zero() < SrlConst.getZExtValue())
+  if (SrlConst.ule(32) || BinopConst.countr_zero() < SrlConst.getZExtValue())
     return SDValue();
 
-  SDValue AddLHSSrl =
-      DAG.getNode(ISD::SRL, DL, MVT::i64, AddLhs, N.getOperand(1));
-  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, AddLHSSrl);
+  SDValue BinopLHSSrl =
+      DAG.getNode(ISD::SRL, DL, MVT::i64, BinopLhs, N.getOperand(1));
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl);
 
-  APInt NewAddConstVal = AddConst.lshr(SrlConst).trunc(VT.getSizeInBits());
-  SDValue NewAddConst = DAG.getConstant(NewAddConstVal, DL, VT);
-  SDValue NewAddNode = DAG.getNode(ISD::ADD, DL, VT, Trunc, NewAddConst);
+  APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits());
+  SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT);
+  SDValue NewBinopNode =
+      DAG.getNode(N.getOperand(0).getOpcode(), DL, VT, Trunc, NewBinopConst);
 
   EVT CleanUpVT =
       EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
-  return DAG.getZeroExtendInReg(NewAddNode, DL, CleanUpVT);
+  return DAG.getZeroExtendInReg(NewBinopNode, DL, CleanUpVT);
 }
 
 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
@@ -53810,11 +53816,9 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
   if (!Src.hasOneUse())
     return SDValue();
 
-  if (SDValue R = combinei64TruncSrlAdd(Src, VT, DAG, DL))
+  if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL))
     return R;
 
-  // Only support vector truncation for now.
-  // TODO: i64 scalar math would benefit as well.
   if (!VT.isVector())
     return SDValue();
 
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index bd3c05a2c9302..ec29cf9d56c29 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -158,9 +158,9 @@ define i32 @test_trunc_and(i64 %x) {
 define i32 @test_trunc_or(i64 %x) {
 ; X64-LABEL: test_trunc_or:
 ; X64:       # %bb.0:
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    orq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    orl $14, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %or = or i64 %x, 3940649673949184
@@ -172,9 +172,9 @@ define i32 @test_trunc_or(i64 %x) {
 define i32 @test_trunc_xor(i64 %x) {
 ; X64-LABEL: test_trunc_xor:
 ; X64:       # %bb.0:
-; X64-NEXT:    movabsq $3940649673949184, %rax # imm = 0xE000000000000
-; X64-NEXT:    xorq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    xorl $14, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %xor = xor i64 %x, 3940649673949184

>From 55189cc558a626155225e519866675c37c394fad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Mon, 24 Feb 2025 17:06:58 +0000
Subject: [PATCH 3/8] [X86] Add extra test cases

---
 .../CodeGen/X86/combine-i64-trunc-srl-add.ll  | 58 ++++++++++++++++---
 1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index ec29cf9d56c29..785865bf06a74 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -141,8 +141,8 @@ define i32 @test_trunc_sub(i64 %x) {
   ret i32 %conv
 }
 
-define i32 @test_trunc_and(i64 %x) {
-; X64-LABEL: test_trunc_and:
+define i32 @test_trunc_and_1(i64 %x) {
+; X64-LABEL: test_trunc_and_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $48, %rax
@@ -155,8 +155,8 @@ define i32 @test_trunc_and(i64 %x) {
   ret i32 %conv
 }
 
-define i32 @test_trunc_or(i64 %x) {
-; X64-LABEL: test_trunc_or:
+define i32 @test_trunc_or_1(i64 %x) {
+; X64-LABEL: test_trunc_or_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $48, %rax
@@ -169,8 +169,8 @@ define i32 @test_trunc_or(i64 %x) {
   ret i32 %conv
 }
 
-define i32 @test_trunc_xor(i64 %x) {
-; X64-LABEL: test_trunc_xor:
+define i32 @test_trunc_xor_1(i64 %x) {
+; X64-LABEL: test_trunc_xor_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $48, %rax
@@ -183,6 +183,48 @@ define i32 @test_trunc_xor(i64 %x) {
   ret i32 %conv
 }
 
+define i32 @test_trunc_and_2(i64 %x) {
+; X64-LABEL: test_trunc_and_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    andl $13, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %and = and i64 %x, 3940649673949183
+  %shr = lshr i64 %and, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_or_2(i64 %x) {
+; X64-LABEL: test_trunc_or_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    orl $13, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %or = or i64 %x, 3940649673949183
+  %shr = lshr i64 %or, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
+define i32 @test_trunc_xor_2(i64 %x) {
+; X64-LABEL: test_trunc_xor_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $48, %rax
+; X64-NEXT:    xorl $13, %eax
+; X64-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NEXT:    retq
+  %xor = xor i64 %x, 3940649673949183
+  %shr = lshr i64 %xor, 48
+  %conv = trunc i64 %shr to i32
+  ret i32 %conv
+}
+
 ; Make sure we don't crash on this test case.
 
 define i32 @pr128158(i64 %x) {
@@ -192,10 +234,10 @@ define i32 @pr128158(i64 %x) {
 ; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    shrq $32, %rax
 ; X64-NEXT:    .p2align 4
-; X64-NEXT:  .LBB13_1: # %for.body
+; X64-NEXT:  .LBB16_1: # %for.body
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X64-NEXT:    cmpl $9, %eax
-; X64-NEXT:    jb .LBB13_1
+; X64-NEXT:    jb .LBB16_1
 ; X64-NEXT:  # %bb.2: # %exit
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    retq

>From d2f27bf7a76c6c07e723983bd023c5bc88eec920 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Wed, 26 Feb 2025 21:39:22 +0000
Subject: [PATCH 4/8] [X86] Address reviews

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 59 ++++++++++++++++---------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 47dc9ffb4b24d..6dfcb7e3fb4fb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53739,36 +53739,47 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
 // cases.
 static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG,
                                        const SDLoc &DL) {
-  using namespace llvm::SDPatternMatch;
 
-  SDValue BinopLhs;
-  APInt BinopConst, SrlConst;
-  if (VT != MVT::i32 ||
-      !sd_match(
-          N,
-          m_AllOf(m_SpecificVT(MVT::i64),
-                  m_Srl(m_OneUse(m_AnyOf(
-                            m_Add(m_Value(BinopLhs), m_ConstInt(BinopConst)),
-                            m_Or(m_Value(BinopLhs), m_ConstInt(BinopConst)),
-                            m_Xor(m_Value(BinopLhs), m_ConstInt(BinopConst)))),
-                        m_ConstInt(SrlConst)))))
-    return SDValue();
+  SDValue Binop = N.getOperand(0);
+  APInt BinopConst = Binop.getConstantOperandAPInt(1);
+  APInt SrlConst = N.getConstantOperandAPInt(1);
+  unsigned Opcode = Binop.getOpcode();
+
+  auto CleanUpFn = +[](SDValue Op, EVT CleanUpVT, EVT VT, SelectionDAG &DAG,
+                       const SDLoc &DL) {
+    SDValue CleanUp = DAG.getAnyExtOrTrunc(Op, DL, CleanUpVT);
+    return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
+  };
+  auto ZeroExtCleanUp = +[](SDValue Op, EVT CleanUpVT, EVT VT,
+                            SelectionDAG &DAG, const SDLoc &DL) {
+    return DAG.getZeroExtendInReg(Op, DL, CleanUpVT);
+  };
 
-  if (SrlConst.ule(32) || BinopConst.countr_zero() < SrlConst.getZExtValue())
+  switch (Opcode) {
+  default:
     return SDValue();
+  case ISD::ADD:
+    if (BinopConst.countr_zero() < SrlConst.getZExtValue())
+      return SDValue();
+    CleanUpFn = ZeroExtCleanUp;
+    [[fallthrough]];
+  case ISD::OR:
+  case ISD::XOR:
+    if (SrlConst.ule(32))
+      return SDValue();
+    break;
+  }
 
   SDValue BinopLHSSrl =
-      DAG.getNode(ISD::SRL, DL, MVT::i64, BinopLhs, N.getOperand(1));
+      DAG.getNode(ISD::SRL, DL, MVT::i64, Binop.getOperand(0), N.getOperand(1));
   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl);
 
   APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits());
   SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT);
-  SDValue NewBinopNode =
-      DAG.getNode(N.getOperand(0).getOpcode(), DL, VT, Trunc, NewBinopConst);
-
+  SDValue NewBinopNode = DAG.getNode(Opcode, DL, VT, Trunc, NewBinopConst);
   EVT CleanUpVT =
       EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
-  return DAG.getZeroExtendInReg(NewBinopNode, DL, CleanUpVT);
+  return CleanUpFn(NewBinopNode, CleanUpVT, VT, DAG, DL);
 }
 
 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
@@ -53816,8 +53827,14 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
   if (!Src.hasOneUse())
     return SDValue();
 
-  if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL))
-    return R;
+  if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL &&
+      Src.getOperand(0).getNumOperands() == 2 &&
+      isa<ConstantSDNode>(Src.getOperand(1)) &&
+      isa<ConstantSDNode>(Src.getOperand(0).getOperand(1))) {
+    if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL))
+      return R;
+    return SDValue();
+  }
 
   if (!VT.isVector())
     return SDValue();

>From 1ba8e326100ffdaa6fbee4990d26a706913d3f1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Thu, 27 Feb 2025 10:12:47 +0000
Subject: [PATCH 5/8] [X86] Improve readability

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 50 +++++++++++--------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6dfcb7e3fb4fb..1c9e8211281f5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53733,35 +53733,24 @@ static SDValue combineLRINT_LLRINT(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(X86ISD::CVTP2SI, DL, VT, Src);
 }
 
-// Attempt to fold some (truncate (srl (binop X, C1), C2)) patterns to
-// (binop (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we are
-// able to avoid generating code with MOVABS and large constants in certain
+// Attempt to fold some (truncate (srl (add/or/xor X, C1), C2)) patterns to
+// (add/or/xor (truncate (srl X, C2)), C1'). C1' will be smaller than C1 so we
+// are able to avoid generating code with MOVABS and large constants in certain
 // cases.
-static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG,
-                                       const SDLoc &DL) {
+static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
+                                          const SDLoc &DL) {
 
-  SDValue Binop = N.getOperand(0);
-  APInt BinopConst = Binop.getConstantOperandAPInt(1);
+  SDValue Op = N.getOperand(0);
+  APInt OpConst = Op.getConstantOperandAPInt(1);
   APInt SrlConst = N.getConstantOperandAPInt(1);
-  unsigned Opcode = Binop.getOpcode();
-
-  auto CleanUpFn = +[](SDValue Op, EVT CleanUpVT, EVT VT, SelectionDAG &DAG,
-                       const SDLoc &DL) {
-    SDValue CleanUp = DAG.getAnyExtOrTrunc(Op, DL, CleanUpVT);
-    return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
-  };
-  auto ZeroExtCleanUp = +[](SDValue Op, EVT CleanUpVT, EVT VT,
-                            SelectionDAG &DAG, const SDLoc &DL) {
-    return DAG.getZeroExtendInReg(Op, DL, CleanUpVT);
-  };
+  unsigned Opcode = Op.getOpcode();
 
   switch (Opcode) {
   default:
     return SDValue();
   case ISD::ADD:
-    if (BinopConst.countr_zero() < SrlConst.getZExtValue())
+    if (OpConst.countr_zero() < SrlConst.getZExtValue())
       return SDValue();
-    CleanUpFn = ZeroExtCleanUp;
     [[fallthrough]];
   case ISD::OR:
   case ISD::XOR:
@@ -53770,16 +53759,21 @@ static SDValue combinei64TruncSrlBinop(SDValue N, EVT VT, SelectionDAG &DAG,
     break;
   }
 
-  SDValue BinopLHSSrl =
-      DAG.getNode(ISD::SRL, DL, MVT::i64, Binop.getOperand(0), N.getOperand(1));
-  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, BinopLHSSrl);
+  SDValue OpLhsSrl =
+      DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1));
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, OpLhsSrl);
 
-  APInt NewBinopConstVal = BinopConst.lshr(SrlConst).trunc(VT.getSizeInBits());
-  SDValue NewBinopConst = DAG.getConstant(NewBinopConstVal, DL, VT);
-  SDValue NewBinopNode = DAG.getNode(Opcode, DL, VT, Trunc, NewBinopConst);
+  APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits());
+  SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT);
+  SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst);
   EVT CleanUpVT =
       EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
-  return CleanUpFn(NewBinopNode, CleanUpVT, VT, DAG, DL);
+
+  if (Opcode == ISD::ADD)
+    return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT);
+
+  SDValue CleanUp = DAG.getAnyExtOrTrunc(NewOpNode, DL, CleanUpVT);
+  return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
 }
 
 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
@@ -53831,7 +53825,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
       Src.getOperand(0).getNumOperands() == 2 &&
       isa<ConstantSDNode>(Src.getOperand(1)) &&
       isa<ConstantSDNode>(Src.getOperand(0).getOperand(1))) {
-    if (SDValue R = combinei64TruncSrlBinop(Src, VT, DAG, DL))
+    if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL))
       return R;
     return SDValue();
   }

>From 696f3a52f29430eed2e2e657e3eb1086864bad03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Thu, 27 Feb 2025 17:06:00 +0000
Subject: [PATCH 6/8] [X86] Remove getNumOperands

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1c9e8211281f5..7cd6ff5a2c4e5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53822,11 +53822,17 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   if (VT == MVT::i32 && SrcVT == MVT::i64 && SrcOpcode == ISD::SRL &&
-      Src.getOperand(0).getNumOperands() == 2 &&
-      isa<ConstantSDNode>(Src.getOperand(1)) &&
-      isa<ConstantSDNode>(Src.getOperand(0).getOperand(1))) {
+      isa<ConstantSDNode>(Src.getOperand(1))) {
+
+    unsigned SrcOpOpcode = Src.getOperand(0).getOpcode();
+    if ((SrcOpOpcode != ISD::ADD && SrcOpOpcode != ISD::OR &&
+         SrcOpOpcode != ISD::XOR) ||
+        !isa<ConstantSDNode>(Src.getOperand(0).getOperand(1)))
+      return SDValue();
+
     if (SDValue R = combinei64TruncSrlConstant(Src, VT, DAG, DL))
       return R;
+
     return SDValue();
   }
 

>From 7caceb190ecf22bd1cee3fe688c937207872f7ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Fri, 28 Feb 2025 12:28:30 +0000
Subject: [PATCH 7/8] [X86] Remove unnecessary switch case

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7cd6ff5a2c4e5..9f56edd226b26 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53743,21 +53743,12 @@ static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
   SDValue Op = N.getOperand(0);
   APInt OpConst = Op.getConstantOperandAPInt(1);
   APInt SrlConst = N.getConstantOperandAPInt(1);
+  uint64_t SrlConstVal = SrlConst.getZExtValue();
   unsigned Opcode = Op.getOpcode();
 
-  switch (Opcode) {
-  default:
+  if (SrlConst.ule(32) ||
+      (Opcode == ISD::ADD && OpConst.countr_zero() < SrlConstVal))
     return SDValue();
-  case ISD::ADD:
-    if (OpConst.countr_zero() < SrlConst.getZExtValue())
-      return SDValue();
-    [[fallthrough]];
-  case ISD::OR:
-  case ISD::XOR:
-    if (SrlConst.ule(32))
-      return SDValue();
-    break;
-  }
 
   SDValue OpLhsSrl =
       DAG.getNode(ISD::SRL, DL, MVT::i64, Op.getOperand(0), N.getOperand(1));
@@ -53766,8 +53757,7 @@ static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
   APInt NewOpConstVal = OpConst.lshr(SrlConst).trunc(VT.getSizeInBits());
   SDValue NewOpConst = DAG.getConstant(NewOpConstVal, DL, VT);
   SDValue NewOpNode = DAG.getNode(Opcode, DL, VT, Trunc, NewOpConst);
-  EVT CleanUpVT =
-      EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConst.getZExtValue());
+  EVT CleanUpVT = EVT::getIntegerVT(*DAG.getContext(), 64 - SrlConstVal);
 
   if (Opcode == ISD::ADD)
     return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT);

>From 4388e736a0b64ba60aef3ed2ec16cf7f6ee867ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Gouveia?= <jtalonegouveia at gmail.com>
Date: Sat, 1 Mar 2025 03:32:22 +0000
Subject: [PATCH 8/8] [X86] Address reviews

---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  4 +-
 .../CodeGen/X86/combine-i64-trunc-srl-add.ll  | 46 +++++++++----------
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9f56edd226b26..e544c165e9afe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53761,9 +53761,7 @@ static SDValue combinei64TruncSrlConstant(SDValue N, EVT VT, SelectionDAG &DAG,
 
   if (Opcode == ISD::ADD)
     return DAG.getZeroExtendInReg(NewOpNode, DL, CleanUpVT);
-
-  SDValue CleanUp = DAG.getAnyExtOrTrunc(NewOpNode, DL, CleanUpVT);
-  return DAG.getAnyExtOrTrunc(CleanUp, DL, VT);
+  return NewOpNode;
 }
 
 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
diff --git a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
index 785865bf06a74..f7906e5a009ae 100644
--- a/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
+++ b/llvm/test/CodeGen/X86/combine-i64-trunc-srl-add.ll
@@ -131,12 +131,12 @@ define i32 @test_trunc_add(i64 %x) {
 define i32 @test_trunc_sub(i64 %x) {
 ; X64-LABEL: test_trunc_sub:
 ; X64:       # %bb.0:
-; X64-NEXT:    shrq $48, %rdi
-; X64-NEXT:    addl $65522, %edi # imm = 0xFFF2
-; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    shrq $49, %rdi
+; X64-NEXT:    leal 32762(%rdi), %eax
+; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
 ; X64-NEXT:    retq
-  %sub = sub i64 %x, 3940649673949184
-  %shr = lshr i64 %sub, 48
+  %sub = sub i64 %x, 3377699720527872
+  %shr = lshr i64 %sub, 49
   %conv = trunc i64 %shr to i32
   ret i32 %conv
 }
@@ -145,12 +145,12 @@ define i32 @test_trunc_and_1(i64 %x) {
 ; X64-LABEL: test_trunc_and_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    andl $14, %eax
+; X64-NEXT:    shrq $50, %rax
+; X64-NEXT:    andl $3, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %and = and i64 %x, 3940649673949184
-  %shr = lshr i64 %and, 48
+  %shr = lshr i64 %and, 50
   %conv = trunc i64 %shr to i32
   ret i32 %conv
 }
@@ -159,12 +159,12 @@ define i32 @test_trunc_or_1(i64 %x) {
 ; X64-LABEL: test_trunc_or_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    orl $14, %eax
+; X64-NEXT:    shrq $50, %rax
+; X64-NEXT:    orl $3, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %or = or i64 %x, 3940649673949184
-  %shr = lshr i64 %or, 48
+  %shr = lshr i64 %or, 50
   %conv = trunc i64 %shr to i32
   ret i32 %conv
 }
@@ -173,12 +173,12 @@ define i32 @test_trunc_xor_1(i64 %x) {
 ; X64-LABEL: test_trunc_xor_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    xorl $14, %eax
+; X64-NEXT:    shrq $50, %rax
+; X64-NEXT:    xorl $3, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %xor = xor i64 %x, 3940649673949184
-  %shr = lshr i64 %xor, 48
+  %shr = lshr i64 %xor, 50
   %conv = trunc i64 %shr to i32
   ret i32 %conv
 }
@@ -187,12 +187,12 @@ define i32 @test_trunc_and_2(i64 %x) {
 ; X64-LABEL: test_trunc_and_2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    andl $13, %eax
+; X64-NEXT:    shrq $45, %rax
+; X64-NEXT:    andl $111, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %and = and i64 %x, 3940649673949183
-  %shr = lshr i64 %and, 48
+  %shr = lshr i64 %and, 45
   %conv = trunc i64 %shr to i32
   ret i32 %conv
 }
@@ -201,12 +201,12 @@ define i32 @test_trunc_or_2(i64 %x) {
 ; X64-LABEL: test_trunc_or_2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    orl $13, %eax
+; X64-NEXT:    shrq $45, %rax
+; X64-NEXT:    orl $111, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %or = or i64 %x, 3940649673949183
-  %shr = lshr i64 %or, 48
+  %shr = lshr i64 %or, 45
   %conv = trunc i64 %shr to i32
   ret i32 %conv
 }
@@ -215,12 +215,12 @@ define i32 @test_trunc_xor_2(i64 %x) {
 ; X64-LABEL: test_trunc_xor_2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shrq $48, %rax
-; X64-NEXT:    xorl $13, %eax
+; X64-NEXT:    shrq $45, %rax
+; X64-NEXT:    xorl $111, %eax
 ; X64-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NEXT:    retq
   %xor = xor i64 %x, 3940649673949183
-  %shr = lshr i64 %xor, 48
+  %shr = lshr i64 %xor, 45
   %conv = trunc i64 %shr to i32
   ret i32 %conv
 }