[llvm] 735f7b3 - [X86] computeKnownBitsForTargetNode - add basic X86ISD::BZHI handling (#177347)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 22 06:10:55 PST 2026


Author: Simon Pilgrim
Date: 2026-01-22T14:10:50Z
New Revision: 735f7b3bd4bcbc16f01d34882365d79d3cf21853

URL: https://github.com/llvm/llvm-project/commit/735f7b3bd4bcbc16f01d34882365d79d3cf21853
DIFF: https://github.com/llvm/llvm-project/commit/735f7b3bd4bcbc16f01d34882365d79d3cf21853.diff

LOG: [X86] computeKnownBitsForTargetNode - add basic X86ISD::BZHI handling (#177347)

Currently limited to constant masks, if the mask (truncated to i8) if
less than the bitwidth then it will zero the upper bits.

So far it mainly just handles BZHI(X,0) -> 0 and BZHI(C1,C2) constant
folding.

All the BMI node combines seem to just call SimplifyDemandedBits - so
I've merged them into a single combineBMI.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/combine-bzhi.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dad0fa4421cda..14dbbfd9d174d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39261,6 +39261,29 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     }
     break;
   }
+  case X86ISD::BZHI: {
+    KnownBits Known2;
+    Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+    // If the mask control (lower 8 bits) is less than the bitwidth, then the
+    // upper bits are set to zero: dst[BW:IMM8] = 0
+    // TODO: Generalise this to use Known2 getMinValue() + getMaxValue().
+    Known2 = Known2.trunc(8);
+    if (Known2.isConstant()) {
+      uint64_t Mask = Known2.getConstant().getZExtValue();
+      if (Mask < BitWidth) {
+        Known.One.clearBits(Mask, BitWidth);
+        Known.Zero.setBits(Mask, BitWidth);
+        if (Known.isConstant())
+          break;
+      }
+    }
+
+    // Zeros are retained from the src operand. But not necessarily ones.
+    Known.One.clearAllBits();
+    break;
+  }
   case X86ISD::PDEP: {
     KnownBits Known2;
     Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -55968,23 +55991,6 @@ static SDValue combineAVG(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
-                            TargetLowering::DAGCombinerInfo &DCI,
-                            const X86Subtarget &Subtarget) {
-  EVT VT = N->getValueType(0);
-  unsigned NumBits = VT.getSizeInBits();
-
-  // TODO - Constant Folding.
-
-  // Simplify the inputs.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  APInt DemandedMask(APInt::getAllOnes(NumBits));
-  if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
-    return SDValue(N, 0);
-
-  return SDValue();
-}
-
 static bool isNullFPScalarOrVectorConst(SDValue V) {
   return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode());
 }
@@ -61741,8 +61747,9 @@ static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
-                           TargetLowering::DAGCombinerInfo &DCI) {
+// Common folds for BMI1/BMI2/TBM intrinsics.
+static SDValue combineBMI(SDNode *N, SelectionDAG &DAG,
+                          TargetLowering::DAGCombinerInfo &DCI) {
   unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBits), DCI))
@@ -61918,8 +61925,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::AVGCEILU:
   case ISD::AVGFLOORS:
   case ISD::AVGFLOORU:      return combineAVG(N, DAG, DCI, Subtarget);
-  case X86ISD::BEXTR:
-  case X86ISD::BEXTRI:      return combineBEXTR(N, DAG, DCI, Subtarget);
   case ISD::LOAD:           return combineLoad(N, DAG, DCI, Subtarget);
   case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return combineStore(N, DAG, DCI, Subtarget);
@@ -62068,7 +62073,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::VBROADCAST_LOAD:
   case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI);
   case X86ISD::MOVDQ2Q:     return combineMOVDQ2Q(N, DAG);
-  case X86ISD::PDEP:        return combinePDEP(N, DAG, DCI);
+  case X86ISD::BEXTR:
+  case X86ISD::BEXTRI:
+  case X86ISD::BZHI:
+  case X86ISD::PDEP:        return combineBMI(N, DAG, DCI);
   case X86ISD::PCLMULQDQ:   return combinePCLMULQDQ(N, DAG, DCI);
   case ISD::INTRINSIC_WO_CHAIN:  return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
   case ISD::INTRINSIC_W_CHAIN:  return combineINTRINSIC_W_CHAIN(N, DAG, DCI);

diff  --git a/llvm/test/CodeGen/X86/combine-bzhi.ll b/llvm/test/CodeGen/X86/combine-bzhi.ll
index 16d96e8ba536a..54e76469dbb82 100644
--- a/llvm/test/CodeGen/X86/combine-bzhi.ll
+++ b/llvm/test/CodeGen/X86/combine-bzhi.ll
@@ -8,7 +8,6 @@ define i32 @test_bzhi32_zero(i32 %a) nounwind {
 ; CHECK-LABEL: test_bzhi32_zero:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    bzhil %eax, %edi, %eax
 ; CHECK-NEXT:    retq
   %1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 0)
   ret i32 %1
@@ -18,7 +17,6 @@ define i64 @test_bzhi64_zero(i64 %a) nounwind readnone {
 ; CHECK-LABEL: test_bzhi64_zero:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    bzhiq %rax, %rdi, %rax
 ; CHECK-NEXT:    retq
   %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 0)
   ret i64 %1
@@ -28,8 +26,6 @@ define i32 @test_bzhi32_constfold() nounwind readnone {
 ; CHECK-LABEL: test_bzhi32_constfold:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    movl $5, %ecx
-; CHECK-NEXT:    bzhil %eax, %ecx, %eax
 ; CHECK-NEXT:    retq
   %1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 5, i32 1)
   ret i32 %1
@@ -39,8 +35,6 @@ define i64 @test_bzhi64_constfold() nounwind readnone {
 ; CHECK-LABEL: test_bzhi64_constfold:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    movl $5, %ecx
-; CHECK-NEXT:    bzhiq %rax, %rcx, %rax
 ; CHECK-NEXT:    retq
   %1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1)
   ret i64 %1


        


More information about the llvm-commits mailing list