[llvm] 735f7b3 - [X86] computeKnownBitsForTargetNode - add basic X86ISD::BZHI handling (#177347)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 22 06:10:55 PST 2026
Author: Simon Pilgrim
Date: 2026-01-22T14:10:50Z
New Revision: 735f7b3bd4bcbc16f01d34882365d79d3cf21853
URL: https://github.com/llvm/llvm-project/commit/735f7b3bd4bcbc16f01d34882365d79d3cf21853
DIFF: https://github.com/llvm/llvm-project/commit/735f7b3bd4bcbc16f01d34882365d79d3cf21853.diff
LOG: [X86] computeKnownBitsForTargetNode - add basic X86ISD::BZHI handling (#177347)
Currently limited to constant masks, if the mask (truncated to i8) if
less than the bitwidth then it will zero the upper bits.
So far it mainly just handles BZHI(X,0) -> 0 and BZHI(C1,C2) constant
folding.
All the BMI node combines seem to just call SimplifyDemandedBits - so
I've merged them into a single combineBMI.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-bzhi.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dad0fa4421cda..14dbbfd9d174d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39261,6 +39261,29 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
+ case X86ISD::BZHI: {
+ KnownBits Known2;
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // If the mask control (lower 8 bits) is less than the bitwidth, then the
+ // upper bits are set to zero: dst[BW:IMM8] = 0
+ // TODO: Generalise this to use Known2 getMinValue() + getMaxValue().
+ Known2 = Known2.trunc(8);
+ if (Known2.isConstant()) {
+ uint64_t Mask = Known2.getConstant().getZExtValue();
+ if (Mask < BitWidth) {
+ Known.One.clearBits(Mask, BitWidth);
+ Known.Zero.setBits(Mask, BitWidth);
+ if (Known.isConstant())
+ break;
+ }
+ }
+
+ // Zeros are retained from the src operand. But not necessarily ones.
+ Known.One.clearAllBits();
+ break;
+ }
case X86ISD::PDEP: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -55968,23 +55991,6 @@ static SDValue combineAVG(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- EVT VT = N->getValueType(0);
- unsigned NumBits = VT.getSizeInBits();
-
- // TODO - Constant Folding.
-
- // Simplify the inputs.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnes(NumBits));
- if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
- return SDValue(N, 0);
-
- return SDValue();
-}
-
static bool isNullFPScalarOrVectorConst(SDValue V) {
return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode());
}
@@ -61741,8 +61747,9 @@ static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+// Common folds for BMI1/BMI2/TBM intrinsics.
+static SDValue combineBMI(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBits), DCI))
@@ -61918,8 +61925,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AVGCEILU:
case ISD::AVGFLOORS:
case ISD::AVGFLOORU: return combineAVG(N, DAG, DCI, Subtarget);
- case X86ISD::BEXTR:
- case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
@@ -62068,7 +62073,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VBROADCAST_LOAD:
case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI);
case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG);
- case X86ISD::PDEP: return combinePDEP(N, DAG, DCI);
+ case X86ISD::BEXTR:
+ case X86ISD::BEXTRI:
+ case X86ISD::BZHI:
+ case X86ISD::PDEP: return combineBMI(N, DAG, DCI);
case X86ISD::PCLMULQDQ: return combinePCLMULQDQ(N, DAG, DCI);
case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
diff --git a/llvm/test/CodeGen/X86/combine-bzhi.ll b/llvm/test/CodeGen/X86/combine-bzhi.ll
index 16d96e8ba536a..54e76469dbb82 100644
--- a/llvm/test/CodeGen/X86/combine-bzhi.ll
+++ b/llvm/test/CodeGen/X86/combine-bzhi.ll
@@ -8,7 +8,6 @@ define i32 @test_bzhi32_zero(i32 %a) nounwind {
; CHECK-LABEL: test_bzhi32_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: bzhil %eax, %edi, %eax
; CHECK-NEXT: retq
%1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a, i32 0)
ret i32 %1
@@ -18,7 +17,6 @@ define i64 @test_bzhi64_zero(i64 %a) nounwind readnone {
; CHECK-LABEL: test_bzhi64_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: bzhiq %rax, %rdi, %rax
; CHECK-NEXT: retq
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a, i64 0)
ret i64 %1
@@ -28,8 +26,6 @@ define i32 @test_bzhi32_constfold() nounwind readnone {
; CHECK-LABEL: test_bzhi32_constfold:
; CHECK: # %bb.0:
; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: movl $5, %ecx
-; CHECK-NEXT: bzhil %eax, %ecx, %eax
; CHECK-NEXT: retq
%1 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 5, i32 1)
ret i32 %1
@@ -39,8 +35,6 @@ define i64 @test_bzhi64_constfold() nounwind readnone {
; CHECK-LABEL: test_bzhi64_constfold:
; CHECK: # %bb.0:
; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: movl $5, %ecx
-; CHECK-NEXT: bzhiq %rax, %rcx, %rax
; CHECK-NEXT: retq
%1 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 5, i64 1)
ret i64 %1
More information about the llvm-commits
mailing list