[llvm] 19777de - [X86] matchAddressRecursively - add foldMaskedShiftToBEXTR handling to ZERO_EXTEND nodes.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 24 05:14:59 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-24T13:14:41+01:00
New Revision: 19777deba49e95c2761fc92cce83a0df23cc64cc
URL: https://github.com/llvm/llvm-project/commit/19777deba49e95c2761fc92cce83a0df23cc64cc
DIFF: https://github.com/llvm/llvm-project/commit/19777deba49e95c2761fc92cce83a0df23cc64cc.diff
LOG: [X86] matchAddressRecursively - add foldMaskedShiftToBEXTR handling to ZERO_EXTEND nodes.
Added:
Modified:
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/test/CodeGen/X86/extract-bits.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 600e7c85a811e9..c78d984a55363b 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2181,14 +2181,16 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
+ MVT XVT = X.getSimpleValueType();
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
- SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
- SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
+ SDValue NewSRL = DAG.getNode(ISD::SRL, DL, XVT, X, NewSRLAmt);
+ SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, XVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, XVT, NewSRL, NewMask);
+ SDValue NewExt = DAG.getZExtOrTrunc(NewAnd, DL, VT);
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
- SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewExt, NewSHLAmt);
// Insert the new nodes into the topological ordering. We must do this in
// a valid topological ordering as nothing is going to go back and re-sort
@@ -2199,13 +2201,14 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, NewSRL);
insertDAGNode(DAG, N, NewMask);
insertDAGNode(DAG, N, NewAnd);
+ insertDAGNode(DAG, N, NewExt);
insertDAGNode(DAG, N, NewSHLAmt);
insertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
DAG.RemoveDeadNode(N.getNode());
AM.Scale = 1 << AMShiftAmt;
- AM.IndexReg = NewAnd;
+ AM.IndexReg = NewExt;
return false;
}
@@ -2633,6 +2636,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (!foldMaskAndShiftToScale(*CurDAG, N, Mask.getZExtValue(), Src,
Src.getOperand(0), AM))
return false;
+
+ // Try to fold the mask and shift into BEXTR and scale.
+ if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask.getZExtValue(), Src,
+ Src.getOperand(0), AM, *Subtarget))
+ return false;
}
break;
diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index e3bce2af12d344..38a1de251a3d91 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -8611,13 +8611,26 @@ define i64 @c8_i64(i64 %arg, ptr %ptr) nounwind {
; X86-NEXT: adcl $0, %edx
; X86-NEXT: retl
;
-; X64-LABEL: c8_i64:
-; X64: # %bb.0:
-; X64-NEXT: movl (%rsi), %eax
-; X64-NEXT: shrl $19, %eax
-; X64-NEXT: andl $4092, %eax # imm = 0xFFC
-; X64-NEXT: addq %rdi, %rax
-; X64-NEXT: retq
+; X64-NOBMI-LABEL: c8_i64:
+; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl (%rsi), %eax
+; X64-NOBMI-NEXT: shrl $19, %eax
+; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
+; X64-NOBMI-NEXT: addq %rdi, %rax
+; X64-NOBMI-NEXT: retq
+;
+; X64-BMINOTBM-LABEL: c8_i64:
+; X64-BMINOTBM: # %bb.0:
+; X64-BMINOTBM-NEXT: movl $2581, %eax # imm = 0xA15
+; X64-BMINOTBM-NEXT: bextrl %eax, (%rsi), %eax
+; X64-BMINOTBM-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-BMINOTBM-NEXT: retq
+;
+; X64-BMITBM-LABEL: c8_i64:
+; X64-BMITBM: # %bb.0:
+; X64-BMITBM-NEXT: bextrl $2581, (%rsi), %eax # imm = 0xA15
+; X64-BMITBM-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-BMITBM-NEXT: retq
%tmp = load i32, ptr %ptr, align 8
%tmp1 = lshr i32 %tmp, 19
%tmp2 = and i32 %tmp1, 4092
More information about the llvm-commits
mailing list