[llvm] 40866b7 - [DAGCombiner][X86] Fold sra (sub AddC, (shl X, N1C)), N1C --> sext (sub AddC1',(trunc X to (width - N1C)))
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 9 11:54:05 PDT 2022
Author: Craig Topper
Date: 2022-07-09T11:53:44-07:00
New Revision: 40866b74bd422ae72ad53270d04568249ed53d69
URL: https://github.com/llvm/llvm-project/commit/40866b74bd422ae72ad53270d04568249ed53d69
DIFF: https://github.com/llvm/llvm-project/commit/40866b74bd422ae72ad53270d04568249ed53d69.diff
LOG: [DAGCombiner][X86] Fold sra (sub AddC, (shl X, N1C)), N1C --> sext (sub AddC1',(trunc X to (width - N1C)))
We already handled this case for add with a constant RHS. A
similar pattern can occur for sub with a constant left hand side.
Test cases use add and a mul representing (neg (shl X, C)) because
that's what I saw in the wild. The mul will be decomposed and then
the new transform can kick in.
Tests have not been committed, but this patch shows the changes.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D128769
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/shift-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 66c8741c27746..4736b9dd6aace 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9297,31 +9297,44 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
// sra (add (shl X, N1C), AddC), N1C -->
// sext (add (trunc X to (width - N1C)), AddC')
- if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
- N0.getOperand(0).getOpcode() == ISD::SHL &&
- N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
- if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
- SDValue Shl = N0.getOperand(0);
- // Determine what the truncate's type would be and ask the target if that
- // is a free operation.
- LLVMContext &Ctx = *DAG.getContext();
- unsigned ShiftAmt = N1C->getZExtValue();
- EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
- if (VT.isVector())
- TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
-
- // TODO: The simple type check probably belongs in the default hook
- // implementation and/or target-specific overrides (because
- // non-simple types likely require masking when legalized), but that
- // restriction may conflict with other transforms.
- if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
- TLI.isTruncateFree(VT, TruncVT)) {
- SDLoc DL(N);
- SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
- SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
- trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
- SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
- return DAG.getSExtOrTrunc(Add, DL, VT);
+ // sra (sub AddC, (shl X, N1C)), N1C -->
+ // sext (sub AddC1',(trunc X to (width - N1C)))
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
+ N0.hasOneUse()) {
+ bool IsAdd = N0.getOpcode() == ISD::ADD;
+ SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
+ if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
+ Shl.hasOneUse()) {
+ // TODO: AddC does not need to be a splat.
+ if (ConstantSDNode *AddC =
+ isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
+ // Determine what the truncate's type would be and ask the target if
+ // that is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
+
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but
+ // that restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC =
+ DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
+ TruncVT.getScalarSizeInBits()),
+ DL, TruncVT);
+ SDValue Add;
+ if (IsAdd)
+ Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ else
+ Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
}
}
}
diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index c49176500cf68..075fbb29f4a95 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -419,6 +419,7 @@ if:
unreachable
}
+; The mul here is the equivalent of (neg (shl X, 32)).
define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind {
; X32-LABEL: ashr_add_neg_shl_i32:
; X32: # %bb.0:
@@ -430,10 +431,9 @@ define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind {
;
; X64-LABEL: ashr_add_neg_shl_i32:
; X64: # %bb.0:
-; X64-NEXT: shlq $32, %rdi
-; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: sarq $32, %rax
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: cltq
; X64-NEXT: retq
%conv = mul i64 %r, -4294967296
%sext = add i64 %conv, 4294967296
@@ -441,6 +441,7 @@ define i64 @ashr_add_neg_shl_i32(i64 %r) nounwind {
ret i64 %conv1
}
+; The mul here is the equivalent of (neg (shl X, 56)).
define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind {
; X32-LABEL: ashr_add_neg_shl_i8:
; X32: # %bb.0:
@@ -455,10 +456,9 @@ define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind {
;
; X64-LABEL: ashr_add_neg_shl_i8:
; X64: # %bb.0:
-; X64-NEXT: shlq $56, %rdi
-; X64-NEXT: movabsq $144115188075855872, %rax # imm = 0x200000000000000
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: sarq $56, %rax
+; X64-NEXT: movb $2, %al
+; X64-NEXT: subb %dil, %al
+; X64-NEXT: movsbq %al, %rax
; X64-NEXT: retq
%conv = mul i64 %r, -72057594037927936
%sext = add i64 %conv, 144115188075855872
@@ -466,42 +466,31 @@ define i64 @ashr_add_neg_shl_i8(i64 %r) nounwind {
ret i64 %conv1
}
+; The mul here is the equivalent of (neg (shl X, 24)).
define <4 x i32> @ashr_add_neg_shl_v4i8(<4 x i32> %r) nounwind {
; X32-LABEL: ashr_add_neg_shl_v4i8:
; X32: # %bb.0:
-; X32-NEXT: pushl %ebp
-; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: shll $24, %edx
-; X32-NEXT: shll $24, %esi
-; X32-NEXT: shll $24, %ebx
-; X32-NEXT: shll $24, %ebp
-; X32-NEXT: movl $16777216, %ecx # imm = 0x1000000
-; X32-NEXT: movl $16777216, %edi # imm = 0x1000000
-; X32-NEXT: subl %ebp, %edi
-; X32-NEXT: movl $16777216, %ebp # imm = 0x1000000
-; X32-NEXT: subl %ebx, %ebp
-; X32-NEXT: movl $16777216, %ebx # imm = 0x1000000
-; X32-NEXT: subl %esi, %ebx
-; X32-NEXT: subl %edx, %ecx
-; X32-NEXT: sarl $24, %ecx
-; X32-NEXT: sarl $24, %ebx
-; X32-NEXT: sarl $24, %ebp
-; X32-NEXT: sarl $24, %edi
-; X32-NEXT: movl %edi, 12(%eax)
-; X32-NEXT: movl %ebp, 8(%eax)
-; X32-NEXT: movl %ebx, 4(%eax)
-; X32-NEXT: movl %ecx, (%eax)
+; X32-NEXT: movb $1, %cl
+; X32-NEXT: movb $1, %dl
+; X32-NEXT: subb {{[0-9]+}}(%esp), %dl
+; X32-NEXT: movsbl %dl, %edx
+; X32-NEXT: movb $1, %ch
+; X32-NEXT: subb {{[0-9]+}}(%esp), %ch
+; X32-NEXT: movsbl %ch, %esi
+; X32-NEXT: movb $1, %ch
+; X32-NEXT: subb {{[0-9]+}}(%esp), %ch
+; X32-NEXT: movsbl %ch, %edi
+; X32-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movsbl %cl, %ecx
+; X32-NEXT: movl %ecx, 12(%eax)
+; X32-NEXT: movl %edi, 8(%eax)
+; X32-NEXT: movl %esi, 4(%eax)
+; X32-NEXT: movl %edx, (%eax)
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
-; X32-NEXT: popl %ebx
-; X32-NEXT: popl %ebp
; X32-NEXT: retl $4
;
; X64-LABEL: ashr_add_neg_shl_v4i8:
More information about the llvm-commits
mailing list