[llvm] [DAGCombiner] Combine (fshl A, B, S) | (fshr C, D, BW-S) --> (fshl (A|C), (B|D), S) (PR #180889)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 21:47:42 PST 2026
- Previous message: [llvm] [DAGCombiner] Combine (fshl A, B, S) | (fshr C, D, BW-S) --> (fshl (A|C), (B|D), S) (PR #180889)
- Next message: [llvm] [DAGCombiner] Combine (fshl A, B, S) | (fshr C, D, BW-S) --> (fshl (A|C), (B|D), S) (PR #180889)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-aarch64
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
This is similar to the FSHL/FSHR handling in hoistLogicOpWithSameOpcodeHands.
Here the opcodes aren't exactly the same, but the operations are
equivalent.
Fixes regressions from #<!-- -->180888
Stacked on #<!-- -->180888 and #<!-- -->180887
---
Patch is 136.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180889.diff
32 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+42-8)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+36-18)
- (modified) llvm/test/CodeGen/AArch64/funnel-shift.ll (+23-31)
- (modified) llvm/test/CodeGen/RISCV/rv32p.ll (+2-5)
- (modified) llvm/test/CodeGen/RISCV/rv64p.ll (+2-5)
- (modified) llvm/test/CodeGen/X86/avgceils-scalar.ll (+20-20)
- (modified) llvm/test/CodeGen/X86/avgceilu-scalar.ll (+20-20)
- (modified) llvm/test/CodeGen/X86/avgfloors-scalar.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/div_i129_v_pow2k.ll (+26-26)
- (modified) llvm/test/CodeGen/X86/expand-large-fp-optnone.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/fold-tied-op.ll (+50-48)
- (modified) llvm/test/CodeGen/X86/funnel-shift.ll (+20-40)
- (modified) llvm/test/CodeGen/X86/icmp-shift-opt.ll (+10-14)
- (modified) llvm/test/CodeGen/X86/legalize-shl-vec.ll (+44-44)
- (modified) llvm/test/CodeGen/X86/load-local-v3i129.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/midpoint-int.ll (+17-18)
- (modified) llvm/test/CodeGen/X86/pr32282.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/pr38539.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/pr43820.ll (+171-172)
- (modified) llvm/test/CodeGen/X86/pr49162.ll (+1-4)
- (modified) llvm/test/CodeGen/X86/rotate-extract.ll (+3-4)
- (modified) llvm/test/CodeGen/X86/scmp.ll (+200-185)
- (modified) llvm/test/CodeGen/X86/sdiv_fix_sat.ll (+182-186)
- (modified) llvm/test/CodeGen/X86/shift-and.ll (+9-7)
- (modified) llvm/test/CodeGen/X86/shift-i512.ll (+33-30)
- (modified) llvm/test/CodeGen/X86/smax.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/smin.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/udiv_fix_sat.ll (+39-37)
- (modified) llvm/test/CodeGen/X86/umax.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/umin.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+44-62)
- (modified) llvm/test/CodeGen/X86/vector-zext.ll (+24-42)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b05157289892b..ac169e3b7361c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8497,17 +8497,51 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
return V;
};
- // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
- N0.getOperand(0) == N1.getOperand(0) &&
- peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
- return N0;
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) {
+ // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return N0;
+ // (fshl A, X, Y) | (shl X, Y) --> fshl (A|X), X, Y
+ if (N0.getOperand(1) == N1.getOperand(0) && N0.hasOneUse() &&
+ N1.hasOneUse()) {
+ SDValue A = N0.getOperand(0);
+ SDValue X = N1.getOperand(0);
+ SDValue NewLHS = DAG.getNode(ISD::OR, DL, VT, A, X);
+ return DAG.getNode(ISD::FSHL, DL, VT, NewLHS, X, N0.getOperand(2));
+ }
+ }
- // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
- N0.getOperand(1) == N1.getOperand(0) &&
- peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
- return N0;
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) {
+ // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return N0;
+ // (fshr X, B, Y) | (srl X, Y) --> fshr X, (X|B), Y
+ if (N0.getOperand(0) == N1.getOperand(0) && N0.hasOneUse() &&
+ N1.hasOneUse()) {
+ SDValue X = N1.getOperand(0);
+ SDValue B = N0.getOperand(1);
+ SDValue NewRHS = DAG.getNode(ISD::OR, DL, VT, X, B);
+ return DAG.getNode(ISD::FSHR, DL, VT, X, NewRHS, N0.getOperand(2));
+ }
+ }
+
+ // (fshl A, B, S) | (fshr C, D, BW-S) --> fshl (A|C), (B|D), S
+ if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::FSHR &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ auto *S0 = dyn_cast<ConstantSDNode>(N0.getOperand(2));
+ auto *S1 = dyn_cast<ConstantSDNode>(N1.getOperand(2));
+ if (S0 && S1 && (S0->getZExtValue() + S1->getZExtValue()) == BW) {
+ SDValue A = N0.getOperand(0);
+ SDValue B = N0.getOperand(1);
+ SDValue C = N1.getOperand(0);
+ SDValue D = N1.getOperand(1);
+ SDValue NewLHS = DAG.getNode(ISD::OR, DL, VT, A, C);
+ SDValue NewRHS = DAG.getNode(ISD::OR, DL, VT, B, D);
+ return DAG.getNode(ISD::FSHL, DL, VT, NewLHS, NewRHS, N0.getOperand(2));
+ }
+ }
// Attempt to match a legalized build_pair-esque pattern:
// or(shl(aext(Hi),BW/2),zext(Lo))
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 4d08a22f25ab9..9dcb68f3420f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3298,12 +3298,18 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
} else {
Lo = DAG.getNode(ISD::SHL, DL, NVT, InL,
DAG.getShiftAmountConstant(Amt, NVT, DL));
- Hi = DAG.getNode(
- ISD::OR, DL, NVT,
- DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getShiftAmountConstant(Amt, NVT, DL)),
- DAG.getNode(ISD::SRL, DL, NVT, InL,
- DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ // Use FSHL if legal so we don't need to combine it later.
+ if (TLI.isOperationLegal(ISD::FSHL, NVT)) {
+ Hi = DAG.getNode(ISD::FSHL, DL, NVT, InH, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL));
+ } else {
+ Hi = DAG.getNode(
+ ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(Amt, NVT, DL)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ }
}
return;
}
@@ -3319,12 +3325,18 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
Lo = InH;
Hi = DAG.getConstant(0, DL, NVT);
} else {
- Lo = DAG.getNode(
- ISD::OR, DL, NVT,
- DAG.getNode(ISD::SRL, DL, NVT, InL,
- DAG.getShiftAmountConstant(Amt, NVT, DL)),
- DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ // Use FSHR if legal so we don't need to combine it later.
+ if (TLI.isOperationLegal(ISD::FSHR, NVT)) {
+ Lo = DAG.getNode(ISD::FSHR, DL, NVT, InH, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL));
+ } else {
+ Lo = DAG.getNode(
+ ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ }
Hi = DAG.getNode(ISD::SRL, DL, NVT, InH,
DAG.getShiftAmountConstant(Amt, NVT, DL));
}
@@ -3345,12 +3357,18 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getShiftAmountConstant(NVTBits - 1, NVT, DL));
} else {
- Lo = DAG.getNode(
- ISD::OR, DL, NVT,
- DAG.getNode(ISD::SRL, DL, NVT, InL,
- DAG.getShiftAmountConstant(Amt, NVT, DL)),
- DAG.getNode(ISD::SHL, DL, NVT, InH,
- DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ // Use FSHR if legal so we don't need to combine it later.
+ if (TLI.isOperationLegal(ISD::FSHR, NVT)) {
+ Lo = DAG.getNode(ISD::FSHR, DL, NVT, InH, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL));
+ } else {
+ Lo = DAG.getNode(
+ ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getShiftAmountConstant(Amt, NVT, DL)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getShiftAmountConstant(-Amt + NVTBits, NVT, DL)));
+ }
Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getShiftAmountConstant(Amt, NVT, DL));
}
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index e0bbfc620e2f8..9dd5dff896624 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -541,14 +541,12 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) {
; CHECK-SD-LABEL: or_shl_fshl:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, w2
-; CHECK-SD-NEXT: lsr w9, w1, #1
-; CHECK-SD-NEXT: lsl w10, w1, w2
-; CHECK-SD-NEXT: mvn w11, w2
-; CHECK-SD-NEXT: lsl w8, w0, w8
-; CHECK-SD-NEXT: lsr w9, w9, w11
-; CHECK-SD-NEXT: orr w8, w8, w10
-; CHECK-SD-NEXT: orr w0, w8, w9
+; CHECK-SD-NEXT: lsr w8, w1, #1
+; CHECK-SD-NEXT: orr w9, w0, w1
+; CHECK-SD-NEXT: mvn w10, w2
+; CHECK-SD-NEXT: lsl w9, w9, w2
+; CHECK-SD-NEXT: lsr w8, w8, w10
+; CHECK-SD-NEXT: orr w0, w9, w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_shl_fshl:
@@ -586,14 +584,12 @@ define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) {
define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) {
; CHECK-SD-LABEL: or_shl_fshl_commute:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, w2
-; CHECK-SD-NEXT: lsr w9, w1, #1
-; CHECK-SD-NEXT: lsl w10, w1, w2
-; CHECK-SD-NEXT: mvn w11, w2
-; CHECK-SD-NEXT: lsl w8, w0, w8
-; CHECK-SD-NEXT: lsr w9, w9, w11
-; CHECK-SD-NEXT: orr w8, w10, w8
-; CHECK-SD-NEXT: orr w0, w8, w9
+; CHECK-SD-NEXT: lsr w8, w1, #1
+; CHECK-SD-NEXT: orr w9, w0, w1
+; CHECK-SD-NEXT: mvn w10, w2
+; CHECK-SD-NEXT: lsl w9, w9, w2
+; CHECK-SD-NEXT: lsr w8, w8, w10
+; CHECK-SD-NEXT: orr w0, w9, w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_shl_fshl_commute:
@@ -631,14 +627,12 @@ define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) {
define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) {
; CHECK-SD-LABEL: or_lshr_fshr:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, w2
-; CHECK-SD-NEXT: lsl w9, w1, #1
-; CHECK-SD-NEXT: lsr w10, w1, w2
-; CHECK-SD-NEXT: lsr w8, w0, w8
-; CHECK-SD-NEXT: mvn w11, w2
-; CHECK-SD-NEXT: lsl w9, w9, w11
-; CHECK-SD-NEXT: orr w8, w8, w10
-; CHECK-SD-NEXT: orr w0, w9, w8
+; CHECK-SD-NEXT: lsl w8, w1, #1
+; CHECK-SD-NEXT: orr w9, w1, w0
+; CHECK-SD-NEXT: mvn w10, w2
+; CHECK-SD-NEXT: lsr w9, w9, w2
+; CHECK-SD-NEXT: lsl w8, w8, w10
+; CHECK-SD-NEXT: orr w0, w8, w9
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: or_lshr_fshr:
@@ -675,13 +669,11 @@ define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) {
define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) {
; CHECK-SD-LABEL: or_lshr_fshr_commute:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, w2
-; CHECK-SD-NEXT: lsl w9, w1, #1
-; CHECK-SD-NEXT: lsr w10, w1, w2
-; CHECK-SD-NEXT: lsr w8, w0, w8
-; CHECK-SD-NEXT: mvn w11, w2
-; CHECK-SD-NEXT: lsl w9, w9, w11
-; CHECK-SD-NEXT: orr w8, w10, w8
+; CHECK-SD-NEXT: lsl w8, w1, #1
+; CHECK-SD-NEXT: orr w9, w1, w0
+; CHECK-SD-NEXT: mvn w10, w2
+; CHECK-SD-NEXT: lsr w9, w9, w2
+; CHECK-SD-NEXT: lsl w8, w8, w10
; CHECK-SD-NEXT: orr w0, w8, w9
; CHECK-SD-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll
index e4d4c68109dea..651163c9ca7e7 100644
--- a/llvm/test/CodeGen/RISCV/rv32p.ll
+++ b/llvm/test/CodeGen/RISCV/rv32p.ll
@@ -349,15 +349,12 @@ define i64 @srx_i64(i64 %x, i64 %y) {
ret i64 %b
}
-; FIXME: Using srx instead of slx would avoid the mv.
define i64 @srxi_i64(i64 %x) {
; CHECK-LABEL: srxi_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: li a3, 7
+; CHECK-NEXT: li a2, 25
+; CHECK-NEXT: srx a0, a1, a2
; CHECK-NEXT: srli a1, a1, 25
-; CHECK-NEXT: slx a2, a0, a3
-; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: ret
%a = lshr i64 %x, 25
ret i64 %a
diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll
index 53ca8476034a1..17dea9130003c 100644
--- a/llvm/test/CodeGen/RISCV/rv64p.ll
+++ b/llvm/test/CodeGen/RISCV/rv64p.ll
@@ -365,15 +365,12 @@ define i128 @srx_i128(i128 %x, i128 %y) {
ret i128 %b
}
-; FIXME: Using srx instead of slx would avoid the mv.
define i128 @srxi_i128(i128 %x) {
; CHECK-LABEL: srxi_i128:
; CHECK: # %bb.0:
-; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: li a3, 15
+; CHECK-NEXT: li a2, 49
+; CHECK-NEXT: srx a0, a1, a2
; CHECK-NEXT: srli a1, a1, 49
-; CHECK-NEXT: slx a2, a0, a3
-; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: ret
%a = lshr i128 %x, 49
ret i128 %a
diff --git a/llvm/test/CodeGen/X86/avgceils-scalar.ll b/llvm/test/CodeGen/X86/avgceils-scalar.ll
index 91121bd4ad935..a44c746ad0eda 100644
--- a/llvm/test/CodeGen/X86/avgceils-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgceils-scalar.ll
@@ -175,19 +175,19 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: xorl %ecx, %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: shrdl $1, %ebx, %edi
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: sarl %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: shrdl $1, %esi, %ebx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: sarl %esi
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: subl %edi, %eax
-; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: subl %ebx, %eax
+; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -215,19 +215,19 @@ define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: xorl %ecx, %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: shrdl $1, %ebx, %edi
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: sarl %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: shrdl $1, %esi, %ebx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: sarl %esi
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: subl %edi, %eax
-; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: subl %ebx, %eax
+; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/avgceilu-scalar.ll b/llvm/test/CodeGen/X86/avgceilu-scalar.ll
index 4ab4851eccd2c..987e0a0188c2d 100644
--- a/llvm/test/CodeGen/X86/avgceilu-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgceilu-scalar.ll
@@ -175,19 +175,19 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: xorl %ecx, %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: shrdl $1, %ebx, %edi
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: shrl %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: shrdl $1, %esi, %ebx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: shrl %esi
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: subl %edi, %eax
-; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: subl %ebx, %eax
+; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -215,19 +215,19 @@ define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: xorl %ecx, %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: shrdl $1, %ebx, %edi
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: shrl %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: shrdl $1, %esi, %ebx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: shrl %esi
; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: subl %edi, %eax
-; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: subl %ebx, %eax
+; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
index 87d72afa90939..eedc7b64ac3b8 100644
--- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
@@ -260,10 +260,10 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: shrdl $1, %edx, %ebx
; X86-NEXT: andl %edi, %ecx
; X86-NEXT: sarl %edx
@@ -300,10 +300,10 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: shrdl $1, %edx, %ebx
; X86-NEXT: andl %edi, %ecx
; X86-NEXT: sarl %edx
@@ -342,10 +342,10 @@ define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: shrdl $1, %edx, %ebx
; X86-NEXT: andl %edi, %ecx
; X86-NEXT: sarl %edx
diff --git a/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll b/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
index 4d6d795e3beb8..d60c626fb900b 100644
--- a/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
+++ b/llvm/test/CodeGen/X86/div_i129_v_pow2k.ll
@@ -25,20 +25,20 @@ define i129 @v_sdiv_i129_v_pow2k(i129 %lhs) nounwind {
;
; X64-O0-LABEL: v_sdiv_i129_v_pow2k:
; X64-O0: # %bb.0:
-; X64-O0-NEXT: movl %edx, %eax
-; X64-O0-NEXT: andl $1, %eax
-; X64-O0-NEXT: movl %eax, %ecx
-; X64-O0-NEXT: negq %rcx
-; X64-O0-NEXT: movl %ecx, %r8d
+; X64-O0-NEXT: movq %rdi, %rax
+; X64-O0-NEXT: movl %edx, %ecx
+; X64-O0-NEXT: andl $1, %ecx
+; X64-O0-NEXT: movl %ecx, %edi
+; X64-O0-NEXT: negq %rdi
+; X64-O0-NEXT: movl %edi, %r8d
; X64-O0-NEXT: andl $1, %r8d
-; X64-O0-NEXT: # implicit-def: $rax
-; X64-O0-NEXT: movl %r8d, %eax
-; X64-O0-NEXT: shldq $32, %rcx, %rax
-; X64-O0-NEXT: addq %rax, %rdi
+; X64-O0-NEXT: # implicit-def: $rcx
+; X64-O0-NEXT: movl %r8d, %ecx
+; X64-O0-NEXT: shldq $32, %rdi, %rcx
+; X64-O0-NEXT: addq %rcx, %rax
; X64-O0-NEXT: adcq $0, %rsi
; X64-O0-NEXT: adcq $0, %rdx
-; X64-O0-NEXT: movq %rsi, %rax
-; X64-O0-NEXT: shldq $31, %rdi, %rax
+; X64-O0-NEXT: shrdq $33, %rsi, %rax
; X64-O0-NEXT: movl %edx, %ecx
; X64-O0-NEXT: andl $1, %ecx
; X64-O0-NEXT: # kill: def $rcx killed $ecx
@@ -154,20 +154,20 @@ define i129 @v_sdiv_exact_i129_v_pow2k(i129 %lhs) nounwind {
;
; X64-O0-LABEL: v_sdiv_exact_i129_v_pow2k:
; X64-O0: # %bb.0:
-; X64-O0-NEXT: movl %edx, %eax
-; X64-O0-NEXT: andl $1, %eax
-; X64-O0-NEXT: movl %eax, %ecx
-; X64-O0-NEXT: negq %rcx
-; X64-O0-NEXT: movl %ecx, %r8d
+; X64-O0-NEXT: movq %rdi, %rax
+; X64-O0-NEXT: movl %edx, %ecx
+; X64-O0-NEXT: andl $1, %ecx
+; X64-O0-NEXT: movl %ecx, %edi
+; X64-O0-NEXT: negq %rdi
+; X64-O0-NEXT: movl %edi, %r8d
; X64-O0-NEXT: andl $1, %r8d
-; X64-O0-NEXT: # implicit-def: $rax
-; X64-O0-NEXT: movl %r8d, %eax
-; X64-O0-NEXT: shldq $32, %rcx, %rax
-; X64-O0-NEXT: addq %rax, %rdi
+; X64-O0-NEXT: # implicit-def: $rcx
+; X64-O0-NEXT: movl %r8d, %ecx
+; X64-O0-NEXT: shldq $32, %rdi, %rcx
+; X64-O0-NEXT: addq %rcx, %rax
; X64-O0-NEXT: adcq $0, %rsi
; X64-O0-NEXT: adcq $0, %rdx
-; X64-O0-NEXT: movq %rsi, %rax
-; X64-O0-NEXT: shldq $31, %rdi, %rax
+; X64-O0-NEXT: shrdq $33, %rsi, %rax
; X64-O0-NEXT: movl %edx, %ecx
; X64-O0-NEXT: andl $1, %ecx
; X64-O0-NEXT: # kill: def $rcx killed $ecx
@@ -274,8 +274,8 @@ define i129 @v_udiv_i129_v_pow2k(i129 %lhs) nounwind {
;
; X64-O0-LABEL: v_udiv_i129_v_pow2k:
; X64-O0: # %bb.0:
-; X64-O0-NEXT: movq %rsi, %rax
-; X64-O0-NEXT: shldq $31, %rdi, %rax
+; X64-O0-NEXT: movq %rdi, %rax
+; X64-O0-NEXT: shrdq $33, %rsi, %rax
; X64-O0-NEXT: movl %edx, %ecx
; X64-O0-NEXT: andl $1, %ecx
; X64-O0-NEXT: movl %ecx, %edx
@@ -345,8...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/180889
- Previous message: [llvm] [DAGCombiner] Combine (fshl A, B, S) | (fshr C, D, BW-S) --> (fshl (A|C), (B|D), S) (PR #180889)
- Next message: [llvm] [DAGCombiner] Combine (fshl A, B, S) | (fshr C, D, BW-S) --> (fshl (A|C), (B|D), S) (PR #180889)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list