[llvm] 4a3708c - [SDAG] remove shift that is redundant with part of funnel shift
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 24 08:26:01 PST 2022
Author: Sanjay Patel
Date: 2022-02-24T11:25:46-05:00
New Revision: 4a3708cd6b062a6afe2697cc8b39329e3f2faa25
URL: https://github.com/llvm/llvm-project/commit/4a3708cd6b062a6afe2697cc8b39329e3f2faa25
DIFF: https://github.com/llvm/llvm-project/commit/4a3708cd6b062a6afe2697cc8b39329e3f2faa25.diff
LOG: [SDAG] remove shift that is redundant with part of funnel shift
This is the SDAG translation of D120253 :
https://alive2.llvm.org/ce/z/qHpmNn
The SDAG nodes can have different operand types than the result value.
We can see an example of that with AArch64 - the funnel shift amount
is an i64 rather than i32.
We may need to make that match even more flexible to handle
post-legalization nodes, but I have not stepped into that yet.
Differential Revision: https://reviews.llvm.org/D120264
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/funnel-shift.ll
llvm/test/CodeGen/RISCV/rv32zbp.ll
llvm/test/CodeGen/X86/funnel-shift.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3544729da2f71..483496a53a6e1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6710,6 +6710,24 @@ static SDValue visitORCommutative(
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
}
+ auto peekThroughZext = [](SDValue V) {
+ if (V->getOpcode() == ISD::ZERO_EXTEND)
+ return V->getOperand(0);
+ return V;
+ };
+
+ // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+ if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
+ // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
+ if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
+ N0.getOperand(1) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index b4b4e37b4cba5..fbf00a59f3cf1 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -472,14 +472,12 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) {
define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
; CHECK-LABEL: or_shl_fshl_simplify:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, w2
-; CHECK-NEXT: mvn w9, w2
-; CHECK-NEXT: lsr w10, w0, #1
-; CHECK-NEXT: lsr w9, w10, w9
-; CHECK-NEXT: lsl w8, w1, w8
+; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT: mvn w8, w2
+; CHECK-NEXT: lsr w9, w0, #1
; CHECK-NEXT: lsl w10, w1, w2
-; CHECK-NEXT: orr w8, w8, w9
-; CHECK-NEXT: orr w0, w8, w10
+; CHECK-NEXT: lsr w8, w9, w8
+; CHECK-NEXT: orr w0, w10, w8
; CHECK-NEXT: ret
%shy = shl i32 %y, %s
%fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
@@ -490,14 +488,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
; CHECK-LABEL: or_lshr_fshr_simplify:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, w2
-; CHECK-NEXT: mvn w9, w2
-; CHECK-NEXT: lsl w10, w0, #1
-; CHECK-NEXT: lsr w8, w1, w8
-; CHECK-NEXT: lsl w9, w10, w9
+; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT: mvn w8, w2
+; CHECK-NEXT: lsl w9, w0, #1
; CHECK-NEXT: lsr w10, w1, w2
-; CHECK-NEXT: orr w8, w9, w8
-; CHECK-NEXT: orr w0, w10, w8
+; CHECK-NEXT: lsl w8, w9, w8
+; CHECK-NEXT: orr w0, w8, w10
; CHECK-NEXT: ret
%shy = lshr i32 %y, %s
%fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 76464a5024484..be13bebd44f1e 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -3370,7 +3370,6 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
; RV32I-NEXT: srli a0, a0, 1
; RV32I-NEXT: srl a0, a0, a2
; RV32I-NEXT: or a0, a1, a0
-; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBP-LABEL: or_shl_fshl_simplify:
@@ -3380,7 +3379,6 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
; RV32ZBP-NEXT: srli a0, a0, 1
; RV32ZBP-NEXT: srl a0, a0, a2
; RV32ZBP-NEXT: or a0, a1, a0
-; RV32ZBP-NEXT: or a0, a0, a1
; RV32ZBP-NEXT: ret
%shy = shl i32 %y, %s
%fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
@@ -3396,7 +3394,6 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
; RV32I-NEXT: slli a0, a0, 1
; RV32I-NEXT: sll a0, a0, a2
; RV32I-NEXT: or a0, a0, a1
-; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBP-LABEL: or_lshr_fshr_simplify:
@@ -3406,7 +3403,6 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
; RV32ZBP-NEXT: slli a0, a0, 1
; RV32ZBP-NEXT: sll a0, a0, a2
; RV32ZBP-NEXT: or a0, a0, a1
-; RV32ZBP-NEXT: or a0, a1, a0
; RV32ZBP-NEXT: ret
%shy = lshr i32 %y, %s
%fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index 49cf2684c7a82..2e9deb51dbe07 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -1260,25 +1260,18 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
; X86-SSE2-LABEL: or_shl_fshl_simplify:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl %eax, %esi
-; X86-SSE2-NEXT: shll %cl, %esi
; X86-SSE2-NEXT: shldl %cl, %edx, %eax
-; X86-SSE2-NEXT: orl %esi, %eax
-; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: or_shl_fshl_simplify:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movl %edx, %ecx
; X64-AVX2-NEXT: movl %esi, %eax
-; X64-AVX2-NEXT: shll %cl, %eax
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT: shldl %cl, %edi, %esi
-; X64-AVX2-NEXT: orl %esi, %eax
+; X64-AVX2-NEXT: shldl %cl, %edi, %eax
; X64-AVX2-NEXT: retq
%shy = shl i32 %y, %s
%fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
@@ -1289,25 +1282,18 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
; X86-SSE2-LABEL: or_lshr_fshr_simplify:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movl %eax, %esi
-; X86-SSE2-NEXT: shrl %cl, %esi
; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
-; X86-SSE2-NEXT: orl %esi, %eax
-; X86-SSE2-NEXT: popl %esi
; X86-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: or_lshr_fshr_simplify:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movl %edx, %ecx
; X64-AVX2-NEXT: movl %esi, %eax
-; X64-AVX2-NEXT: shrl %cl, %eax
; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT: shrdl %cl, %edi, %esi
-; X64-AVX2-NEXT: orl %esi, %eax
+; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
; X64-AVX2-NEXT: retq
%shy = lshr i32 %y, %s
%fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
More information about the llvm-commits
mailing list