[llvm] c1e7911 - [DAG] FoldConstantArithmetic - fold bitlogic(bitcast(x),bitcast(y)) -> bitcast(bitlogic(x,y))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 5 05:01:13 PDT 2021
Author: Simon Pilgrim
Date: 2021-11-05T12:00:59Z
New Revision: c1e7911c3b36ba41c919eabddf4e745e3b473e53
URL: https://github.com/llvm/llvm-project/commit/c1e7911c3b36ba41c919eabddf4e745e3b473e53
DIFF: https://github.com/llvm/llvm-project/commit/c1e7911c3b36ba41c919eabddf4e745e3b473e53.diff
LOG: [DAG] FoldConstantArithmetic - fold bitlogic(bitcast(x),bitcast(y)) -> bitcast(bitlogic(x,y))
To constant fold bitwise logic ops where we've legalized constant build vectors to a different type (e.g. v2i64 -> v4i32), this patch adds a basic ability to peek through the bitcasts and perform the constant fold on the inner operands.
The MVE predicate v2i64 regressions will be addressed by future support for basic v2i64 type support.
One of the yak shaving fixes for D113192....
Differential Revision: https://reviews.llvm.org/D113202
Added:
Modified:
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index d15878aa23c44..fd106f55a43d1 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1260,6 +1260,11 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400;
/// be used with SelectionDAG::getMemIntrinsicNode.
static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500;
+/// Whether this is bitwise logic opcode.
+inline bool isBitwiseLogicOp(unsigned Opcode) {
+ return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR;
+}
+
/// Get underlying scalar opcode for VECREDUCE opcode.
/// For example ISD::AND for ISD::VECREDUCE_AND.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1c25f5f952917..05256464742c8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5293,6 +5293,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
return FoldSymbolOffset(Opcode, VT, GA, N1);
+ // If this is a bitwise logic opcode see if we can fold bitcasted ops.
+ // TODO: Can we generalize this and fold any bitcasted constant data?
+ if (ISD::isBitwiseLogicOp(Opcode) && N1->getOpcode() == ISD::BITCAST &&
+ N2->getOpcode() == ISD::BITCAST) {
+ SDValue InnerN1 = peekThroughBitcasts(N1->getOperand(0));
+ SDValue InnerN2 = peekThroughBitcasts(N2->getOperand(0));
+ EVT InnerVT = InnerN1.getValueType();
+ if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger())
+ if (SDValue C =
+ FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2}))
+ return getBitcast(VT, C);
+ }
+
// For fixed width vectors, extract each constant element and fold them
// individually. Either input may be an undef value.
bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll b/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll
index 86634db14b344..248edbf6c1558 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll
@@ -158,13 +158,20 @@ define arm_aapcs_vfpcc <2 x i64> @build_upper_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI14_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
-; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: adr r0, .LCPI14_1
+; CHECK-NEXT: vand q1, q1, q2
+; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI14_0:
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .LCPI14_1:
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
@@ -179,13 +186,20 @@ define arm_aapcs_vfpcc <2 x i64> @build_lower_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI15_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
-; CHECK-NEXT: vbic q1, q1, q2
+; CHECK-NEXT: adr r0, .LCPI15_1
+; CHECK-NEXT: vand q1, q1, q2
+; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI15_0:
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .LCPI15_1:
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index e04dbafe5af7c..648b1323936e4 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -2161,21 +2161,13 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; X86-SSE2-LABEL: constant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0]
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <4,u,14,u>
-; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
-; X86-SSE2-NEXT: pandn %xmm3, %xmm5
-; X86-SSE2-NEXT: psrlq $1, %xmm1
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X86-SSE2-NEXT: psrlq %xmm5, %xmm2
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
-; X86-SSE2-NEXT: psrlq %xmm5, %xmm1
+; X86-SSE2-NEXT: psrlq $60, %xmm2
+; X86-SSE2-NEXT: psrlq $50, %xmm1
; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; X86-SSE2-NEXT: pand %xmm3, %xmm4
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psllq %xmm4, %xmm1
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3]
-; X86-SSE2-NEXT: psllq %xmm3, %xmm0
+; X86-SSE2-NEXT: psllq $4, %xmm1
+; X86-SSE2-NEXT: psllq $14, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
@@ -2695,10 +2687,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi
; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: psrlq $50, %xmm1
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; X86-SSE2-NEXT: psllq $14, %xmm0
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
-; X86-SSE2-NEXT: orpd %xmm1, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> <i64 14, i64 14>)
ret <2 x i64> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
index 3b69e9cfcd375..d3e4260abf6a6 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
@@ -1460,24 +1460,20 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
;
; X86-SSE2-LABEL: constant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0]
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u>
-; X86-SSE2-NEXT: pxor %xmm3, %xmm3
-; X86-SSE2-NEXT: psubq %xmm2, %xmm3
-; X86-SSE2-NEXT: pand %xmm1, %xmm2
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
-; X86-SSE2-NEXT: psllq %xmm2, %xmm4
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm5
-; X86-SSE2-NEXT: psllq %xmm2, %xmm5
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
-; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: pxor %xmm1, %xmm1
+; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: psrlq %xmm1, %xmm2
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: psrlq %xmm1, %xmm3
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psrlq %xmm3, %xmm1
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
-; X86-SSE2-NEXT: psrlq %xmm2, %xmm0
+; X86-SSE2-NEXT: psllq $4, %xmm1
+; X86-SSE2-NEXT: psllq $14, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X86-SSE2-NEXT: orpd %xmm5, %xmm0
+; X86-SSE2-NEXT: orpd %xmm3, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
ret <2 x i64> %res
@@ -1932,9 +1928,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind {
; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psrlq $50, %xmm1
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
-; X86-SSE2-NEXT: psllq $14, %xmm0
+; X86-SSE2-NEXT: psllq $14, %xmm1
+; X86-SSE2-NEXT: psrlq $50, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
; X86-SSE2-NEXT: orpd %xmm1, %xmm0
; X86-SSE2-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index d279ad9c67c80..f1b0c70825cef 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -1867,21 +1867,13 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
;
; X86-SSE2-LABEL: constant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0]
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <4,u,14,u>
-; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
-; X86-SSE2-NEXT: pand %xmm3, %xmm5
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X86-SSE2-NEXT: psrlq %xmm5, %xmm2
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
-; X86-SSE2-NEXT: psrlq %xmm5, %xmm1
+; X86-SSE2-NEXT: psrlq $4, %xmm2
+; X86-SSE2-NEXT: psrlq $14, %xmm1
; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
-; X86-SSE2-NEXT: pandn %xmm3, %xmm4
-; X86-SSE2-NEXT: psllq $1, %xmm0
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psllq %xmm4, %xmm1
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3]
-; X86-SSE2-NEXT: psllq %xmm3, %xmm0
+; X86-SSE2-NEXT: psllq $60, %xmm1
+; X86-SSE2-NEXT: psllq $50, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-SSE2-NEXT: orpd %xmm2, %xmm0
; X86-SSE2-NEXT: retl
@@ -2414,10 +2406,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi
; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: psrlq $14, %xmm1
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; X86-SSE2-NEXT: psllq $50, %xmm0
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
-; X86-SSE2-NEXT: orpd %xmm1, %xmm0
+; X86-SSE2-NEXT: por %xmm1, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> <i64 14, i64 14>)
ret <2 x i64> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
index fec48466aff33..f6d983dad2501 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
@@ -1548,24 +1548,20 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
;
; X86-SSE2-LABEL: constant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0]
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u>
-; X86-SSE2-NEXT: pxor %xmm3, %xmm3
-; X86-SSE2-NEXT: psubq %xmm2, %xmm3
-; X86-SSE2-NEXT: pand %xmm1, %xmm2
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
-; X86-SSE2-NEXT: psrlq %xmm2, %xmm4
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; X86-SSE2-NEXT: movdqa %xmm0, %xmm5
-; X86-SSE2-NEXT: psrlq %xmm2, %xmm5
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
-; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: pxor %xmm1, %xmm1
+; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: psllq %xmm1, %xmm2
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: psllq %xmm1, %xmm3
+; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psllq %xmm3, %xmm1
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
-; X86-SSE2-NEXT: psllq %xmm2, %xmm0
+; X86-SSE2-NEXT: psrlq $4, %xmm1
+; X86-SSE2-NEXT: psrlq $14, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X86-SSE2-NEXT: orpd %xmm5, %xmm0
+; X86-SSE2-NEXT: orpd %xmm3, %xmm0
; X86-SSE2-NEXT: retl
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
ret <2 x i64> %res
@@ -2020,9 +2016,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind {
; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT: psllq $50, %xmm1
-; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
-; X86-SSE2-NEXT: psrlq $14, %xmm0
+; X86-SSE2-NEXT: psrlq $14, %xmm1
+; X86-SSE2-NEXT: psllq $50, %xmm0
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1]
; X86-SSE2-NEXT: orpd %xmm1, %xmm0
; X86-SSE2-NEXT: retl
More information about the llvm-commits
mailing list