[llvm] e5e93b6 - [DAG] FoldConstantArithmetic - add initial support for undef elements in bitcasted binop constant folding
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 8 03:54:13 PDT 2022
Author: Simon Pilgrim
Date: 2022-08-08T11:53:56+01:00
New Revision: e5e93b6130bde96d7e14851e218c5bf055f8a834
URL: https://github.com/llvm/llvm-project/commit/e5e93b6130bde96d7e14851e218c5bf055f8a834
DIFF: https://github.com/llvm/llvm-project/commit/e5e93b6130bde96d7e14851e218c5bf055f8a834.diff
LOG: [DAG] FoldConstantArithmetic - add initial support for undef elements in bitcasted binop constant folding
FoldConstantArithmetic can fold constant vectors hidden behind bitcasts (e.g. vXi64 -> v2Xi32 on 32-bit platforms), but currently bails if either vector contains undef elements. These undefs can often occur due to SimplifyDemandedBits/VectorElts calls recognising that the upper bits are often unnecessary (e.g. funnel-shift/rotate implicit-modulo and AND masks).
This patch adds a basic 'FoldValueWithUndef' handler that will attempt to constant fold if one or both of the ops are undef - so far this just handles the AND and MUL cases where we always fold to zero.
The RISCV codegen increase is interesting - it looks like the BUILD_VECTOR lowering was loading a constant pool entry but now (with all elements defined constant) it can materialize the constant instead?
Differential Revision: https://reviews.llvm.org/D130839
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
llvm/test/CodeGen/X86/fshl-splat-undef.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5ca8d2b3a2849..3541c3d690b45 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5465,6 +5465,23 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
return llvm::None;
}
+// Handle constant folding with UNDEF.
+// TODO: Handle more cases.
+static llvm::Optional<APInt> FoldValueWithUndef(unsigned Opcode,
+ const APInt &C1, bool IsUndef1,
+ const APInt &C2,
+ bool IsUndef2) {
+ if (!(IsUndef1 || IsUndef2))
+ return FoldValue(Opcode, C1, C2);
+
+ // Fold and(x, undef) -> 0
+ // Fold mul(x, undef) -> 0
+ if (Opcode == ISD::AND || Opcode == ISD::MUL)
+ return APInt::getZero(C1.getBitWidth());
+
+ return llvm::None;
+};
+
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
const GlobalAddressSDNode *GA,
const SDNode *N2) {
@@ -5565,7 +5582,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
ElementCount NumElts = VT.getVectorElementCount();
// See if we can fold through bitcasted integer ops.
- // TODO: Can we handle undef elements?
if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
Ops[0].getOpcode() == ISD::BITCAST &&
@@ -5581,11 +5597,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
SmallVector<APInt> RawBits1, RawBits2;
BitVector UndefElts1, UndefElts2;
if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
- BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
- UndefElts1.none() && UndefElts2.none()) {
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) {
SmallVector<APInt> RawBits;
for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
- Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ Optional<APInt> Fold = FoldValueWithUndef(
+ Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]);
if (!Fold)
break;
RawBits.push_back(*Fold);
diff --git a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
index c925548332e3b..a4e081d5384e5 100644
--- a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll
@@ -431,7 +431,7 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; ARM7-NEXT: .LCPI3_2:
; ARM7-NEXT: .long 3 @ 0x3
; ARM7-NEXT: .long 0 @ 0x0
-; ARM7-NEXT: .zero 4
+; ARM7-NEXT: .long 0 @ 0x0
; ARM7-NEXT: .long 0 @ 0x0
;
; ARM8-LABEL: test_srem_vec:
@@ -507,7 +507,7 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; ARM8-NEXT: .LCPI3_2:
; ARM8-NEXT: .long 3 @ 0x3
; ARM8-NEXT: .long 0 @ 0x0
-; ARM8-NEXT: .zero 4
+; ARM8-NEXT: .long 0 @ 0x0
; ARM8-NEXT: .long 0 @ 0x0
;
; NEON7-LABEL: test_srem_vec:
@@ -583,7 +583,7 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; NEON7-NEXT: .LCPI3_2:
; NEON7-NEXT: .long 3 @ 0x3
; NEON7-NEXT: .long 0 @ 0x0
-; NEON7-NEXT: .zero 4
+; NEON7-NEXT: .long 0 @ 0x0
; NEON7-NEXT: .long 0 @ 0x0
;
; NEON8-LABEL: test_srem_vec:
@@ -659,7 +659,7 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; NEON8-NEXT: .LCPI3_2:
; NEON8-NEXT: .long 3 @ 0x3
; NEON8-NEXT: .long 0 @ 0x0
-; NEON8-NEXT: .zero 4
+; NEON8-NEXT: .long 0 @ 0x0
; NEON8-NEXT: .long 0 @ 0x0
%srem = srem <3 x i33> %X, <i33 9, i33 9, i33 -9>
%cmp = icmp ne <3 x i33> %srem, <i33 3, i33 -3, i33 3>
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index f65fba336bfa8..7838d7dc44b2a 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -651,16 +651,22 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
; RV32MV-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32MV-NEXT: vmv.s.x v0, a0
; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, mu
-; RV32MV-NEXT: vmv.v.i v8, 1
; RV32MV-NEXT: mv a0, sp
-; RV32MV-NEXT: vle32.v v10, (a0)
-; RV32MV-NEXT: lui a0, %hi(.LCPI3_0)
-; RV32MV-NEXT: addi a0, a0, %lo(.LCPI3_0)
-; RV32MV-NEXT: vle32.v v12, (a0)
-; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
-; RV32MV-NEXT: vand.vv v8, v10, v8
+; RV32MV-NEXT: vle32.v v8, (a0)
+; RV32MV-NEXT: vmv.v.i v10, 1
+; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0
+; RV32MV-NEXT: vand.vv v8, v8, v10
+; RV32MV-NEXT: li a0, 2
+; RV32MV-NEXT: vmv.s.x v10, a0
+; RV32MV-NEXT: li a0, 1
+; RV32MV-NEXT: vmv.s.x v12, a0
+; RV32MV-NEXT: vmv.v.i v14, 0
+; RV32MV-NEXT: vsetivli zero, 3, e32, m2, tu, mu
+; RV32MV-NEXT: vslideup.vi v14, v12, 2
+; RV32MV-NEXT: vsetivli zero, 5, e32, m2, tu, mu
+; RV32MV-NEXT: vslideup.vi v14, v10, 4
; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; RV32MV-NEXT: vmsne.vv v0, v8, v12
+; RV32MV-NEXT: vmsne.vv v0, v8, v14
; RV32MV-NEXT: vmv.v.i v8, 0
; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, mu
diff --git a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
index 45411771b1baa..58bafebd5b702 100644
--- a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
@@ -136,7 +136,7 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; CHECK-NEXT: .LCPI3_2:
; CHECK-NEXT: .long 3 @ 0x3
; CHECK-NEXT: .long 0 @ 0x0
-; CHECK-NEXT: .zero 4
+; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 0 @ 0x0
%srem = srem <3 x i33> %X, <i33 9, i33 9, i33 -9>
%cmp = icmp ne <3 x i33> %srem, <i33 3, i33 -3, i33 3>
diff --git a/llvm/test/CodeGen/X86/fshl-splat-undef.ll b/llvm/test/CodeGen/X86/fshl-splat-undef.ll
index d6090d20cf961..586aa9b110c5d 100644
--- a/llvm/test/CodeGen/X86/fshl-splat-undef.ll
+++ b/llvm/test/CodeGen/X86/fshl-splat-undef.ll
@@ -21,9 +21,7 @@ define void @test_fshl(<8 x i64> %lo, <8 x i64> %hi, <8 x i64>* %arr) {
; CHECK-LABEL: test_fshl:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12]
-; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm2, %zmm2
-; CHECK-NEXT: vpsllvq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpsllq $12, %zmm1, %zmm1
; CHECK-NEXT: vpsrlq $52, %zmm0, %zmm0
; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
; CHECK-NEXT: vmovdqa64 %zmm0, (%eax)
More information about the llvm-commits
mailing list