[llvm] 9fc347a - [DAG] PromoteIntRes_BUILD_VECTOR - extend constant boolean vectors according to target BooleanContents

Wed Jul 20 02:49:48 PDT 2022

Author: Simon Pilgrim
Date: 2022-07-20T10:49:31+01:00
New Revision: 9fc347aa4e819d43c87e43ae4a6388a58d387b20

URL: https://github.com/llvm/llvm-project/commit/9fc347aa4e819d43c87e43ae4a6388a58d387b20
DIFF: https://github.com/llvm/llvm-project/commit/9fc347aa4e819d43c87e43ae4a6388a58d387b20.diff

LOG: [DAG] PromoteIntRes_BUILD_VECTOR - extend constant boolean vectors according to target BooleanContents

PromoteIntRes_BUILD_VECTOR currently always ANY_EXTENDs build vector operands, but if this is a constant boolean vector we're losing the useful ability to keep the vector matching the BooleanContents mode used by the target.

This patch extends constant boolean vectors according to target BooleanContents, allowing a number of additional all-bits folds (notable XOR -> NOT conversions) to occur.

Differential Revision: https://reviews.llvm.org/D129641

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
    llvm/test/CodeGen/ARM/select_xform.ll
    llvm/test/CodeGen/PowerPC/pr25080.ll
    llvm/test/CodeGen/PowerPC/vec-select.ll
    llvm/test/CodeGen/X86/bitcast-setcc-128.ll
    llvm/test/CodeGen/X86/promote-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 62f3d1140506..282cc5f9ae38 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -5297,21 +5297,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
   unsigned NumElems = N->getNumOperands();
   EVT NOutVTElem = NOutVT.getVectorElementType();
-
+  TargetLoweringBase::BooleanContent NOutBoolType = TLI.getBooleanContents(NOutVT);
+  unsigned NOutExtOpc = TargetLowering::getExtendForContent(NOutBoolType);
   SDLoc dl(N);
 
   SmallVector<SDValue, 8> Ops;
   Ops.reserve(NumElems);
   for (unsigned i = 0; i != NumElems; ++i) {
-    SDValue Op;
+    SDValue Op = N->getOperand(i);
+    EVT OpVT = Op.getValueType();
     // BUILD_VECTOR integer operand types are allowed to be larger than the
     // result's element type. This may still be true after the promotion. For
     // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
     // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
-    if (N->getOperand(i).getValueType().bitsLT(NOutVTElem))
-      Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
-    else
-      Op = N->getOperand(i);
+    if (OpVT.bitsLT(NOutVTElem)) {
+      unsigned ExtOpc = ISD::ANY_EXTEND;
+      // Attempt to extend constant bool vectors to match target's BooleanContent.
+      // While not necessary, this improves chances of the constant correctly
+      // folding with compare results (e.g. for NOT patterns).
+      if (OpVT == MVT::i1 && Op.getOpcode() == ISD::Constant)
+        ExtOpc = NOutExtOpc;
+      Op = DAG.getNode(ExtOpc, dl, NOutVTElem, Op);
+    }
     Ops.push_back(Op);
   }
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
index 89d491e88775..8275c66cb5b5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -15,19 +15,11 @@ entry:
   ret <8 x i1> %Shuff
 }
 
-; CHECK-LABEL: lCPI1_0:
-; CHECK:          .byte   0                       ; 0x0
-; CHECK:          .space  1
-; CHECK:          .byte   0                       ; 0x0
-; CHECK:          .space  1
-; CHECK:          .byte   1                       ; 0x1
-; CHECK:          .byte   0                       ; 0x0
-; CHECK:          .byte   0                       ; 0x0
-; CHECK:          .byte   0                       ; 0x0
 define <8 x i1>@test2() {
-; CHECK-LABEL: test2
-; CHECK: adrp    x[[REG2:[0-9]+]], lCPI1_0 at PAGE
-; CHECK: ldr     d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0 at PAGEOFF]
+; CHECK-LABEL: test2:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    movi d0, #0x0000ff00000000
+; CHECK-NEXT:    ret
 bb:
   %Shuff = shufflevector <8 x i1> zeroinitializer,
      <8 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>,
@@ -39,7 +31,7 @@ bb:
 define <16 x i1> @test3(i1* %ptr, i32 %v) {
 ; CHECK-LABEL: test3:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    movi.4s v0, #1
+; CHECK-NEXT:    movi.2d v0, #0x0000ff000000ff
 ; CHECK-NEXT:    ret
 bb:
   %Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
@@ -52,7 +44,7 @@ bb:
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
-; CHECK:         .byte   1                       ; 0x1
+; CHECK:         .byte   255                     ; 0xff
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0
 ; CHECK:         .byte   0                       ; 0x0

diff  --git a/llvm/test/CodeGen/ARM/select_xform.ll b/llvm/test/CodeGen/ARM/select_xform.ll
index ce03f61d0777..159ff50fcf2f 100644
--- a/llvm/test/CodeGen/ARM/select_xform.ll
+++ b/llvm/test/CodeGen/ARM/select_xform.ll
@@ -529,8 +529,7 @@ define  <2 x i32> @t21(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; CHECK-NEXT:    vmov d16, r2, r3
 ; CHECK-NEXT:    vmov d17, r0, r1
 ; CHECK-NEXT:    vceq.i32 d16, d17, d16
-; CHECK-NEXT:    vmov.i32 d17, #0x1
-; CHECK-NEXT:    veor d16, d16, d17
+; CHECK-NEXT:    vmvn d16, d16
 ; CHECK-NEXT:    vshl.i32 d16, d16, #31
 ; CHECK-NEXT:    vshr.s32 d16, d16, #31
 ; CHECK-NEXT:    vmov r0, r1, d16

diff  --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index 0346b765252e..0f78f8086613 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -44,13 +44,10 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; LE-NEXT:    lxvd2x 2, 0, 3
 ; LE-NEXT:    vmrghh 5, 0, 5
 ; LE-NEXT:    xxmrglw 0, 36, 34
-; LE-NEXT:    vspltish 4, 15
 ; LE-NEXT:    xxmrglw 1, 37, 35
 ; LE-NEXT:    xxswapd 35, 2
 ; LE-NEXT:    xxmrgld 34, 1, 0
 ; LE-NEXT:    xxlor 34, 34, 35
-; LE-NEXT:    vslh 2, 2, 4
-; LE-NEXT:    vsrah 2, 2, 4
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: pr25080:
@@ -96,12 +93,9 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; BE-NEXT:    vperm 3, 0, 3, 1
 ; BE-NEXT:    xxmrghw 0, 36, 34
 ; BE-NEXT:    xxmrghw 1, 35, 37
-; BE-NEXT:    vspltish 3, 15
 ; BE-NEXT:    xxmrghd 34, 1, 0
 ; BE-NEXT:    lxvw4x 0, 0, 3
 ; BE-NEXT:    xxlor 34, 34, 0
-; BE-NEXT:    vslh 2, 2, 3
-; BE-NEXT:    vsrah 2, 2, 3
 ; BE-NEXT:    blr
 entry:
   %0 = trunc <8 x i32> %a to <8 x i23>

diff  --git a/llvm/test/CodeGen/PowerPC/vec-select.ll b/llvm/test/CodeGen/PowerPC/vec-select.ll
index 152f4eebec16..a1be48761665 100644
--- a/llvm/test/CodeGen/PowerPC/vec-select.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-select.ll
@@ -53,15 +53,11 @@ entry:
   ret <2 x i64> %or.i
 }
 
-; Not valid to emit XXSEL for this illegal type.
+; vXi1 constants are sign-extended to preserve XXSEL pattern.
 define dso_local <4 x i1> @test5(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vspltisw v5, 1
-; CHECK-NEXT:    xxland vs0, vs36, vs35
-; CHECK-NEXT:    xxlxor vs1, vs36, vs37
-; CHECK-NEXT:    xxland vs1, vs34, vs1
-; CHECK-NEXT:    xxlor vs34, vs1, vs0
+; CHECK-NEXT:    xxsel vs34, vs34, vs35, vs36
 ; CHECK-NEXT:    blr
 entry:
   %neg.i = xor <4 x i1> %c, <i1 -1, i1 -1, i1 -1, i1 -1>

diff  --git a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
index 2c49eec87044..86162281e0d7 100644
--- a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
+++ b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
@@ -515,8 +515,8 @@ define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) {
 ; AVX2-LABEL: v16i8_widened_with_ones:
 ; AVX2:       # %bb.0: # %entry
 ; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vinserti128 $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
 ; AVX2-NEXT:    movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
 ; AVX2-NEXT:    orq %rcx, %rax

diff  --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll
index 3d1041cd78e3..4a58ea49608d 100644
--- a/llvm/test/CodeGen/X86/promote-cmp.ll
+++ b/llvm/test/CodeGen/X86/promote-cmp.ll
@@ -28,21 +28,20 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
 ; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,3],xmm8[1,3]
 ; SSE2-NEXT:    orps %xmm4, %xmm5
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[2,1,3,3]
-; SSE2-NEXT:    psllq $63, %xmm6
-; SSE2-NEXT:    psrad $31, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT:    pand %xmm6, %xmm1
-; SSE2-NEXT:    pandn %xmm3, %xmm6
-; SSE2-NEXT:    por %xmm6, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[0,1,1,3]
-; SSE2-NEXT:    pxor %xmm4, %xmm3
-; SSE2-NEXT:    psllq $63, %xmm3
-; SSE2-NEXT:    psrad $31, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
-; SSE2-NEXT:    pand %xmm3, %xmm0
-; SSE2-NEXT:    pandn %xmm2, %xmm3
-; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
+; SSE2-NEXT:    pand %xmm4, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[2,1,3,3]
+; SSE2-NEXT:    psllq $63, %xmm2
+; SSE2-NEXT:    psrad $31, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: PR45808:
@@ -57,8 +56,7 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
 ; SSE4-NEXT:    pxor %xmm5, %xmm6
 ; SSE4-NEXT:    psllq $63, %xmm0
 ; SSE4-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
-; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero
-; SSE4-NEXT:    psllq $63, %xmm0
+; SSE4-NEXT:    pmovsxdq %xmm6, %xmm0
 ; SSE4-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
 ; SSE4-NEXT:    movapd %xmm2, %xmm0
 ; SSE4-NEXT:    movapd %xmm3, %xmm1