[llvm] 5780087 - [DAG] Extend SearchForAndLoads with any_extend handling

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 17 07:25:15 PST 2022


Author: David Green
Date: 2022-01-17T15:25:11Z
New Revision: 578008789fd061a88ce47dac6ff627001b404348

URL: https://github.com/llvm/llvm-project/commit/578008789fd061a88ce47dac6ff627001b404348
DIFF: https://github.com/llvm/llvm-project/commit/578008789fd061a88ce47dac6ff627001b404348.diff

LOG: [DAG] Extend SearchForAndLoads with any_extend handling

This extends the code in SearchForAndLoads to be able to look through
ANY_EXTEND nodes, which can be created from mismatching IR types where
the AND node we begin from only demands the low parts of the register.
That turns zext and sext into any_extends as only the low bits are
demanded. To be able to look through ANY_EXTEND nodes we need to handle
mismatching types in a few places, potentially truncating the mask to
the size of the final load.

Differential Revision: https://reviews.llvm.org/D117457

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/combine-andintoload.ll
    llvm/test/CodeGen/X86/pr35763.ll
    llvm/test/CodeGen/X86/pr35765.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7c684bd3aeb97..f0bc7470a3e0d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5491,6 +5491,8 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
 
     // Some constants may need fixing up later if they are too large.
     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (Mask->getValueType(0) != C->getValueType(0))
+        return false;
       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
         NodesWithConsts.insert(N);
@@ -5524,9 +5526,9 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
     case ISD::AssertZext: {
       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-      EVT VT = Op.getOpcode() == ISD::AssertZext ?
-        cast<VTSDNode>(Op.getOperand(1))->getVT() :
-        Op.getOperand(0).getValueType();
+      EVT VT = Op.getOpcode() == ISD::AssertZext
+                   ? cast<VTSDNode>(Op.getOperand(1))->getVT()
+                   : Op.getOperand(0).getValueType();
 
       // We can accept extending nodes if the mask is wider or an equal
       // width to the original type.
@@ -5534,6 +5536,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
         continue;
       break;
     }
+    case ISD::ANY_EXTEND:
     case ISD::OR:
     case ISD::XOR:
     case ISD::AND:
@@ -5593,12 +5596,14 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
     // masking.
     if (FixupNode) {
       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
-      SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
-                                FixupNode->getValueType(0),
-                                SDValue(FixupNode, 0), MaskOp);
+      SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
+                                           FixupNode->getValueType(0));
+      SDValue And =
+          DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
+                      SDValue(FixupNode, 0), MaskOpT);
       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
       if (And.getOpcode() == ISD ::AND)
-        DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
+        DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
     }
 
     // Narrow any constants that need it.
@@ -5607,10 +5612,12 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
       SDValue Op1 = LogicN->getOperand(1);
 
       if (isa<ConstantSDNode>(Op0))
-          std::swap(Op0, Op1);
+        std::swap(Op0, Op1);
 
-      SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
-                                Op1, MaskOp);
+      SDValue MaskOpT =
+          DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
+      SDValue And =
+          DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
 
       DAG.UpdateNodeOperands(LogicN, Op0, And);
     }
@@ -5618,12 +5625,14 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
     // Create narrow loads.
     for (auto *Load : Loads) {
       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
+      SDValue MaskOpT =
+          DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
-                                SDValue(Load, 0), MaskOp);
+                                SDValue(Load, 0), MaskOpT);
       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
       if (And.getOpcode() == ISD ::AND)
         And = SDValue(
-            DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
+            DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
       SDValue NewLoad = reduceLoadWidth(And.getNode());
       assert(NewLoad &&
              "Shouldn't be masking the load if it can't be narrowed");

diff  --git a/llvm/test/CodeGen/AArch64/combine-andintoload.ll b/llvm/test/CodeGen/AArch64/combine-andintoload.ll
index 2fcd546d2ade9..55516a10d17f3 100644
--- a/llvm/test/CodeGen/AArch64/combine-andintoload.ll
+++ b/llvm/test/CodeGen/AArch64/combine-andintoload.ll
@@ -5,16 +5,14 @@
 define i64 @load32_and16_and(i32* %p, i64 %y) {
 ; CHECK-LABEL: load32_and16_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    and w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xffff
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    and w0, w1, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load32_and16_and:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldr w8, [x0]
-; CHECKBE-NEXT:    and w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xffff
+; CHECKBE-NEXT:    ldrh w8, [x0, #2]
+; CHECKBE-NEXT:    and w0, w1, w8
 ; CHECKBE-NEXT:    ret
   %x = load i32, i32* %p, align 4
   %xz = zext i32 %x to i64
@@ -26,16 +24,14 @@ define i64 @load32_and16_and(i32* %p, i64 %y) {
 define i64 @load32_and16_andr(i32* %p, i64 %y) {
 ; CHECK-LABEL: load32_and16_andr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    and w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xffff
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    and w0, w1, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load32_and16_andr:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldr w8, [x0]
-; CHECKBE-NEXT:    and w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xffff
+; CHECKBE-NEXT:    ldrh w8, [x0, #2]
+; CHECKBE-NEXT:    and w0, w1, w8
 ; CHECKBE-NEXT:    ret
   %x = load i32, i32* %p, align 4
   %xz = zext i32 %x to i64
@@ -47,16 +43,14 @@ define i64 @load32_and16_andr(i32* %p, i64 %y) {
 define i64 @load32_and16_and_sext(i32* %p, i64 %y) {
 ; CHECK-LABEL: load32_and16_and_sext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    and w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xffff
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    and w0, w1, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load32_and16_and_sext:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldr w8, [x0]
-; CHECKBE-NEXT:    and w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xffff
+; CHECKBE-NEXT:    ldrh w8, [x0, #2]
+; CHECKBE-NEXT:    and w0, w1, w8
 ; CHECKBE-NEXT:    ret
   %x = load i32, i32* %p, align 4
   %xz = sext i32 %x to i64
@@ -68,16 +62,16 @@ define i64 @load32_and16_and_sext(i32* %p, i64 %y) {
 define i64 @load32_and16_or(i32* %p, i64 %y) {
 ; CHECK-LABEL: load32_and16_or:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    orr w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xffff
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    and w9, w1, #0xffff
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load32_and16_or:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldr w8, [x0]
-; CHECKBE-NEXT:    orr w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xffff
+; CHECKBE-NEXT:    ldrh w8, [x0, #2]
+; CHECKBE-NEXT:    and w9, w1, #0xffff
+; CHECKBE-NEXT:    orr w0, w9, w8
 ; CHECKBE-NEXT:    ret
   %x = load i32, i32* %p, align 4
   %xz = zext i32 %x to i64
@@ -170,16 +164,14 @@ define i64 @load16_and16(i16* %p, i64 %y) {
 define i64 @load16_and8(i16* %p, i64 %y) {
 ; CHECK-LABEL: load16_and8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    and w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xff
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    and w0, w1, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load16_and8:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldrh w8, [x0]
-; CHECKBE-NEXT:    and w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xff
+; CHECKBE-NEXT:    ldrb w8, [x0, #1]
+; CHECKBE-NEXT:    and w0, w1, w8
 ; CHECKBE-NEXT:    ret
   %x = load i16, i16* %p, align 4
   %xz = zext i16 %x to i64
@@ -232,15 +224,13 @@ define i64 @load8_and16_zext(i8* %p, i8 %y) {
 ; CHECK-LABEL: load8_and16_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrb w8, [x0]
-; CHECK-NEXT:    and w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xff
+; CHECK-NEXT:    and w0, w1, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load8_and16_zext:
 ; CHECKBE:       // %bb.0:
 ; CHECKBE-NEXT:    ldrb w8, [x0]
-; CHECKBE-NEXT:    and w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xff
+; CHECKBE-NEXT:    and w0, w1, w8
 ; CHECKBE-NEXT:    ret
   %x = load i8, i8* %p, align 4
   %xz = zext i8 %x to i64
@@ -296,16 +286,14 @@ define i64 @load8_and16_or(i8* %p, i64 %y) {
 define i64 @load16_and8_manyext(i16* %p, i32 %y) {
 ; CHECK-LABEL: load16_and8_manyext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    and w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xff
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    and w0, w1, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load16_and8_manyext:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldrh w8, [x0]
-; CHECKBE-NEXT:    and w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xff
+; CHECKBE-NEXT:    ldrb w8, [x0, #1]
+; CHECKBE-NEXT:    and w0, w1, w8
 ; CHECKBE-NEXT:    ret
   %x = load i16, i16* %p, align 4
   %xz = zext i16 %x to i32
@@ -318,18 +306,16 @@ define i64 @load16_and8_manyext(i16* %p, i32 %y) {
 define i64 @multiple_load(i16* %p, i32* %q) {
 ; CHECK-LABEL: multiple_load:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ldr w9, [x1]
-; CHECK-NEXT:    and w8, w9, w8
-; CHECK-NEXT:    and x0, x8, #0xff
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    ldrb w9, [x1]
+; CHECK-NEXT:    and w0, w9, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: multiple_load:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldrh w8, [x0]
-; CHECKBE-NEXT:    ldr w9, [x1]
-; CHECKBE-NEXT:    and w8, w9, w8
-; CHECKBE-NEXT:    and x0, x8, #0xff
+; CHECKBE-NEXT:    ldrb w8, [x0, #1]
+; CHECKBE-NEXT:    ldrb w9, [x1, #3]
+; CHECKBE-NEXT:    and w0, w9, w8
 ; CHECKBE-NEXT:    ret
   %x = load i16, i16* %p, align 4
   %xz = zext i16 %x to i64
@@ -343,18 +329,16 @@ define i64 @multiple_load(i16* %p, i32* %q) {
 define i64 @multiple_load_or(i16* %p, i32* %q) {
 ; CHECK-LABEL: multiple_load_or:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldrh w8, [x0]
-; CHECK-NEXT:    ldr w9, [x1]
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    and x0, x8, #0xff
+; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    ldrb w9, [x1]
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: multiple_load_or:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldrh w8, [x0]
-; CHECKBE-NEXT:    ldr w9, [x1]
-; CHECKBE-NEXT:    orr w8, w9, w8
-; CHECKBE-NEXT:    and x0, x8, #0xff
+; CHECKBE-NEXT:    ldrb w8, [x0, #1]
+; CHECKBE-NEXT:    ldrb w9, [x1, #3]
+; CHECKBE-NEXT:    orr w0, w9, w8
 ; CHECKBE-NEXT:    ret
   %x = load i16, i16* %p, align 4
   %xz = zext i16 %x to i64
@@ -368,16 +352,16 @@ define i64 @multiple_load_or(i16* %p, i32* %q) {
 define i64 @load32_and16_zexty(i32* %p, i32 %y) {
 ; CHECK-LABEL: load32_and16_zexty:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    orr w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xffff
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    and w9, w1, #0xffff
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load32_and16_zexty:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldr w8, [x0]
-; CHECKBE-NEXT:    orr w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xffff
+; CHECKBE-NEXT:    ldrh w8, [x0, #2]
+; CHECKBE-NEXT:    and w9, w1, #0xffff
+; CHECKBE-NEXT:    orr w0, w9, w8
 ; CHECKBE-NEXT:    ret
   %x = load i32, i32* %p, align 4
   %xz = zext i32 %x to i64
@@ -390,16 +374,16 @@ define i64 @load32_and16_zexty(i32* %p, i32 %y) {
 define i64 @load32_and16_sexty(i32* %p, i32 %y) {
 ; CHECK-LABEL: load32_and16_sexty:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    orr w8, w1, w8
-; CHECK-NEXT:    and x0, x8, #0xffff
+; CHECK-NEXT:    ldrh w8, [x0]
+; CHECK-NEXT:    and w9, w1, #0xffff
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load32_and16_sexty:
 ; CHECKBE:       // %bb.0:
-; CHECKBE-NEXT:    ldr w8, [x0]
-; CHECKBE-NEXT:    orr w8, w1, w8
-; CHECKBE-NEXT:    and x0, x8, #0xffff
+; CHECKBE-NEXT:    ldrh w8, [x0, #2]
+; CHECKBE-NEXT:    and w9, w1, #0xffff
+; CHECKBE-NEXT:    orr w0, w9, w8
 ; CHECKBE-NEXT:    ret
   %x = load i32, i32* %p, align 4
   %xz = zext i32 %x to i64

diff  --git a/llvm/test/CodeGen/X86/pr35763.ll b/llvm/test/CodeGen/X86/pr35763.ll
index 8b3e91dc577ae..53a0a0284d11d 100644
--- a/llvm/test/CodeGen/X86/pr35763.ll
+++ b/llvm/test/CodeGen/X86/pr35763.ll
@@ -10,10 +10,10 @@
 define dso_local void @PR35763() {
 ; CHECK-LABEL: PR35763:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl z(%rip), %eax
-; CHECK-NEXT:    orl z+2(%rip), %eax
-; CHECK-NEXT:    movzwl %ax, %eax
-; CHECK-NEXT:    movq %rax, tf_3_var_136(%rip)
+; CHECK-NEXT:    movzwl z(%rip), %eax
+; CHECK-NEXT:    movzwl z+2(%rip), %ecx
+; CHECK-NEXT:    orl %eax, %ecx
+; CHECK-NEXT:    movq %rcx, tf_3_var_136(%rip)
 ; CHECK-NEXT:    movl z+6(%rip), %eax
 ; CHECK-NEXT:    movzbl z+10(%rip), %ecx
 ; CHECK-NEXT:    shlq $32, %rcx

diff  --git a/llvm/test/CodeGen/X86/pr35765.ll b/llvm/test/CodeGen/X86/pr35765.ll
index 5ff34a005f53f..80fdf78cc89b0 100644
--- a/llvm/test/CodeGen/X86/pr35765.ll
+++ b/llvm/test/CodeGen/X86/pr35765.ll
@@ -13,14 +13,13 @@ define dso_local void @PR35765() {
 ; CHECK-NEXT:    addb $-118, %cl
 ; CHECK-NEXT:    movl $4, %eax
 ; CHECK-NEXT:    shll %cl, %eax
-; CHECK-NEXT:    movzwl x(%rip), %ecx
-; CHECK-NEXT:    movzwl s2(%rip), %edx
-; CHECK-NEXT:    notl %edx
-; CHECK-NEXT:    orl $63488, %edx # imm = 0xF800
-; CHECK-NEXT:    movzwl %dx, %edx
-; CHECK-NEXT:    orl %ecx, %edx
-; CHECK-NEXT:    xorl %eax, %edx
-; CHECK-NEXT:    movslq %edx, %rax
+; CHECK-NEXT:    movzwl s2(%rip), %ecx
+; CHECK-NEXT:    notl %ecx
+; CHECK-NEXT:    orl x(%rip), %ecx
+; CHECK-NEXT:    orl $63488, %ecx # imm = 0xF800
+; CHECK-NEXT:    movzwl %cx, %ecx
+; CHECK-NEXT:    xorl %eax, %ecx
+; CHECK-NEXT:    movslq %ecx, %rax
 ; CHECK-NEXT:    movq %rax, ll(%rip)
 ; CHECK-NEXT:    retq
 entry:


        


More information about the llvm-commits mailing list