[llvm] 109cc5a - [DAGCombine] Fold SRA of a load into a narrower sign-extending load

Tue Jan 25 03:28:15 PST 2022

Author: Bjorn Pettersson
Date: 2022-01-25T12:14:48+01:00
New Revision: 109cc5adccaec4c2264c0db3d54bbec1183bf95d

URL: https://github.com/llvm/llvm-project/commit/109cc5adccaec4c2264c0db3d54bbec1183bf95d
DIFF: https://github.com/llvm/llvm-project/commit/109cc5adccaec4c2264c0db3d54bbec1183bf95d.diff

LOG: [DAGCombine] Fold SRA of a load into a narrower sign-extending load

An sra is basically sign-extending a narrower value. Fold away the
shift by doing a sextload of a narrower value, when it is legal to
reduce the load width accordingly.

Differential Revision: https://reviews.llvm.org/D116930

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/PowerPC/pr13891.ll
    llvm/test/CodeGen/X86/combine-sra-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1137f8b16977f..c3d2ed2dcf85c 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8964,6 +8964,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
     return MULH;
 
+  // Attempt to convert a sra of a load into a narrower sign-extending load.
+  if (SDValue NarrowLoad = reduceLoadWidth(N))
+    return NarrowLoad;
+
   return SDValue();
 }
 
@@ -12151,10 +12155,10 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
-  } else if (Opc == ISD::SRL) {
-    // Another special-case: SRL is basically zero-extending a narrower value,
-    // or it may be shifting a higher subword, half or byte into the lowest
-    // bits.
+  } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
+    // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
+    // value, or it may be shifting a higher subword, half or byte into the
+    // lowest bits.
 
     // Only handle shift with constant shift amount, and the shiftee must be a
     // load.
@@ -12168,13 +12172,16 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
     if (MemoryWidth <= ShAmt)
       return SDValue();
-    // Attempt to fold away the SRL by using ZEXTLOAD.
-    ExtType = ISD::ZEXTLOAD;
+    // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
+    ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
     // If original load is a SEXTLOAD then we can't simply replace it by a
     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
-    // followed by a ZEXT, but that is not handled at the moment).
-    if (LN->getExtensionType() == ISD::SEXTLOAD)
+    // followed by a ZEXT, but that is not handled at the moment). Similarly if
+    // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
+    if ((LN->getExtensionType() == ISD::SEXTLOAD ||
+         LN->getExtensionType() == ISD::ZEXTLOAD) &&
+        LN->getExtensionType() != ExtType)
       return SDValue();
   } else if (Opc == ISD::AND) {
     // An AND with a constant mask is the same as a truncate + zero-extend.

diff  --git a/llvm/test/CodeGen/PowerPC/pr13891.ll b/llvm/test/CodeGen/PowerPC/pr13891.ll
index f35a0a724bfd4..816166a20fedc 100644
--- a/llvm/test/CodeGen/PowerPC/pr13891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr13891.ll
@@ -7,7 +7,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline {
 ; CHECK-LABEL: _Z5check3foos:
 ; CHECK: sth 3, {{[0-9]+}}(1)
-; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
+; CHECK: lbz {{[0-9]+}}, {{[0-9]+}}(1)
 entry:
   %0 = bitcast %struct.foo* %f to i16*
   %1 = load i16, i16* %0, align 2

diff  --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll
index 119acaa6a02b5..ba5814f0f160d 100644
--- a/llvm/test/CodeGen/X86/combine-sra-load.ll
+++ b/llvm/test/CodeGen/X86/combine-sra-load.ll
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
 
-; FIXME: fold (sra (load i32), 16)) -> (sextload i16)
+; fold (sra (load i32), 16)) -> (sextload i16)
 define i32 @sra_half(i32* %p) {
 ; CHECK-LABEL: sra_half:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    sarl $16, %eax
+; CHECK-NEXT:    movswl 2(%rdi), %eax
 ; CHECK-NEXT:    retq
   %load = load i32, i32* %p
   %shift = ashr i32 %load, 16
@@ -25,12 +24,11 @@ define <4 x i32> @sra_half_vec(<4 x i32>* %p) {
   ret <4 x i32> %shift
 }
 
-; FIXME: fold (sra (load i64), 48)) -> (sextload i16)
+; fold (sra (load i64), 48)) -> (sextload i16)
 define i64 @sra_large_shift(i64* %r) {
 ; CHECK-LABEL: sra_large_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    sarq $48, %rax
+; CHECK-NEXT:    movswq 6(%rdi), %rax
 ; CHECK-NEXT:    retq
   %t0 = load i64, i64* %r
   %conv = ashr i64 %t0, 48
@@ -61,12 +59,11 @@ define i32 @sra_of_zextload(i16* %p) {
   ret i32 %shift
 }
 
-; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8)
+; fold (sra (sextload i16 to i32), 8) -> (sextload i8)
 define i32 @sra_of_sextload(i16* %p) {
 ; CHECK-LABEL: sra_of_sextload:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movswl (%rdi), %eax
-; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    movsbl 1(%rdi), %eax
 ; CHECK-NEXT:    retq
   %load = load i16, i16* %p
   %sext = sext i16 %load to i32
@@ -89,12 +86,11 @@ define i32 @sra_of_sextload_no_fold(i16* %p) {
   ret i32 %shift
 }
 
-; FIXME: Fold even if SRA has multiple uses.
+; Fold even if SRA has multiple uses.
 define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {
 ; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %ecx
-; CHECK-NEXT:    sarl $16, %ecx
+; CHECK-NEXT:    movswl 2(%rdi), %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    xorl $6, %eax
 ; CHECK-NEXT:    orl %ecx, %eax