[llvm] 109cc5a - [DAGCombine] Fold SRA of a load into a narrower sign-extending load
Bjorn Pettersson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 25 03:28:15 PST 2022
Author: Bjorn Pettersson
Date: 2022-01-25T12:14:48+01:00
New Revision: 109cc5adccaec4c2264c0db3d54bbec1183bf95d
URL: https://github.com/llvm/llvm-project/commit/109cc5adccaec4c2264c0db3d54bbec1183bf95d
DIFF: https://github.com/llvm/llvm-project/commit/109cc5adccaec4c2264c0db3d54bbec1183bf95d.diff
LOG: [DAGCombine] Fold SRA of a load into a narrower sign-extending load
An sra is basically sign-extending a narrower value. Fold away the
shift by doing a sextload of a narrower value, when it is legal to
reduce the load width accordingly.
Differential Revision: https://reviews.llvm.org/D116930
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/PowerPC/pr13891.ll
llvm/test/CodeGen/X86/combine-sra-load.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1137f8b16977f..c3d2ed2dcf85c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8964,6 +8964,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
return MULH;
+ // Attempt to convert a sra of a load into a narrower sign-extending load.
+ if (SDValue NarrowLoad = reduceLoadWidth(N))
+ return NarrowLoad;
+
return SDValue();
}
@@ -12151,10 +12155,10 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- } else if (Opc == ISD::SRL) {
- // Another special-case: SRL is basically zero-extending a narrower value,
- // or it may be shifting a higher subword, half or byte into the lowest
- // bits.
+ } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
+ // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
+ // value, or it may be shifting a higher subword, half or byte into the
+ // lowest bits.
// Only handle shift with constant shift amount, and the shiftee must be a
// load.
@@ -12168,13 +12172,16 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
if (MemoryWidth <= ShAmt)
return SDValue();
- // Attempt to fold away the SRL by using ZEXTLOAD.
- ExtType = ISD::ZEXTLOAD;
+ // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
+ ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
// If original load is a SEXTLOAD then we can't simply replace it by a
// ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
- // followed by a ZEXT, but that is not handled at the moment).
- if (LN->getExtensionType() == ISD::SEXTLOAD)
+ // followed by a ZEXT, but that is not handled at the moment). Similarly if
+ // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
+ if ((LN->getExtensionType() == ISD::SEXTLOAD ||
+ LN->getExtensionType() == ISD::ZEXTLOAD) &&
+ LN->getExtensionType() != ExtType)
return SDValue();
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
diff --git a/llvm/test/CodeGen/PowerPC/pr13891.ll b/llvm/test/CodeGen/PowerPC/pr13891.ll
index f35a0a724bfd4..816166a20fedc 100644
--- a/llvm/test/CodeGen/PowerPC/pr13891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr13891.ll
@@ -7,7 +7,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline {
; CHECK-LABEL: _Z5check3foos:
; CHECK: sth 3, {{[0-9]+}}(1)
-; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
+; CHECK: lbz {{[0-9]+}}, {{[0-9]+}}(1)
entry:
%0 = bitcast %struct.foo* %f to i16*
%1 = load i16, i16* %0, align 2
diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll
index 119acaa6a02b5..ba5814f0f160d 100644
--- a/llvm/test/CodeGen/X86/combine-sra-load.ll
+++ b/llvm/test/CodeGen/X86/combine-sra-load.ll
@@ -1,12 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
-; FIXME: fold (sra (load i32), 16)) -> (sextload i16)
+; fold (sra (load i32), 16)) -> (sextload i16)
define i32 @sra_half(i32* %p) {
; CHECK-LABEL: sra_half:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: sarl $16, %eax
+; CHECK-NEXT: movswl 2(%rdi), %eax
; CHECK-NEXT: retq
%load = load i32, i32* %p
%shift = ashr i32 %load, 16
@@ -25,12 +24,11 @@ define <4 x i32> @sra_half_vec(<4 x i32>* %p) {
ret <4 x i32> %shift
}
-; FIXME: fold (sra (load i64), 48)) -> (sextload i16)
+; fold (sra (load i64), 48)) -> (sextload i16)
define i64 @sra_large_shift(i64* %r) {
; CHECK-LABEL: sra_large_shift:
; CHECK: # %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax
-; CHECK-NEXT: sarq $48, %rax
+; CHECK-NEXT: movswq 6(%rdi), %rax
; CHECK-NEXT: retq
%t0 = load i64, i64* %r
%conv = ashr i64 %t0, 48
@@ -61,12 +59,11 @@ define i32 @sra_of_zextload(i16* %p) {
ret i32 %shift
}
-; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8)
+; fold (sra (sextload i16 to i32), 8) -> (sextload i8)
define i32 @sra_of_sextload(i16* %p) {
; CHECK-LABEL: sra_of_sextload:
; CHECK: # %bb.0:
-; CHECK-NEXT: movswl (%rdi), %eax
-; CHECK-NEXT: sarl $8, %eax
+; CHECK-NEXT: movsbl 1(%rdi), %eax
; CHECK-NEXT: retq
%load = load i16, i16* %p
%sext = sext i16 %load to i32
@@ -89,12 +86,11 @@ define i32 @sra_of_sextload_no_fold(i16* %p) {
ret i32 %shift
}
-; FIXME: Fold even if SRA has multiple uses.
+; Fold even if SRA has multiple uses.
define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {
; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %ecx
-; CHECK-NEXT: sarl $16, %ecx
+; CHECK-NEXT: movswl 2(%rdi), %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: xorl $6, %eax
; CHECK-NEXT: orl %ecx, %eax
More information about the llvm-commits
mailing list