[llvm] c1f81e7 - [DAG] mergeStore - peek through truncates when finding dead store(trunc(load())) patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 15 04:54:25 PDT 2023
Author: Simon Pilgrim
Date: 2023-03-15T11:54:13Z
New Revision: c1f81e7604457fa0b6c8bc03745c41d511106040
URL: https://github.com/llvm/llvm-project/commit/c1f81e7604457fa0b6c8bc03745c41d511106040
DIFF: https://github.com/llvm/llvm-project/commit/c1f81e7604457fa0b6c8bc03745c41d511106040.diff
LOG: [DAG] mergeStore - peek through truncates when finding dead store(trunc(load())) patterns
Extend the existing store(load()) removal code to account for intermediate truncates that some targets won't remove with canCombineTruncStore - we only care about the load/store MemoryVT.
Fixes regression from D146121
Added:
Modified:
llvm/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 011f81ffbece..32745ac660d6 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1712,6 +1712,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V);
/// If \p V is not an extracted subvector, it is returned as-is.
SDValue peekThroughExtractSubvectors(SDValue V);
+/// Return the non-truncated source operand of \p V if it exists.
+/// If \p V is not a truncation, it is returned as-is.
+SDValue peekThroughTruncates(SDValue V);
+
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3d45c3e2082b..c848847f24f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20391,9 +20391,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
// If this is a load followed by a store to the same location, then the store
- // is dead/noop.
+ // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
+ // TODO: Add big-endian truncate support with test coverage.
// TODO: Can relax for unordered atomics (see D66309)
- if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
+ ? peekThroughTruncates(Value)
+ : Value;
+ if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
ST->isUnindexed() && ST->isSimple() &&
Ld->getAddressSpace() == ST->getAddressSpace() &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c4d37b12f073..5cf9497069f7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -11040,6 +11040,12 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
return V;
}
+SDValue llvm::peekThroughTruncates(SDValue V) {
+ while (V.getOpcode() == ISD::TRUNCATE)
+ V = V.getOperand(0);
+ return V;
+}
+
bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
if (V.getOpcode() != ISD::XOR)
return false;
diff --git a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
index 05ad92cc0b33..7fb07c6b3163 100644
--- a/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
+++ b/llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll
@@ -5,21 +5,19 @@
define void @i24_or(ptr %a) {
; X86-LABEL: i24_or:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl (%ecx), %edx
-; X86-NEXT: movzbl 2(%ecx), %eax
-; X86-NEXT: movb %al, 2(%ecx)
-; X86-NEXT: shll $16, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: orl $384, %eax # imm = 0x180
-; X86-NEXT: movw %ax, (%ecx)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzwl (%eax), %ecx
+; X86-NEXT: movzbl 2(%eax), %edx
+; X86-NEXT: shll $16, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: orl $384, %edx # imm = 0x180
+; X86-NEXT: movw %dx, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: i24_or:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzbl 2(%rdi), %ecx
-; X64-NEXT: movb %cl, 2(%rdi)
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: orl $384, %ecx # imm = 0x180
@@ -35,21 +33,19 @@ define void @i24_and_or(ptr %a) {
; X86-LABEL: i24_and_or:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: movzbl 2(%eax), %ecx
-; X86-NEXT: movb %cl, 2(%eax)
-; X86-NEXT: shll $16, %ecx
-; X86-NEXT: orl %edx, %ecx
-; X86-NEXT: orl $384, %ecx # imm = 0x180
-; X86-NEXT: andl $-128, %ecx
-; X86-NEXT: movw %cx, (%eax)
+; X86-NEXT: movzwl (%eax), %ecx
+; X86-NEXT: movzbl 2(%eax), %edx
+; X86-NEXT: shll $16, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: orl $384, %edx # imm = 0x180
+; X86-NEXT: andl $-128, %edx
+; X86-NEXT: movw %dx, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: i24_and_or:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzbl 2(%rdi), %ecx
-; X64-NEXT: movb %cl, 2(%rdi)
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: orl $384, %ecx # imm = 0x180
@@ -66,21 +62,20 @@ define void @i24_and_or(ptr %a) {
define void @i24_insert_bit(ptr %a, i1 zeroext %bit) {
; X86-LABEL: i24_insert_bit:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%eax), %edx
-; X86-NEXT: movzbl 2(%eax), %ebx
-; X86-NEXT: movb %bl, 2(%eax)
-; X86-NEXT: shll $16, %ebx
-; X86-NEXT: orl %edx, %ebx
+; X86-NEXT: movzbl 2(%eax), %esi
+; X86-NEXT: shll $16, %esi
+; X86-NEXT: orl %edx, %esi
; X86-NEXT: shll $13, %ecx
-; X86-NEXT: andl $16769023, %ebx # imm = 0xFFDFFF
-; X86-NEXT: orl %ecx, %ebx
-; X86-NEXT: movw %bx, (%eax)
-; X86-NEXT: popl %ebx
+; X86-NEXT: andl $16769023, %esi # imm = 0xFFDFFF
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: movw %si, (%eax)
+; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
@@ -88,7 +83,6 @@ define void @i24_insert_bit(ptr %a, i1 zeroext %bit) {
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzbl 2(%rdi), %ecx
-; X64-NEXT: movb %cl, 2(%rdi)
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shll $13, %esi
@@ -114,8 +108,6 @@ define void @i56_or(ptr %a) {
;
; X64-LABEL: i56_or:
; X64: # %bb.0:
-; X64-NEXT: movzwl 4(%rdi), %eax
-; X64-NEXT: movw %ax, 4(%rdi)
; X64-NEXT: orl $384, (%rdi) # imm = 0x180
; X64-NEXT: retq
%aa = load i56, ptr %a, align 1
@@ -138,8 +130,6 @@ define void @i56_and_or(ptr %a) {
; X64: # %bb.0:
; X64-NEXT: movzwl 4(%rdi), %eax
; X64-NEXT: movzbl 6(%rdi), %ecx
-; X64-NEXT: movb %cl, 6(%rdi)
-; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shlq $32, %rcx
@@ -175,8 +165,6 @@ define void @i56_insert_bit(ptr %a, i1 zeroext %bit) {
; X64: # %bb.0:
; X64-NEXT: movzwl 4(%rdi), %eax
; X64-NEXT: movzbl 6(%rdi), %ecx
-; X64-NEXT: movb %cl, 6(%rdi)
-; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: shll $16, %ecx
; X64-NEXT: orl %eax, %ecx
; X64-NEXT: shlq $32, %rcx
More information about the llvm-commits
mailing list