[llvm] b27e545 - [DAG] Convert truncstore(extend(x)) back to store(x)

David Green via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 22 05:20:41 PST 2022


Author: David Green
Date: 2022-01-22T13:20:36Z
New Revision: b27e5459d51fd5ba80a1182e5bd8c0fd5e2e6a49

URL: https://github.com/llvm/llvm-project/commit/b27e5459d51fd5ba80a1182e5bd8c0fd5e2e6a49
DIFF: https://github.com/llvm/llvm-project/commit/b27e5459d51fd5ba80a1182e5bd8c0fd5e2e6a49.diff

LOG: [DAG] Convert truncstore(extend(x)) back to store(x)

Pulled out of D106237, this folds truncstore(extend(x)) back to store(x)
if the original store was legal. This can come up due to the order we
fold nodes. A fold from X86 needs to be adjusted to prevent infinite
loops, to have it pick the operand of a trunc more directly.

Differential Revision: https://reviews.llvm.org/D117901

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d1f75b40e79db..861beee6386bf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18396,6 +18396,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       Value.getValueType().isInteger() &&
       (!isa<ConstantSDNode>(Value) ||
        !cast<ConstantSDNode>(Value)->isOpaque())) {
+    // Convert a truncating store of a extension into a standard store.
+    if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
+         Value.getOpcode() == ISD::SIGN_EXTEND ||
+         Value.getOpcode() == ISD::ANY_EXTEND) &&
+        Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
+        TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
+      return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+                          ST->getMemOperand());
+
     APInt TruncDemandedBits =
         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
                              ST->getMemoryVT().getScalarSizeInBits());

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 71c80d518f998..17079116a6ae1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48170,7 +48170,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
       St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
       TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
       St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
-    SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
+    SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
+                              St->getValue().getOperand(0));
     return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
                              MVT::v16i8, St->getMemOperand());
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
index 80802658b2a04..2660c3162b107 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -543,17 +543,16 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_mov_b32 s8, s2
 ; VI-NEXT:    s_mov_b32 s9, s3
-; VI-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
-; VI-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:1
-; VI-NEXT:    buffer_load_ubyte v2, off, s[8:11], 0 offset:2
+; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; VI-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:2
 ; VI-NEXT:    s_mov_b32 s4, s0
 ; VI-NEXT:    s_mov_b32 s5, s1
-; VI-NEXT:    s_waitcnt vmcnt(2)
+; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
-; VI-NEXT:    s_waitcnt vmcnt(2)
-; VI-NEXT:    buffer_store_byte v1, off, s[4:7], 0 offset:1
-; VI-NEXT:    s_waitcnt vmcnt(2)
-; VI-NEXT:    buffer_store_byte v2, off, s[4:7], 0 offset:2
+; VI-NEXT:    s_waitcnt vmcnt(1)
+; VI-NEXT:    buffer_store_byte v1, off, s[4:7], 0 offset:2
+; VI-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
+; VI-NEXT:    buffer_store_byte v0, off, s[4:7], 0 offset:1
 ; VI-NEXT:    s_endpgm
   %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1
   store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1


        


More information about the llvm-commits mailing list