[llvm] r340797 - [DAGCombiner][AMDGPU][Mips] Fold bitcast with volatile loads if the resulting load is legal for the target.

Mon Aug 27 20:47:20 PDT 2018

Author: ctopper
Date: Mon Aug 27 20:47:20 2018
New Revision: 340797

URL: http://llvm.org/viewvc/llvm-project?rev=340797&view=rev
Log:
[DAGCombiner][AMDGPU][Mips] Fold bitcast with volatile loads if the resulting load is legal for the target.

Summary:
I'm not sure if this patch is correct or if it needs more qualifying somehow. Bitcast shouldn't change the size of the load so it should be ok? We already do something similar for stores. We'll change the type of a volatile store if the resulting store is Legal or Custom. I'm not sure we should be allowing Custom there...

I was playing around with converting X86 atomic loads/stores(except seq_cst) into regular volatile loads and stores during lowering. This would allow some special RMW isel patterns in X86InstrCompiler.td to be removed. But there's some floating point patterns in there that didn't work because we don't fold (f64 (bitconvert (i64 volatile load))) or (f32 (bitconvert (i32 volatile load))).

Reviewers: efriedma, atanasyan, arsenm

Reviewed By: efriedma

Subscribers: jvesely, arsenm, sdardis, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, arichardson, jrtc27, atanasyan, jfb, llvm-commits

Differential Revision: https://reviews.llvm.org/D50491

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AMDGPU/copy-illegal-type.ll
    llvm/trunk/test/CodeGen/Mips/cconv/return-hard-fp128.ll
    llvm/trunk/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
    llvm/trunk/test/CodeGen/Mips/msa/bitcast.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=340797&r1=340796&r2=340797&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Aug 27 20:47:20 2018
@@ -9833,12 +9833,16 @@ SDValue DAGCombiner::visitBITCAST(SDNode
   // fold (conv (load x)) -> (load (conv*)x)
   // If the resultant load doesn't need a higher alignment than the original!
   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
-      // Do not change the width of a volatile load.
-      !cast<LoadSDNode>(N0)->isVolatile() &&
       // Do not remove the cast if the types differ in endian layout.
       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+      // If the load is volatile, we only want to change the load type if the
+      // resulting load is legal. Otherwise we might increase the number of
+      // memory accesses. We don't care if the original type was legal or not
+      // as we assume software couldn't rely on the number of accesses of an
+      // illegal type.
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isOperationLegal(ISD::LOAD, VT)) &&
       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
     unsigned OrigAlign = LN0->getAlignment();
@@ -14694,6 +14698,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *
   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
       ST->isUnindexed()) {
     EVT SVT = Value.getOperand(0).getValueType();
+    // If the store is volatile, we only want to change the store type if the
+    // resulting store is legal. Otherwise we might increase the number of
+    // memory accesses. We don't care if the original type was legal or not
+    // as we assume software couldn't rely on the number of accesses of an
+    // illegal type.
     if (((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegal(ISD::STORE, SVT)) &&
         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/copy-illegal-type.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/copy-illegal-type.ll?rev=340797&r1=340796&r2=340797&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/copy-illegal-type.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/copy-illegal-type.ll Mon Aug 27 20:47:20 2018
@@ -147,10 +147,7 @@ define amdgpu_kernel void @test_copy_v3i
 }
 
 ; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
-; GCN: {{buffer|flat}}_load_ubyte
+; GCN: {{buffer|flat}}_load_dword
 ; GCN: buffer_store_dword
 ; GCN: s_endpgm
 define amdgpu_kernel void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {

Modified: llvm/trunk/test/CodeGen/Mips/cconv/return-hard-fp128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cconv/return-hard-fp128.ll?rev=340797&r1=340796&r2=340797&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/return-hard-fp128.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/cconv/return-hard-fp128.ll Mon Aug 27 20:47:20 2018
@@ -18,14 +18,10 @@ entry:
 }
 
 ; ALL-LABEL: retldouble:
-; N32-DAG:           ld [[R2:\$[0-9]+]], %lo(fp128)([[R1:\$[0-9]+]])
+; N32-DAG:           ldc1 $f0, %lo(fp128)([[R1:\$[0-9]+]])
 ; N32-DAG:           addiu [[R3:\$[0-9]+]], [[R1]], %lo(fp128)
-; N32-DAG:           ld [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG:           dmtc1 [[R2]], $f0
-; N32-DAG:           dmtc1 [[R4]], $f2
+; N32-DAG:           ldc1 $f2, 8([[R3]])
 
 ; N64-DAG:           lui [[R2:\$[0-9]+]], %highest(fp128)
-; N64-DAG:           ld [[R3:\$[0-9]+]], %lo(fp128)([[R2]])
-; N64-DAG:           ld [[R4:\$[0-9]+]], 8([[R2]])
-; N64-DAG:           dmtc1 [[R3]], $f0
-; N64-DAG:           dmtc1 [[R4]], $f2
+; N64-DAG:           ldc1 $f0, %lo(fp128)([[R2]])
+; N64-DAG:           ldc1 $f2, 8([[R2]])

Modified: llvm/trunk/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll?rev=340797&r1=340796&r2=340797&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll Mon Aug 27 20:47:20 2018
@@ -23,14 +23,10 @@ entry:
 ; is returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to
 ; match the de facto ABI as implemented by GCC.
 ; N32-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_fp128)
-; N32-DAG:        ld  [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N32-DAG:        dmtc1 [[R2]], $f0
+; N32-DAG:        ldc1 $f0, %lo(struct_fp128)([[R1]])
 ; N32-DAG:        addiu [[R3:\$[0-9]+]], [[R1]], %lo(struct_fp128)
-; N32-DAG:        ld  [[R4:\$[0-9]+]], 8([[R3]])
-; N32-DAG:        dmtc1 [[R4]], $f1
+; N32-DAG:        ldc1  $f1, 8([[R3]])
 
 ; N64-DAG:        lui  [[R1:\$[0-9]+]], %highest(struct_fp128)
-; N64-DAG:        ld  [[R2:\$[0-9]+]], %lo(struct_fp128)([[R1]])
-; N64-DAG:        dmtc1 [[R2]], $f0
-; N64-DAG:        ld  [[R4:\$[0-9]+]], 8([[R1]])
-; N64-DAG:        dmtc1 [[R4]], $f1
+; N64-DAG:        ldc1 $f0, %lo(struct_fp128)([[R1]])
+; N64-DAG:        ldc1 $f1, 8([[R1]])

Modified: llvm/trunk/test/CodeGen/Mips/msa/bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Mips/msa/bitcast.ll?rev=340797&r1=340796&r2=340797&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Mips/msa/bitcast.ll (original)
+++ llvm/trunk/test/CodeGen/Mips/msa/bitcast.ll Mon Aug 27 20:47:20 2018
@@ -362,14 +362,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v16i8:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.b [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.b [[R3]],
 ; LITENDIAN: .size v8f16_to_v16i8
 
 ; BIGENDIAN: v8f16_to_v16i8:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.b [[R3:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.b [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.b [[R4:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.b [[R4]],
 ; BIGENDIAN: .size v8f16_to_v16i8
@@ -431,14 +430,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v4i32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.w [[R2]],
 ; LITENDIAN: .size v8f16_to_v4i32
 
 ; BIGENDIAN: v8f16_to_v4i32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.w [[R3]],
 ; BIGENDIAN: .size v8f16_to_v4i32
@@ -455,14 +453,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v4f32:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; LITENDIAN: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.w [[R2]],
 ; LITENDIAN: .size v8f16_to_v4f32
 
 ; BIGENDIAN: v8f16_to_v4f32:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 177
+; BIGENDIAN: ld.w [[R1:\$w[0-9]+]],
 ; BIGENDIAN: fadd.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.w [[R3]],
 ; BIGENDIAN: .size v8f16_to_v4f32
@@ -479,14 +476,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v2i64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; LITENDIAN: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.d [[R2]],
 ; LITENDIAN: .size v8f16_to_v2i64
 
 ; BIGENDIAN: v8f16_to_v2i64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; BIGENDIAN: addv.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.d [[R3]],
 ; BIGENDIAN: .size v8f16_to_v2i64
@@ -503,14 +499,13 @@ entry:
 }
 
 ; LITENDIAN: v8f16_to_v2f64:
-; LITENDIAN: ld.h [[R1:\$w[0-9]+]],
+; LITENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; LITENDIAN: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 ; LITENDIAN: st.d [[R2]],
 ; LITENDIAN: .size v8f16_to_v2f64
 
 ; BIGENDIAN: v8f16_to_v2f64:
-; BIGENDIAN: ld.h [[R1:\$w[0-9]+]],
-; BIGENDIAN: shf.h [[R2:\$w[0-9]+]], [[R1]], 27
+; BIGENDIAN: ld.d [[R1:\$w[0-9]+]],
 ; BIGENDIAN: fadd.d [[R3:\$w[0-9]+]], [[R2]], [[R2]]
 ; BIGENDIAN: st.d [[R3]],
 ; BIGENDIAN: .size v8f16_to_v2f64