[PATCH] D13805: DAGCombiner: Check shouldReduceLoadWidth before combining (and (load), x) -> extload

Fri Oct 16 06:08:39 PDT 2015

tstellarAMD created this revision.
tstellarAMD added reviewers: resistor, arsenm.
tstellarAMD added a subscriber: llvm-commits.

http://reviews.llvm.org/D13805

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/AMDGPU/kernel-args.ll
  test/CodeGen/AMDGPU/no-shrink-extloads.ll
  test/CodeGen/AMDGPU/setcc-opt.ll

Index: test/CodeGen/AMDGPU/setcc-opt.ll
===================================================================

--- test/CodeGen/AMDGPU/setcc-opt.ll
+++ test/CodeGen/AMDGPU/setcc-opt.ll
@@ -142,9 +142,12 @@
 }
 
 ; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
-; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K255]], [[B]]
+; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
+; GCN: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
+; GCN: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK255]]
 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
 ; GCN: buffer_store_byte [[RESULT]]
 ; GCN: s_endpgm
@@ -187,9 +190,12 @@
 ; Should do a buffer_load_sbyte and compare with -1
 
 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
-; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K]], [[B]]{{$}}
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
+; GCN: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
+; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK]]{{$}}
 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
 ; GCN: buffer_store_byte [[RESULT]]
 ; GCN: s_endpgm
Index: test/CodeGen/AMDGPU/no-shrink-extloads.ll
===================================================================
--- test/CodeGen/AMDGPU/no-shrink-extloads.ll
+++ test/CodeGen/AMDGPU/no-shrink-extloads.ll
@@ -189,3 +189,15 @@
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
 }
+
+; FUNC-LABEL: {{^}}smrd_mask_i32_to_i16
+; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
+; SI: s_waitcnt lgkmcnt(0)
+; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
+define void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+entry:
+  %val = load i32, i32 addrspace(2)* %in
+  %mask = and i32 %val, 65535
+  store i32 %mask, i32 addrspace(1)* %out
+  ret void
+}
Index: test/CodeGen/AMDGPU/kernel-args.ll
===================================================================
--- test/CodeGen/AMDGPU/kernel-args.ll
+++ test/CodeGen/AMDGPU/kernel-args.ll
@@ -4,8 +4,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 
 ; FUNC-LABEL: {{^}}i8_arg:
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; GCN: buffer_load_ubyte
+; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
 
 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
 entry:
@@ -39,8 +41,10 @@
 }
 
 ; FUNC-LABEL: {{^}}i16_arg:
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; GCN: buffer_load_ushort
+; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
 
 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
 entry:
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3104,7 +3104,8 @@
         // be expensive (and would be wrong if the type is not byte sized).
         if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
             (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
-                                                    ExtVT))) {
+                                                    ExtVT)) &&
+            TLI.shouldReduceLoadWidth(LN0, ISD::ZEXTLOAD, ExtVT)) {
           EVT PtrType = LN0->getOperand(1).getValueType();
 
           unsigned Alignment = LN0->getAlignment();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D13805.37576.patch
Type: text/x-patch
Size: 4185 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151016/6846c85e/attachment.bin>