[PATCH] D51589: DAG: Fold extract_vector_elt (scalar_to_vector), K to undef

Sun Mar 17 13:57:27 PDT 2019

RKSimon updated this revision to Diff 191043.
RKSimon added a comment.
Herald added subscribers: nhaehnle, jvesely.
Herald added a project: LLVM.

Finally got a test change, and its an increase in instructions....


Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D51589/new/

https://reviews.llvm.org/D51589

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/AMDGPU/max.i16.ll


Index: test/CodeGen/AMDGPU/max.i16.ll
===================================================================

--- test/CodeGen/AMDGPU/max.i16.ll
+++ test/CodeGen/AMDGPU/max.i16.ll
@@ -160,6 +160,8 @@
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
+; GFX9-NEXT:    v_mov_b32_e32 v6, 0
+; GFX9-NEXT:    v_mov_b32_e32 v7, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v4
@@ -167,20 +169,19 @@
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX9-NEXT:    global_load_dword v6, v[2:3], off
-; GFX9-NEXT:    global_load_dword v7, v[0:1], off
+; GFX9-NEXT:    global_load_short_d16 v7, v[0:1], off offset:4
+; GFX9-NEXT:    global_load_dword v8, v[2:3], off
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    global_load_short_d16 v6, v[2:3], off offset:4
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s5
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, s4, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_pk_max_i16 v0, v0, v8
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-NEXT:    v_pk_max_i16 v7, v7, v6
-; GFX9-NEXT:    global_load_short_d16 v6, v[2:3], off offset:4
-; GFX9-NEXT:    global_load_short_d16 v8, v[0:1], off offset:4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_max_i16 v0, v8, v6
-; GFX9-NEXT:    global_store_dword v[4:5], v7, off
-; GFX9-NEXT:    global_store_short v[4:5], v0, off offset:4
+; GFX9-NEXT:    v_pk_max_i16 v1, v7, v6
+; GFX9-NEXT:    global_store_dword v[4:5], v0, off
+; GFX9-NEXT:    global_store_short v[4:5], v1, off offset:4
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %aptr, i32 %tid
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16017,6 +16017,10 @@
 
   // (vextract (scalar_to_vector val, 0) -> val
   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    // Only 0'th element of SCALAR_TO_VECTOR is defined.
+    if (DAG.isKnownNeverZero(Index))
+      return DAG.getUNDEF(ScalarVT);
+
     // Check if the result type doesn't match the inserted element type. A
     // SCALAR_TO_VECTOR may truncate the inserted element and the
     // EXTRACT_VECTOR_ELT may widen the extracted vector.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D51589.191043.patch
Type: text/x-patch
Size: 2739 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190317/7de9d00a/attachment.bin>