[llvm] r312699 - AMDGPU: Don't legalize i16 extloads to i32 with legal i16

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 6 22:37:34 PDT 2017


Author: arsenm
Date: Wed Sep  6 22:37:34 2017
New Revision: 312699

URL: http://llvm.org/viewvc/llvm-project?rev=312699&view=rev
Log:
AMDGPU: Don't legalize i16 extloads to i32 with legal i16

Keeping non-i16 extloads makes it easier to match some new
gfx9 load instructions.

Modified:
    llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
    llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll

Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Wed Sep  6 22:37:34 2017
@@ -1130,6 +1130,8 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_UB
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, mubuf_sextloadi8>;
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
 
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, mubuf_load>;
+
 } // End Predicates = [Has16BitInsts]
 
 multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
@@ -1153,6 +1155,7 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;

Modified: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td Wed Sep  6 22:37:34 2017
@@ -706,6 +706,7 @@ def : FlatLoadPat <FLAT_LOAD_SBYTE, flat
 def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i16>;
 def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i16>;
 def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, flat_load, i16>;
 def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
@@ -763,7 +764,7 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_UBY
 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, az_extloadi16_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
-
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, global_load, i16>;
 
 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, global_load, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, global_load, v2i32>;

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Sep  6 22:37:34 2017
@@ -4430,6 +4430,9 @@ SDValue SITargetLowering::LowerLOAD(SDVa
   EVT MemVT = Load->getMemoryVT();
 
   if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
+    if (MemVT == MVT::i16 && isTypeLegal(MVT::i16))
+      return SDValue();
+
     // FIXME: Copied from PPC
     // First, load into 32 bits, then truncate to 1 bit.
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll Wed Sep  6 22:37:34 2017
@@ -10,8 +10,8 @@
 
 ; VI: v_sub_i32_e32
 ; VI-DAG: v_sub_i32_e32
-; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
+; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
 ; VI: v_add_i32_e32
 ; VI: v_add_i32_e32
 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD




More information about the llvm-commits mailing list