[llvm] r312699 - AMDGPU: Don't legalize i16 extloads to i32 with legal i16
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 6 22:37:34 PDT 2017
Author: arsenm
Date: Wed Sep 6 22:37:34 2017
New Revision: 312699
URL: http://llvm.org/viewvc/llvm-project?rev=312699&view=rev
Log:
AMDGPU: Don't legalize i16 extloads to i32 with legal i16
Keeping non-i16 extloads makes it easier to match some new
gfx9 load instructions.
Modified:
llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll
Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Wed Sep 6 22:37:34 2017
@@ -1130,6 +1130,8 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_UB
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, mubuf_sextloadi8>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, mubuf_load>;
+
} // End Predicates = [Has16BitInsts]
multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
@@ -1153,6 +1155,7 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
Modified: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td Wed Sep 6 22:37:34 2017
@@ -706,6 +706,7 @@ def : FlatLoadPat <FLAT_LOAD_SBYTE, flat
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i16>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i16>;
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, flat_load, i16>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
@@ -763,7 +764,7 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_UBY
def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, az_extloadi16_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
-
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, global_load, i16>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, global_load, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, global_load, v2i32>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Sep 6 22:37:34 2017
@@ -4430,6 +4430,9 @@ SDValue SITargetLowering::LowerLOAD(SDVa
EVT MemVT = Load->getMemoryVT();
if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
+ if (MemVT == MVT::i16 && isTypeLegal(MVT::i16))
+ return SDValue();
+
// FIXME: Copied from PPC
// First, load into 32 bits, then truncate to 1 bit.
Modified: llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll?rev=312699&r1=312698&r2=312699&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll Wed Sep 6 22:37:34 2017
@@ -10,8 +10,8 @@
; VI: v_sub_i32_e32
; VI-DAG: v_sub_i32_e32
-; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
+; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
; VI: v_add_i32_e32
; VI: v_add_i32_e32
; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
More information about the llvm-commits
mailing list