[llvm] cf29333 - AMDGPU/GlobalISel: Work around forming illegal zextload after legalize

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 11 07:55:58 PDT 2020


Author: Matt Arsenault
Date: 2020-04-11T10:52:58-04:00
New Revision: cf29333f40e8eee520b1907748d8ed2b82b95f80

URL: https://github.com/llvm/llvm-project/commit/cf29333f40e8eee520b1907748d8ed2b82b95f80
DIFF: https://github.com/llvm/llvm-project/commit/cf29333f40e8eee520b1907748d8ed2b82b95f80.diff

LOG: AMDGPU/GlobalISel: Work around forming illegal zextload after legalize

Selection would fail after the post legalize combiner put an illegal
zextload back together.

The base combiner has parameter to only allow legal operations, but
they appear to not be used. I also don't see a nice way to remove a
single entry from all_combines, so just hack around this.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index ff8a31dee337..9476b6e6d767 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -37,8 +37,18 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
   let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
 }
 
+
+// FIXME: combines_for_extload can introduce illegal extloads which
+// aren't re-legalized.
+// FIXME: Is there a way to remove a single item from all_combines?
+def all_combines_minus_extload : GICombineGroup<[trivial_combines,
+  ptr_add_immed_chain, combine_indexed_load_store, undef_combines,
+  identity_combines]
+>;
+
 def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
-  "AMDGPUGenPostLegalizerCombinerHelper", [all_combines,
-                                           gfx6gfx7_combines, uchar_to_float]> {
+  "AMDGPUGenPostLegalizerCombinerHelper",
+  [all_combines_minus_extload, gfx6gfx7_combines,
+   uchar_to_float]> {
   let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
new file mode 100644
index 000000000000..7303d104e97c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
@@ -0,0 +1,217 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
+
+define i64 @zextload_global_i1_to_i64(i1 addrspace(1)* %ptr) {
+; GFX9-LABEL: zextload_global_i1_to_i64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: zextload_global_i1_to_i64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX6-LABEL: zextload_global_i1_to_i64:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b64 s[4:5], 0
+; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i1, i1 addrspace(1)* %ptr
+  %ext = zext i1 %load to i64
+  ret i64 %ext
+}
+
+define i64 @zextload_global_i8_to_i64(i8 addrspace(1)* %ptr) {
+; GFX9-LABEL: zextload_global_i8_to_i64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: zextload_global_i8_to_i64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    flat_load_ubyte v0, v[0:1]
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX6-LABEL: zextload_global_i8_to_i64:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b64 s[4:5], 0
+; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i8, i8 addrspace(1)* %ptr
+  %ext = zext i8 %load to i64
+  ret i64 %ext
+}
+
+define i64 @zextload_global_i16_to_i64(i16 addrspace(1)* %ptr) {
+; GFX9-LABEL: zextload_global_i16_to_i64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: zextload_global_i16_to_i64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX6-LABEL: zextload_global_i16_to_i64:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b64 s[4:5], 0
+; GFX6-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i16, i16 addrspace(1)* %ptr
+  %ext = zext i16 %load to i64
+  ret i64 %ext
+}
+
+define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) {
+; GFX9-LABEL: zextload_global_i32_to_i64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: zextload_global_i32_to_i64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    flat_load_dword v0, v[0:1]
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX6-LABEL: zextload_global_i32_to_i64:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b64 s[4:5], 0
+; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i32, i32 addrspace(1)* %ptr
+  %ext = zext i32 %load to i64
+  ret i64 %ext
+}
+
+define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
+; GFX9-LABEL: zextload_global_i32_to_i96:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s4, 0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-NEXT:    v_mov_b32_e32 v2, s4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: zextload_global_i32_to_i96:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    flat_load_dword v0, v[0:1]
+; GFX8-NEXT:    s_mov_b32 s4, 0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s4
+; GFX8-NEXT:    v_mov_b32_e32 v2, s4
+; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX6-LABEL: zextload_global_i32_to_i96:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b64 s[4:5], 0
+; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; GFX6-NEXT:    s_mov_b32 s8, 0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s8
+; GFX6-NEXT:    v_mov_b32_e32 v2, s8
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i32, i32 addrspace(1)* %ptr
+  %ext = zext i32 %load to i96
+  ret i96 %ext
+}
+
+define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
+; GFX9-LABEL: zextload_global_i32_to_i128:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-NEXT:    v_mov_b32_e32 v2, v0
+; GFX9-NEXT:    global_load_dword v0, v[2:3], off
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: zextload_global_i32_to_i128:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v3, v1
+; GFX8-NEXT:    v_mov_b32_e32 v2, v0
+; GFX8-NEXT:    flat_load_dword v0, v[2:3]
+; GFX8-NEXT:    v_mov_b32_e32 v2, 0
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-NEXT:    v_mov_b32_e32 v3, 0
+; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX6-LABEL: zextload_global_i32_to_i128:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mov_b32_e32 v3, v1
+; GFX6-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6-NEXT:    s_mov_b32 s6, 0
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b64 s[4:5], 0
+; GFX6-NEXT:    buffer_load_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX6-NEXT:    v_mov_b32_e32 v2, 0
+; GFX6-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6-NEXT:    v_mov_b32_e32 v3, 0
+; GFX6-NEXT:    s_waitcnt vmcnt(0)
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i32, i32 addrspace(1)* %ptr
+  %ext = zext i32 %load to i128
+  ret i128 %ext
+}


        


More information about the llvm-commits mailing list