[llvm] d7d2e45 - AMDGPU/GlobalISel: Fix selecting G_SEXTLOAD/G_ZEXTLOAD pre-gfx9
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 27 12:56:52 PDT 2021
Author: Matt Arsenault
Date: 2021-07-27T15:56:42-04:00
New Revision: d7d2e4545e6b04ea29ffd05ebef2f7c26590b925
URL: https://github.com/llvm/llvm-project/commit/d7d2e4545e6b04ea29ffd05ebef2f7c26590b925
DIFF: https://github.com/llvm/llvm-project/commit/d7d2e4545e6b04ea29ffd05ebef2f7c26590b925.diff
LOG: AMDGPU/GlobalISel: Fix selecting G_SEXTLOAD/G_ZEXTLOAD pre-gfx9
The patterns for the m0 glue patterns were failing to import.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 36e04fc78c31..521c8f261a00 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -128,6 +128,8 @@ def gi_smrd_buffer_imm32 :
def : GINodeEquiv<G_LOAD, AMDGPUld_glue> {
let CheckMMOIsNonAtomic = 1;
+ let IfSignExtend = G_SEXTLOAD;
+ let IfZeroExtend = G_ZEXTLOAD;
}
def : GINodeEquiv<G_STORE, AMDGPUst_glue> {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir
new file mode 100644
index 000000000000..9a6dea64e59d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sextload-local.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+name: sextload_local_s32_from_s8_align1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_I8_]]
+ ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_I8_]]
+ ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_I8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+name: sextload_local_s32_from_s16_align2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: sextload_local_s32_from_s16_align2
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_I16_]]
+ ; GFX7-LABEL: name: sextload_local_s32_from_s16_align2
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[DS_READ_I16_:%[0-9]+]]:vgpr_32 = DS_READ_I16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_I16_]]
+ ; GFX9-LABEL: name: sextload_local_s32_from_s16_align2
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_I16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_I16_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+# ---
+# name: sextload_local_s16_from_s8_align1
+# legalized: true
+# regBankSelected: true
+# tracksRegLiveness: true
+
+# body: |
+# bb.0:
+# liveins: $vgpr0
+
+# %0:vgpr(p3) = COPY $vgpr0
+# %1:vgpr(s16) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 3)
+# %2:vgpr(s32) = G_ANYEXT %1
+# $vgpr0 = COPY %2
+
+# ...
+
+---
+name: sextload_local_s32_from_s8_align1_offset4095
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: sextload_local_s32_from_s8_align1_offset4095
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_I8_]]
+ ; GFX7-LABEL: name: sextload_local_s32_from_s8_align1_offset4095
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[DS_READ_I8_:%[0-9]+]]:vgpr_32 = DS_READ_I8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_I8_]]
+ ; GFX9-LABEL: name: sextload_local_s32_from_s8_align1_offset4095
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_I8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_I8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_I8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 4095
+ %2:vgpr(p3) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_SEXTLOAD %2 :: (load (s8), align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir
new file mode 100644
index 000000000000..ed4908cafa30
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zextload-local.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+name: zextload_local_s32_from_s8_align1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+name: zextload_local_s32_from_s16_align2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: zextload_local_s32_from_s16_align2
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]]
+ ; GFX7-LABEL: name: zextload_local_s32_from_s16_align2
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (s16), addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]]
+ ; GFX9-LABEL: name: zextload_local_s32_from_s16_align2
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (s16), addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+# ---
+# name: zextload_local_s16_from_s8_align1
+# legalized: true
+# regBankSelected: true
+# tracksRegLiveness: true
+
+# body: |
+# bb.0:
+# liveins: $vgpr0
+
+# %0:vgpr(p3) = COPY $vgpr0
+# %1:vgpr(s16) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 3)
+# %2:vgpr(s32) = G_ANYEXT %1
+# $vgpr0 = COPY %2
+
+# ...
+
+---
+name: zextload_local_s32_from_s8_align1_offset4095
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: zextload_local_s32_from_s8_align1_offset4095
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: zextload_local_s32_from_s8_align1_offset4095
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 4095, 0, implicit $m0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: zextload_local_s32_from_s8_align1_offset4095
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 4095, 0, implicit $exec :: (load (s8), addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 4095
+ %2:vgpr(p3) = G_PTR_ADD %0, %1
+ %3:vgpr(s32) = G_ZEXTLOAD %2 :: (load (s8), align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
More information about the llvm-commits
mailing list