[llvm] [AMDGPU][True16][CodeGen] gisel true16 for G_MERGE (PR #129287)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 11:19:26 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-globalisel
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
GlobalIsel true16 selection for G_MERGE
---
Patch is 158.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129287.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir (+280-57)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir (+989-529)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a787c10a9421c..b5ad8ad77593c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -639,7 +639,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());
const unsigned SrcSize = SrcTy.getSizeInBits();
- if (SrcSize < 32)
+ if (SrcSize < 16)
return selectImpl(MI, *CoverageInfo);
const DebugLoc &DL = MI.getDebugLoc();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index d45bc31a12729..63aa8b4dc1b4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -1,7 +1,10 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-FAKE16 %s
---
@@ -29,13 +32,39 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_sv
- ; GFX11: liveins: $sgpr0, $vgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_sv
+ ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s16) = G_TRUNC %0
@@ -70,13 +99,39 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_vs
- ; GFX11: liveins: $sgpr0, $vgpr0
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vs
+ ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s16) = G_TRUNC %0
@@ -111,13 +166,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_eq_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -152,13 +235,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ne_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_ne_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -193,13 +304,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
;
- ; GFX11-LABEL: name: icmp_slt_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_slt_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -234,13 +373,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
;
- ; GFX11-LABEL: name: icmp_sle_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ;
+ ; GFX11-FAKE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-FAKE16-NEXT: {{ $}}
+ ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+ ;
+ ; GFX12-TRUE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-TRUE16-NEXT: {{ $}}
+ ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX12-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+ ;
+ ; GFX12-FAKE16-LABEL: name: icmp_sle_s16_vv
+ ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+ ; GFX12-FAKE16-NEXT: {{ $}}
+ ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX12-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+ ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
@@ -275,13 +442,41 @@ body: |
; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
;
- ; GFX11-LABEL: name: icmp_ult_s16_vv
- ; GFX11: liveins: $vgpr0, $vgpr1
- ; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
+ ; GFX11-TRUE16-LABEL: name: icmp_ult_s16_vv
+ ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+ ; GFX11-TRUE16-NEXT: {{ $}}
+ ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+ ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+ ; GFX11-TRUE16-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+ ; GFX11-TRUE16-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/129287
More information about the llvm-commits
mailing list