[llvm] [AMDGPU][True16][CodeGen] gisel true16 for G_Merge and ICMP (PR #128913)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 27 15:48:51 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

<details>
<summary>Changes</summary>

GlobalIsel true16 selection fro G_MERGE and ICMP

---

Patch is 159.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128913.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+13-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir (+280-57) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir (+989-529) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a787c10a9421c..4ee1c9ef86479 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -639,7 +639,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
   LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());
 
   const unsigned SrcSize = SrcTy.getSizeInBits();
-  if (SrcSize < 32)
+  if (SrcSize < 16)
     return selectImpl(MI, *CoverageInfo);
 
   const DebugLoc &DL = MI.getDebugLoc();
@@ -1478,10 +1478,17 @@ bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
   if (Opcode == -1)
     return false;
 
-  MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
-            I.getOperand(0).getReg())
-            .add(I.getOperand(2))
-            .add(I.getOperand(3));
+  MachineInstrBuilder ICmp =
+      BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg());
+  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers))
+    ICmp.addImm(0);
+  ICmp.add(I.getOperand(2));
+  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1_modifiers))
+    ICmp.addImm(0);
+  ICmp.add(I.getOperand(3));
+  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel))
+    ICmp.addImm(0); // op_sel
+
   RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
                                *TRI.getBoolRC(), *MRI);
   bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
@@ -4597,6 +4604,7 @@ AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
   }};
 }
 
+// FIXME-TRUE16 remove when fake16 is removed
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
   Register Src;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index d45bc31a12729..d2a3c8ea88c07 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -1,7 +1,10 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
 
 ---
 
@@ -29,13 +32,39 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_eq_s16_sv
-    ; GFX11: liveins: $sgpr0, $vgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s16) = G_TRUNC %0
@@ -70,13 +99,39 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_eq_s16_vs
-    ; GFX11: liveins: $sgpr0, $vgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr(s32) = COPY $sgpr0
     %2:vgpr(s16) = G_TRUNC %0
@@ -111,13 +166,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_eq_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -152,13 +235,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_ne_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -193,13 +304,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_slt_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -234,13 +373,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_sle_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/128913


More information about the llvm-commits mailing list