[llvm] 15a5b3a - [AMDGPU][True16][CodeGen] gisel true16 for ICMP (#128913)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 13 09:03:21 PDT 2025


Author: Brox Chen
Date: 2025-03-13T12:03:17-04:00
New Revision: 15a5b3a192165bb1372c89bfc841067dd26170eb

URL: https://github.com/llvm/llvm-project/commit/15a5b3a192165bb1372c89bfc841067dd26170eb
DIFF: https://github.com/llvm/llvm-project/commit/15a5b3a192165bb1372c89bfc841067dd26170eb.diff

LOG: [AMDGPU][True16][CodeGen] gisel true16 for ICMP (#128913)

GlobalIsel true16 selection for ICMP

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 441fb5730a6d8..2ee82381c4ef0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1478,10 +1478,21 @@ bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
   if (Opcode == -1)
     return false;
 
-  MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
-            I.getOperand(0).getReg())
-            .add(I.getOperand(2))
-            .add(I.getOperand(3));
+  MachineInstrBuilder ICmp;
+  // t16 instructions
+  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers)) {
+    ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg())
+               .addImm(0)
+               .add(I.getOperand(2))
+               .addImm(0)
+               .add(I.getOperand(3))
+               .addImm(0); // op_sel
+  } else {
+    ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), I.getOperand(0).getReg())
+               .add(I.getOperand(2))
+               .add(I.getOperand(3));
+  }
+
   RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
                                *TRI.getBoolRC(), *MRI);
   bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
@@ -4596,6 +4607,7 @@ AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
   }};
 }
 
+// FIXME-TRUE16 remove when fake16 is removed
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
   Register Src;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
index d45bc31a12729..63aa8b4dc1b4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.s16.mir
@@ -1,7 +1,10 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-TRUE16 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefixes=GFX12-FAKE16 %s
 
 ---
 
@@ -29,13 +32,39 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_eq_s16_sv
-    ; GFX11: liveins: $sgpr0, $vgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY]], 0, [[COPY2]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_sv
+    ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0
     %2:sgpr(s16) = G_TRUNC %0
@@ -70,13 +99,39 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_eq_s16_vs
-    ; GFX11: liveins: $sgpr0, $vgpr0
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX11-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX11-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX12-TRUE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY1]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vs
+    ; GFX12-FAKE16: liveins: $sgpr0, $vgpr0
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr(s32) = COPY $sgpr0
     %2:vgpr(s16) = G_TRUNC %0
@@ -111,13 +166,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_EQ_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_eq_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_EQ_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_eq_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_EQ_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_EQ_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -152,13 +235,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_NE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_ne_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_NE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_ne_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_NE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_NE_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -193,13 +304,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_LT_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_slt_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_LT_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_slt_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_LT_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_I16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -234,13 +373,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_LE_I16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_sle_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_LE_I16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_sle_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_LE_I16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_I16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -275,13 +442,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_LT_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_ult_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_ult_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_ult_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_ult_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_LT_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_ult_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_LT_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LT_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0
@@ -316,13 +511,41 @@ body: |
     ; WAVE32-NEXT: [[V_CMP_LE_U16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_e64 [[COPY]], [[COPY1]], implicit $exec
     ; WAVE32-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_e64_]]
     ;
-    ; GFX11-LABEL: name: icmp_ule_s16_vv
-    ; GFX11: liveins: $vgpr0, $vgpr1
-    ; GFX11-NEXT: {{  $}}
-    ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
-    ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
+    ; GFX11-TRUE16-LABEL: name: icmp_ule_s16_vv
+    ; GFX11-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-TRUE16-NEXT: {{  $}}
+    ; GFX11-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX11-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX11-TRUE16-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX11-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+    ;
+    ; GFX11-FAKE16-LABEL: name: icmp_ule_s16_vv
+    ; GFX11-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX11-FAKE16-NEXT: {{  $}}
+    ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX11-FAKE16-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
+    ;
+    ; GFX12-TRUE16-LABEL: name: icmp_ule_s16_vv
+    ; GFX12-TRUE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-TRUE16-NEXT: {{  $}}
+    ; GFX12-TRUE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-TRUE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-TRUE16-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
+    ; GFX12-TRUE16-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY [[COPY1]].lo16
+    ; GFX12-TRUE16-NEXT: [[V_CMP_LE_U16_t16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_t16_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $exec
+    ; GFX12-TRUE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_t16_e64_]]
+    ;
+    ; GFX12-FAKE16-LABEL: name: icmp_ule_s16_vv
+    ; GFX12-FAKE16: liveins: $vgpr0, $vgpr1
+    ; GFX12-FAKE16-NEXT: {{  $}}
+    ; GFX12-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX12-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX12-FAKE16-NEXT: [[V_CMP_LE_U16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_LE_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX12-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_LE_U16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s16) = G_TRUNC %0


        


More information about the llvm-commits mailing list