[PATCH] D135149: [AMDGPU][GISel] Add missing V2S16 BUILD_VECTOR_TRUNC legalization

Pierre van Houtryve via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 4 05:09:37 PDT 2022


Pierre-vh created this revision.
Pierre-vh added a reviewer: arsenm.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added a project: All.
Pierre-vh requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

Previously we would be unable to legalize V2S16 BUILD_VECTOR_TRUNC on GFX8 & below as the custom legalization was missing.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135149

Files:
  llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
  llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir


Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir
@@ -2,6 +2,7 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
 
 ---
 name: legal_s32_to_v2s16
@@ -16,6 +17,19 @@
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32)
     ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+    ; VI-LABEL: name: legal_s32_to_v2s16
+    ; VI: liveins: $vgpr0, $vgpr1
+    ; VI-NEXT: {{  $}}
+    ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
+    ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+    ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+    ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x s16>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1800,6 +1800,7 @@
   case TargetOpcode::G_FFLOOR:
     return legalizeFFloor(MI, MRI, B);
   case TargetOpcode::G_BUILD_VECTOR:
+  case TargetOpcode::G_BUILD_VECTOR_TRUNC:
     return legalizeBuildVector(MI, MRI, B);
   case TargetOpcode::G_MUL:
     return legalizeMul(Helper, MI);
@@ -2888,11 +2889,24 @@
   MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
   Register Dst = MI.getOperand(0).getReg();
   const LLT S32 = LLT::scalar(32);
+  const LLT S16 = LLT::scalar(16);
   assert(MRI.getType(Dst) == LLT::fixed_vector(2, 16));
 
   Register Src0 = MI.getOperand(1).getReg();
   Register Src1 = MI.getOperand(2).getReg();
-  assert(MRI.getType(Src0) == LLT::scalar(16));
+
+  if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC) {
+    assert(MRI.getType(Src0) == S32);
+
+    Src0 = MRI.createGenericVirtualRegister(S16);
+    B.buildTrunc(Src0, MI.getOperand(1).getReg());
+
+    Src1 = MRI.createGenericVirtualRegister(S16);
+    B.buildTrunc(Src1, MI.getOperand(2).getReg());
+  }
+
+  assert(MRI.getType(Src0) == S16);
+  assert(MRI.getType(Src1) == S16);
 
   auto Merge = B.buildMerge(S32, {Src0, Src1});
   B.buildBitcast(Dst, Merge);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D135149.464970.patch
Type: text/x-patch
Size: 3292 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221004/7233b926/attachment.bin>


More information about the llvm-commits mailing list