[PATCH] D45883: AMDGPU/GlobalISel: Implement select() for 32-bit G_FPTOUI

Fri Apr 20 07:22:25 PDT 2018

tstellar created this revision.
tstellar added reviewers: arsenm, nhaehnle.
Herald added subscribers: t-tye, tpr, dstuttard, kristof.beyls, rovka, yaxunl, wdng, kzhuravl.

Repository:
  rL LLVM

https://reviews.llvm.org/D45883

Files:
  lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
  test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir


Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -0,0 +1,31 @@
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+
+--- |
+  define amdgpu_kernel void @fptoui(i32 addrspace(1)* %global0) {ret void}
+...
+---
+
+name:            fptoui
+legalized:       true
+regBankSelected: true
+
+# GCN-LABEL: name: fptoui
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0, $vgpr3_vgpr4
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s64) = COPY $vgpr3_vgpr4
+
+    ; fptoui s
+    ; GCN: V_CVT_U32_F32_e64
+    %3:vgpr(s32) = G_FPTOUI %0
+
+    ; fptoui v
+    ; GCN: V_CVT_U32_F32_e64
+    %4:vgpr(s32) = G_FPTOUI %1
+
+    G_STORE %3, %2 :: (store 4 into %ir.global0)
+    G_STORE %4, %2 :: (store 4 into %ir.global0)
+...
+---
Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -533,6 +533,11 @@
   unsigned Size0 = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
   switch (I.getOpcode()) {
   default: break;
+  case TargetOpcode::G_FPTOUI:
+    if (Size0 == 32 &&
+        RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI) == 32)
+      return AMDGPU::V_CVT_U32_F32_e64;
+    break;
   case TargetOpcode::G_OR:
     if (Size0 == 32)
       return AMDGPU::V_OR_B32_e64;
@@ -569,8 +574,11 @@
       BuildMI(*BB, &I, DL, Desc, I.getOperand(0).getReg());
   for (unsigned i = 1, OpIdx = 1, e = Desc.NumOperands; i != e; ++i) {
     int RegClassID = Desc.OpInfo[i].RegClass;
-    if (RegClassID == -1)
+    if (RegClassID == -1) {
+      // Input / Output modifiers
+      VALU.addImm(0);
       continue;
+    }
 
     const TargetRegisterClass *RC = TRI.getRegClass(RegClassID);
     const MachineOperand &MO = I.getOperand(OpIdx++);
@@ -611,6 +619,7 @@
   switch (I.getOpcode()) {
   default:
     break;
+  case TargetOpcode::G_FPTOUI:
   case TargetOpcode::G_OR:
     return selectSimple(I);
   case TargetOpcode::G_ADD:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45883.143312.patch
Type: text/x-patch
Size: 2281 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180420/11c82b6b/attachment.bin>