[PATCH] D60978: [AMDGPU] Fix an issue in `op_sel_hi` skipping.

Mon Apr 22 14:01:30 PDT 2019

hliao created this revision.
hliao added reviewers: rampitec, arsenm, kzhuravl.
Herald added subscribers: llvm-commits, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely.
Herald added a project: LLVM.

- Only apply packed literal `op_sel_hi` skipping on operands requiring packed literals. Even an instruction is `packed`, it may have operand requiring non-packed literal, such as `v_dot2_f32_f16`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D60978

Files:
  llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
  llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll


Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
@@ -33,3 +33,10 @@
   store float %r.val, float addrspace(1)* %r
   ret void
 }
+
+; GFX906-LABEL: {{^}}fdot2_inline_literal
+; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0
+define float @fdot2_inline_literal(<2 x half> %a, <2 x half> %b) {
+  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
+  ret float %ret
+}
Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -189,15 +189,24 @@
       unsigned Val = Mod.getImm();
       if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
         return false;
-      // If upper part is all zero we do not need op_sel_hi.
-      if (!isUInt<16>(Fold.ImmToFold)) {
-        if (!(Fold.ImmToFold & 0xffff)) {
-          Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+      // Only apply the following transformation if that operand requries
+      // a packed immediate.
+      switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
+      case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+      case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+        // If upper part is all zero we do not need op_sel_hi.
+        if (!isUInt<16>(Fold.ImmToFold)) {
+          if (!(Fold.ImmToFold & 0xffff)) {
+            Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+            Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+            Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+            return true;
+          }
           Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-          Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
-          return true;
         }
-        Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+        break;
+      default:
+        break;
       }
     }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60978.196130.patch
Type: text/x-patch
Size: 2101 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190422/f2410ee2/attachment.bin>