[llvm] r339513 - AMDGPU: Check NSZ MI flag when folding omod

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 12 01:44:25 PDT 2018


Author: arsenm
Date: Sun Aug 12 01:44:25 2018
New Revision: 339513

URL: http://llvm.org/viewvc/llvm-project?rev=339513&view=rev
Log:
AMDGPU: Check NSZ MI flag when folding omod

I'm not sure the exact nsz flag combination that
is OK. I think as long as it's on either, this is OK.
For now just check it on the omod multiply.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/omod-nsz-flag.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=339513&r1=339512&r2=339513&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Sun Aug 12 01:44:25 2018
@@ -994,9 +994,8 @@ bool SIFoldOperands::runOnMachineFunctio
   // omod is ignored by hardware if IEEE bit is enabled. omod also does not
   // correctly handle signed zeros.
   //
-  // TODO: Check nsz on instructions when fast math flags are preserved to MI
-  // level.
-  bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
+  bool IsIEEEMode = ST->enableIEEEBit(MF);
+  bool HasNSZ = MFI->hasNoSignedZerosFPMath();
 
   for (MachineBasicBlock *MBB : depth_first(&MF)) {
     MachineBasicBlock::iterator I, Next;
@@ -1007,7 +1006,10 @@ bool SIFoldOperands::runOnMachineFunctio
       tryFoldInst(TII, &MI);
 
       if (!TII->isFoldableCopy(MI)) {
-        if (IsIEEEMode || !tryFoldOMod(MI))
+        // TODO: Omod might be OK if there is NSZ only on the source
+        // instruction, and not the omod multiply.
+        if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
+            !tryFoldOMod(MI))
           tryFoldClamp(MI);
         continue;
       }

Added: llvm/trunk/test/CodeGen/AMDGPU/omod-nsz-flag.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/omod-nsz-flag.mir?rev=339513&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/omod-nsz-flag.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/omod-nsz-flag.mir Sun Aug 12 01:44:25 2018
@@ -0,0 +1,71 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands  %s -o - | FileCheck -check-prefix=GCN %s
+
+--- |
+  define amdgpu_ps void @omod_inst_flag_nsz_src() {
+    unreachable
+  }
+
+  define amdgpu_ps void @omod_inst_flag_nsz_result() {
+    unreachable
+  }
+
+  define amdgpu_ps void @omod_inst_flag_nsz_both() {
+    unreachable
+  }
+
+...
+
+---
+
+# FIXME: Is it OK to fold omod for this?
+# GCN-LABEL: name: omod_inst_flag_nsz_src
+# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %1
+name: omod_inst_flag_nsz_src
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+  %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
+  %1:vgpr_32 = V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
+  S_ENDPGM implicit %1
+
+...
+---
+
+# GCN-LABEL: name: omod_inst_flag_nsz_result
+# GCN: %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %0
+
+name: omod_inst_flag_nsz_result
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+  %0:vgpr_32 = V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
+  %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
+  S_ENDPGM implicit %1
+...
+
+---
+
+# GCN-LABEL: name: omod_inst_flag_nsz_both
+# GCN: %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1, implicit $exec
+# GCN-NEXT: S_ENDPGM implicit %0
+
+name: omod_inst_flag_nsz_both
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+  %0:vgpr_32 = nsz V_ADD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $exec
+  %1:vgpr_32 = nsz V_MUL_F32_e64 0, %0, 0, 1073741824, 0, 0, implicit $exec
+  S_ENDPGM implicit %1
+...




More information about the llvm-commits mailing list