[PATCH] D104408: AMDGPU/GlobalISel: Do not fcanonicalize const splat padded with undef

Petar Avramovic via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 17 06:27:39 PDT 2021


Petar.Avramovic updated this revision to Diff 352699.
Petar.Avramovic retitled this revision from "AMDGPU/GlobalISel: Treat undef as KnownNeverNaN" to "AMDGPU/GlobalISel: Do not fcanonicalize const splat padded with undef".
Petar.Avramovic edited the summary of this revision.
Petar.Avramovic added a comment.

Move to isCanonicalized and post-legalizer combiner.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104408/new/

https://reviews.llvm.org/D104408

Files:
  llvm/lib/Target/AMDGPU/SIISelLowering.cpp
  llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir


Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
@@ -221,3 +221,52 @@
     %3:_(s32) = G_FMAXNUM_IEEE %4, %5
     $vgpr0 = COPY %3(s32)
 ...
+
+---
+name: test_splat_padded_with_undef
+tracksRegLiveness: true
+legalized: true
+machineFunctionInfo:
+  mode:
+    ieee: true
+body: |
+  bb.0 :
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_splat_padded_with_undef
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4000
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
+    ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C1]](s16)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[DEF]](s32)
+    ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+    ; CHECK: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT2]](s32)
+    ; CHECK: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], [[BUILD_VECTOR_TRUNC]]
+    ; CHECK: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[FMUL]]
+    ; CHECK: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[BUILD_VECTOR_TRUNC1]], [[FCANONICALIZE]]
+    ; CHECK: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[BUILD_VECTOR_TRUNC2]], [[FMAXNUM_IEEE]]
+    ; CHECK: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %3:_(s16) = G_FCONSTANT half 0xH4000
+    %16:_(s32) = G_ANYEXT %3(s16)
+    %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %16(s32), %16(s32)
+    %6:_(s16) = G_FCONSTANT half 0xH0000
+    %17:_(s32) = G_ANYEXT %6(s16)
+    %18:_(s32) = G_IMPLICIT_DEF
+    %5:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %17(s32), %18(s32)
+    %10:_(s16) = G_FCONSTANT half 0xH3C00
+    %19:_(s32) = G_ANYEXT %10(s16)
+    %9:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %19(s32), %19(s32)
+    %4:_(<2 x s16>) = G_FMUL %0, %2
+    %14:_(<2 x s16>) = G_FCANONICALIZE %5
+    %15:_(<2 x s16>) = G_FCANONICALIZE %4
+    %8:_(<2 x s16>) = G_FMAXNUM_IEEE %14, %15
+    %13:_(<2 x s16>) = G_FCANONICALIZE %8
+    %11:_(<2 x s16>) = G_FMINNUM_IEEE %9, %13
+    $vgpr0 = COPY %11(<2 x s16>)
+...
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -9714,6 +9715,11 @@
     return !F.isDenormal() || denormalsEnabledForType(MRI.getType(Reg), MF);
   }
 
+  Register SplatReg;
+  // Constant splat padded with undef.
+  if (mi_match(Reg, MRI, MIPatternMatch::FCstOrSplatFCstRegMatch(SplatReg)))
+    return true;
+
   if (MaxDepth == 0)
     return false;
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D104408.352699.patch
Type: text/x-patch
Size: 3532 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210617/5bf61e8d/attachment.bin>


More information about the llvm-commits mailing list