[PATCH] D104408: AMDGPU/GlobalISel: Do not fcanonicalize const splat padded with undef
Petar Avramovic via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 17 06:27:39 PDT 2021
Petar.Avramovic updated this revision to Diff 352699.
Petar.Avramovic retitled this revision from "AMDGPU/GlobalISel: Treat undef as KnownNeverNaN" to "AMDGPU/GlobalISel: Do not fcanonicalize const splat padded with undef".
Petar.Avramovic edited the summary of this revision.
Petar.Avramovic added a comment.
Move to isCanonicalized and post-legalizer combiner.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D104408/new/
https://reviews.llvm.org/D104408
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir
@@ -221,3 +221,52 @@
%3:_(s32) = G_FMAXNUM_IEEE %4, %5
$vgpr0 = COPY %3(s32)
...
+
+---
+name: test_splat_padded_with_undef
+tracksRegLiveness: true
+legalized: true
+machineFunctionInfo:
+ mode:
+ ieee: true
+body: |
+ bb.0 :
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: test_splat_padded_with_undef
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4000
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+ ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C1]](s16)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT1]](s32), [[DEF]](s32)
+ ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00
+ ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+ ; CHECK: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT2]](s32)
+ ; CHECK: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], [[BUILD_VECTOR_TRUNC]]
+ ; CHECK: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[FMUL]]
+ ; CHECK: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[BUILD_VECTOR_TRUNC1]], [[FCANONICALIZE]]
+ ; CHECK: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[BUILD_VECTOR_TRUNC2]], [[FMAXNUM_IEEE]]
+ ; CHECK: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %3:_(s16) = G_FCONSTANT half 0xH4000
+ %16:_(s32) = G_ANYEXT %3(s16)
+ %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %16(s32), %16(s32)
+ %6:_(s16) = G_FCONSTANT half 0xH0000
+ %17:_(s32) = G_ANYEXT %6(s16)
+ %18:_(s32) = G_IMPLICIT_DEF
+ %5:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %17(s32), %18(s32)
+ %10:_(s16) = G_FCONSTANT half 0xH3C00
+ %19:_(s32) = G_ANYEXT %10(s16)
+ %9:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %19(s32), %19(s32)
+ %4:_(<2 x s16>) = G_FMUL %0, %2
+ %14:_(<2 x s16>) = G_FCANONICALIZE %5
+ %15:_(<2 x s16>) = G_FCANONICALIZE %4
+ %8:_(<2 x s16>) = G_FMAXNUM_IEEE %14, %15
+ %13:_(<2 x s16>) = G_FCANONICALIZE %8
+ %11:_(<2 x s16>) = G_FMINNUM_IEEE %9, %13
+ $vgpr0 = COPY %11(<2 x s16>)
+...
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -9714,6 +9715,11 @@
return !F.isDenormal() || denormalsEnabledForType(MRI.getType(Reg), MF);
}
+ Register SplatReg;
+ // Constant splat padded with undef.
+ if (mi_match(Reg, MRI, MIPatternMatch::FCstOrSplatFCstRegMatch(SplatReg)))
+ return true;
+
if (MaxDepth == 0)
return false;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D104408.352699.patch
Type: text/x-patch
Size: 3532 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210617/5bf61e8d/attachment.bin>
More information about the llvm-commits
mailing list