[llvm] AMDGPU: Handle undef correctly in isKnownIntegral (PR #92566)

Fri May 17 08:59:21 PDT 2024

https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/92566

None

>From cafca64ba36c0a34072998845b8b09c52e3b0686 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 17 May 2024 16:43:22 +0200
Subject: [PATCH] AMDGPU: Handle undef correctly in isKnownIntegral

---
 llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp     | 20 +++++++---
 .../AMDGPU/amdgpu-simplify-libcall-pow.ll     | 40 +++++++++++++++++++
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 978a2d49b08bc..faf04c3c7e709 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -577,18 +577,26 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
 
 static bool isKnownIntegral(const Value *V, const DataLayout &DL,
                             FastMathFlags FMF) {
-  if (isa<UndefValue>(V))
+  if (isa<PoisonValue>(V))
     return true;
+  if (isa<UndefValue>(V))
+    return false;
 
   if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
     return CF->getValueAPF().isInteger();
 
-  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
-    for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
-      Constant *ConstElt = CDV->getElementAsConstant(i);
-      if (isa<UndefValue>(ConstElt))
+  auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
+  const Constant *CV = dyn_cast<Constant>(V);
+  if (VFVTy && CV) {
+    unsigned NumElts = VFVTy->getNumElements();
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Elt = CV->getAggregateElement(i);
+      if (!Elt)
+        return false;
+      if (isa<PoisonValue>(Elt))
         continue;
-      const ConstantFP *CFP = dyn_cast<ConstantFP>(ConstElt);
+
+      const ConstantFP *CFP = dyn_cast<ConstantFP>(Elt);
       if (!CFP || !CFP->getValue().isInteger())
         return false;
     }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index 5db25a59d33fc..6b4b0f881f3be 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -2673,6 +2673,46 @@ define float @test_pow_f32__y_known_integral_roundeven(float %x, float nofpclass
   ret float %pow
 }
 
+define float @test_pow_f32_known_integral_undef(float %x) {
+; CHECK-LABEL: define float @test_pow_f32_known_integral_undef
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float undef)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float undef)
+  ret float %pow
+}
+
+define float @test_pow_f32_known_integral_poison(float %x) {
+; CHECK-LABEL: define float @test_pow_f32_known_integral_poison
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 poison)
+; CHECK-NEXT:    ret float [[POW]]
+;
+  %pow = tail call float @_Z3powff(float %x, float poison)
+  ret float %pow
+}
+
+define <2 x float> @test_pow_v2f32_known_integral_constant_vector_undef_elt(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_constant_vector_undef_elt
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 4.000000e+00, float undef>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.0, float undef>)
+  ret <2 x float> %pow
+}
+
+define <2 x float> @test_pow_v2f32_known_integral_constant_vector_poison_elt(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_constant_vector_poison_elt
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 4, i32 poison>)
+; CHECK-NEXT:    ret <2 x float> [[POW]]
+;
+  %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.0, float poison>)
+  ret <2 x float> %pow
+}
+
 attributes #0 = { minsize }
 attributes #1 = { noinline }
 attributes #2 = { strictfp }