[llvm] f5d8a9b - AMDGPU: Simplify handling of constant vectors in libcalls

Fri Sep 1 05:22:25 PDT 2023

Author: Matt Arsenault
Date: 2023-09-01T08:22:16-04:00
New Revision: f5d8a9b1bbf2b9894c653a5807753d268e13fc11

URL: https://github.com/llvm/llvm-project/commit/f5d8a9b1bbf2b9894c653a5807753d268e13fc11
DIFF: https://github.com/llvm/llvm-project/commit/f5d8a9b1bbf2b9894c653a5807753d268e13fc11.diff

LOG: AMDGPU: Simplify handling of constant vectors in libcalls

Also fixes not handling the partially undef case.

https://reviews.llvm.org/D158905

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index bc4ad345ae2e53..6a85a06a0b6fc1 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -17,12 +17,14 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include <cmath>
 
 #define DEBUG_TYPE "amdgpu-simplifylib"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 static cl::opt<bool> EnablePreLink("amdgpu-prelink",
   cl::desc("Enable pre-link mode optimizations"),
@@ -803,32 +805,19 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
          "fold_pow: encounter a wrong function call");
 
   Module *M = B.GetInsertBlock()->getModule();
-  ConstantFP *CF;
-  ConstantInt *CINT;
-  Type *eltType;
+  Type *eltType = FPOp->getType()->getScalarType();
   Value *opr0 = FPOp->getOperand(0);
   Value *opr1 = FPOp->getOperand(1);
-  ConstantAggregateZero *CZero = dyn_cast<ConstantAggregateZero>(opr1);
 
-  if (getVecSize(FInfo) == 1) {
-    eltType = opr0->getType();
-    CF = dyn_cast<ConstantFP>(opr1);
-    CINT = dyn_cast<ConstantInt>(opr1);
-  } else {
-    VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
-    assert(VTy && "Oprand of vector function should be of vectortype");
-    eltType = VTy->getElementType();
-    ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
-
-    // Now, only Handle vector const whose elements have the same value.
-    CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
-    CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
-  }
+  const APFloat *CF = nullptr;
+  const APInt *CINT = nullptr;
+  if (!match(opr1, m_APFloatAllowUndef(CF)))
+    match(opr1, m_APIntAllowUndef(CINT));
 
   // 0x1111111 means that we don't do anything for this call.
   int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
 
-  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
+  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
     //  pow/powr/pown(x, 0) == 1
     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
     Constant *cnval = ConstantFP::get(eltType, 1.0);
@@ -888,8 +877,8 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
   // Remember that ci_opr1 is set if opr1 is integral
   if (CF) {
     double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
-                    ? (double)CF->getValueAPF().convertToFloat()
-                    : CF->getValueAPF().convertToDouble();
+                      ? (double)CF->convertToFloat()
+                      : CF->convertToDouble();
     int ival = (int)dval;
     if ((double)ival == dval) {
       ci_opr1 = ival;
@@ -947,12 +936,13 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
   bool needcopysign = false;
   Constant *cnval = nullptr;
   if (getVecSize(FInfo) == 1) {
-    CF = dyn_cast<ConstantFP>(opr0);
+    CF = nullptr;
+    match(opr0, m_APFloatAllowUndef(CF));
 
     if (CF) {
       double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
-                   ? (double)CF->getValueAPF().convertToFloat()
-                   : CF->getValueAPF().convertToDouble();
+                     ? (double)CF->convertToFloat()
+                     : CF->convertToDouble();
 
       V = log2(std::abs(V));
       cnval = ConstantFP::get(eltType, V);

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index bca4646e76c49f..1a7bc81b56eb70 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -569,8 +569,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
 define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> <float 0.000000e+00, float poison, float 0.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POW]]
+; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float 0.0, float poison, float 0.0>)
   ret <3 x float> %pow
@@ -579,8 +578,7 @@ define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x floa
 define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> <float -0.000000e+00, float poison, float -0.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POW]]
+; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -0.0, float poison, float -0.0>)
   ret <3 x float> %pow
@@ -639,8 +637,8 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_0.5(<2 x float> %x) {
 define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> <float 5.000000e-01, float poison, float 5.000000e-01>)
-; CHECK-NEXT:    ret <3 x float> [[POW]]
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call afn <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__POW2SQRT]]
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float 0.5, float poison, float 0.5>)
   ret <3 x float> %pow
@@ -649,8 +647,8 @@ define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x floa
 define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> <float -5.000000e-01, float poison, float -5.000000e-01>)
-; CHECK-NEXT:    ret <3 x float> [[POW]]
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call afn <3 x float> @_Z5rsqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__POW2RSQRT]]
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -0.5, float poison, float -0.5>)
   ret <3 x float> %pow
@@ -707,8 +705,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_1.0(<2 x float> %x) {
 define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> <float 1.000000e+00, float poison, float 1.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POW]]
+; CHECK-NEXT:    ret <3 x float> [[X]]
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float 1.0, float poison, float 1.0>)
   ret <3 x float> %pow
@@ -717,8 +714,8 @@ define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x floa
 define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> <float -1.000000e+00, float poison, float -1.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POW]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -1.0, float poison, float -1.0>)
   ret <3 x float> %pow

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index 7f8b8f944857d3..ac179120a9575b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -288,8 +288,7 @@ define <2 x float> @test_pown_v2f32__y_0_undef(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_0_undef
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 0, i32 poison>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
 ;
 entry:
   %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 0, i32 poison>)
@@ -366,8 +365,7 @@ define <2 x float> @test_pown_v2f32__y_1_undef(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_1_undef
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 poison>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    ret <2 x float> [[X]]
 ;
 entry:
   %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 poison>)
@@ -389,8 +387,7 @@ define <3 x float> @test_pown_v3f32__y_1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_1_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 1, i32 1, i32 poison>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    ret <3 x float> [[X]]
 ;
 entry:
   %call = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 1, i32 1, i32 poison>)
@@ -470,8 +467,8 @@ define <3 x float> @test_pown_v3f32__y_2_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_2_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 2, i32 poison, i32 2>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__POW2:%.*]] = fmul <3 x float> [[X]], [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__POW2]]
 ;
 entry:
   %call = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 2, i32 poison, i32 2>)
@@ -554,8 +551,8 @@ define <3 x float> @test_pown_v3f32__y_neg1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_neg1_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 -1, i32 -1, i32 poison>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
 ;
 entry:
   %call = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 -1, i32 -1, i32 poison>)

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
index e08a182a3cd67e..bd742bb68a16ac 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
@@ -545,8 +545,7 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
 define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float 0.000000e+00, float poison, float 0.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POWR]]
+; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float 0.0, float poison, float 0.0>)
   ret <3 x float> %powr
@@ -555,8 +554,7 @@ define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x flo
 define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float -0.000000e+00, float poison, float -0.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POWR]]
+; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -0.0, float poison, float -0.0>)
   ret <3 x float> %powr
@@ -615,8 +613,8 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_0.5(<2 x float> %x) {
 define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float 5.000000e-01, float poison, float 5.000000e-01>)
-; CHECK-NEXT:    ret <3 x float> [[POWR]]
+; CHECK-NEXT:    [[__POW2SQRT:%.*]] = call afn <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__POW2SQRT]]
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float 0.5, float poison, float 0.5>)
   ret <3 x float> %powr
@@ -625,8 +623,8 @@ define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x flo
 define <3 x float> @test_powr_afn_v3f32_neg0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.5_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float -5.000000e-01, float poison, float -5.000000e-01>)
-; CHECK-NEXT:    ret <3 x float> [[POWR]]
+; CHECK-NEXT:    [[__POW2RSQRT:%.*]] = call afn <3 x float> @_Z5rsqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__POW2RSQRT]]
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -0.5, float poison, float -0.5>)
   ret <3 x float> %powr
@@ -683,8 +681,7 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_1.0(<2 x float> %x) {
 define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float 1.000000e+00, float poison, float 1.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POWR]]
+; CHECK-NEXT:    ret <3 x float> [[X]]
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float 1.0, float poison, float 1.0>)
   ret <3 x float> %powr
@@ -693,8 +690,8 @@ define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x flo
 define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float -1.000000e+00, float poison, float -1.000000e+00>)
-; CHECK-NEXT:    ret <3 x float> [[POWR]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -1.0, float poison, float -1.0>)
   ret <3 x float> %powr