[llvm] GlobalISel needs fdiv 1 / sqrt(x) to rsq combine (PR #78673)

Nick Anderson via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 22 00:33:03 PST 2024


https://github.com/nickleus27 updated https://github.com/llvm/llvm-project/pull/78673

>From ac88229787d70493726c9682976706038cf7e95e Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Mon, 15 Jan 2024 02:38:21 -0800
Subject: [PATCH] GlobalISel needs fdiv 1 / sqrt(x) to rsq combine

---
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td       |   8 +-
 .../AMDGPU/AMDGPUPostLegalizerCombiner.cpp    |  23 +
 .../GlobalISel/combine-fdiv-sqrt-to-rsq.mir   | 584 ++++++++++++++++++
 3 files changed, 614 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b9411e2052120d..9218760538dc5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -33,6 +33,12 @@ def rcp_sqrt_to_rsq : GICombineRule<
          [{ return matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
   (apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
 
+def fdiv_by_sqrt_to_rsq_f16 : GICombineRule<
+  (defs root:$root),
+  (match (G_FSQRT f16:$sqrt, $x, (MIFlags FmContract)),
+         (G_FDIV f16:$dst, $y, $sqrt, (MIFlags FmContract)):$root,
+         [{ return matchFDivSqrtToRsqF16(*${root}); }]),
+  (apply [{ applyFDivSqrtToRsqF16(*${root}, ${x}.getReg()); }])>;
 
 def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
 
@@ -156,7 +162,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
   "AMDGPUPostLegalizerCombinerImpl",
   [all_combines, gfx6gfx7_combines, gfx8_combines,
    uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
-   rcp_sqrt_to_rsq, sign_extension_in_reg, smulu64]> {
+   rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
   let CombineAllMethodName = "tryCombineAllImpl";
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index a1c34e92a57f35..82e17ddad851fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -83,6 +83,9 @@ class AMDGPUPostLegalizerCombinerImpl : public Combiner {
   matchRcpSqrtToRsq(MachineInstr &MI,
                     std::function<void(MachineIRBuilder &)> &MatchInfo) const;
 
+  bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
+  void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
+
   // FIXME: Should be able to have 2 separate matchdatas rather than custom
   // struct boilerplate.
   struct CvtF32UByteMatchInfo {
@@ -334,6 +337,26 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
   return false;
 }
 
+bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
+    MachineInstr &MI) const {
+  Register Sqrt = MI.getOperand(2).getReg();
+  return MRI.hasOneNonDBGUse(Sqrt);
+}
+
+void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
+    MachineInstr &MI, const Register &X) const {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Y = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  uint32_t Flags = MI.getFlags();
+  Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})
+                     .addUse(X)
+                     .setMIFlags(Flags)
+                     .getReg(0);
+  B.buildFMul(Dst, RSQ, Y, Flags);
+  MI.eraseFromParent();
+}
+
 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
   Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
new file mode 100644
index 00000000000000..6c5339e36c77f4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
@@ -0,0 +1,584 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name:            rsq_f16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT [[INT]](s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %one:_(s16) = G_FCONSTANT half 1.0
+    %rsq:_(s16) = contract G_FDIV %one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            rsq_f16_missing_contract0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f16_missing_contract0
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x
+    ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = G_FSQRT %x
+    %one:_(s16) = G_FCONSTANT half 1.0
+    %rsq:_(s16) = contract G_FDIV %one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            rsq_f16_missing_contract1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f16_missing_contract1
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+    ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+    ; GCN-NEXT: %rsq:_(s16) = G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %one:_(s16) = G_FCONSTANT half 1.0
+    %rsq:_(s16) = G_FDIV %one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            neg_rsq_f16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: neg_rsq_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FNEG [[INT]]
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %neg_one:_(s16) = G_FCONSTANT half -1.0
+    %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            neg_rsq_f16_missing_contract0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: neg_rsq_f16_missing_contract0
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x
+    ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = G_FSQRT %x
+    %neg_one:_(s16) = G_FCONSTANT half -1.0
+    %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            neg_rsq_f16_missing_contract1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: neg_rsq_f16_missing_contract1
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+    ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00
+    ; GCN-NEXT: %rsq:_(s16) = G_FDIV %neg_one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %neg_one:_(s16) = G_FCONSTANT half -1.0
+    %rsq:_(s16) = G_FDIV %neg_one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            rsq_f16_multi_use
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f16_multi_use
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+    ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %one:_(s16) = G_FCONSTANT half 1.0
+    %rsq:_(s16) = contract G_FDIV %one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+    S_ENDPGM 0, implicit %sqrt
+
+...
+
+---
+name:            rsq_f16_multi_use_missing_contract0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f16_multi_use_missing_contract0
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x
+    ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = G_FSQRT %x
+    %one:_(s16) = G_FCONSTANT half 1.0
+    %rsq:_(s16) = contract G_FDIV %one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+    S_ENDPGM 0, implicit %sqrt
+
+...
+
+---
+name:            rsq_f16_multi_use_missing_contract1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f16_multi_use_missing_contract1
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+    ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+    ; GCN-NEXT: %rsq:_(s16) = G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %one:_(s16) = G_FCONSTANT half 1.0
+    %rsq:_(s16) = G_FDIV %one, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+    S_ENDPGM 0, implicit %sqrt
+
+...
+
+---
+name:            rsq_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f32
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %sqrt:_(s32) = contract G_FSQRT %x
+    ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00
+    ; GCN-NEXT: %rsq:_(s32) = contract G_FDIV %one, %sqrt
+    ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+    %x:_(s32) = COPY $vgpr0
+    %sqrt:_(s32) = contract G_FSQRT %x
+    %one:_(s32) = G_FCONSTANT float 1.0
+    %rsq:_(s32) = contract G_FDIV %one, %sqrt
+    $vgpr0 = COPY %rsq
+
+...
+
+---
+name:            neg_rsq_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: neg_rsq_f32
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %sqrt:_(s32) = contract G_FSQRT %x
+    ; GCN-NEXT: %neg_one:_(s32) = G_FCONSTANT float -1.000000e+00
+    ; GCN-NEXT: %rsq:_(s32) = contract G_FDIV %neg_one, %sqrt
+    ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+    %x:_(s32) = COPY $vgpr0
+    %sqrt:_(s32) = contract G_FSQRT %x
+    %neg_one:_(s32) = G_FCONSTANT float -1.0
+    %rsq:_(s32) = contract G_FDIV %neg_one, %sqrt
+    $vgpr0 = COPY %rsq
+
+...
+
+---
+name:            afn_rsq_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: afn_rsq_f32
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x
+    ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00
+    ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+    ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+    %x:_(s32) = COPY $vgpr0
+    %sqrt:_(s32) = contract afn G_FSQRT %x
+    %one:_(s32) = G_FCONSTANT float 1.0
+    %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+    $vgpr0 = COPY %rsq
+
+...
+
+---
+name:            afn_rsq_f32_multi_use
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: afn_rsq_f32_multi_use
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x
+    ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00
+    ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ret:_(s32) = G_FSUB %sqrt, %rsq
+    ; GCN-NEXT: $vgpr0 = COPY %ret(s32)
+    %x:_(s32) = COPY $vgpr0
+    %sqrt:_(s32) = contract afn G_FSQRT %x
+    %one:_(s32) = G_FCONSTANT float 1.0
+    %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+    %ret:_(s32) = G_FSUB %sqrt, %rsq
+    $vgpr0 = COPY %ret
+
+...
+
+---
+name:            afn_neg_rsq_f32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: afn_neg_rsq_f32
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x
+    ; GCN-NEXT: %neg_one:_(s32) = G_FCONSTANT float -1.000000e+00
+    ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %neg_one, %sqrt
+    ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+    %x:_(s32) = COPY $vgpr0
+    %sqrt:_(s32) = contract afn G_FSQRT %x
+    %neg_one:_(s32) = G_FCONSTANT float -1.0
+    %rsq:_(s32) = contract afn G_FDIV %neg_one, %sqrt
+    $vgpr0 = COPY %rsq
+
+...
+
+
+---
+name:            rsq_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_f64
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s64) = contract G_FSQRT %x
+    ; GCN-NEXT: %one:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; GCN-NEXT: %rsq:_(s64) = contract G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s64) = G_ANYEXT %0:_(s32)
+    %sqrt:_(s64) = contract G_FSQRT %x
+    %one:_(s64) = G_FCONSTANT double 1.0
+    %rsq:_(s64) = contract G_FDIV %one, %sqrt
+    %ext:_(s32) = G_TRUNC %rsq:_(s64)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            neg_rsq_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: neg_rsq_f64
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s64) = contract G_FSQRT %x
+    ; GCN-NEXT: %neg_one:_(s64) = G_FCONSTANT double -1.000000e+00
+    ; GCN-NEXT: %rsq:_(s64) = contract G_FDIV %neg_one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s64) = G_ANYEXT %0:_(s32)
+    %sqrt:_(s64) = contract G_FSQRT %x
+    %neg_one:_(s64) = G_FCONSTANT double -1.0
+    %rsq:_(s64) = contract G_FDIV %neg_one, %sqrt
+    %ext:_(s32) = G_TRUNC %rsq:_(s64)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            afn_rsq_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: afn_rsq_f64
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s64) = contract afn G_FSQRT %x
+    ; GCN-NEXT: %one:_(s64) = G_FCONSTANT double 1.000000e+00
+    ; GCN-NEXT: %rsq:_(s64) = contract afn G_FDIV %one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s64) = G_ANYEXT %0:_(s32)
+    %sqrt:_(s64) = contract afn G_FSQRT %x
+    %one:_(s64) = G_FCONSTANT double 1.0
+    %rsq:_(s64) = contract afn G_FDIV %one, %sqrt
+    %ext:_(s32) = G_TRUNC %rsq:_(s64)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            afn_neg_rsq_f64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: afn_neg_rsq_f64
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; GCN-NEXT: %sqrt:_(s64) = contract afn G_FSQRT %x
+    ; GCN-NEXT: %neg_one:_(s64) = G_FCONSTANT double -1.000000e+00
+    ; GCN-NEXT: %rsq:_(s64) = contract afn G_FDIV %neg_one, %sqrt
+    ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s64) = G_ANYEXT %0:_(s32)
+    %sqrt:_(s64) = contract afn G_FSQRT %x
+    %neg_one:_(s64) = G_FCONSTANT double -1.0
+    %rsq:_(s64) = contract afn G_FDIV %neg_one, %sqrt
+    %ext:_(s32) = G_TRUNC %rsq:_(s64)
+    $vgpr0 = COPY %ext
+
+...
+
+
+---
+name:            rsq_fract_num_f16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_fract_num_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %fract:_(s16) = G_FCONSTANT half 0xH3800
+    ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %fract
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %fract:_(s16) = G_FCONSTANT half 0.5
+    %rsq:_(s16) = contract G_FDIV %fract, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            neg_rsq_fract_num_f16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: neg_rsq_fract_num_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %neg_fract:_(s16) = G_FCONSTANT half 0xHB800
+    ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %neg_fract
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %neg_fract:_(s16) = G_FCONSTANT half -0.5
+    %rsq:_(s16) = contract G_FDIV %neg_fract, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+
+...
+
+---
+name:            rsq_large_num_f16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: rsq_large_num_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %ten:_(s16) = G_FCONSTANT half 0xH4900
+    ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %ten
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %ten:_(s16) = G_FCONSTANT half 10.0
+    %rsq:_(s16) = contract G_FDIV %ten, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            neg_rsq_large_num_f16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: neg_rsq_large_num_f16
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: %neg_ten:_(s16) = G_FCONSTANT half 0xHC900
+    ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+    ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %neg_ten
+    ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+    ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %sqrt:_(s16) = contract G_FSQRT %x
+    %neg_ten:_(s16) = G_FCONSTANT half -10.0
+    %rsq:_(s16) = contract G_FDIV %neg_ten, %sqrt
+    %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+    $vgpr0 = COPY %ext
+
+...



More information about the llvm-commits mailing list