[llvm] GlobalISel needs fdiv 1 / sqrt(x) to rsq combine (PR #78673)
Nick Anderson via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 00:33:03 PST 2024
https://github.com/nickleus27 updated https://github.com/llvm/llvm-project/pull/78673
>From ac88229787d70493726c9682976706038cf7e95e Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Mon, 15 Jan 2024 02:38:21 -0800
Subject: [PATCH] GlobalISel needs fdiv 1 / sqrt(x) to rsq combine
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 8 +-
.../AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 23 +
.../GlobalISel/combine-fdiv-sqrt-to-rsq.mir | 584 ++++++++++++++++++
3 files changed, 614 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b9411e2052120d..9218760538dc5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -33,6 +33,12 @@ def rcp_sqrt_to_rsq : GICombineRule<
[{ return matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
+def fdiv_by_sqrt_to_rsq_f16 : GICombineRule<
+ (defs root:$root),
+ (match (G_FSQRT f16:$sqrt, $x, (MIFlags FmContract)),
+ (G_FDIV f16:$dst, $y, $sqrt, (MIFlags FmContract)):$root,
+ [{ return matchFDivSqrtToRsqF16(*${root}); }]),
+ (apply [{ applyFDivSqrtToRsqF16(*${root}, ${x}.getReg()); }])>;
def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
@@ -156,7 +162,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
[all_combines, gfx6gfx7_combines, gfx8_combines,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
- rcp_sqrt_to_rsq, sign_extension_in_reg, smulu64]> {
+ rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index a1c34e92a57f35..82e17ddad851fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -83,6 +83,9 @@ class AMDGPUPostLegalizerCombinerImpl : public Combiner {
matchRcpSqrtToRsq(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo) const;
+ bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
+ void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
+
// FIXME: Should be able to have 2 separate matchdatas rather than custom
// struct boilerplate.
struct CvtF32UByteMatchInfo {
@@ -334,6 +337,26 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
return false;
}
+bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
+ MachineInstr &MI) const {
+ Register Sqrt = MI.getOperand(2).getReg();
+ return MRI.hasOneNonDBGUse(Sqrt);
+}
+
+void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
+ MachineInstr &MI, const Register &X) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Y = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ uint32_t Flags = MI.getFlags();
+ Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})
+ .addUse(X)
+ .setMIFlags(Flags)
+ .getReg(0);
+ B.buildFMul(Dst, RSQ, Y, Flags);
+ MI.eraseFromParent();
+}
+
bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
new file mode 100644
index 00000000000000..6c5339e36c77f4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
@@ -0,0 +1,584 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: rsq_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT [[INT]](s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: rsq_f16_missing_contract0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16_missing_contract0
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x
+ ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: rsq_f16_missing_contract1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16_missing_contract1
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+ ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+ ; GCN-NEXT: %rsq:_(s16) = G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FNEG [[INT]]
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %neg_one:_(s16) = G_FCONSTANT half -1.0
+ %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_f16_missing_contract0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f16_missing_contract0
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x
+ ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = G_FSQRT %x
+ %neg_one:_(s16) = G_FCONSTANT half -1.0
+ %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_f16_missing_contract1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f16_missing_contract1
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+ ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00
+ ; GCN-NEXT: %rsq:_(s16) = G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %neg_one:_(s16) = G_FCONSTANT half -1.0
+ %rsq:_(s16) = G_FDIV %neg_one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: rsq_f16_multi_use
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16_multi_use
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+ ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+ S_ENDPGM 0, implicit %sqrt
+
+...
+
+---
+name: rsq_f16_multi_use_missing_contract0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16_multi_use_missing_contract0
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = G_FSQRT %x
+ ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+ S_ENDPGM 0, implicit %sqrt
+
+...
+
+---
+name: rsq_f16_multi_use_missing_contract1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16_multi_use_missing_contract1
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+ ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+ ; GCN-NEXT: %rsq:_(s16) = G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+ S_ENDPGM 0, implicit %sqrt
+
+...
+
+---
+name: rsq_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f32
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %sqrt:_(s32) = contract G_FSQRT %x
+ ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GCN-NEXT: %rsq:_(s32) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+ %x:_(s32) = COPY $vgpr0
+ %sqrt:_(s32) = contract G_FSQRT %x
+ %one:_(s32) = G_FCONSTANT float 1.0
+ %rsq:_(s32) = contract G_FDIV %one, %sqrt
+ $vgpr0 = COPY %rsq
+
+...
+
+---
+name: neg_rsq_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f32
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %sqrt:_(s32) = contract G_FSQRT %x
+ ; GCN-NEXT: %neg_one:_(s32) = G_FCONSTANT float -1.000000e+00
+ ; GCN-NEXT: %rsq:_(s32) = contract G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+ %x:_(s32) = COPY $vgpr0
+ %sqrt:_(s32) = contract G_FSQRT %x
+ %neg_one:_(s32) = G_FCONSTANT float -1.0
+ %rsq:_(s32) = contract G_FDIV %neg_one, %sqrt
+ $vgpr0 = COPY %rsq
+
+...
+
+---
+name: afn_rsq_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: afn_rsq_f32
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x
+ ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+ ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+ %x:_(s32) = COPY $vgpr0
+ %sqrt:_(s32) = contract afn G_FSQRT %x
+ %one:_(s32) = G_FCONSTANT float 1.0
+ %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+ $vgpr0 = COPY %rsq
+
+...
+
+---
+name: afn_rsq_f32_multi_use
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: afn_rsq_f32_multi_use
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x
+ ; GCN-NEXT: %one:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ret:_(s32) = G_FSUB %sqrt, %rsq
+ ; GCN-NEXT: $vgpr0 = COPY %ret(s32)
+ %x:_(s32) = COPY $vgpr0
+ %sqrt:_(s32) = contract afn G_FSQRT %x
+ %one:_(s32) = G_FCONSTANT float 1.0
+ %rsq:_(s32) = contract afn G_FDIV %one, %sqrt
+ %ret:_(s32) = G_FSUB %sqrt, %rsq
+ $vgpr0 = COPY %ret
+
+...
+
+---
+name: afn_neg_rsq_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: afn_neg_rsq_f32
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %sqrt:_(s32) = contract afn G_FSQRT %x
+ ; GCN-NEXT: %neg_one:_(s32) = G_FCONSTANT float -1.000000e+00
+ ; GCN-NEXT: %rsq:_(s32) = contract afn G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+ %x:_(s32) = COPY $vgpr0
+ %sqrt:_(s32) = contract afn G_FSQRT %x
+ %neg_one:_(s32) = G_FCONSTANT float -1.0
+ %rsq:_(s32) = contract afn G_FDIV %neg_one, %sqrt
+ $vgpr0 = COPY %rsq
+
+...
+
+
+---
+name: rsq_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f64
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s64) = contract G_FSQRT %x
+ ; GCN-NEXT: %one:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GCN-NEXT: %rsq:_(s64) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s64) = G_ANYEXT %0:_(s32)
+ %sqrt:_(s64) = contract G_FSQRT %x
+ %one:_(s64) = G_FCONSTANT double 1.0
+ %rsq:_(s64) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_TRUNC %rsq:_(s64)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f64
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s64) = contract G_FSQRT %x
+ ; GCN-NEXT: %neg_one:_(s64) = G_FCONSTANT double -1.000000e+00
+ ; GCN-NEXT: %rsq:_(s64) = contract G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s64) = G_ANYEXT %0:_(s32)
+ %sqrt:_(s64) = contract G_FSQRT %x
+ %neg_one:_(s64) = G_FCONSTANT double -1.0
+ %rsq:_(s64) = contract G_FDIV %neg_one, %sqrt
+ %ext:_(s32) = G_TRUNC %rsq:_(s64)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: afn_rsq_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: afn_rsq_f64
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s64) = contract afn G_FSQRT %x
+ ; GCN-NEXT: %one:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GCN-NEXT: %rsq:_(s64) = contract afn G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s64) = G_ANYEXT %0:_(s32)
+ %sqrt:_(s64) = contract afn G_FSQRT %x
+ %one:_(s64) = G_FCONSTANT double 1.0
+ %rsq:_(s64) = contract afn G_FDIV %one, %sqrt
+ %ext:_(s32) = G_TRUNC %rsq:_(s64)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: afn_neg_rsq_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: afn_neg_rsq_f64
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s64) = contract afn G_FSQRT %x
+ ; GCN-NEXT: %neg_one:_(s64) = G_FCONSTANT double -1.000000e+00
+ ; GCN-NEXT: %rsq:_(s64) = contract afn G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s64) = G_ANYEXT %0:_(s32)
+ %sqrt:_(s64) = contract afn G_FSQRT %x
+ %neg_one:_(s64) = G_FCONSTANT double -1.0
+ %rsq:_(s64) = contract afn G_FDIV %neg_one, %sqrt
+ %ext:_(s32) = G_TRUNC %rsq:_(s64)
+ $vgpr0 = COPY %ext
+
+...
+
+
+---
+name: rsq_fract_num_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_fract_num_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %fract:_(s16) = G_FCONSTANT half 0xH3800
+ ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %fract
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %fract:_(s16) = G_FCONSTANT half 0.5
+ %rsq:_(s16) = contract G_FDIV %fract, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_fract_num_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_fract_num_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %neg_fract:_(s16) = G_FCONSTANT half 0xHB800
+ ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %neg_fract
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %neg_fract:_(s16) = G_FCONSTANT half -0.5
+ %rsq:_(s16) = contract G_FDIV %neg_fract, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+
+...
+
+---
+name: rsq_large_num_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_large_num_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %ten:_(s16) = G_FCONSTANT half 0xH4900
+ ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %ten
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %ten:_(s16) = G_FCONSTANT half 10.0
+ %rsq:_(s16) = contract G_FDIV %ten, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_large_num_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_large_num_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %neg_ten:_(s16) = G_FCONSTANT half 0xHC900
+ ; GCN-NEXT: [[INT:%[0-9]+]]:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FMUL [[INT]], %neg_ten
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %neg_ten:_(s16) = G_FCONSTANT half -10.0
+ %rsq:_(s16) = contract G_FDIV %neg_ten, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
More information about the llvm-commits
mailing list