[llvm] GlobalISel needs fdiv 1 / sqrt(x) to rsq combine (PR #78673)
Nick Anderson via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 24 00:50:41 PST 2024
https://github.com/nickleus27 updated https://github.com/llvm/llvm-project/pull/78673
>From 59a7d7d9347cb7ae13abe7886ae4575625213706 Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Mon, 15 Jan 2024 02:38:21 -0800
Subject: [PATCH 1/4] GlobalISel needs fdiv 1 / sqrt(x) to rsq combine
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 11 ++-
.../AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 15 ++++
.../GlobalISel/combine-fdiv-sqrt-to-rsq.mir | 85 +++++++++++++++++++
3 files changed, 110 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b9411e2052120d8..acef73a1882b8fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -33,6 +33,15 @@ def rcp_sqrt_to_rsq : GICombineRule<
[{ return matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
+def fdiv_instr : GIDefMatchData<"MachineInstr *">;
+
+def fdiv_1_by_sqrt_to_rsq : GICombineRule<
+ (defs root:$dst, fdiv_instr:$fdivinstr),
+ (match (G_FSQRT $sqrt, $x, (MIFlags FmContract)),
+ // (G_FCONSTANT $one, $fpimm), // error: ('G_FCONSTANT') is unreachable from the pattern root!
+ (G_FDIV $dst, $sqrt, $fpimm, (MIFlags FmContract)):$fdivinstr,
+ [{ return ${fpimm}.getFPImm()->isExactlyValue(1.0); }]),
+ (apply [{ applyOneFDivSqrtToRsq(*${fdivinstr}, ${x}.getReg()); }])>;
def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
@@ -156,7 +165,7 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
[all_combines, gfx6gfx7_combines, gfx8_combines,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
- rcp_sqrt_to_rsq, sign_extension_in_reg, smulu64]> {
+ rcp_sqrt_to_rsq, fdiv_1_by_sqrt_to_rsq, sign_extension_in_reg, smulu64]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index a1c34e92a57f356..650736c60d7cea8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -83,6 +83,8 @@ class AMDGPUPostLegalizerCombinerImpl : public Combiner {
matchRcpSqrtToRsq(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo) const;
+ void applyOneFDivSqrtToRsq(MachineInstr &MI, const Register &X) const;
+
// FIXME: Should be able to have 2 separate matchdatas rather than custom
// struct boilerplate.
struct CvtF32UByteMatchInfo {
@@ -334,6 +336,19 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
return false;
}
+void AMDGPUPostLegalizerCombinerImpl::applyOneFDivSqrtToRsq(
+ MachineInstr &MI, const Register &X) const {
+ // B.setInstrAndDebugLoc(MI);
+
+ Register Dst = MI.getOperand(0).getReg();
+
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, ArrayRef<Register>({Dst}))
+ .addUse(X)
+ .setMIFlags(MI.getFlags());
+
+ MI.eraseFromParent();
+}
+
bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
new file mode 100644
index 000000000000000..07479bc607ad8ca
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
@@ -0,0 +1,85 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: rsq_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+ ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_f16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f16
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+ ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %neg_one:_(s16) = G_FCONSTANT half -1.0
+ %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: rsq_f16_multi_use
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f16_multi_use
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
+ ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
+ ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ ; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s16) = G_TRUNC %0:_(s32)
+ %sqrt:_(s16) = contract G_FSQRT %x
+ %one:_(s16) = G_FCONSTANT half 1.0
+ %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_ANYEXT %rsq:_(s16)
+ $vgpr0 = COPY %ext
+ S_ENDPGM 0, implicit %sqrt
+
+...
>From e9acad2f6b4fd7a06d646279bde12808bee10ce6 Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Tue, 23 Jan 2024 01:20:54 -0800
Subject: [PATCH 2/4] fixup! GlobalISel needs fdiv 1 / sqrt(x) to rsq combine
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index acef73a1882b8fd..e8158d202a3a534 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -38,8 +38,8 @@ def fdiv_instr : GIDefMatchData<"MachineInstr *">;
def fdiv_1_by_sqrt_to_rsq : GICombineRule<
(defs root:$dst, fdiv_instr:$fdivinstr),
(match (G_FSQRT $sqrt, $x, (MIFlags FmContract)),
- // (G_FCONSTANT $one, $fpimm), // error: ('G_FCONSTANT') is unreachable from the pattern root!
- (G_FDIV $dst, $sqrt, $fpimm, (MIFlags FmContract)):$fdivinstr,
+ (G_FCONSTANT $one, $fpimm),
+ (G_FDIV $dst, $one, $sqrt, (MIFlags FmContract)):$fdivinstr,
[{ return ${fpimm}.getFPImm()->isExactlyValue(1.0); }]),
(apply [{ applyOneFDivSqrtToRsq(*${fdivinstr}, ${x}.getReg()); }])>;
>From 43026b8013796983f90646e964404f7b4cc4f8b8 Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Tue, 23 Jan 2024 21:07:46 -0800
Subject: [PATCH 3/4] fixup! GlobalISel needs fdiv 1 / sqrt(x) to rsq combine
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 11 +++++------
.../AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir | 11 +++--------
2 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index e8158d202a3a534..6e6a714ee70010d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -33,15 +33,14 @@ def rcp_sqrt_to_rsq : GICombineRule<
[{ return matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
-def fdiv_instr : GIDefMatchData<"MachineInstr *">;
-
def fdiv_1_by_sqrt_to_rsq : GICombineRule<
- (defs root:$dst, fdiv_instr:$fdivinstr),
+ (defs root:$root),
(match (G_FSQRT $sqrt, $x, (MIFlags FmContract)),
(G_FCONSTANT $one, $fpimm),
- (G_FDIV $dst, $one, $sqrt, (MIFlags FmContract)):$fdivinstr,
- [{ return ${fpimm}.getFPImm()->isExactlyValue(1.0); }]),
- (apply [{ applyOneFDivSqrtToRsq(*${fdivinstr}, ${x}.getReg()); }])>;
+ (G_FDIV $dst, $one, $sqrt, (MIFlags FmContract)):$root,
+ [{ return ${fpimm}.getFPImm()->isExactlyValue(1.0)
+ || ${fpimm}.getFPImm()->isExactlyValue(-1.0); }]),
+ (apply [{ applyOneFDivSqrtToRsq(*${root}, ${x}.getReg()); }])>;
def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
index 07479bc607ad8ca..134e69768303f67 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
@@ -13,9 +13,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
- ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
- ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
- ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %rsq:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
; GCN-NEXT: $vgpr0 = COPY %ext(s32)
%0:_(s32) = COPY $vgpr0
@@ -40,9 +38,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
- ; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
- ; GCN-NEXT: %neg_one:_(s16) = G_FCONSTANT half 0xHBC00
- ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %neg_one, %sqrt
+ ; GCN-NEXT: %rsq:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
; GCN-NEXT: $vgpr0 = COPY %ext(s32)
%0:_(s32) = COPY $vgpr0
@@ -68,8 +64,7 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
- ; GCN-NEXT: %one:_(s16) = G_FCONSTANT half 0xH3C00
- ; GCN-NEXT: %rsq:_(s16) = contract G_FDIV %one, %sqrt
+ ; GCN-NEXT: %rsq:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
; GCN-NEXT: $vgpr0 = COPY %ext(s32)
; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
>From 8683e840582a3a70ef7a8e52c7808408c5dbe73e Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Wed, 24 Jan 2024 00:50:09 -0800
Subject: [PATCH 4/4] fixup! GlobalISel needs fdiv 1 / sqrt(x) to rsq combine
---
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 2 +-
.../AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 8 +-
.../GlobalISel/combine-fdiv-sqrt-to-rsq.mir | 98 ++++++++++++++++++-
3 files changed, 97 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 6e6a714ee70010d..5e388a80741dd08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -39,7 +39,7 @@ def fdiv_1_by_sqrt_to_rsq : GICombineRule<
(G_FCONSTANT $one, $fpimm),
(G_FDIV $dst, $one, $sqrt, (MIFlags FmContract)):$root,
[{ return ${fpimm}.getFPImm()->isExactlyValue(1.0)
- || ${fpimm}.getFPImm()->isExactlyValue(-1.0); }]),
+ || ${fpimm}.getFPImm()->isExactlyValue(-1.0); }]),
(apply [{ applyOneFDivSqrtToRsq(*${root}, ${x}.getReg()); }])>;
def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 650736c60d7cea8..ae4e657283ec06d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -338,14 +338,8 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
void AMDGPUPostLegalizerCombinerImpl::applyOneFDivSqrtToRsq(
MachineInstr &MI, const Register &X) const {
- // B.setInstrAndDebugLoc(MI);
-
Register Dst = MI.getOperand(0).getReg();
-
- B.buildIntrinsic(Intrinsic::amdgcn_rsq, ArrayRef<Register>({Dst}))
- .addUse(X)
- .setMIFlags(MI.getFlags());
-
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, Dst).addUse(X);
MI.eraseFromParent();
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
index 134e69768303f67..402b8b5495a23b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fdiv-sqrt-to-rsq.mir
@@ -13,7 +13,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
- ; GCN-NEXT: %rsq:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
; GCN-NEXT: $vgpr0 = COPY %ext(s32)
%0:_(s32) = COPY $vgpr0
@@ -38,7 +38,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
- ; GCN-NEXT: %rsq:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
; GCN-NEXT: $vgpr0 = COPY %ext(s32)
%0:_(s32) = COPY $vgpr0
@@ -64,7 +64,7 @@ body: |
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
; GCN-NEXT: %sqrt:_(s16) = contract G_FSQRT %x
- ; GCN-NEXT: %rsq:_(s16) = contract G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
+ ; GCN-NEXT: %rsq:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s16)
; GCN-NEXT: %ext:_(s32) = G_ANYEXT %rsq(s16)
; GCN-NEXT: $vgpr0 = COPY %ext(s32)
; GCN-NEXT: S_ENDPGM 0, implicit %sqrt(s16)
@@ -78,3 +78,95 @@ body: |
S_ENDPGM 0, implicit %sqrt
...
+
+---
+name: rsq_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f32
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %rsq:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s32)
+ ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+ %x:_(s32) = COPY $vgpr0
+ %sqrt:_(s32) = contract G_FSQRT %x
+ %one:_(s32) = G_FCONSTANT float 1.0
+ %rsq:_(s32) = contract G_FDIV %one, %sqrt
+ $vgpr0 = COPY %rsq
+
+...
+
+---
+name: neg_rsq_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f32
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: %x:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %rsq:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s32)
+ ; GCN-NEXT: $vgpr0 = COPY %rsq(s32)
+ %x:_(s32) = COPY $vgpr0
+ %sqrt:_(s32) = contract G_FSQRT %x
+ %neg_one:_(s32) = G_FCONSTANT float -1.0
+ %rsq:_(s32) = contract G_FDIV %neg_one, %sqrt
+ $vgpr0 = COPY %rsq
+
+...
+
+---
+name: rsq_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: rsq_f64
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+ ; GCN-NEXT: %rsq:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s64)
+ ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s64) = G_ANYEXT %0:_(s32)
+ %sqrt:_(s64) = contract G_FSQRT %x
+ %one:_(s64) = G_FCONSTANT double 1.0
+ %rsq:_(s64) = contract G_FDIV %one, %sqrt
+ %ext:_(s32) = G_TRUNC %rsq:_(s64)
+ $vgpr0 = COPY %ext
+
+...
+
+---
+name: neg_rsq_f64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: neg_rsq_f64
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+ ; GCN-NEXT: %rsq:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rsq), %x(s64)
+ ; GCN-NEXT: %ext:_(s32) = G_TRUNC %rsq(s64)
+ ; GCN-NEXT: $vgpr0 = COPY %ext(s32)
+ %0:_(s32) = COPY $vgpr0
+ %x:_(s64) = G_ANYEXT %0:_(s32)
+ %sqrt:_(s64) = contract G_FSQRT %x
+ %neg_one:_(s64) = G_FCONSTANT double -1.0
+ %rsq:_(s64) = contract G_FDIV %neg_one, %sqrt
+ %ext:_(s32) = G_TRUNC %rsq:_(s64)
+ $vgpr0 = COPY %ext
+
+...
More information about the llvm-commits
mailing list