[llvm] f3225f2 - AMDGPU/GlobalISel: Legalize FDIV64
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 19 21:02:58 PST 2019
Author: Austin Kerbow
Date: 2019-11-19T21:02:27-08:00
New Revision: f3225f2abe78d8a25ee5deea4265b447e7b7d5ee
URL: https://github.com/llvm/llvm-project/commit/f3225f2abe78d8a25ee5deea4265b447e7b7d5ee
DIFF: https://github.com/llvm/llvm-project/commit/f3225f2abe78d8a25ee5deea4265b447e7b7d5ee.diff
LOG: AMDGPU/GlobalISel: Legalize FDIV64
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70403
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b4538097a211..c21102fd611f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1867,6 +1867,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
LLT DstTy = MRI.getType(Dst);
LLT S16 = LLT::scalar(16);
LLT S32 = LLT::scalar(32);
+ LLT S64 = LLT::scalar(64);
if (legalizeFastUnsafeFDIV(MI, MRI, B))
return true;
@@ -1875,6 +1876,8 @@ bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
return legalizeFDIV16(MI, MRI, B);
if (DstTy == S32)
return legalizeFDIV32(MI, MRI, B);
+ if (DstTy == S64)
+ return legalizeFDIV64(MI, MRI, B);
return false;
}
@@ -2072,6 +2075,88 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ uint16_t Flags = MI.getFlags();
+
+ LLT S64 = LLT::scalar(64);
+ LLT S1 = LLT::scalar(1);
+
+ auto One = B.buildFConstant(S64, 1.0);
+
+ auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
+ .addUse(RHS)
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
+
+ auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false)
+ .addUse(DivScale0.getReg(0))
+ .setMIFlags(Flags);
+
+ auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
+ auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
+ auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
+
+ auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
+ .addUse(LHS)
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
+ auto Mul = B.buildMul(S64, DivScale1.getReg(0), Fma3, Flags);
+ auto Fma4 = B.buildFMA(S64, NegDivScale0, Mul, DivScale1.getReg(0), Flags);
+
+ Register Scale;
+ if (!ST.hasUsableDivScaleConditionOutput()) {
+ // Workaround a hardware bug on SI where the condition output from div_scale
+ // is not usable.
+
+ Scale = MRI.createGenericVirtualRegister(S1);
+
+ LLT S32 = LLT::scalar(32);
+
+ auto NumUnmerge = B.buildUnmerge(S32, LHS);
+ auto DenUnmerge = B.buildUnmerge(S32, RHS);
+ auto Scale0Unmerge = B.buildUnmerge(S32, DivScale0);
+ auto Scale1Unmerge = B.buildUnmerge(S32, DivScale1);
+
+ auto CmpNum = B.buildICmp(ICmpInst::ICMP_EQ, S1, NumUnmerge.getReg(1),
+ Scale1Unmerge.getReg(1));
+ auto CmpDen = B.buildICmp(ICmpInst::ICMP_EQ, S1, DenUnmerge.getReg(1),
+ Scale0Unmerge.getReg(1));
+ B.buildXor(Scale, CmpNum, CmpDen);
+ } else {
+ Scale = DivScale1.getReg(1);
+ }
+
+ auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false)
+ .addUse(Fma4.getReg(0))
+ .addUse(Fma3.getReg(0))
+ .addUse(Mul.getReg(0))
+ .addUse(Scale)
+ .setMIFlags(Flags);
+
+ B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, {S64}, false)
+ .addDef(Res)
+ .addUse(Fmas.getReg(0))
+ .addUse(RHS)
+ .addUse(LHS)
+ .setMIFlags(Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 4b6a971a4fd8..4b1405a92787 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -90,6 +90,8 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
MachineIRBuilder &B) const;
bool legalizeFDIV32(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
index 505d25fb370e..25dc9c234e7e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir
@@ -1,9 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -enable-unsafe-fp-math -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -enable-unsafe-fp-math -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
---
name: test_fdiv_s16
@@ -347,18 +347,85 @@ body: |
; SI-LABEL: name: test_fdiv_s64
; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]]
- ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; SI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; SI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+ ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64)
+ ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]]
+ ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]]
+ ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]]
+ ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[XOR]](s1)
+ ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; SI: $vgpr0_vgpr1 = COPY %2(s64)
; VI-LABEL: name: test_fdiv_s64
; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]]
- ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; VI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; VI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; VI: $vgpr0_vgpr1 = COPY %2(s64)
; GFX9-LABEL: name: test_fdiv_s64
; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]]
- ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; GFX9: $vgpr0_vgpr1 = COPY %2(s64)
; GFX9-UNSAFE-LABEL: name: test_fdiv_s64
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
@@ -368,8 +435,28 @@ body: |
; GFX10-LABEL: name: test_fdiv_s64
; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; GFX10: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[COPY]], [[COPY1]]
- ; GFX10: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY1]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX10: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; GFX10: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; GFX10: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; GFX10: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; GFX10: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX10: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64)
+ ; GFX10: $vgpr0_vgpr1 = COPY %2(s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = G_FDIV %0, %1
@@ -911,27 +998,158 @@ body: |
; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
- ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]]
- ; SI: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]]
- ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s64), [[UV2]](s64), [[UV]](s64)
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), [[UV]](s64)
+ ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
+ ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+ ; SI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+ ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+ ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; SI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+ ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64)
+ ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64)
+ ; SI: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV9]](s32), [[UV15]]
+ ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV11]](s32), [[UV13]]
+ ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]]
+ ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[XOR]](s1)
+ ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %7(s64), [[INT5]](s64), [[UV2]](s64), [[UV]](s64)
+ ; SI: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]]
+ ; SI: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64)
+ ; SI: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]]
+ ; SI: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]]
+ ; SI: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]]
+ ; SI: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; SI: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]]
+ ; SI: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64)
+ ; SI: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA8]](s64)
+ ; SI: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV18]]
+ ; SI: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UV18]]
+ ; SI: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UV19]]
+ ; SI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UV18]]
+ ; SI: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+ ; SI: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+ ; SI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32)
+ ; SI: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[MV1]], [[INT10]]
+ ; SI: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+ ; SI: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64)
+ ; SI: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT7]](s64)
+ ; SI: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64)
+ ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV21]](s32), [[UV27]]
+ ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV23]](s32), [[UV25]]
+ ; SI: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP3]]
+ ; SI: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[MV1]](s64), [[XOR1]](s1)
+ ; SI: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %8(s64), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %7(s64), %8(s64)
; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
; VI-LABEL: name: test_fdiv_v2s64
; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
- ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]]
- ; VI: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]]
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64)
+ ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s64), [[UV2]](s64), [[UV]](s64)
+ ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), [[UV]](s64)
+ ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
+ ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+ ; VI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+ ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+ ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; VI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %7(s64), [[INT5]](s64), [[UV2]](s64), [[UV]](s64)
+ ; VI: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; VI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]]
+ ; VI: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64)
+ ; VI: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]]
+ ; VI: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]]
+ ; VI: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]]
+ ; VI: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; VI: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]]
+ ; VI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64)
+ ; VI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA8]](s64)
+ ; VI: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
+ ; VI: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
+ ; VI: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
+ ; VI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
+ ; VI: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+ ; VI: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+ ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32)
+ ; VI: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[MV1]], [[INT10]]
+ ; VI: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[MV1]](s64), [[INT11]](s1)
+ ; VI: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %8(s64), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %7(s64), %8(s64)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
; GFX9-LABEL: name: test_fdiv_v2s64
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
- ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]]
- ; GFX9: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]]
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64)
+ ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s64), [[UV2]](s64), [[UV]](s64)
+ ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), [[UV]](s64)
+ ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %7(s64), [[INT5]](s64), [[UV2]](s64), [[UV]](s64)
+ ; GFX9: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; GFX9: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]]
+ ; GFX9: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64)
+ ; GFX9: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]]
+ ; GFX9: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]]
+ ; GFX9: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]]
+ ; GFX9: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; GFX9: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]]
+ ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64)
+ ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA8]](s64)
+ ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
+ ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
+ ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
+ ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
+ ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+ ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+ ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32)
+ ; GFX9: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[MV1]], [[INT10]]
+ ; GFX9: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[MV1]](s64), [[INT11]](s1)
+ ; GFX9: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %8(s64), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %7(s64), %8(s64)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s64
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
@@ -949,9 +1167,48 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; GFX10: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
; GFX10: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
- ; GFX10: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[UV]], [[UV2]]
- ; GFX10: [[FDIV1:%[0-9]+]]:_(s64) = G_FDIV [[UV1]], [[UV3]]
- ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FDIV]](s64), [[FDIV1]](s64)
+ ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s64), [[UV2]](s64), [[UV]](s64)
+ ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), [[UV]](s64)
+ ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX10: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV6]]
+ ; GFX10: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]]
+ ; GFX10: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]]
+ ; GFX10: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]]
+ ; GFX10: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX10: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %7(s64), [[INT5]](s64), [[UV2]](s64), [[UV]](s64)
+ ; GFX10: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV3]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; GFX10: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]]
+ ; GFX10: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64)
+ ; GFX10: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]]
+ ; GFX10: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]]
+ ; GFX10: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]]
+ ; GFX10: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; GFX10: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]]
+ ; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64)
+ ; GFX10: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA8]](s64)
+ ; GFX10: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV10]]
+ ; GFX10: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]]
+ ; GFX10: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]]
+ ; GFX10: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]]
+ ; GFX10: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[MUL4]], [[MUL5]]
+ ; GFX10: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+ ; GFX10: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL3]](s32), [[ADD3]](s32)
+ ; GFX10: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[MV1]], [[INT10]]
+ ; GFX10: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[MV1]](s64), [[INT11]](s1)
+ ; GFX10: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %8(s64), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64)
+ ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %7(s64), %8(s64)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
@@ -1976,18 +2233,82 @@ body: |
; SI-LABEL: name: test_fdiv_s64_constant_one_rcp
; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; SI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; SI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64)
+ ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]]
+ ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]]
+ ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]]
+ ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[XOR]](s1)
+ ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; SI: $vgpr0_vgpr1 = COPY %2(s64)
; VI-LABEL: name: test_fdiv_s64_constant_one_rcp
; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; VI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; VI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; VI: $vgpr0_vgpr1 = COPY %2(s64)
; GFX9-LABEL: name: test_fdiv_s64_constant_one_rcp
; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX9: $vgpr0_vgpr1 = COPY %2(s64)
; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_one_rcp
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64)
@@ -1995,8 +2316,27 @@ body: |
; GFX10-LABEL: name: test_fdiv_s64_constant_one_rcp
; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; GFX10: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; GFX10: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]]
+ ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]]
+ ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX10: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; GFX10: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; GFX10: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; GFX10: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; GFX10: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX10: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX10: $vgpr0_vgpr1 = COPY %2(s64)
%0:_(s64) = G_FCONSTANT double 1.0
%1:_(s64) = COPY $vgpr0_vgpr1
%2:_(s64) = G_FDIV %0, %1
@@ -2017,18 +2357,85 @@ body: |
; SI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp
; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00
; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; SI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; SI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; SI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]]
+ ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]]
+ ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; SI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; SI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64)
+ ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]]
+ ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]]
+ ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]]
+ ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[XOR]](s1)
+ ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; SI: $vgpr0_vgpr1 = COPY %2(s64)
; VI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp
; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00
; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; VI: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; VI: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; VI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]]
+ ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]]
+ ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; VI: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; VI: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; VI: $vgpr0_vgpr1 = COPY %2(s64)
; GFX9-LABEL: name: test_fdiv_s64_constant_negative_one_rcp
; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00
; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; GFX9: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; GFX9: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]]
+ ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]]
+ ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX9: $vgpr0_vgpr1 = COPY %2(s64)
; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_negative_one_rcp
; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]]
@@ -2037,8 +2444,28 @@ body: |
; GFX10-LABEL: name: test_fdiv_s64_constant_negative_one_rcp
; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00
; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
- ; GFX10: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[COPY]]
- ; GFX10: $vgpr0_vgpr1 = COPY [[FDIV]](s64)
+ ; GFX10: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+ ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]]
+ ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64)
+ ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]]
+ ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]]
+ ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]]
+ ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]]
+ ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64)
+ ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA3]](s64)
+ ; GFX10: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]]
+ ; GFX10: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]]
+ ; GFX10: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+ ; GFX10: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]]
+ ; GFX10: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]]
+ ; GFX10: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]]
+ ; GFX10: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32)
+ ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[MV]], [[INT3]]
+ ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[MV]](s64), [[INT4]](s1)
+ ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), def %2(s64), [[INT5]](s64), [[COPY]](s64), [[C]](s64)
+ ; GFX10: $vgpr0_vgpr1 = COPY %2(s64)
%0:_(s64) = G_FCONSTANT double -1.0
%1:_(s64) = COPY $vgpr0_vgpr1
%2:_(s64) = G_FDIV %0, %1
More information about the llvm-commits
mailing list