[llvm] 16bc07a - AMDGPU: Select f64 fmul by negative power of 2 to ldexp
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 23 17:39:15 PDT 2023
Author: Matt Arsenault
Date: 2023-08-23T20:36:01-04:00
New Revision: 16bc07ac9105dd87745e0335b057c217efadd4dc
URL: https://github.com/llvm/llvm-project/commit/16bc07ac9105dd87745e0335b057c217efadd4dc
DIFF: https://github.com/llvm/llvm-project/commit/16bc07ac9105dd87745e0335b057c217efadd4dc.diff
LOG: AMDGPU: Select f64 fmul by negative power of 2 to ldexp
Select fmul x, -K -> ldexp(-x, log2(fabsK))
Select fmul fabs(x), -K -> ldexp(-|x|, log2(fabsK))
https://reviews.llvm.org/D158173
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f5c7feeb40c4da..1bde2f30e37784 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5131,7 +5131,7 @@ void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF();
- int ExpVal = APF.getExactLog2();
+ int ExpVal = APF.getExactLog2Abs();
assert(ExpVal != INT_MIN);
MIB.addImm(ExpVal);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 5d239e2da17edf..da6701fbcea1d2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3330,7 +3330,7 @@ defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax>;
// Convert a floating-point power of 2 to the integer exponent.
def FPPow2ToExponentXForm : SDNodeXForm<fpimm, [{
const auto &APF = N->getValueAPF();
- int Log2 = APF.getExactLog2();
+ int Log2 = APF.getExactLog2Abs();
assert(Log2 != INT_MIN);
return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32);
}]>;
@@ -3339,8 +3339,24 @@ def FPPow2ToExponentXForm : SDNodeXForm<fpimm, [{
// immediate where it's preferable to emit a multiply by as an
// ldexp. We skip over 0.5 to 4.0 as those are inline immediates
// anyway.
-def fpimm_pow2_prefer_ldexp_f64 : FPImmLeaf<f64, [{
- int Exp = Imm.getExactLog2();
+def fpimm_pos_pow2_prefer_ldexp_f64 : FPImmLeaf<f64, [{
+ if (Imm.isNegative())
+ return false;
+
+ int Exp = Imm.getExactLog2Abs();
+ // Prefer leaving the FP inline immediates as they are.
+ // 0.5, 1.0, 2.0, 4.0
+
+ // For f64 ldexp is always better than materializing a 64-bit
+ // constant.
+ return Exp != INT_MIN && (Exp < -1 || Exp > 2);
+ }], FPPow2ToExponentXForm
+>;
+
+def fpimm_neg_pow2_prefer_ldexp_f64 : FPImmLeaf<f64, [{
+ if (!Imm.isNegative())
+ return false;
+ int Exp = Imm.getExactLog2Abs();
// Prefer leaving the FP inline immediates as they are.
// 0.5, 1.0, 2.0, 4.0
@@ -3353,17 +3369,32 @@ def fpimm_pow2_prefer_ldexp_f64 : FPImmLeaf<f64, [{
// f64 is
diff erent because we also want to handle cases that may
// require materialization of the exponent.
// TODO: If we know f64 ops are fast, prefer add (ldexp x, N), y over fma
-// TODO: fmul x, -2^n -> ldexp(-x, n)
// TODO: For f32/f16, it's not a clear win on code size to use ldexp
// in place of mul since we have to use the vop3 form. Are there power
// savings or some other reason to prefer ldexp over mul?
def : GCNPat<
(any_fmul (f64 (VOP3Mods f64:$src0, i32:$src0_mods)),
- fpimm_pow2_prefer_ldexp_f64:$src1),
+ fpimm_pos_pow2_prefer_ldexp_f64:$src1),
(V_LDEXP_F64_e64 i32:$src0_mods, VSrc_b64:$src0,
0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1))))
>;
+def : GCNPat<
+ (any_fmul f64:$src0, fpimm_neg_pow2_prefer_ldexp_f64:$src1),
+ (V_LDEXP_F64_e64 SRCMODS.NEG, VSrc_b64:$src0,
+ 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1))))
+>;
+
+// We want to avoid using VOP3Mods which could pull in another fneg
+// which we would need to be re-negated (which should never happen in
+// practice). I don't see a way to apply an SDNodeXForm that accounts
+// for a second operand.
+def : GCNPat<
+ (any_fmul (fabs f64:$src0), fpimm_neg_pow2_prefer_ldexp_f64:$src1),
+ (V_LDEXP_F64_e64 SRCMODS.NEG_ABS, VSrc_b64:$src0,
+ 0, (S_MOV_B32 (i32 (FPPow2ToExponentXForm $src1))))
+>;
+
class AMDGPUGenericInstruction : GenericInstruction {
let Namespace = "AMDGPU";
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
index d592d78a35d575..15ece434487ed1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
@@ -289,11 +289,9 @@ body: |
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 0, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+ ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 1, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]]
; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FCONSTANT double -16.0
@@ -315,11 +313,9 @@ body: |
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 2, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+ ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 3, [[COPY]], 0, [[S_MOV_B32_]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]]
; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FABS %0
@@ -342,11 +338,14 @@ body: |
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 3, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]]
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_MOV_B32_]], [[COPY1]], implicit $exec
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e64_]], %subreg.sub1
+ ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4
+ ; GCN-NEXT: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_LDEXP_F64_e64 1, [[REG_SEQUENCE]], 0, [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_LDEXP_F64_e64_]]
; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FABS %0
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index a5057529f0be50..86d0df494bcacd 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -1049,169 +1049,277 @@ define double @v_mul_0x1pn15_f64(double %x) {
}
define double @v_mul_neg256_f64(double %x) {
-; GFX9-LABEL: v_mul_neg256_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0700000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg256_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg256_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0700000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg256_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0700000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg256_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0700000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg256_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg256_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0700000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg256_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 8
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg256_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0700000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -256.0
ret double %mul
}
define double @v_mul_neg128_f64(double %x) {
-; GFX9-LABEL: v_mul_neg128_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0600000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg128_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg128_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0600000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg128_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0600000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg128_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0600000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg128_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg128_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0600000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg128_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 7
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg128_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0600000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -128.0
ret double %mul
}
define double @v_mul_neg64_f64(double %x) {
-; GFX9-LABEL: v_mul_neg64_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0500000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg64_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg64_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0500000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg64_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0500000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg64_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0500000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg64_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg64_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0500000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg64_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 6
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg64_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0500000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -64.0
ret double %mul
}
define double @v_mul_neg32_f64(double %x) {
-; GFX9-LABEL: v_mul_neg32_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0400000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg32_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg32_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0400000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg32_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0400000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg32_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0400000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg32_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg32_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0400000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg32_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 5
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg32_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0400000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -32.0
ret double %mul
}
define double @v_mul_neg16_f64(double %x) {
-; GFX9-LABEL: v_mul_neg16_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0300000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg16_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg16_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0300000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg16_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0300000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg16_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0300000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg16_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg16_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0300000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg16_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg16_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0300000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -16.0
ret double %mul
}
define double @v_mul_neg8_f64(double %x) {
-; GFX9-LABEL: v_mul_neg8_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0200000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg8_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg8_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0200000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg8_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0200000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg8_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0200000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg8_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg8_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0200000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg8_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg8_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0200000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -8.0
ret double %mul
}
@@ -1297,29 +1405,47 @@ define double @v_mul_neg_half_f64(double %x) {
}
define double @v_mul_neg_quarter_f64(double %x) {
-; GFX9-LABEL: v_mul_neg_quarter_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xbfd00000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg_quarter_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg_quarter_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xbfd00000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg_quarter_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg_quarter_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xbfd00000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg_quarter_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg_quarter_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg_quarter_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], -2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg_quarter_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xbfd00000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul double %x, -0.25
ret double %mul
}
@@ -2517,32 +2643,53 @@ define <2 x double> @v_mul_16_v2f64(<2 x double> %x) {
}
define <2 x double> @v_mul_neg16_v2f64(<2 x double> %x) {
-; GFX9-LABEL: v_mul_neg16_v2f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0300000
-; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_neg16_v2f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_neg16_v2f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0300000
-; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
-; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_neg16_v2f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0300000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_neg16_v2f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0300000
-; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
-; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg16_v2f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg16_v2f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0300000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5]
+; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg16_v2f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[0:1], 4
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], -v[2:3], 4
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg16_v2f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0300000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], s[0:1]
+; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], v[2:3], s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x double> %x, <double -16.0, double -16.0>
ret <2 x double> %mul
}
@@ -6879,58 +7026,94 @@ define double @v_mul_fabs_0x1pn1031_f64(double %x) {
}
define double @v_mul_fabs_neg256_f64(double %x) {
-; GFX9-LABEL: v_mul_fabs_neg256_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0700000
-; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_fabs_neg256_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_fabs_neg256_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0700000
-; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_fabs_neg256_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0700000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_fabs_neg256_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0700000
-; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_fabs_neg256_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_neg256_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0700000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_neg256_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 8
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_neg256_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0700000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call double @llvm.fabs.f64(double %x)
%mul = fmul double %fabs.x, -256.0
ret double %mul
}
define double @v_mul_fabs_neg8_f64(double %x) {
-; GFX9-LABEL: v_mul_fabs_neg8_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc0200000
-; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_fabs_neg8_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_fabs_neg8_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xc0200000
-; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_fabs_neg8_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc0200000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_fabs_neg8_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xc0200000
-; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_fabs_neg8_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_neg8_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc0200000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_neg8_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, 3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_neg8_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc0200000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call double @llvm.fabs.f64(double %x)
%mul = fmul double %fabs.x, -8.0
ret double %mul
@@ -7011,29 +7194,47 @@ define double @v_mul_fabs_neghalf_f64(double %x) {
}
define double @v_mul_fabs_negquarter_f64(double %x) {
-; GFX9-LABEL: v_mul_fabs_negquarter_f64:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xbfd00000
-; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: v_mul_fabs_negquarter_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: v_mul_fabs_negquarter_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0xbfd00000
-; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: v_mul_fabs_negquarter_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_mul_fabs_negquarter_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s1, 0xbfd00000
-; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_fabs_negquarter_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_negquarter_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xbfd00000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_negquarter_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -|v[0:1]|, -2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_negquarter_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xbfd00000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%fabs.x = call double @llvm.fabs.f64(double %x)
%mul = fmul double %fabs.x, -0.25
ret double %mul
More information about the llvm-commits
mailing list