[llvm] 1cc9f62 - [GlobalISel] Add constant-folding of FP binops to combiner. (#65230)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 09:33:39 PDT 2023
Author: Amara Emerson
Date: 2023-09-07T19:33:35+03:00
New Revision: 1cc9f626cbfcbc0f929e4f42fc28b358d159d765
URL: https://github.com/llvm/llvm-project/commit/1cc9f626cbfcbc0f929e4f42fc28b358d159d765
DIFF: https://github.com/llvm/llvm-project/commit/1cc9f626cbfcbc0f929e4f42fc28b358d159d765.diff
LOG: [GlobalISel] Add constant-folding of FP binops to combiner. (#65230)
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir
llvm/test/CodeGen/AMDGPU/llvm.log.ll
llvm/test/CodeGen/AMDGPU/llvm.log10.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index bb7b8654045d7ff..1708ef9436979ea 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -436,6 +436,9 @@ class CombinerHelper {
/// Replace an instruction with a G_FCONSTANT with value \p C.
void replaceInstWithFConstant(MachineInstr &MI, double C);
+ /// Replace an instruction with an G_FCONSTANT with value \p CFP.
+ void replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP);
+
/// Replace an instruction with a G_CONSTANT with value \p C.
void replaceInstWithConstant(MachineInstr &MI, int64_t C);
@@ -651,6 +654,9 @@ class CombinerHelper {
/// Do constant folding when opportunities are exposed after MIR building.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo);
+ /// Do constant FP folding when opportunities are exposed after MIR building.
+ bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo);
+
/// \returns true if it is possible to narrow the width of a scalar binop
/// feeding a G_AND instruction \p MI.
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index c97658bc9c25c6d..e3634e50ec741c1 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -155,6 +155,7 @@ def instruction_steps_matchdata: GIDefMatchData<"InstructionStepsMatchInfo">;
def register_matchinfo: GIDefMatchData<"Register">;
def int64_matchinfo: GIDefMatchData<"int64_t">;
def apint_matchinfo : GIDefMatchData<"APInt">;
+def constantfp_matchinfo : GIDefMatchData<"ConstantFP*">;
def build_fn_matchinfo :
GIDefMatchData<"std::function<void(MachineIRBuilder &)>">;
def unsigned_matchinfo: GIDefMatchData<"unsigned">;
@@ -978,6 +979,12 @@ def constant_fold_binop : GICombineRule<
[{ return Helper.matchConstantFoldBinOp(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+def constant_fold_fp_binop : GICombineRule<
+ (defs root:$d, constantfp_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV):$d,
+ [{ return Helper.matchConstantFoldFPBinOp(*${d}, ${matchinfo}); }]),
+ (apply [{ Helper.replaceInstWithFConstant(*${d}, ${matchinfo}); }])>;
+
def constant_fold_cast_op : GICombineRule<
(defs root:$d, apint_matchinfo:$matchinfo),
(match (wip_match_opcode G_ZEXT, G_SEXT, G_ANYEXT):$d,
@@ -1229,6 +1236,9 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma,
combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>;
+def constant_fold_binops : GICombineGroup<[constant_fold_binop,
+ constant_fold_fp_binop]>;
+
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
@@ -1243,7 +1253,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
div_rem_to_divrem, funnel_shift_combines, commute_shift,
- form_bitfield_extract, constant_fold_binop, constant_fold_cast_op, fabs_fneg_fold,
+ form_bitfield_extract, constant_fold_binops, constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1225eb11f10ede4..9030efb9c07b6e3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2717,6 +2717,13 @@ void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
MI.eraseFromParent();
}
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, ConstantFP *CFP) {
+ assert(MI.getNumDefs() == 1 && "Expected only one def?");
+ Builder.setInstr(MI);
+ Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
+ MI.eraseFromParent();
+}
+
void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -4603,6 +4610,17 @@ bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
return true;
}
+bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) {
+ Register Op1 = MI.getOperand(1).getReg();
+ Register Op2 = MI.getOperand(2).getReg();
+ auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
+ if (!MaybeCst)
+ return false;
+ MatchInfo =
+ ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
+ return true;
+}
+
bool CombinerHelper::matchNarrowBinopFeedingAnd(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
// Look for a binop feeding into an AND with a mask:
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 7e6d2805a8863cc..e31afe7a0f4af52 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -239,7 +239,7 @@ def AArch64PostLegalizerCombiner
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
- constant_fold_binop, identity_combines,
+ constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax]> {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir
index fe516029887a007..d600bff8e08a91a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-constant-fold.mir
@@ -139,3 +139,113 @@ body: |
RET_ReallyLR implicit $x0
...
+---
+name: fadd
+legalized: true
+liveins:
+ - { reg: '$d0' }
+body: |
+ bb.1.entry:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fadd
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 4.200000e+01
+ ; CHECK-NEXT: $d0 = COPY %res(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %a:_(s64) = G_FCONSTANT double 40.0
+ %b:_(s64) = G_FCONSTANT double 2.0
+ %res:_(s64) = G_FADD %a, %b
+ $d0 = COPY %res(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: fsub
+legalized: true
+liveins:
+ - { reg: '$d0' }
+body: |
+ bb.1.entry:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fsub
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 3.800000e+01
+ ; CHECK-NEXT: $d0 = COPY %res(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %a:_(s64) = G_FCONSTANT double 40.0
+ %b:_(s64) = G_FCONSTANT double 2.0
+ %res:_(s64) = G_FSUB %a, %b
+ $d0 = COPY %res(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: fmul
+legalized: true
+liveins:
+ - { reg: '$d0' }
+body: |
+ bb.1.entry:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fmul
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 8.000000e+01
+ ; CHECK-NEXT: $d0 = COPY %res(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %a:_(s64) = G_FCONSTANT double 40.0
+ %b:_(s64) = G_FCONSTANT double 2.0
+ %res:_(s64) = G_FMUL %a, %b
+ $d0 = COPY %res(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: fdiv
+legalized: true
+liveins:
+ - { reg: '$d0' }
+body: |
+ bb.1.entry:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: fdiv
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %res:_(s64) = G_FCONSTANT double 2.000000e+01
+ ; CHECK-NEXT: $d0 = COPY %res(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %a:_(s64) = G_FCONSTANT double 40.0
+ %b:_(s64) = G_FCONSTANT double 2.0
+ %res:_(s64) = G_FDIV %a, %b
+ $d0 = COPY %res(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: fadd32
+legalized: true
+liveins:
+ - { reg: '$s0' }
+body: |
+ bb.1.entry:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: fadd32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %res:_(s32) = G_FCONSTANT float 4.200000e+01
+ ; CHECK-NEXT: $s0 = COPY %res(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
+ %a:_(s32) = G_FCONSTANT float 40.0
+ %b:_(s32) = G_FCONSTANT float 2.0
+ %res:_(s32) = G_FADD %a, %b
+ $s0 = COPY %res(s32)
+ RET_ReallyLR implicit $s0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index 3868addbfbff380..f55242a8726beb1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -5783,23 +5783,20 @@ define float @v_log_f32_0() {
; SI-GISEL-LABEL: v_log_f32_0:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v0, 0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5825,26 +5822,23 @@ define float @v_log_f32_0() {
; VI-GISEL-LABEL: v_log_f32_0:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, 0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5867,23 +5861,20 @@ define float @v_log_f32_0() {
; GFX900-GISEL-LABEL: v_log_f32_0:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, 0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3f317217
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5907,21 +5898,18 @@ define float @v_log_f32_0() {
; GFX1100-GISEL-LABEL: v_log_f32_0:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, 0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, 0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3f317217, -v1
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 9207c09669ff75b..ab6325216c06d9f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -5783,23 +5783,20 @@ define float @v_log10_f32_0() {
; SI-GISEL-LABEL: v_log10_f32_0:
; SI-GISEL: ; %bb.0:
; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; SI-GISEL-NEXT: v_log_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_log_f32_e32 v0, 0
; SI-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
-; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; SI-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; SI-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; SI-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
+; SI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x411a209b
+; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
+; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5825,26 +5822,23 @@ define float @v_log10_f32_0() {
; VI-GISEL-LABEL: v_log10_f32_0:
; VI-GISEL: ; %bb.0:
; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; VI-GISEL-NEXT: v_log_f32_e32 v0, v0
-; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0
-; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT: v_log_f32_e32 v0, 0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
+; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2
; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3
+; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3
+; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5
; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4
; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2
; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3
-; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1
-; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x411a209b
+; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
+; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5867,23 +5861,20 @@ define float @v_log10_f32_0() {
; GFX900-GISEL-LABEL: v_log10_f32_0:
; GFX900-GISEL: ; %bb.0:
; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
-; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX900-GISEL-NEXT: v_log_f32_e32 v0, 0
; GFX900-GISEL-NEXT: s_mov_b32 s4, 0x3e9a209a
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf
-; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, s4, -v1
-; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2
-; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000
-; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2
-; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
-; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b
-; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000
+; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0
+; GFX900-GISEL-NEXT: v_fma_f32 v4, v0, s4, -v3
+; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, v4
+; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v3, v2
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x411a209b
+; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1
+; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -5907,21 +5898,18 @@ define float @v_log10_f32_0() {
; GFX1100-GISEL-LABEL: v_log10_f32_0:
; GFX1100-GISEL: ; %bb.0:
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, 0
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
-; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0
+; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, 0
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, 0
; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff
; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_fma_f32 v2, v0, 0x3e9a209a, -v1
; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0
; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
More information about the llvm-commits
mailing list