[llvm] [GlobalISel] Commute G_FMUL and G_FADD constant LHS to RHS. (PR #65298)
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 5 08:25:22 PDT 2023
https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/65298:
>From bda7b386d52e44b24a15f2f095ce062f725b114f Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Fri, 1 Sep 2023 10:29:38 -0700
Subject: [PATCH 1/2] [GlobalISel] Commute G_FMUL and G_FADD constant LHS to
RHS.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 +++
.../include/llvm/Target/GlobalISel/Combine.td | 23 ++++++----
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 16 +++++++
.../combine-commute-fp-const-lhs.mir | 45 +++++++++++++++++++
.../CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll | 4 +-
.../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 18 ++++----
llvm/test/CodeGen/AMDGPU/rsq.f64.ll | 44 +++++++++---------
7 files changed, 114 insertions(+), 42 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 4b10ca3b7eb69a..4f87590b9cbf97 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -787,6 +787,12 @@ class CombinerHelper {
/// Match constant LHS ops that should be commuted.
bool matchCommuteConstantToRHS(MachineInstr &MI);
+ /// Match constant LHS FP ops that should be commuted.
+ bool matchCommuteFPConstantToRHS(MachineInstr &MI);
+
+ // Given a binop \p MI, commute operands 1 and 2.
+ void applyCommuteBinOpOperands(MachineInstr &MI);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b76f739cdcaa22..3742cd2b6fc0b3 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -396,20 +396,25 @@ def select_to_logical : GICombineRule<
// Fold (C op x) -> (x op C)
// TODO: handle more isCommutable opcodes
// TODO: handle compares (currently not marked as isCommutable)
-def commute_constant_to_rhs : GICombineRule<
+def commute_int_constant_to_rhs : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_ADD, G_MUL, G_AND, G_OR, G_XOR):$root,
[{ return Helper.matchCommuteConstantToRHS(*${root}); }]),
- (apply [{
- Observer.changingInstr(*${root});
- Register LHSReg = ${root}->getOperand(1).getReg();
- Register RHSReg = ${root}->getOperand(2).getReg();
- ${root}->getOperand(1).setReg(RHSReg);
- ${root}->getOperand(2).setReg(LHSReg);
- Observer.changedInstr(*${root});
- }])
+ (apply [{ Helper.applyCommuteBinOpOperands(*${root}); }])
+>;
+
+def commute_fp_constant_to_rhs : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_FADD, G_FMUL):$root,
+ [{ return Helper.matchCommuteFPConstantToRHS(*${root}); }]),
+ (apply [{ Helper.applyCommuteBinOpOperands(*${root}); }])
>;
+def commute_constant_to_rhs : GICombineGroup<[
+ commute_int_constant_to_rhs,
+ commute_fp_constant_to_rhs
+]>;
+
// Fold x op 0 -> x
def right_identity_zero_frags : GICombinePatFrag<
(outs root:$dst), (ins $x),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 1604de13b49403..9e1fe6f0a16b56 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6026,6 +6026,22 @@ bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) {
!getIConstantVRegVal(RHS, MRI);
}
+bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ return getFConstantVRegValWithLookThrough(LHS, MRI, false).has_value() &&
+ !getFConstantVRegValWithLookThrough(RHS, MRI, false).has_value();
+}
+
+void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
+ Observer.changingInstr(MI);
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ MI.getOperand(1).setReg(RHSReg);
+ MI.getOperand(2).setReg(LHSReg);
+ Observer.changedInstr(MI);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir
new file mode 100644
index 00000000000000..9f5b402d32d6a6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir
@@ -0,0 +1,45 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: fadd
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: fadd
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 1.000000e+00
+ ; CHECK-NEXT: %add:_(s32) = G_FADD [[COPY]], %cst
+ ; CHECK-NEXT: $s0 = COPY %add(s32)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(s32) = COPY $s0
+ %cst:_(s32) = G_FCONSTANT float 1.000000e+00
+ %add:_(s32) = G_FADD %cst, %0
+ $s0 = COPY %add
+ RET_ReallyLR
+
+...
+---
+name: fmul
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: fmul
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: %mul:_(s32) = G_FMUL [[COPY]], %cst
+ ; CHECK-NEXT: $s0 = COPY %mul(s32)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(s32) = COPY $s0
+ %cst:_(s32) = G_FCONSTANT float 2.000000e+00
+ %mul:_(s32) = G_FMUL %cst, %0
+ $s0 = COPY %mul
+ RET_ReallyLR
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
index be3cfa7d88c4cd..33a2c9b2ce8bd3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
@@ -1728,7 +1728,7 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
; GFX8-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v2, 0x3c00
; GFX8-NEXT: v_mul_f16_e32 v1, 1.0, v1
-; GFX8-NEXT: v_mul_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -1789,7 +1789,7 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
; GFX8-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mov_b32_e32 v2, 0x3c00
; GFX8-NEXT: v_mul_f16_e32 v1, 1.0, v1
-; GFX8-NEXT: v_mul_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
index 621badb4d395ef..a7eab3105a5252 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
@@ -440,7 +440,7 @@ define double @v_rcp_f64_arcp_afn(double %x) {
; GCN-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; GCN-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; GCN-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; GCN-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -453,7 +453,7 @@ define double @v_rcp_f64_arcp_afn(double %x) {
; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; GFX10-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GFX10-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; GFX10-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -469,7 +469,7 @@ define double @v_rcp_f64_arcp_afn(double %x) {
; GFX11-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GFX11-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; GFX11-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
@@ -1436,8 +1436,8 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
; GCN-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
; GCN-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
; GCN-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; GCN-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
-; GCN-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
+; GCN-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
+; GCN-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
; GCN-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
@@ -1457,8 +1457,8 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
; GFX10-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
; GFX10-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
; GFX10-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; GFX10-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
-; GFX10-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
+; GFX10-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
+; GFX10-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
@@ -1483,8 +1483,8 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
; GFX11-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
; GFX11-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
-; GFX11-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
+; GFX11-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
+; GFX11-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
; GFX11-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 9caea1b3b3853d..046df70e95240d 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -3132,7 +3132,7 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3203,7 +3203,7 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3279,7 +3279,7 @@ define double @v_rsq_f64__afn(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3350,7 +3350,7 @@ define double @v_rsq_f64__afn(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3427,7 +3427,7 @@ define double @v_neg_rsq_f64__afn(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], -1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3499,7 +3499,7 @@ define double @v_neg_rsq_f64__afn(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], -1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3575,7 +3575,7 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3646,7 +3646,7 @@ define double @v_rsq_f64__afn_ninf(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3722,7 +3722,7 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3793,7 +3793,7 @@ define double @v_rsq_f64__afn_nnan(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3869,7 +3869,7 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -3940,7 +3940,7 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4017,7 +4017,7 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], -1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4089,7 +4089,7 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], -1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], -1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4383,8 +4383,8 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; SI-GISEL-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
; SI-GISEL-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-GISEL-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
-; SI-GISEL-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
+; SI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
+; SI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
@@ -4506,8 +4506,8 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
; VI-GISEL-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
; VI-GISEL-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-GISEL-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
-; VI-GISEL-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
+; VI-GISEL-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
+; VI-GISEL-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
@@ -4587,7 +4587,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
@@ -4662,7 +4662,7 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: v_readfirstlane_b32 s0, v0
@@ -4747,7 +4747,7 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; SI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; SI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; SI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; SI-GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -4818,7 +4818,7 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
; VI-GISEL-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
+; VI-GISEL-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
; VI-GISEL-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; VI-GISEL-NEXT: s_setpc_b64 s[30:31]
>From 22a39d676f2f11ca6409f2a1bdd8bd9367469517 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Tue, 5 Sep 2023 08:16:45 -0700
Subject: [PATCH 2/2] Also match vector splats and add tests.
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 6 +-
.../combine-commute-fp-const-lhs.mir | 73 +++++++++++++++++++
2 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9e1fe6f0a16b56..e7862b7a26211e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6029,8 +6029,10 @@ bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) {
bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) {
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
- return getFConstantVRegValWithLookThrough(LHS, MRI, false).has_value() &&
- !getFConstantVRegValWithLookThrough(RHS, MRI, false).has_value();
+ std::optional<FPValueAndVReg> ValAndVReg;
+ if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
+ return false;
+ return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
}
void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir
index 9f5b402d32d6a6..76d82884a7b1f1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir
@@ -43,3 +43,76 @@ body: |
$s0 = COPY %mul
RET_ReallyLR
...
+---
+name: fmul_vector
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: fmul_vector
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: %cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar(s32), %cst_scalar(s32), %cst_scalar(s32), %cst_scalar(s32)
+ ; CHECK-NEXT: %mul:_(<4 x s32>) = G_FMUL [[COPY]], %cst
+ ; CHECK-NEXT: $q0 = COPY %mul(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(<4 x s32>) = COPY $q0
+ %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
+ %cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar, %cst_scalar, %cst_scalar, %cst_scalar
+ %mul:_(<4 x s32>) = G_FMUL %cst, %0
+ $q0 = COPY %mul
+ RET_ReallyLR
+...
+---
+name: fmul_splat_with_undef
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0
+
+ ; CHECK-LABEL: name: fmul_splat_with_undef
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: %cst:_(<4 x s32>) = G_BUILD_VECTOR %undef(s32), %undef(s32), %cst_scalar(s32), %cst_scalar(s32)
+ ; CHECK-NEXT: %mul:_(<4 x s32>) = G_FMUL [[COPY]], %cst
+ ; CHECK-NEXT: $q0 = COPY %mul(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(<4 x s32>) = COPY $q0
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
+ %cst:_(<4 x s32>) = G_BUILD_VECTOR %undef, %undef, %cst_scalar, %cst_scalar
+ %mul:_(<4 x s32>) = G_FMUL %cst, %0
+ $q0 = COPY %mul
+ RET_ReallyLR
+...
+---
+name: fmul_vector_nonsplat
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $s0
+
+ ; CHECK-LABEL: name: fmul_vector_nonsplat
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: %scalar:_(s32) = COPY $s0
+ ; CHECK-NEXT: %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
+ ; CHECK-NEXT: %cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar(s32), %cst_scalar(s32), %cst_scalar(s32), %scalar(s32)
+ ; CHECK-NEXT: %mul:_(<4 x s32>) = G_FMUL %cst, [[COPY]]
+ ; CHECK-NEXT: $q0 = COPY %mul(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(<4 x s32>) = COPY $q0
+ %scalar:_(s32) = COPY $s0
+ %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
+ %cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar, %cst_scalar, %cst_scalar, %scalar
+ %mul:_(<4 x s32>) = G_FMUL %cst, %0
+ $q0 = COPY %mul
+ RET_ReallyLR
+...
More information about the llvm-commits
mailing list