[llvm] c2614b3 - [RISCV] Add isCommutable to scalar FMA instructions.

Wed Apr 27 11:17:50 PDT 2022

Author: Craig Topper
Date: 2022-04-27T11:07:18-07:00
New Revision: c2614b31d955f799179c34215b1225d0131da378

URL: https://github.com/llvm/llvm-project/commit/c2614b31d955f799179c34215b1225d0131da378
DIFF: https://github.com/llvm/llvm-project/commit/c2614b31d955f799179c34215b1225d0131da378.diff

LOG: [RISCV] Add isCommutable to scalar FMA instructions.

The default implementation of findCommutedOpIndices picks the
first two source operands. That's exactly what we want for the
scalar FMA instructions.

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D124463

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfoF.td
    llvm/test/CodeGen/RISCV/machine-cse.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 6b1a61419a707..a2cd4a0f618cc 100644

--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -159,7 +159,7 @@ class FPStore_r<bits<3> funct3, string opcodestr, RegisterClass rty,
       Sched<[sw, ReadStoreData, ReadFMemBase]>;
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
-    UseNamedOperandTable = 1, hasPostISelHook = 1 in
+    UseNamedOperandTable = 1, hasPostISelHook = 1, isCommutable = 1 in
 class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr,
                     DAGOperand rty>
     : RVInstR4Frm<funct2, opcode, (outs rty:$rd),

diff  --git a/llvm/test/CodeGen/RISCV/machine-cse.ll b/llvm/test/CodeGen/RISCV/machine-cse.ll
index 3f1cd9d9c522f..cd8ae1ce7fbd1 100644
--- a/llvm/test/CodeGen/RISCV/machine-cse.ll
+++ b/llvm/test/CodeGen/RISCV/machine-cse.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32
-; RUN: llc < %s -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+f,+d,+zfh -target-abi=ilp32d | \
+; RUN:   FileCheck %s --check-prefixes=RV32
+; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+d,+zfh -target-abi=lp64d | \
+; RUN:   FileCheck %s --check-prefixes=RV64
 
 ; Make sure MachineCSE can combine the adds with the operands commuted.
 
@@ -76,3 +78,429 @@ trueblock:
 falseblock:
   ret void
 }
+
+declare half @llvm.fma.f16(half, half, half)
+
+define void @commute_fmadd_f16(half %x, half %y, half %z, half* %p1, half* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fmadd_f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fmadd.h ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB2_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:  .LBB2_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fmadd_f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmadd.h ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB2_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:  .LBB2_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = call half @llvm.fma.f16(half %x, half %y, half %z)
+  store half %a, half* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %b = call half @llvm.fma.f16(half %y, half %x, half %z)
+  store half %b, half* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+declare float @llvm.fma.f32(float, float, float)
+
+define void @commute_fmadd_f32(float %x, float %y, float %z, float* %p1, float* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fmadd_f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fmadd.s ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB3_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:  .LBB3_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fmadd_f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmadd.s ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB3_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:  .LBB3_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = call float @llvm.fma.f32(float %x, float %y, float %z)
+  store float %a, float* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %b = call float @llvm.fma.f32(float %y, float %x, float %z)
+  store float %b, float* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+declare double @llvm.fma.f64(double, double, double)
+
+define void @commute_fmadd_f64(double %x, double %y, double %z, double* %p1, double* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fmadd_f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fmadd.d ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB4_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:  .LBB4_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fmadd_f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmadd.d ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB4_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:  .LBB4_2: # %falseblock
+; RV64-NEXT:    ret
+  %a = call double @llvm.fma.f64(double %x, double %y, double %z)
+  store double %a, double* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %b = call double @llvm.fma.f64(double %y, double %x, double %z)
+  store double %b, double* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fmsub_f16(half %x, half %y, half %z, half* %p1, half* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fmsub_f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fmsub.h ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB5_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:  .LBB5_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fmsub_f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmsub.h ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB5_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:  .LBB5_2: # %falseblock
+; RV64-NEXT:    ret
+  %negz = fneg half %z
+  %a = call half @llvm.fma.f16(half %x, half %y, half %negz)
+  store half %a, half* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negz2 = fneg half %z
+  %b = call half @llvm.fma.f16(half %y, half %x, half %negz2)
+  store half %b, half* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fmsub_f32(float %x, float %y, float %z, float* %p1, float* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fmsub_f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fmsub.s ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB6_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:  .LBB6_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fmsub_f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmsub.s ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB6_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:  .LBB6_2: # %falseblock
+; RV64-NEXT:    ret
+  %negz = fneg float %z
+  %a = call float @llvm.fma.f32(float %x, float %y, float %negz)
+  store float %a, float* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negz2 = fneg float %z
+  %b = call float @llvm.fma.f32(float %y, float %x, float %negz2)
+  store float %b, float* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fmsub_f64(double %x, double %y, double %z, double* %p1, double* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fmsub_f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fmsub.d ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB7_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:  .LBB7_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fmsub_f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fmsub.d ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB7_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:  .LBB7_2: # %falseblock
+; RV64-NEXT:    ret
+  %negz = fneg double %z
+  %a = call double @llvm.fma.f64(double %x, double %y, double %negz)
+  store double %a, double* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negz2 = fneg double %z
+  %b = call double @llvm.fma.f64(double %y, double %x, double %negz2)
+  store double %b, double* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fnmadd_f16(half %x, half %y, half %z, half* %p1, half* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fnmadd_f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fnmadd.h ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB8_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:  .LBB8_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fnmadd_f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fnmadd.h ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB8_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:  .LBB8_2: # %falseblock
+; RV64-NEXT:    ret
+  %negx = fneg half %x
+  %negz = fneg half %z
+  %a = call half @llvm.fma.f16(half %negx, half %y, half %negz)
+  store half %a, half* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negy = fneg half %y
+  %negz2 = fneg half %z
+  %b = call half @llvm.fma.f16(half %negy, half %x, half %negz2)
+  store half %b, half* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fnmadd_f32(float %x, float %y, float %z, float* %p1, float* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fnmadd_f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fnmadd.s ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB9_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:  .LBB9_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fnmadd_f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fnmadd.s ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB9_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:  .LBB9_2: # %falseblock
+; RV64-NEXT:    ret
+  %negx = fneg float %x
+  %negz = fneg float %z
+  %a = call float @llvm.fma.f32(float %negx, float %y, float %negz)
+  store float %a, float* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negy = fneg float %y
+  %negz2 = fneg float %z
+  %b = call float @llvm.fma.f32(float %negy, float %x, float %negz2)
+  store float %b, float* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fnmadd_f64(double %x, double %y, double %z, double* %p1, double* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fnmadd_f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fnmadd.d ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB10_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:  .LBB10_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fnmadd_f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fnmadd.d ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB10_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:  .LBB10_2: # %falseblock
+; RV64-NEXT:    ret
+  %negx = fneg double %x
+  %negz = fneg double %z
+  %a = call double @llvm.fma.f64(double %negx, double %y, double %negz)
+  store double %a, double* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negy = fneg double %y
+  %negz2 = fneg double %z
+  %b = call double @llvm.fma.f64(double %negy, double %x, double %negz2)
+  store double %b, double* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fnmsub_f16(half %x, half %y, half %z, half* %p1, half* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fnmsub_f16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fnmsub.h ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB11_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsh ft0, 0(a0)
+; RV32-NEXT:  .LBB11_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fnmsub_f16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fnmsub.h ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB11_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsh ft0, 0(a0)
+; RV64-NEXT:  .LBB11_2: # %falseblock
+; RV64-NEXT:    ret
+  %negx = fneg half %x
+  %a = call half @llvm.fma.f16(half %negx, half %y, half %z)
+  store half %a, half* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negy = fneg half %y
+  %b = call half @llvm.fma.f16(half %negy, half %x, half %z)
+  store half %b, half* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fnmsub_f32(float %x, float %y, float %z, float* %p1, float* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fnmsub_f32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fnmsub.s ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB12_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsw ft0, 0(a0)
+; RV32-NEXT:  .LBB12_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fnmsub_f32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fnmsub.s ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB12_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsw ft0, 0(a0)
+; RV64-NEXT:  .LBB12_2: # %falseblock
+; RV64-NEXT:    ret
+  %negx = fneg float %x
+  %a = call float @llvm.fma.f32(float %negx, float %y, float %z)
+  store float %a, float* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negy = fneg float %y
+  %b = call float @llvm.fma.f32(float %negy, float %x, float %z)
+  store float %b, float* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}
+
+define void @commute_fnmsub_f64(double %x, double %y, double %z, double* %p1, double* %p2, i1 zeroext %cond) {
+; RV32-LABEL: commute_fnmsub_f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    fnmsub.d ft0, fa0, fa1, fa2
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:    beqz a2, .LBB13_2
+; RV32-NEXT:  # %bb.1: # %trueblock
+; RV32-NEXT:    fsd ft0, 0(a0)
+; RV32-NEXT:  .LBB13_2: # %falseblock
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: commute_fnmsub_f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    fnmsub.d ft0, fa0, fa1, fa2
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:    beqz a2, .LBB13_2
+; RV64-NEXT:  # %bb.1: # %trueblock
+; RV64-NEXT:    fsd ft0, 0(a0)
+; RV64-NEXT:  .LBB13_2: # %falseblock
+; RV64-NEXT:    ret
+  %negx = fneg double %x
+  %a = call double @llvm.fma.f64(double %negx, double %y, double %z)
+  store double %a, double* %p1
+  br i1 %cond, label %trueblock, label %falseblock
+
+trueblock:
+  %negy = fneg double %y
+  %b = call double @llvm.fma.f64(double %negy, double %x, double %z)
+  store double %b, double* %p1
+  br label %falseblock
+
+falseblock:
+  ret void
+}