[llvm] 1aecb0e - [RISCV] Clear kill flags when forming FMA instructions in MachineCombiner.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 12 08:07:09 PDT 2023


Author: Craig Topper
Date: 2023-07-12T08:03:45-07:00
New Revision: 1aecb0e000e4efaa347e836e7ac06e070559dc2c

URL: https://github.com/llvm/llvm-project/commit/1aecb0e000e4efaa347e836e7ac06e070559dc2c
DIFF: https://github.com/llvm/llvm-project/commit/1aecb0e000e4efaa347e836e7ac06e070559dc2c.diff

LOG: [RISCV] Clear kill flags when forming FMA instructions in MachineCombiner.

If the operands to the mul have other uses we may be extending their
live range past a kill flag.

Reviewed By: asb, asi-sc

Differential Revision: https://reviews.llvm.org/D155046

Added: 
    llvm/test/CodeGen/RISCV/machine-combiner.mir

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 0820dbbddb9305..0f8523bd3887df 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1655,17 +1655,23 @@ static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
   DebugLoc MergedLoc =
       DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());
 
+  bool Mul1IsKill = Mul1.isKill();
+  bool Mul2IsKill = Mul2.isKill();
+  bool AddendIsKill = Addend.isKill();
+
+  // We need to clear kill flags since we may be extending the live range past
+  // a kill. If the mul had kill flags, we can preserve those since we know
+  // where the previous range stopped.
+  MRI.clearKillFlags(Mul1.getReg());
+  MRI.clearKillFlags(Mul2.getReg());
+
   MachineInstrBuilder MIB =
       BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
-          .addReg(Mul1.getReg(), getKillRegState(Mul1.isKill()))
-          .addReg(Mul2.getReg(), getKillRegState(Mul2.isKill()))
-          .addReg(Addend.getReg(), getKillRegState(Addend.isKill()))
+          .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
+          .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
+          .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
           .setMIFlags(IntersectedFlags);
 
-  // Mul operands are not killed anymore.
-  Mul1.setIsKill(false);
-  Mul2.setIsKill(false);
-
   InsInstrs.push_back(MIB);
   if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
     DelInstrs.push_back(&Prev);

diff  --git a/llvm/test/CodeGen/RISCV/machine-combiner.mir b/llvm/test/CodeGen/RISCV/machine-combiner.mir
new file mode 100644
index 00000000000000..e110dd9985f635
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.mir
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+f -run-pass=machine-combiner \
+# RUN:   -verify-machineinstrs | FileCheck %s
+
+--- |
+  source_filename = "test.ll"
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+  target triple = "riscv64"
+
+  define void @foo(ptr %0, ptr %1, float %2, float %3, ptr %4, ptr %5) #0 {
+    %7 = load float, ptr %0, align 4
+    %8 = load float, ptr %1, align 4
+    %9 = fmul fast float %8, %7
+    %10 = fadd fast float %9, %2
+    store float %10, ptr %4, align 4
+    %11 = fsub fast float %3, %9
+    store float %11, ptr %5, align 4
+    ret void
+  }
+
+  attributes #0 = { "target-features"="+f" }
+
+...
+---
+name:            foo
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: fpr32 }
+  - { id: 3, class: fpr32 }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+  - { id: 6, class: fpr32 }
+  - { id: 7, class: fpr32 }
+  - { id: 8, class: fpr32 }
+  - { id: 9, class: fpr32 }
+  - { id: 10, class: fpr32 }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+  - { reg: '$x11', virtual-reg: '%1' }
+  - { reg: '$f10_f', virtual-reg: '%2' }
+  - { reg: '$f11_f', virtual-reg: '%3' }
+  - { reg: '$x12', virtual-reg: '%4' }
+  - { reg: '$x13', virtual-reg: '%5' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo:
+  varArgsFrameIndex: 0
+  varArgsSaveSize: 0
+body:             |
+  bb.0 (%ir-block.6):
+    liveins: $x10, $x11, $f10_f, $f11_f, $x12, $x13
+
+    ; CHECK-LABEL: name: foo
+    ; CHECK: liveins: $x10, $x11, $f10_f, $f11_f, $x12, $x13
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x13
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $f11_f
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $f10_f
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x11
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr = COPY $x10
+    ; CHECK-NEXT: [[FLW:%[0-9]+]]:fpr32 = FLW [[COPY5]], 0 :: (load (s32) from %ir.0)
+    ; CHECK-NEXT: [[FLW1:%[0-9]+]]:fpr32 = FLW [[COPY4]], 0 :: (load (s32) from %ir.1)
+    ; CHECK-NEXT: [[FMADD_S:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FMADD_S [[FLW1]], [[FLW]], [[COPY3]], 7, implicit $frm
+    ; CHECK-NEXT: FSW killed [[FMADD_S]], [[COPY1]], 0 :: (store (s32) into %ir.4)
+    ; CHECK-NEXT: [[FNMSUB_S:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FNMSUB_S [[FLW1]], [[FLW]], [[COPY2]], 7, implicit $frm
+    ; CHECK-NEXT: FSW killed [[FNMSUB_S]], [[COPY]], 0 :: (store (s32) into %ir.5)
+    ; CHECK-NEXT: PseudoRET
+    %5:gpr = COPY $x13
+    %4:gpr = COPY $x12
+    %3:fpr32 = COPY $f11_f
+    %2:fpr32 = COPY $f10_f
+    %1:gpr = COPY $x11
+    %0:gpr = COPY $x10
+    %6:fpr32 = FLW %0, 0 :: (load (s32) from %ir.0)
+    %7:fpr32 = FLW %1, 0 :: (load (s32) from %ir.1)
+    %8:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FMUL_S killed %7, killed %6, 7, implicit $frm
+    %9:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADD_S %8, %2, 7, implicit $frm
+    FSW killed %9, %4, 0 :: (store (s32) into %ir.4)
+    %10:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FSUB_S %3, %8, 7, implicit $frm
+    FSW killed %10, %5, 0 :: (store (s32) into %ir.5)
+    PseudoRET
+
+...


        


More information about the llvm-commits mailing list