[llvm] eb55203 - [AArch64][GlobalISel][PostSelectOpt] Constrain reg operands after mutating instructions.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 23 19:32:36 PST 2021


Author: Amara Emerson
Date: 2021-02-23T19:32:18-08:00
New Revision: eb55203e009ec98351f2be6e5655b7178b604920

URL: https://github.com/llvm/llvm-project/commit/eb55203e009ec98351f2be6e5655b7178b604920
DIFF: https://github.com/llvm/llvm-project/commit/eb55203e009ec98351f2be6e5655b7178b604920.diff

LOG: [AArch64][GlobalISel][PostSelectOpt] Constrain reg operands after mutating instructions.

The non-flag setting variants of instructions may have different regclass
requirements. If so, we need to constrain them.

Differential Revision: https://reviews.llvm.org/D97343

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 2f882ecb1fd4..74f417994a0e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -14,6 +14,7 @@
 #include "AArch64.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineOperand.h"
@@ -93,7 +94,12 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
   // Our solution here is to try to convert flag setting operations between
   // a interval of identical FCMPs, so that CSE will be able to eliminate one.
   bool Changed = false;
-  const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+  auto &MF = *MBB.getParent();
+  auto &Subtarget = MF.getSubtarget();
+  const auto &TII = Subtarget.getInstrInfo();
+  auto TRI = Subtarget.getRegisterInfo();
+  auto RBI = Subtarget.getRegBankInfo();
+  auto &MRI = MF.getRegInfo();
 
   // The first step is to find the first and last FCMPs. If we have found
   // at least two, then set the limit of the bottom-up walk to the first FCMP
@@ -144,6 +150,11 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
                             << II);
           II.setDesc(TII->get(NewOpc));
           II.RemoveOperand(DeadNZCVIdx);
+          // Changing the opcode can result in 
diff ering regclass requirements,
+          // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
+          // Constrain the regclasses, possibly introducing a copy.
+          constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
+                                   II.getOperand(0), 0);
           Changed |= true;
         } else {
           // Otherwise, we just set the nzcv imp-def operand to be dead, so the

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir
new file mode 100644
index 000000000000..cc4ebca0726b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir
@@ -0,0 +1,83 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s
+---
+name:            pluto
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+  - { reg: '$x2' }
+frameInfo:
+  maxAlignment:    1
+  maxCallFrameSize: 0
+body:             |
+  ; CHECK-LABEL: name: pluto
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   liveins: $w1, $x0, $x2
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr32sp = COPY $w1
+  ; CHECK:   [[COPY2:%[0-9]+]]:gpr64sp = COPY $x2
+  ; CHECK:   [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK:   [[COPY3:%[0-9]+]]:fpr32 = COPY [[DEF]]
+  ; CHECK:   [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 872415232
+  ; CHECK:   [[COPY4:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+  ; CHECK:   FCMPSrr [[COPY3]], [[COPY4]], implicit-def $nzcv
+  ; CHECK:   [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+  ; CHECK:   [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY1]], 1, 0
+  ; CHECK:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBWri]], %subreg.sub_32
+  ; CHECK:   [[COPY5:%[0-9]+]]:fpr32 = COPY [[DEF]]
+  ; CHECK:   FCMPSrr [[COPY5]], [[COPY4]], implicit-def $nzcv
+  ; CHECK:   [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+  ; CHECK:   [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[CSINCWr]], [[CSINCWr1]]
+  ; CHECK:   TBNZW [[EORWrr]], 0, %bb.2
+  ; CHECK:   B %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 60, 59
+  ; CHECK:   [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[UBFMXri]], 0, 0 :: (load 4)
+  ; CHECK:   [[COPY6:%[0-9]+]]:fpr32 = COPY [[DEF]]
+  ; CHECK:   [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY6]], [[LDRSroX]]
+  ; CHECK:   [[COPY7:%[0-9]+]]:fpr32 = COPY [[DEF]]
+  ; CHECK:   [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[FMULSrr]], [[COPY7]]
+  ; CHECK:   STRSui [[FADDSrr]], [[COPY2]], 0 :: (store 4)
+  ; CHECK: bb.2:
+  ; CHECK:   RET_ReallyLR
+  bb.1:
+    liveins: $w1, $x0, $x2
+
+    %0:gpr64sp = COPY $x0
+    %1:gpr32sp = COPY $w1
+    %2:gpr64sp = COPY $x2
+    %3:gpr32 = IMPLICIT_DEF
+    %29:fpr32 = COPY %3
+    %33:gpr32 = MOVi32imm 872415232
+    %4:fpr32 = COPY %33
+    FCMPSrr %29, %4, implicit-def $nzcv
+    %28:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    %7:gpr32 = SUBSWri %1, 1, 0, implicit-def $nzcv
+    %8:gpr64 = SUBREG_TO_REG 0, %7, %subreg.sub_32
+    %30:fpr32 = COPY %3
+    FCMPSrr %30, %4, implicit-def $nzcv
+    %27:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    %26:gpr32 = EORWrr %28, %27
+    TBNZW %26, 0, %bb.3
+    B %bb.2
+
+  bb.2:
+    %12:gpr64 = UBFMXri %8, 60, 59
+    %15:fpr32 = LDRSroX %0, %12, 0, 0 :: (load 4)
+    %31:fpr32 = COPY %3
+    %16:fpr32 = FMULSrr %31, %15
+    %32:fpr32 = COPY %3
+    %17:fpr32 = FADDSrr %16, %32
+    STRSui %17, %2, 0 :: (store 4)
+
+  bb.3:
+    RET_ReallyLR
+
+...


        


More information about the llvm-commits mailing list