[llvm] dfa5429 - [InitUndef] Enable the InitUndef pass on non-AMDGPU targets (#108353)

Mon Sep 16 00:48:28 PDT 2024

Author: Nikita Popov
Date: 2024-09-16T09:48:25+02:00
New Revision: dfa54298ff6d6e420a1a5b74c070912409713589

URL: https://github.com/llvm/llvm-project/commit/dfa54298ff6d6e420a1a5b74c070912409713589
DIFF: https://github.com/llvm/llvm-project/commit/dfa54298ff6d6e420a1a5b74c070912409713589.diff

LOG: [InitUndef] Enable the InitUndef pass on non-AMDGPU targets (#108353)

The InitUndef pass works around a register allocation issue, where undef
operands can be allocated to the same register as early-clobber result
operands. This may lead to ISA constraint violations, where certain
input and output registers are not allowed to overlap.

Originally this pass was implemented for RISCV, and then extended to ARM
in #77770. I've since removed the target-specific parts of the pass in
#106744 and #107885. This PR reduces the pass to use a single
requiresDisjointEarlyClobberAndUndef() target hook and enables it by
default. The hook is disabled for AMDGPU, because overlapping
early-clobber and undef operands are known to be safe for that target,
and we get significant codegen diffs otherwise.

The motivating case is the one in arm64-ldxr-stxr.ll, where we were
previously incorrectly allocating a stxp input and output to the same
register.

Added: 
    llvm/test/CodeGen/AArch64/init-undef.mir

Modified: 
    llvm/include/llvm/CodeGen/TargetRegisterInfo.h
    llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
    llvm/lib/CodeGen/InitUndef.cpp
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/lib/Target/AMDGPU/R600Subtarget.h
    llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
    llvm/lib/Target/ARM/ARMSubtarget.h
    llvm/lib/Target/RISCV/RISCVRegisterInfo.h
    llvm/lib/Target/RISCV/RISCVSubtarget.h
    llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
    llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index ebf06bc57948f2..1a2f31e199336a 100644

--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1203,19 +1203,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
   virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const {
     return false;
   }
-
-  /// Returns if the architecture being targeted has the required Pseudo
-  /// Instructions for initializing the register. By default this returns false,
-  /// but where it is overriden for an architecture, the behaviour will be
-  /// 
diff erent. This can either be a check to ensure the Register Class is
-  /// present, or to return true as an indication the architecture supports the
-  /// pass. If using the method that does not check for the Register Class, it
-  /// is imperative to ensure all required Pseudo Instructions are implemented,
-  /// otherwise compilation may fail with an `Unexpected register class` error.
-  virtual bool
-  doesRegClassHavePseudoInitUndef(const TargetRegisterClass *RC) const {
-    return false;
-  }
 };
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index b4b018f080914a..bfaa6450779ae0 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -333,13 +333,13 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   /// Get the list of MacroFusion predicates.
   virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; };
 
-  /// supportsInitUndef is used to determine if an architecture supports
-  /// the Init Undef Pass. By default, it is assumed that it will not support
-  /// the pass, with architecture specific overrides providing the information
-  /// where they are implemented.
-  virtual bool supportsInitUndef() const { return false; }
+  /// Whether the target has instructions where an early-clobber result
+  /// operand cannot overlap with an undef input operand.
+  virtual bool requiresDisjointEarlyClobberAndUndef() const {
+    // Conservatively assume such instructions exist by default.
+    return true;
+  }
 };
-
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_TARGETSUBTARGETINFO_H

diff  --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp
index a89c823416c510..911e8bb7a4d9ef 100644
--- a/llvm/lib/CodeGen/InitUndef.cpp
+++ b/llvm/lib/CodeGen/InitUndef.cpp
@@ -120,8 +120,6 @@ bool InitUndef::handleReg(MachineInstr *MI) {
       continue;
     if (!UseMO.getReg().isVirtual())
       continue;
-    if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
-      continue;
 
     if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
       Changed |= fixupIllOperand(MI, UseMO);
@@ -140,8 +138,6 @@ bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
       continue;
     if (UseMO.isTied())
       continue;
-    if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
-      continue;
 
     Register Reg = UseMO.getReg();
     if (NewRegs.count(Reg))
@@ -246,13 +242,9 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
 bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
   ST = &MF.getSubtarget();
 
-  // supportsInitUndef is implemented to reflect if an architecture has support
-  // for the InitUndef pass. Support comes from having the relevant Pseudo
-  // instructions that can be used to initialize the register. The function
-  // returns false by default so requires an implementation per architecture.
-  // Support can be added by overriding the function in a way that best fits
-  // the architecture.
-  if (!ST->supportsInitUndef())
+  // The pass is only needed if early-clobber defs and undef ops cannot be
+  // allocated to the same register.
+  if (!ST->requiresDisjointEarlyClobberAndUndef())
     return false;
 
   MRI = &MF.getRegInfo();

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 7b74eab96c5677..a4ae8a1be32258 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1587,6 +1587,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     // the nop.
     return true;
   }
+
+  bool requiresDisjointEarlyClobberAndUndef() const override {
+    // AMDGPU doesn't care if early-clobber and undef operands are allocated
+    // to the same register.
+    return false;
+  }
 };
 
 class GCNUserSGPRUsageInfo {

diff  --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index c3d002f29272de..7f0f9305e1fa6c 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -160,6 +160,12 @@ class R600Subtarget final : public R600GenSubtargetInfo,
   unsigned getMinWavesPerEU() const override {
     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
   }
+
+  bool requiresDisjointEarlyClobberAndUndef() const override {
+    // AMDGPU doesn't care if early-clobber and undef operands are allocated
+    // to the same register.
+    return false;
+  }
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 58b5e98fd30b14..926d702b4092a5 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -240,20 +240,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
                             unsigned SrcSubReg) const override;
 
   int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
-
-  bool doesRegClassHavePseudoInitUndef(
-      const TargetRegisterClass *RC) const override {
-    (void)RC;
-    // For the ARM Architecture we want to always return true because all
-    // required PseudoInitUndef types have been added. If compilation fails due
-    // to `Unexpected register class`, this is likely to be because the specific
-    // register being used is not support by Init Undef and needs the Pseudo
-    // Instruction adding to ARMInstrInfo.td. If this is implemented as a
-    // conditional check, this could create a false positive where Init Undef is
-    // not running, skipping the instruction and moving to the next. This could
-    // lead to illegal instructions being generated by the register allocator.
-    return true;
-  }
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 00239ff94b7ba5..fa20f4b590bea5 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -209,13 +209,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     return &InstrInfo->getRegisterInfo();
   }
 
-  /// The correct instructions have been implemented to initialize undef
-  /// registers, therefore the ARM Architecture is supported by the Init Undef
-  /// Pass. This will return true as the pass needs to be supported for all
-  /// types of instructions. The pass will then perform more checks to ensure it
-  /// should be applying the Pseudo Instructions.
-  bool supportsInitUndef() const override { return true; }
-
   const CallLowering *getCallLowering() const override;
   InstructionSelector *getInstructionSelector() const override;
   const LegalizerInfo *getLegalizerInfo() const override;

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 98a712af085399..cb0bb77d1fcbcb 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -130,11 +130,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
                              const MachineFunction &MF, const VirtRegMap *VRM,
                              const LiveRegMatrix *Matrix) const override;
 
-  bool doesRegClassHavePseudoInitUndef(
-      const TargetRegisterClass *RC) const override {
-    return isVRRegClass(RC);
-  }
-
   static bool isVRRegClass(const TargetRegisterClass *RC) {
     return RISCVRI::isVRegClass(RC->TSFlags) &&
            RISCVRI::getNF(RC->TSFlags) == 1;

diff  --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index ea54ff1df0b7cb..bf9ed3f3d71655 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -306,8 +306,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   unsigned getTailDupAggressiveThreshold() const {
     return TuneInfo->TailDupAggressiveThreshold;
   }
-
-  bool supportsInitUndef() const override { return hasVInstructions(); }
 };
 } // End llvm namespace
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
index 1a60f8752dd571..4fb0c2775a7a7a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -354,11 +354,10 @@ define dso_local i32 @test_store_release_i64(i32, i64 %val, ptr %addr) {
 }
 
 ; The stxp result cannot be allocated to the same register as the inputs.
-; FIXME: This is a miscompile.
 define dso_local i32 @test_stxp_undef(ptr %p, i64 %x) nounwind {
 ; CHECK-LABEL: test_stxp_undef:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    stxp w8, x8, x1, [x0]
+; CHECK-NEXT:    stxp w8, x9, x1, [x0]
 ; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.aarch64.stxp(i64 undef, i64 %x, ptr %p)

diff  --git a/llvm/test/CodeGen/AArch64/init-undef.mir b/llvm/test/CodeGen/AArch64/init-undef.mir
new file mode 100644
index 00000000000000..7935c09d7df5ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/init-undef.mir
@@ -0,0 +1,27 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64-- -run-pass=init-undef -o - %s | FileCheck %s
+
+---
+name:            test_stxp_undef
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_stxp_undef
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:gpr64 = INIT_UNDEF
+    ; CHECK-NEXT: early-clobber %3:gpr32 = STXPX killed [[INIT_UNDEF]], [[COPY]], [[COPY1]] :: (volatile store (s128))
+    ; CHECK-NEXT: $w0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr64 = COPY $x1
+    %0:gpr64common = COPY $x0
+    %3:gpr64 = IMPLICIT_DEF
+    early-clobber %2:gpr32 = STXPX killed %3, %1, %0 :: (volatile store (s128))
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+
+...

diff  --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
index 0f3fdf08696d61..6a0dbbe356a165 100644
--- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
+++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
@@ -25,7 +25,7 @@ define void @last_chance_recoloring_failure() {
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
 ; CHECK-NEXT:    li a0, 55
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT:    vloxseg2ei32.v v16, (a0), v8
+; CHECK-NEXT:    vloxseg2ei32.v v16, (a1), v8
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
@@ -81,7 +81,7 @@ define void @last_chance_recoloring_failure() {
 ; SUBREGLIVENESS-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
 ; SUBREGLIVENESS-NEXT:    li a0, 55
 ; SUBREGLIVENESS-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
-; SUBREGLIVENESS-NEXT:    vloxseg2ei32.v v16, (a0), v8
+; SUBREGLIVENESS-NEXT:    vloxseg2ei32.v v16, (a1), v8
 ; SUBREGLIVENESS-NEXT:    csrr a0, vlenb
 ; SUBREGLIVENESS-NEXT:    slli a0, a0, 3
 ; SUBREGLIVENESS-NEXT:    add a0, sp, a0