[llvm] a959ad1 - [AArch64][GISel] Assign G_LOAD defs into FPR if they feed FP instructions indirectly via PHI (#94618)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 06:29:34 PDT 2024


Author: Him188
Date: 2024-06-13T14:29:31+01:00
New Revision: a959ad155343095fc8f73aa397a72e3ced1df354

URL: https://github.com/llvm/llvm-project/commit/a959ad155343095fc8f73aa397a72e3ced1df354
DIFF: https://github.com/llvm/llvm-project/commit/a959ad155343095fc8f73aa397a72e3ced1df354.diff

LOG: [AArch64][GISel] Assign G_LOAD defs into FPR if they feed FP instructions indirectly via PHI (#94618)

If G_LOAD def is used by a PHI, recursively check if the def of PHI is
used by any instruction that only uses FP oprand. If so, assign the def
of G_LOAD to FPR.

In other words, this change helps RBS to assign G_LOAD defs into FPR if
they are later on used in an FP instruction **indirectly via PHI**, to
avoid unnecessary copies. Before this patch, we only considered direct
usages.

It helps to fix GISel regression in TSVC kernel s3110.
GISel was 50% slower than SDAG before this change. Now GISel and SDAG
have the same runtime.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
    llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
    llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
    llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4aa6999d1d3ca..f63e29e442667 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -496,6 +496,20 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
   }
 }
 
+bool AArch64RegisterBankInfo::isPHIWithFPContraints(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI,
+    const TargetRegisterInfo &TRI, const unsigned Depth) const {
+  if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+    return false;
+
+  return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+                [&](const MachineInstr &UseMI) {
+                  if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
+                    return true;
+                  return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1);
+                });
+}
+
 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
                                                const MachineRegisterInfo &MRI,
                                                const TargetRegisterInfo &TRI,
@@ -851,13 +865,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // instead of blind map every scalar to GPR.
     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
                [&](const MachineInstr &UseMI) {
-                 // If we have at least one direct use in a FP instruction,
+                 // If we have at least one direct or indirect use
+                 // in a FP instruction,
                  // assume this was a floating point load in the IR. If it was
                  // not, we would have had a bitcast before reaching that
                  // instruction.
                  //
                  // Int->FP conversion operations are also captured in
                  // onlyDefinesFP().
+
+                 if (isPHIWithFPContraints(UseMI, MRI, TRI))
+                   return true;
+
                  return onlyUsesFP(UseMI, MRI, TRI) ||
                         onlyDefinesFP(UseMI, MRI, TRI);
                }))

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index b6364c6a64099..9d23f8480f779 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -120,6 +120,13 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   /// Maximum recursion depth for hasFPConstraints.
   const unsigned MaxFPRSearchDepth = 2;
 
+  /// \returns true if \p MI is a PHI that its def is used by
+  /// any instruction that onlyUsesFP.
+  bool isPHIWithFPContraints(const MachineInstr &MI,
+                             const MachineRegisterInfo &MRI,
+                             const TargetRegisterInfo &TRI,
+                             unsigned Depth = 0) const;
+
   /// \returns true if \p MI only uses and defines FPRs.
   bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
                      const TargetRegisterInfo &TRI, unsigned Depth = 0) const;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
index eb768f3b16da3..4cdefa80b309b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -150,7 +150,7 @@ body:             |
 
 ...
 ---
-name:            load_used_by_phi_gpr
+name:            load_used_by_phi_gpr_copy_gpr
 legalized:       true
 tracksRegLiveness: true
 body:             |
@@ -173,6 +173,50 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+  ; CHECK-NEXT:   $w2 = COPY %phi(s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w2
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $x0, $s0, $s1, $w0, $w1
+    %cond_wide:_(s32) = COPY $w0
+    %gpr_copy:_(s32) = COPY $w1
+    %ptr:_(p0) = COPY $x0
+    G_BRCOND %cond_wide, %bb.1
+    G_BR %bb.2
+  bb.1:
+    successors: %bb.2
+    %load:_(s32) = G_LOAD %ptr(p0) :: (load (s32))
+    G_BR %bb.2
+  bb.2:
+    %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+    $w2 = COPY %phi(s32)
+    RET_ReallyLR implicit $w2
+
+...
+---
+name:            load_used_by_phi_gpr_copy_fpr
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: load_used_by_phi_gpr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $x0, $s0, $s1, $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   %cond_wide:gpr(s32) = COPY $w0
+  ; CHECK-NEXT:   %gpr_copy:gpr(s32) = COPY $w1
+  ; CHECK-NEXT:   %ptr:gpr(p0) = COPY $x0
+  ; CHECK-NEXT:   G_BRCOND %cond_wide(s32), %bb.1
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   %load:fpr(s32) = G_LOAD %ptr(p0) :: (load (s32))
+  ; CHECK-NEXT:   G_BR %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
   ; CHECK-NEXT:   $s0 = COPY %phi(s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   bb.0:
@@ -189,7 +233,7 @@ body:             |
     G_BR %bb.2
   bb.2:
     %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
-    $s0 = COPY %phi(s32)
+    $s0 = COPY %phi(s32) ; G_LOAD should consider this FPR constraint and assign %load FPR
     RET_ReallyLR implicit $s0
 
 ...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir
index 6efa6f29d850a..b3261a9f1b651 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir
@@ -8,7 +8,8 @@
   @var_int = global i32 0
 
   define float @fp_load_phi() { ret float undef }
-  define i32 @int_load_phi() { ret i32 undef }
+  define i32 @int_load_phi_gpr() { ret i32 undef }
+  define i32 @int_load_phi_fpr() { ret i32 undef }
 
 
   @array_double = dso_local global [32000 x double] zeroinitializer, align 8
@@ -23,9 +24,12 @@
   @struct_array_int = dso_local global { [32000 x i32] } zeroinitializer, align 8
   @struct_struct_array_int = dso_local global {{ [32000 x i32] }} zeroinitializer, align 8
 
-  define i32 @array_load_int() { ret i32 undef }
-  define i32 @struct_array_load_int() { ret i32 undef }
-  define i32 @struct_struct_array_load_int() { ret i32 undef }
+  define i32 @array_load_int_gpr() { ret i32 undef }
+  define i32 @array_load_int_fpr() { ret i32 undef }
+  define i32 @struct_array_load_int_gpr() { ret i32 undef }
+  define i32 @struct_array_load_int_fpr() { ret i32 undef }
+  define i32 @struct_struct_array_load_int_gpr() { ret i32 undef }
+  define i32 @struct_struct_array_load_int_fpr() { ret i32 undef }
 
 ...
 ---
@@ -70,12 +74,12 @@ body:             |
 ...
 
 ---
-name:            int_load_phi
+name:            int_load_phi_gpr
 legalized:       true
 regBankSelected: false
 tracksRegLiveness: true
 body:             |
-  ; CHECK-LABEL: name: int_load_phi
+  ; CHECK-LABEL: name: int_load_phi_gpr
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w0
@@ -91,6 +95,47 @@ body:             |
   ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w1 = COPY [[PHI]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w1
+  bb.0:
+    liveins: $w0
+    successors: %bb.1
+    %0:_(s32) = COPY $w0
+    %1:_(p0) = G_GLOBAL_VALUE @var_fp
+    %fp_load:_(s32) = G_LOAD %1 :: (load 4 from @var_int)
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+    G_BRCOND %0, %bb.1
+
+  bb.2:
+    $w1 = COPY %2
+    RET_ReallyLR implicit $w1
+...
+
+---
+name:            int_load_phi_fpr
+legalized:       true
+regBankSelected: false
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: int_load_phi_fpr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var_fp
+  ; CHECK-NEXT:   %fp_load:fpr(s32) = G_LOAD [[GV]](p0) :: (load (s32) from @var_int)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s32), %bb.0, [[PHI]](s32), %bb.1
+  ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   $s0 = COPY [[PHI]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   bb.0:
@@ -234,12 +279,12 @@ body:             |
 ...
 
 ---
-name:            array_load_int
+name:            array_load_int_gpr
 legalized:       true
 regBankSelected: false
 tracksRegLiveness: true
 body:             |
-  ; CHECK-LABEL: name: array_load_int
+  ; CHECK-LABEL: name: array_load_int_gpr
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w0
@@ -255,6 +300,47 @@ body:             |
   ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w1 = COPY [[PHI]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w1
+  bb.0:
+    liveins: $w0
+    successors: %bb.1
+    %0:_(s32) = COPY $w0
+    %1:_(p0) = G_GLOBAL_VALUE @array_int
+    %fp_load:_(s64) = G_LOAD %1(p0) :: (dereferenceable load (s64) from @array_int)
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+    G_BRCOND %0, %bb.1
+
+  bb.2:
+    $w1 = COPY %2
+    RET_ReallyLR implicit $w1
+...
+
+---
+name:            array_load_int_fpr
+legalized:       true
+regBankSelected: false
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: array_load_int_fpr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @array_int
+  ; CHECK-NEXT:   %fp_load:fpr(s64) = G_LOAD [[GV]](p0) :: (dereferenceable load (s64) from @array_int)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s64), %bb.0, [[PHI]](s32), %bb.1
+  ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   $s0 = COPY [[PHI]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   bb.0:
@@ -275,12 +361,12 @@ body:             |
 ...
 
 ---
-name:            struct_array_load_int
+name:            struct_array_load_int_gpr
 legalized:       true
 regBankSelected: false
 tracksRegLiveness: true
 body:             |
-  ; CHECK-LABEL: name: struct_array_load_int
+  ; CHECK-LABEL: name: struct_array_load_int_gpr
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w0
@@ -296,6 +382,47 @@ body:             |
   ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w1 = COPY [[PHI]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w1
+  bb.0:
+    liveins: $w0
+    successors: %bb.1
+    %0:_(s32) = COPY $w0
+    %1:_(p0) = G_GLOBAL_VALUE @struct_array_int
+    %fp_load:_(s64) = G_LOAD %1(p0) :: (dereferenceable load (s64) from @struct_array_int)
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+    G_BRCOND %0, %bb.1
+
+  bb.2:
+    $w1 = COPY %2
+    RET_ReallyLR implicit $w1
+...
+
+---
+name:            struct_array_load_int_fpr
+legalized:       true
+regBankSelected: false
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: struct_array_load_int_fpr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @struct_array_int
+  ; CHECK-NEXT:   %fp_load:fpr(s64) = G_LOAD [[GV]](p0) :: (dereferenceable load (s64) from @struct_array_int)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s64), %bb.0, [[PHI]](s32), %bb.1
+  ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   $s0 = COPY [[PHI]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   bb.0:
@@ -316,12 +443,12 @@ body:             |
 ...
 
 ---
-name:            struct_struct_array_load_int
+name:            struct_struct_array_load_int_gpr
 legalized:       true
 regBankSelected: false
 tracksRegLiveness: true
 body:             |
-  ; CHECK-LABEL: name: struct_struct_array_load_int
+  ; CHECK-LABEL: name: struct_struct_array_load_int_gpr
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w0
@@ -337,6 +464,47 @@ body:             |
   ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $w1 = COPY [[PHI]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $w1
+  bb.0:
+    liveins: $w0
+    successors: %bb.1
+    %0:_(s32) = COPY $w0
+    %1:_(p0) = G_GLOBAL_VALUE @struct_struct_array_int
+    %fp_load:_(s64) = G_LOAD %1(p0) :: (dereferenceable load (s64) from @struct_struct_array_int)
+
+  bb.1:
+    successors: %bb.1, %bb.2
+    %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+    G_BRCOND %0, %bb.1
+
+  bb.2:
+    $w1 = COPY %2
+    RET_ReallyLR implicit $w1
+...
+
+---
+name:            struct_struct_array_load_int_fpr
+legalized:       true
+regBankSelected: false
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: struct_struct_array_load_int_fpr
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @struct_struct_array_int
+  ; CHECK-NEXT:   %fp_load:fpr(s64) = G_LOAD [[GV]](p0) :: (dereferenceable load (s64) from @struct_struct_array_int)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s64), %bb.0, [[PHI]](s32), %bb.1
+  ; CHECK-NEXT:   G_BRCOND [[COPY]](s32), %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   $s0 = COPY [[PHI]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $s0
   bb.0:


        


More information about the llvm-commits mailing list