[llvm] r302453 - [AArch64][RegisterBankInfo] Change the default mapping of fp loads.

Mon May 8 11:16:33 PDT 2017

Author: qcolombet
Date: Mon May  8 13:16:31 2017
New Revision: 302453

URL: http://llvm.org/viewvc/llvm-project?rev=302453&view=rev
Log:
[AArch64][RegisterBankInfo] Change the default mapping of fp loads.

This fixes PR32550, in a way that does not imply running the greedy
mode at O0.

The fix consists in checking if a load is used by any floating point
instruction and if yes, we return a default mapping with FPR instead
of GPR.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir

Modified: llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp?rev=302453&r1=302452&r2=302453&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp Mon May  8 13:16:31 2017
@@ -531,6 +531,20 @@ AArch64RegisterBankInfo::getInstrMapping
     // FIXME: Should be derived from the scheduling model.
     if (OpRegBankIdx[0] != PMI_FirstGPR)
       Cost = 2;
+    else
+      // Check if that load feeds fp instructions.
+      // In that case, we want the default mapping to be on FPR
+      // instead of blind map every scalar to GPR.
+      for (const MachineInstr &UseMI :
+           MRI.use_instructions(MI.getOperand(0).getReg()))
+        // If we have at least one direct use in a FP instruction,
+        // assume this was a floating point load in the IR.
+        // If it was not, we would have had a bitcast before
+        // reaching that instruction.
+        if (isPreISelGenericFloatingPointOpcode(UseMI.getOpcode())) {
+          OpRegBankIdx[0] = PMI_FirstFPR;
+          break;
+        }
     break;
   }
 

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir?rev=302453&r1=302452&r2=302453&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir Mon May  8 13:16:31 2017
@@ -74,6 +74,14 @@
     %res = bitcast <2 x i32> %vres to i64
     ret i64 %res
   }
+
+  define i64 @floatingPointLoad(i64 %arg1, double* %addr) {
+    %varg1 = bitcast i64 %arg1 to double
+    %varg2 = load double, double* %addr
+    %vres = fadd double %varg1, %varg2
+    %res = bitcast double %vres to i64
+    ret i64 %res
+  }
 ...
 
 ---
@@ -650,3 +658,45 @@ body:             |
     RET_ReallyLR implicit %x0
 
 ...
+
+---
+# Make sure we map what looks like floating point
+# loads to floating point register bank.
+# CHECK-LABEL: name: floatingPointLoad
+name:            floatingPointLoad
+legalized:       true
+
+# CHECK: registers:
+# CHECK-NEXT:  - { id: 0, class: gpr }
+# CHECK-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:   - { id: 2, class: fpr }
+# CHECK-NEXT:   - { id: 3, class: fpr }
+# CHECK-NEXT:   - { id: 4, class: fpr }
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+
+# No repairing should be necessary for both modes.
+# CHECK:         %0(s64) = COPY %x0
+# CHECK-NEXT:    %1(p0) = COPY %x1
+# CHECK-NEXT:    %2(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr)
+# %0 has been mapped to GPR, we need to repair to match FPR.
+# CHECK-NEXT:    %4(s64) = COPY %0
+# CHECK-NEXT:    %3(s64) = G_FADD %4, %2
+# CHECK-NEXT:    %x0 = COPY %3(s64)
+# CHECK-NEXT:    RET_ReallyLR implicit %x0
+
+body:             |
+  bb.0:
+    liveins: %x0, %x1
+
+    %0(s64) = COPY %x0
+    %1(p0) = COPY %x1
+    %2(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr)
+    %3(s64) = G_FADD %0, %2
+    %x0 = COPY %3(s64)
+    RET_ReallyLR implicit %x0
+
+...