[llvm] a86bbe1 - [AArch64][GlobalISel] Handle any-extending FPR loads in manual selection code.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 1 10:19:41 PDT 2021


Author: Amara Emerson
Date: 2021-09-01T10:19:22-07:00
New Revision: a86bbe1e3191800d42abf073a060eb8601b8be37

URL: https://github.com/llvm/llvm-project/commit/a86bbe1e3191800d42abf073a060eb8601b8be37
DIFF: https://github.com/llvm/llvm-project/commit/a86bbe1e3191800d42abf073a060eb8601b8be37.diff

LOG: [AArch64][GlobalISel] Handle any-extending FPR loads in manual selection code.

When we have an any-extending FPR bank load, none of the tablegen patterns
match and we fall back to the C++ selector. Like with the truncating stores
that were fixed recently, the C++ wasn't able to handle it and ended up
generating invalid copies between different size regclasses.

This change adds handling for this case, splitting the load into a regular
load and a SUBREG_TO_REG to extend it into the original wide destination reg.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index c2951c4f6ecb2..930f836719259 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2764,6 +2764,30 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
                       .getReg(0);
       RBI.constrainGenericRegister(Copy, *RC, MRI);
       LdSt.getOperand(0).setReg(Copy);
+    } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+      // If this is an any-extending load from the FPR bank, split it into a regular
+      // load + extend.
+      if (RB.getID() == AArch64::FPRRegBankID) {
+        unsigned SubReg;
+        LLT MemTy = LdSt.getMMO().getMemoryType();
+        auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+        if (!getSubRegForClass(RC, TRI, SubReg))
+          return false;
+        Register OldDst = LdSt.getReg(0);
+        Register NewDst =
+            MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
+        LdSt.getOperand(0).setReg(NewDst);
+        MRI.setRegBank(NewDst, RB);
+        // Generate a SUBREG_TO_REG to extend it.
+        MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
+        MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
+            .addImm(0)
+            .addUse(NewDst)
+            .addImm(SubReg);
+        auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
+        RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
+        MIB.setInstr(LdSt);
+      }
     }
 
     // Helper lambda for partially selecting I. Either returns the original

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
index 4339005ea7f99..f313e08178dbb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-load.mir
@@ -38,6 +38,8 @@
   define void @load_4xi32(<4 x i32>* %ptr) { ret void }
   define void @load_8xi16(<8 x i16>* %ptr) { ret void }
   define void @load_16xi8(<16 x i8>* %ptr) { ret void }
+  define void @anyext_on_fpr() { ret void }
+  define void @anyext_on_fpr8() { ret void }
 
 ...
 
@@ -638,3 +640,69 @@ body:             |
     RET_ReallyLR implicit $q0
 
 ...
+---
+name:            anyext_on_fpr
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+  - { reg: '$x2' }
+  - { reg: '$w3' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $w3, $x0, $x1, $x2
+
+    ; CHECK-LABEL: name: anyext_on_fpr
+    ; CHECK: liveins: $w3, $x0, $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load (s16))
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRHui]], %subreg.hsub
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+    ; CHECK: $w0 = COPY [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:gpr(p0) = COPY $x0
+    %16:fpr(s32) = G_LOAD %0(p0) :: (load (s16))
+    %24:gpr(s32) = COPY %16(s32)
+    $w0 = COPY %24(s32)
+    RET_ReallyLR
+
+...
+---
+name:            anyext_on_fpr8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+  - { reg: '$x2' }
+  - { reg: '$w3' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $w3, $x0, $x1, $x2
+
+    ; CHECK-LABEL: name: anyext_on_fpr8
+    ; CHECK: liveins: $w3, $x0, $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load (s8))
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[LDRBui]], %subreg.bsub
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+    ; CHECK: $w0 = COPY [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:gpr(p0) = COPY $x0
+    %16:fpr(s32) = G_LOAD %0(p0) :: (load (s8))
+    %24:gpr(s32) = COPY %16(s32)
+    $w0 = COPY %24(s32)
+    RET_ReallyLR
+
+...


        


More information about the llvm-commits mailing list