[llvm] 1fec092 - [AArch64][GlobalISel] Allow selecting FPR index loads. (#143835)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 23:01:38 PDT 2025


Author: David Green
Date: 2025-06-21T07:01:35+01:00
New Revision: 1fec092fd74abc6fa7399da5bcf165d6249883f5

URL: https://github.com/llvm/llvm-project/commit/1fec092fd74abc6fa7399da5bcf165d6249883f5
DIFF: https://github.com/llvm/llvm-project/commit/1fec092fd74abc6fa7399da5bcf165d6249883f5.diff

LOG: [AArch64][GlobalISel] Allow selecting FPR index loads. (#143835)

We can, through legalization of certain operations, end up generating
G_INDEXED_LOAD into FPR registers that require entensions. SExt and ZExt
will always opt for GPR, but anyext/noext can curently be set to FPR
registers in regbankselect. As writing a subregister will set higher
bits in the same register to 0, we can successfully handle zext and
anyext on FPR registers, which is what this patch attempts to add.

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 5081cc4bba144..d55ff5acb3dca 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5553,9 +5553,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
   unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
   bool IsPre = ExtLd.isPre();
   bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
-  bool InsertIntoXReg = false;
+  unsigned InsertIntoSubReg = 0;
   bool IsDst64 = Ty.getSizeInBits() == 64;
 
+  // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
+  // long as they are scalar.
+  bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
+  if ((IsSExt && IsFPR) || Ty.isVector())
+    return false;
+
   unsigned Opc = 0;
   LLT NewLdDstTy;
   LLT s32 = LLT::scalar(32);
@@ -5568,9 +5574,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
       else
         Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
       NewLdDstTy = IsDst64 ? s64 : s32;
+    } else if (IsFPR) {
+      Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
+      InsertIntoSubReg = AArch64::bsub;
+      NewLdDstTy = LLT::scalar(MemSizeBits);
     } else {
       Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
-      InsertIntoXReg = IsDst64;
+      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
       NewLdDstTy = s32;
     }
   } else if (MemSizeBits == 16) {
@@ -5580,27 +5590,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
       else
         Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
       NewLdDstTy = IsDst64 ? s64 : s32;
+    } else if (IsFPR) {
+      Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
+      InsertIntoSubReg = AArch64::hsub;
+      NewLdDstTy = LLT::scalar(MemSizeBits);
     } else {
       Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
-      InsertIntoXReg = IsDst64;
+      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
       NewLdDstTy = s32;
     }
   } else if (MemSizeBits == 32) {
     if (IsSExt) {
       Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
       NewLdDstTy = s64;
+    } else if (IsFPR) {
+      Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
+      InsertIntoSubReg = AArch64::ssub;
+      NewLdDstTy = LLT::scalar(MemSizeBits);
     } else {
       Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
-      InsertIntoXReg = IsDst64;
+      InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
       NewLdDstTy = s32;
     }
   } else {
     llvm_unreachable("Unexpected size for indexed load");
   }
 
-  if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
-    return false; // We should be on gpr.
-
   auto Cst = getIConstantVRegVal(Offset, MRI);
   if (!Cst)
     return false; // Shouldn't happen, but just in case.
@@ -5610,15 +5625,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
   LdMI.cloneMemRefs(ExtLd);
   constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
   // Make sure to select the load with the MemTy as the dest type, and then
-  // insert into X reg if needed.
-  if (InsertIntoXReg) {
+  // insert into a larger reg if needed.
+  if (InsertIntoSubReg) {
     // Generate a SUBREG_TO_REG.
     auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
                         .addImm(0)
                         .addUse(LdMI.getReg(1))
-                        .addImm(AArch64::sub_32);
-    RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
-                                 MRI);
+                        .addImm(InsertIntoSubReg);
+    RBI.constrainGenericRegister(
+        SubToReg.getReg(0),
+        *getRegClassForTypeOnBank(MRI.getType(Dst),
+                                  *RBI.getRegBank(Dst, MRI, TRI)),
+        MRI);
   } else {
     auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
     selectCopy(*Copy, TII, MRI, TRI, RBI);

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir
new file mode 100644
index 0000000000000..80c2f8ca08608
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir
@@ -0,0 +1,328 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -mattr=+fullfp16 -o - | FileCheck %s
+
+...
+---
+name:            load_s8_s16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s8_s16
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr16 = SUBREG_TO_REG 0, %4, %subreg.bsub
+    ; CHECK-NEXT: $h0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s16), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8))
+    $h0 = COPY %2(s16)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s8_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s8_s32
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.bsub
+    ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8))
+    $s0 = COPY %2(s32)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s8_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s8_s64
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.bsub
+    ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8))
+    $d0 = COPY %2(s64)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s16_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s16_s32
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpost [[COPY]], 4 :: (load (s16))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.hsub
+    ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s16))
+    $s0 = COPY %2(s32)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s16_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s16_s64
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpost [[COPY]], 4 :: (load (s16))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.hsub
+    ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s16))
+    $d0 = COPY %2(s64)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s32_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s32_s64
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr32 = LDRSpost [[COPY]], 4 :: (load (s32))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.ssub
+    ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s32))
+    $d0 = COPY %2(s64)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s8_s16_pre
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s8_s16_pre
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr16 = SUBREG_TO_REG 0, %4, %subreg.bsub
+    ; CHECK-NEXT: $h0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s16), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8))
+    $h0 = COPY %2(s16)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s8_s32_pre
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s8_s32_pre
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.bsub
+    ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8))
+    $s0 = COPY %2(s32)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s8_s64_pre
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s8_s64_pre
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.bsub
+    ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8))
+    $d0 = COPY %2(s64)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s16_s32_pre
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s16_s32_pre
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpre [[COPY]], 4 :: (load (s16))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.hsub
+    ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s16))
+    $s0 = COPY %2(s32)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s16_s64_pre
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s16_s64_pre
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpre [[COPY]], 4 :: (load (s16))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.hsub
+    ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s16))
+    $d0 = COPY %2(s64)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            load_s32_s64_pre
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x0
+    ; CHECK-LABEL: name: load_s32_s64_pre
+    ; CHECK: liveins: $d0, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+    ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr32 = LDRSpre [[COPY]], 4 :: (load (s32))
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.ssub
+    ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+    ; CHECK-NEXT: $x0 = COPY %3
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %0:gpr(p0) = COPY $x0
+    %1:gpr(s64) = G_CONSTANT i64 4
+    %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s32))
+    $d0 = COPY %2(s64)
+    $x0 = COPY %3(p0)
+    RET_ReallyLR implicit $d0
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index d949f95209577..cb5df07c7ede4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -725,3 +725,46 @@ define ptr @postidx64_sw(ptr %src, ptr %out) {
   store i64 %sext, ptr %out, align 8
   ret ptr %ptr
 }
+
+define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
+; CHECK64-LABEL: postidx32_shalf:
+; CHECK64:       ; %bb.0:
+; CHECK64-NEXT:    ldr h1, [x0], #4
+; CHECK64-NEXT:    ; kill: def $h0 killed $h0 def $s0
+; CHECK64-NEXT:    fcvt s2, h1
+; CHECK64-NEXT:    fcmp s2, #0.0
+; CHECK64-NEXT:    fcsel s0, s1, s0, mi
+; CHECK64-NEXT:    str h0, [x1]
+; CHECK64-NEXT:    ret
+;
+; GISEL-LABEL: postidx32_shalf:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    mov w8, #0 ; =0x0
+; GISEL-NEXT:    ldr h1, [x0], #4
+; GISEL-NEXT:    fmov s2, w8
+; GISEL-NEXT:    ; kill: def $h0 killed $h0 def $s0
+; GISEL-NEXT:    fmov w9, s0
+; GISEL-NEXT:    fcvt s3, h1
+; GISEL-NEXT:    fmov w8, s1
+; GISEL-NEXT:    fcvt s2, h2
+; GISEL-NEXT:    fcmp s3, s2
+; GISEL-NEXT:    csel w8, w8, w9, mi
+; GISEL-NEXT:    strh w8, [x1]
+; GISEL-NEXT:    ret
+;
+; CHECK32-LABEL: postidx32_shalf:
+; CHECK32:       ; %bb.0:
+; CHECK32-NEXT:    ldr h1, [x0], #4
+; CHECK32-NEXT:    ; kill: def $h0 killed $h0 def $s0
+; CHECK32-NEXT:    fcvt s2, h1
+; CHECK32-NEXT:    fcmp s2, #0.0
+; CHECK32-NEXT:    fcsel s0, s1, s0, mi
+; CHECK32-NEXT:    str h0, [x1]
+; CHECK32-NEXT:    ret
+  %tmp = load half, ptr %src, align 2
+  %ptr = getelementptr inbounds i32, ptr %src, i64 1
+  %c = fcmp olt half %tmp, 0.0
+  %s = select i1 %c, half %tmp, half %a
+  store half %s, ptr %out, align 8
+  ret ptr %ptr
+}


        


More information about the llvm-commits mailing list