[llvm] 1fec092 - [AArch64][GlobalISel] Allow selecting FPR index loads. (#143835)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 23:01:38 PDT 2025
Author: David Green
Date: 2025-06-21T07:01:35+01:00
New Revision: 1fec092fd74abc6fa7399da5bcf165d6249883f5
URL: https://github.com/llvm/llvm-project/commit/1fec092fd74abc6fa7399da5bcf165d6249883f5
DIFF: https://github.com/llvm/llvm-project/commit/1fec092fd74abc6fa7399da5bcf165d6249883f5.diff
LOG: [AArch64][GlobalISel] Allow selecting FPR index loads. (#143835)
We can, through legalization of certain operations, end up generating
G_INDEXED_LOAD into FPR registers that require entensions. SExt and ZExt
will always opt for GPR, but anyext/noext can curently be set to FPR
registers in regbankselect. As writing a subregister will set higher
bits in the same register to 0, we can successfully handle zext and
anyext on FPR registers, which is what this patch attempts to add.
Added:
llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 5081cc4bba144..d55ff5acb3dca 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5553,9 +5553,15 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
bool IsPre = ExtLd.isPre();
bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
- bool InsertIntoXReg = false;
+ unsigned InsertIntoSubReg = 0;
bool IsDst64 = Ty.getSizeInBits() == 64;
+ // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
+ // long as they are scalar.
+ bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
+ if ((IsSExt && IsFPR) || Ty.isVector())
+ return false;
+
unsigned Opc = 0;
LLT NewLdDstTy;
LLT s32 = LLT::scalar(32);
@@ -5568,9 +5574,13 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
else
Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
NewLdDstTy = IsDst64 ? s64 : s32;
+ } else if (IsFPR) {
+ Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
+ InsertIntoSubReg = AArch64::bsub;
+ NewLdDstTy = LLT::scalar(MemSizeBits);
} else {
Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
- InsertIntoXReg = IsDst64;
+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
NewLdDstTy = s32;
}
} else if (MemSizeBits == 16) {
@@ -5580,27 +5590,32 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
else
Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
NewLdDstTy = IsDst64 ? s64 : s32;
+ } else if (IsFPR) {
+ Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
+ InsertIntoSubReg = AArch64::hsub;
+ NewLdDstTy = LLT::scalar(MemSizeBits);
} else {
Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
- InsertIntoXReg = IsDst64;
+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
NewLdDstTy = s32;
}
} else if (MemSizeBits == 32) {
if (IsSExt) {
Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
NewLdDstTy = s64;
+ } else if (IsFPR) {
+ Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
+ InsertIntoSubReg = AArch64::ssub;
+ NewLdDstTy = LLT::scalar(MemSizeBits);
} else {
Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
- InsertIntoXReg = IsDst64;
+ InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
NewLdDstTy = s32;
}
} else {
llvm_unreachable("Unexpected size for indexed load");
}
- if (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
- return false; // We should be on gpr.
-
auto Cst = getIConstantVRegVal(Offset, MRI);
if (!Cst)
return false; // Shouldn't happen, but just in case.
@@ -5610,15 +5625,18 @@ bool AArch64InstructionSelector::selectIndexedExtLoad(
LdMI.cloneMemRefs(ExtLd);
constrainSelectedInstRegOperands(*LdMI, TII, TRI, RBI);
// Make sure to select the load with the MemTy as the dest type, and then
- // insert into X reg if needed.
- if (InsertIntoXReg) {
+ // insert into a larger reg if needed.
+ if (InsertIntoSubReg) {
// Generate a SUBREG_TO_REG.
auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
.addImm(0)
.addUse(LdMI.getReg(1))
- .addImm(AArch64::sub_32);
- RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
- MRI);
+ .addImm(InsertIntoSubReg);
+ RBI.constrainGenericRegister(
+ SubToReg.getReg(0),
+ *getRegClassForTypeOnBank(MRI.getType(Dst),
+ *RBI.getRegBank(Dst, MRI, TRI)),
+ MRI);
} else {
auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
selectCopy(*Copy, TII, MRI, TRI, RBI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir
new file mode 100644
index 0000000000000..80c2f8ca08608
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp-index-load.mir
@@ -0,0 +1,328 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -mattr=+fullfp16 -o - | FileCheck %s
+
+...
+---
+name: load_s8_s16
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s8_s16
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr16 = SUBREG_TO_REG 0, %4, %subreg.bsub
+ ; CHECK-NEXT: $h0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s16), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8))
+ $h0 = COPY %2(s16)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s8_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s8_s32
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.bsub
+ ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8))
+ $s0 = COPY %2(s32)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s8_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s8_s64
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpost [[COPY]], 4 :: (load (s8))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.bsub
+ ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s8))
+ $d0 = COPY %2(s64)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s16_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s16_s32
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpost [[COPY]], 4 :: (load (s16))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.hsub
+ ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s16))
+ $s0 = COPY %2(s32)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s16_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s16_s64
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpost [[COPY]], 4 :: (load (s16))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.hsub
+ ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s16))
+ $d0 = COPY %2(s64)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s32_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s32_s64
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr32 = LDRSpost [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.ssub
+ ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 0 :: (load (s32))
+ $d0 = COPY %2(s64)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s8_s16_pre
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s8_s16_pre
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr16 = SUBREG_TO_REG 0, %4, %subreg.bsub
+ ; CHECK-NEXT: $h0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s16), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8))
+ $h0 = COPY %2(s16)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s8_s32_pre
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s8_s32_pre
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.bsub
+ ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8))
+ $s0 = COPY %2(s32)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s8_s64_pre
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s8_s64_pre
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr8 = LDRBpre [[COPY]], 4 :: (load (s8))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.bsub
+ ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s8))
+ $d0 = COPY %2(s64)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s16_s32_pre
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s16_s32_pre
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpre [[COPY]], 4 :: (load (s16))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, %4, %subreg.hsub
+ ; CHECK-NEXT: $s0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s32), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s16))
+ $s0 = COPY %2(s32)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s16_s64_pre
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s16_s64_pre
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr16 = LDRHpre [[COPY]], 4 :: (load (s16))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.hsub
+ ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s16))
+ $d0 = COPY %2(s64)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: load_s32_s64_pre
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x0
+ ; CHECK-LABEL: name: load_s32_s64_pre
+ ; CHECK: liveins: $d0, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: early-clobber %3:gpr64sp, %4:fpr32 = LDRSpre [[COPY]], 4 :: (load (s32))
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr64 = SUBREG_TO_REG 0, %4, %subreg.ssub
+ ; CHECK-NEXT: $d0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: $x0 = COPY %3
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %2:fpr(s64), %3:gpr(p0) = G_INDEXED_LOAD %0:gpr, %1:gpr, 1 :: (load (s32))
+ $d0 = COPY %2(s64)
+ $x0 = COPY %3(p0)
+ RET_ReallyLR implicit $d0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index d949f95209577..cb5df07c7ede4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -725,3 +725,46 @@ define ptr @postidx64_sw(ptr %src, ptr %out) {
store i64 %sext, ptr %out, align 8
ret ptr %ptr
}
+
+define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
+; CHECK64-LABEL: postidx32_shalf:
+; CHECK64: ; %bb.0:
+; CHECK64-NEXT: ldr h1, [x0], #4
+; CHECK64-NEXT: ; kill: def $h0 killed $h0 def $s0
+; CHECK64-NEXT: fcvt s2, h1
+; CHECK64-NEXT: fcmp s2, #0.0
+; CHECK64-NEXT: fcsel s0, s1, s0, mi
+; CHECK64-NEXT: str h0, [x1]
+; CHECK64-NEXT: ret
+;
+; GISEL-LABEL: postidx32_shalf:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: mov w8, #0 ; =0x0
+; GISEL-NEXT: ldr h1, [x0], #4
+; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
+; GISEL-NEXT: fmov w9, s0
+; GISEL-NEXT: fcvt s3, h1
+; GISEL-NEXT: fmov w8, s1
+; GISEL-NEXT: fcvt s2, h2
+; GISEL-NEXT: fcmp s3, s2
+; GISEL-NEXT: csel w8, w8, w9, mi
+; GISEL-NEXT: strh w8, [x1]
+; GISEL-NEXT: ret
+;
+; CHECK32-LABEL: postidx32_shalf:
+; CHECK32: ; %bb.0:
+; CHECK32-NEXT: ldr h1, [x0], #4
+; CHECK32-NEXT: ; kill: def $h0 killed $h0 def $s0
+; CHECK32-NEXT: fcvt s2, h1
+; CHECK32-NEXT: fcmp s2, #0.0
+; CHECK32-NEXT: fcsel s0, s1, s0, mi
+; CHECK32-NEXT: str h0, [x1]
+; CHECK32-NEXT: ret
+ %tmp = load half, ptr %src, align 2
+ %ptr = getelementptr inbounds i32, ptr %src, i64 1
+ %c = fcmp olt half %tmp, 0.0
+ %s = select i1 %c, half %tmp, half %a
+ store half %s, ptr %out, align 8
+ ret ptr %ptr
+}
More information about the llvm-commits
mailing list