[llvm] [AArch64][MachineCombiner] Fix setting reg state for gather lane pattern (PR #149703)
Jonathan Cohen via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 20 04:55:10 PDT 2025
https://github.com/jcohen-apple created https://github.com/llvm/llvm-project/pull/149703
Closing #149585 #149644
Offset register was marked `killed` without verifying it does not have additional uses, updated to use the reg kill state set previously.
>From d5b53d4df502000fcaea3b970e94d3c37993c337 Mon Sep 17 00:00:00 2001
From: Jonathan Cohen <joncoh at apple.com>
Date: Sun, 20 Jul 2025 11:19:38 +0300
Subject: [PATCH] [AArch64][MachineCombiner] Fix setting reg state for gather
lane pattern
---
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 38 ++++++++++++-------
.../AArch64/aarch64-combine-gather-lanes.mir | 18 ++++-----
2 files changed, 34 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bc57537ad5dfb..214eb815738af 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/CFIInstBuilder.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -7516,14 +7517,15 @@ generateGatherPattern(MachineInstr &Root,
auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
Register SrcRegister, unsigned Lane,
- Register OffsetRegister) {
+ Register OffsetRegister,
+ bool OffsetRegisterKillState) {
auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
MachineInstrBuilder LoadIndexIntoRegister =
BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
NewRegister)
.addReg(SrcRegister)
.addImm(Lane)
- .addReg(OffsetRegister, getKillRegState(true));
+ .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
InsInstrs.push_back(LoadIndexIntoRegister);
return NewRegister;
@@ -7531,7 +7533,8 @@ generateGatherPattern(MachineInstr &Root,
// Helper to create load instruction based on opcode
auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
- Register OffsetReg) -> MachineInstrBuilder {
+ Register OffsetReg,
+ bool KillState) -> MachineInstrBuilder {
unsigned Opcode;
switch (NumLanes) {
case 4:
@@ -7557,25 +7560,30 @@ generateGatherPattern(MachineInstr &Root,
auto LanesToLoadToReg0 =
llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
LoadToLaneInstrsAscending.begin() + NumLanes / 2);
- auto PrevReg = SubregToReg->getOperand(0).getReg();
+ Register PrevReg = SubregToReg->getOperand(0).getReg();
for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
+ const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
+ OffsetRegOperand.getReg(),
+ OffsetRegOperand.isKill());
DelInstrs.push_back(LoadInstr);
}
- auto LastLoadReg0 = PrevReg;
+ Register LastLoadReg0 = PrevReg;
// First load into register 1. Perform a LDRSui to zero out the upper lanes in
// a single instruction.
- auto Lane0Load = *LoadToLaneInstrsAscending.begin();
- auto OriginalSplitLoad =
+ MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
+ MachineInstr *OriginalSplitLoad =
*std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
- auto DestRegForMiddleIndex = MRI.createVirtualRegister(
+ Register DestRegForMiddleIndex = MRI.createVirtualRegister(
MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
+ const MachineOperand &OriginalSplitToLoadOffsetOperand =
+ OriginalSplitLoad->getOperand(3);
MachineInstrBuilder MiddleIndexLoadInstr =
CreateLoadInstruction(NumLanes, DestRegForMiddleIndex,
- OriginalSplitLoad->getOperand(3).getReg());
+ OriginalSplitToLoadOffsetOperand.getReg(),
+ OriginalSplitToLoadOffsetOperand.isKill());
InstrIdxForVirtReg.insert(
std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
@@ -7583,7 +7591,7 @@ generateGatherPattern(MachineInstr &Root,
DelInstrs.push_back(OriginalSplitLoad);
// Subreg To Reg instruction for register 1.
- auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
+ Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
unsigned SubregType;
switch (NumLanes) {
case 4:
@@ -7616,14 +7624,18 @@ generateGatherPattern(MachineInstr &Root,
LoadToLaneInstrsAscending.end());
PrevReg = SubRegToRegInstr->getOperand(0).getReg();
for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
+ const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
- LoadInstr->getOperand(3).getReg());
+ OffsetRegOperand.getReg(),
+ OffsetRegOperand.isKill());
+
+ // Do not add the last reg to DelInstrs - it will be removed later.
if (Index == NumLanes / 2 - 2) {
break;
}
DelInstrs.push_back(LoadInstr);
}
- auto LastLoadReg1 = PrevReg;
+ Register LastLoadReg1 = PrevReg;
// Create the final zip instruction to combine the results.
MachineInstrBuilder ZipInstr =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
index 09eb18b0e3574..5cddf92fdbb4c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
@@ -13,12 +13,12 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
- ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
- ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
- ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]]
+ ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 1
+ ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, [[LD_i32]], %subreg.ssub
+ ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, [[COPY2]]
; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY3]], 0
; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
- ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]]
+ ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, [[COPY4]]
; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
; CHECK-NEXT: $q0 = COPY [[ZIP]]
; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -27,11 +27,11 @@ body: |
%2:gpr64common = COPY $x2
%3:gpr64common = COPY $x3
%4:gpr64common = COPY $x4
- %5:fpr32 = LDRSroX %0, killed %1, 0, 1
- %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
- %7:fpr128 = LD1i32 %6, 1, killed %2
- %8:fpr128 = LD1i32 %7, 2, killed %3
- %9:fpr128 = LD1i32 %8, 3, killed %4
+ %5:fpr32 = LDRSroX %0, %1, 0, 1
+ %6:fpr128 = SUBREG_TO_REG 0, %5, %subreg.ssub
+ %7:fpr128 = LD1i32 %6, 1, %2
+ %8:fpr128 = LD1i32 %7, 2, %3
+ %9:fpr128 = LD1i32 %8, 3, %4
$q0 = COPY %9
RET_ReallyLR implicit $q0
More information about the llvm-commits
mailing list