[llvm] [AArch64][MachineCombiner] Fix setting reg state for gather lane pattern (PR #149703)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 20 04:55:29 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Jonathan Cohen (jcohen-apple)

<details>
<summary>Changes</summary>

Closing #<!-- -->149585 #<!-- -->149644

Offset register was marked `killed` without verifying it does not have additional uses, updated to use the reg kill state set previously.

---
Full diff: https://github.com/llvm/llvm-project/pull/149703.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+25-13) 
- (modified) llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir (+9-9) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bc57537ad5dfb..214eb815738af 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/CFIInstBuilder.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -7516,14 +7517,15 @@ generateGatherPattern(MachineInstr &Root,
 
   auto LoadLaneToRegister = [&](MachineInstr *OriginalInstr,
                                 Register SrcRegister, unsigned Lane,
-                                Register OffsetRegister) {
+                                Register OffsetRegister,
+                                bool OffsetRegisterKillState) {
     auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
     MachineInstrBuilder LoadIndexIntoRegister =
         BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
                 NewRegister)
             .addReg(SrcRegister)
             .addImm(Lane)
-            .addReg(OffsetRegister, getKillRegState(true));
+            .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
     InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
     InsInstrs.push_back(LoadIndexIntoRegister);
     return NewRegister;
@@ -7531,7 +7533,8 @@ generateGatherPattern(MachineInstr &Root,
 
   // Helper to create load instruction based on opcode
   auto CreateLoadInstruction = [&](unsigned NumLanes, Register DestReg,
-                                   Register OffsetReg) -> MachineInstrBuilder {
+                                   Register OffsetReg,
+                                   bool KillState) -> MachineInstrBuilder {
     unsigned Opcode;
     switch (NumLanes) {
     case 4:
@@ -7557,25 +7560,30 @@ generateGatherPattern(MachineInstr &Root,
   auto LanesToLoadToReg0 =
       llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
                        LoadToLaneInstrsAscending.begin() + NumLanes / 2);
-  auto PrevReg = SubregToReg->getOperand(0).getReg();
+  Register PrevReg = SubregToReg->getOperand(0).getReg();
   for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
+    const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
     PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
-                                 LoadInstr->getOperand(3).getReg());
+                                 OffsetRegOperand.getReg(),
+                                 OffsetRegOperand.isKill());
     DelInstrs.push_back(LoadInstr);
   }
-  auto LastLoadReg0 = PrevReg;
+  Register LastLoadReg0 = PrevReg;
 
   // First load into register 1. Perform a LDRSui to zero out the upper lanes in
   // a single instruction.
-  auto Lane0Load = *LoadToLaneInstrsAscending.begin();
-  auto OriginalSplitLoad =
+  MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
+  MachineInstr *OriginalSplitLoad =
       *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
-  auto DestRegForMiddleIndex = MRI.createVirtualRegister(
+  Register DestRegForMiddleIndex = MRI.createVirtualRegister(
       MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
 
+  const MachineOperand &OriginalSplitToLoadOffsetOperand =
+      OriginalSplitLoad->getOperand(3);
   MachineInstrBuilder MiddleIndexLoadInstr =
       CreateLoadInstruction(NumLanes, DestRegForMiddleIndex,
-                            OriginalSplitLoad->getOperand(3).getReg());
+                            OriginalSplitToLoadOffsetOperand.getReg(),
+                            OriginalSplitToLoadOffsetOperand.isKill());
 
   InstrIdxForVirtReg.insert(
       std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
@@ -7583,7 +7591,7 @@ generateGatherPattern(MachineInstr &Root,
   DelInstrs.push_back(OriginalSplitLoad);
 
   // Subreg To Reg instruction for register 1.
-  auto DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
+  Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
   unsigned SubregType;
   switch (NumLanes) {
   case 4:
@@ -7616,14 +7624,18 @@ generateGatherPattern(MachineInstr &Root,
                        LoadToLaneInstrsAscending.end());
   PrevReg = SubRegToRegInstr->getOperand(0).getReg();
   for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
+    const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
     PrevReg = LoadLaneToRegister(LoadInstr, PrevReg, Index + 1,
-                                 LoadInstr->getOperand(3).getReg());
+                                 OffsetRegOperand.getReg(),
+                                 OffsetRegOperand.isKill());
+
+    // Do not add the last reg to DelInstrs - it will be removed later.
     if (Index == NumLanes / 2 - 2) {
       break;
     }
     DelInstrs.push_back(LoadInstr);
   }
-  auto LastLoadReg1 = PrevReg;
+  Register LastLoadReg1 = PrevReg;
 
   // Create the final zip instruction to combine the results.
   MachineInstrBuilder ZipInstr =
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
index 09eb18b0e3574..5cddf92fdbb4c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-gather-lanes.mir
@@ -13,12 +13,12 @@ body:             |
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
-    ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
-    ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
-    ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]] 
+    ; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 1
+    ; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, [[LD_i32]], %subreg.ssub
+    ; CHECK-NEXT: [[LD0_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, [[COPY2]] 
     ; CHECK-NEXT: [[LD1_0:%[0-9]+]]:fpr32 = LDRSui [[COPY3]], 0
     ; CHECK-NEXT: [[SECOND_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD1_0]], %subreg.ssub
-    ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, killed [[COPY4]]
+    ; CHECK-NEXT: [[LD1_1:%[0-9]+]]:fpr128 = LD1i32 [[SECOND_REG]], 1, [[COPY4]]
     ; CHECK-NEXT: [[ZIP:%[0-9]+]]:fpr128 = ZIP1v2i64 [[LD0_1]], [[LD1_1]]
     ; CHECK-NEXT: $q0 = COPY [[ZIP]]
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -27,11 +27,11 @@ body:             |
     %2:gpr64common = COPY $x2
     %3:gpr64common = COPY $x3
     %4:gpr64common = COPY $x4
-    %5:fpr32 = LDRSroX %0, killed %1, 0, 1
-    %6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
-    %7:fpr128 = LD1i32 %6, 1, killed %2
-    %8:fpr128 = LD1i32 %7, 2, killed %3
-    %9:fpr128 = LD1i32 %8, 3, killed %4
+    %5:fpr32 = LDRSroX %0, %1, 0, 1
+    %6:fpr128 = SUBREG_TO_REG 0, %5, %subreg.ssub
+    %7:fpr128 = LD1i32 %6, 1, %2
+    %8:fpr128 = LD1i32 %7, 2, %3
+    %9:fpr128 = LD1i32 %8, 3, %4
     $q0 = COPY %9
     RET_ReallyLR implicit $q0
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/149703


More information about the llvm-commits mailing list