[llvm] [llvm][RISCV] Handle atomic and volatile in ZilsdOptimizer (PR #169169)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 22 08:40:44 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Brandon Wu (4vtomat)
<details>
<summary>Changes</summary>
Atomic(except for unordered) loads/stores and volatile loads/stores are
considered as ordered instructions, we only needs to ensure ordered
instructions are not reordered instead of just disable them from
ZilsdOptimizer.
---
Full diff: https://github.com/llvm/llvm-project/pull/169169.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp (+15-6)
- (modified) llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir (+204-5)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
index 99e83fbb05a73..cb0a66c23b90b 100644
--- a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
@@ -275,6 +275,16 @@ bool RISCVPreAllocZilsdOpt::isSafeToMove(MachineInstr *MI, MachineInstr *Target,
LLVM_DEBUG(dbgs() << "Memory operation interference detected\n");
return false;
}
+
+ // Don't move across instructions if they are guaranteed to be ordered, e.g.
+ // volatile and ordered atomic
+ if (MI->hasOrderedMemoryRef() && It->hasOrderedMemoryRef()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Cannot move across instruction that is guaranteed to be ordered: "
+ << *It);
+ return false;
+ }
}
return true;
@@ -334,6 +344,10 @@ bool RISCVPreAllocZilsdOpt::rescheduleOps(
Distance > MaxRescheduleDistance)
continue;
+ // If MI0 comes later, it's not able fold if the memory order matters.
+ if (!MI1IsLater && MI0->hasOrderedMemoryRef() && MI1->hasOrderedMemoryRef())
+ continue;
+
// Move the instruction to the target position
MachineBasicBlock::iterator InsertPos = TargetInstr->getIterator();
++InsertPos;
@@ -400,15 +414,10 @@ bool RISCVPreAllocZilsdOpt::isMemoryOp(const MachineInstr &MI) {
return false;
// When no memory operands are present, conservatively assume unaligned,
- // volatile, unfoldable.
+ // unfoldable.
if (!MI.hasOneMemOperand())
return false;
- const MachineMemOperand *MMO = *MI.memoperands_begin();
-
- if (MMO->isVolatile() || MMO->isAtomic())
- return false;
-
// sw <undef> could probably be eliminated entirely, but for now we just want
// to avoid making a mess of it.
if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
diff --git a/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir
index dab394d4bc8c4..95d0e322faa4d 100644
--- a/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir
+++ b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir
@@ -182,6 +182,26 @@
ret i32 %5
}
+ define void @invalid_volatile_loads() {
+ ret void
+ }
+
+ define void @atomic_loads() {
+ ret void
+ }
+
+ define void @invalid_atomic_loads() {
+ ret void
+ }
+
+ define void @interleave_atomic_volatile_loads() {
+ ret void
+ }
+
+ define void @interleave_atomic_volatile_loads2() {
+ ret void
+ }
+
define i32 @store_dependency(ptr %0, i32 %1) {
%3 = load i32, ptr %0, align 4
%4 = getelementptr inbounds i32, ptr %0, i32 1
@@ -895,7 +915,7 @@ body: |
...
---
-# Test with volatile loads - should not combine
+# Test with valid volatile loads
name: volatile_loads
alignment: 4
tracksRegLiveness: true
@@ -918,17 +938,196 @@ body: |
; CHECK-4BYTE: liveins: $x10
; CHECK-4BYTE-NEXT: {{ $}}
; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
- ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
- ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
- ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]]
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]]
; CHECK-4BYTE-NEXT: PseudoRET
%0:gpr = COPY $x10
- ; Volatile loads should not be combined
%1:gpr = LW %0, 0 :: (volatile load (s32))
%2:gpr = LW %0, 4 :: (volatile load (s32))
%3:gpr = ADD %1, %2
PseudoRET
+...
+---
+# Test with invalid volatile loads
+name: invalid_volatile_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: invalid_volatile_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
+ ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: invalid_volatile_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (volatile load (s32))
+ %2:gpr = LW %0, 8 :: (volatile load (s32))
+ %3:gpr = LW %0, 4 :: (volatile load (s32))
+ %4:gpr = LW %0, 12 :: (volatile load (s32))
+ PseudoRET
+
+...
+---
+# Test with valid atomic loads
+name: atomic_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: atomic_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: atomic_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load monotonic (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (load monotonic (s32))
+ %2:gpr = LW %0, 4 :: (load monotonic (s32))
+ PseudoRET
+
+...
+---
+# Test with invalid atomic loads
+name: invalid_atomic_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: invalid_atomic_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load monotonic (s32))
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
+ ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (load monotonic (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: invalid_atomic_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load monotonic (s32))
+ ; CHECK-4BYTE-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
+ ; CHECK-4BYTE-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (load monotonic (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (load monotonic (s32))
+ %2:gpr = LW %0, 8 :: (load monotonic (s32))
+ %3:gpr = LW %0, 4 :: (load monotonic (s32))
+ %4:gpr = LW %0, 12 :: (load monotonic (s32))
+ PseudoRET
+
+...
+---
+# Test with interleaving atomic loads and volatile loads
+name: interleave_atomic_volatile_loads
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: interleave_atomic_volatile_loads
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
+ ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: interleave_atomic_volatile_loads
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load monotonic (s32))
+ ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load monotonic (s32))
+ ; CHECK-4BYTE-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (load monotonic (s32))
+ %2:gpr = LW %0, 8 :: (volatile load (s32))
+ %3:gpr = LW %0, 4 :: (load monotonic (s32))
+ %4:gpr = LW %0, 12 :: (volatile load (s32))
+ PseudoRET
+
+...
+---
+# Test with interleaving atomic loads and volatile loads
+name: interleave_atomic_volatile_loads2
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+body: |
+ bb.0:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: interleave_atomic_volatile_loads2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load unordered (s32))
+ ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (volatile load (s32))
+ ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load unordered (s32))
+ ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 12 :: (volatile load (s32))
+ ; CHECK-NEXT: PseudoRET
+ ;
+ ; CHECK-4BYTE-LABEL: name: interleave_atomic_volatile_loads2
+ ; CHECK-4BYTE: liveins: $x10
+ ; CHECK-4BYTE-NEXT: {{ $}}
+ ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load unordered (s32))
+ ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT2:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT3:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 8 :: (volatile load (s32))
+ ; CHECK-4BYTE-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gpr = LW %0, 0 :: (load unordered (s32))
+ %2:gpr = LW %0, 8 :: (volatile load (s32))
+ %3:gpr = LW %0, 4 :: (load unordered (s32))
+ %4:gpr = LW %0, 12 :: (volatile load (s32))
+ PseudoRET
+
...
---
# Test store dependency - store modifies same location as load
``````````
</details>
https://github.com/llvm/llvm-project/pull/169169
More information about the llvm-commits
mailing list