[llvm] 0ee10e9 - [RISCV] Add additional fence for amocas when required by recent ABI change (#101023)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 19 05:40:00 PDT 2024
Author: Alex Bradbury
Date: 2024-09-19T13:39:56+01:00
New Revision: 0ee10e9466603f9e55e6044b5070f65b8182dae1
URL: https://github.com/llvm/llvm-project/commit/0ee10e9466603f9e55e6044b5070f65b8182dae1
DIFF: https://github.com/llvm/llvm-project/commit/0ee10e9466603f9e55e6044b5070f65b8182dae1.diff
LOG: [RISCV] Add additional fence for amocas when required by recent ABI change (#101023)
A recent atomics ABI change / fix requires that for the "A6C" and A6S"
atomics ABIs (i.e. both of those supported by LLVM currently), an
additional fence is inserted for an atomic_compare_exchange with seq_cst
failure ordering.
<https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/445>
This isn't trivial to support through the hooks used by AtomicExpandPass
because that pass assumes that when fences are inserted, the original
atomics ordering information can be removed from the instruction. Rather
than try to change and complicate that API, this patch implements the
needed fence insertion through a small special purpose pass.
Added:
llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp
Modified:
llvm/lib/Target/RISCV/CMakeLists.txt
llvm/lib/Target/RISCV/RISCV.h
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/O0-pipeline.ll
llvm/test/CodeGen/RISCV/O3-pipeline.ll
llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
llvm/test/CodeGen/RISCV/atomic-rmw.ll
llvm/test/CodeGen/RISCV/atomic-signext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 7d02d630a28199..fb5edfa4765fb5 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -61,6 +61,7 @@ add_llvm_target(RISCVCodeGen
RISCVTargetObjectFile.cpp
RISCVTargetTransformInfo.cpp
RISCVVectorPeephole.cpp
+ RISCVZacasABIFix.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 561e4895e19428..f2fb32a8b63bce 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -85,6 +85,9 @@ void initializeRISCVMoveMergePass(PassRegistry &);
FunctionPass *createRISCVPushPopOptimizationPass();
void initializeRISCVPushPopOptPass(PassRegistry &);
+FunctionPass *createRISCVZacasABIFixPass();
+void initializeRISCVZacasABIFixPass(PassRegistry &);
+
InstructionSelector *
createRISCVInstructionSelector(const RISCVTargetMachine &,
const RISCVSubtarget &,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 63488e1fcb0075..ceb9d499002846 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -684,6 +684,9 @@ class RISCVTargetLowering : public TargetLowering {
bool preferZeroCompareBranch() const override { return true; }
+ // Note that one specific case requires fence insertion for an
+ // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
+ // than this hook due to limitations in the interface here.
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return isa<LoadInst>(I) || isa<StoreInst>(I);
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 5cb9fbc8a9eb98..912887ba191609 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -420,6 +420,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandLegacyPass());
+ addPass(createRISCVZacasABIFixPass());
if (getOptLevel() != CodeGenOptLevel::None) {
if (EnableLoopDataPrefetch)
diff --git a/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp
new file mode 100644
index 00000000000000..17f88e71d03527
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp
@@ -0,0 +1,96 @@
+//===----- RISCVZacasABIFix.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a fence insertion for an atomic cmpxchg in a case that
+// isn't easy to do with the current AtomicExpandPass hooks API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-zacas-abi-fix"
+#define PASS_NAME "RISC-V Zacas ABI fix"
+
+namespace {
+
+class RISCVZacasABIFix : public FunctionPass,
+ public InstVisitor<RISCVZacasABIFix, bool> {
+ const RISCVSubtarget *ST;
+
+public:
+ static char ID;
+
+ RISCVZacasABIFix() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return PASS_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetPassConfig>();
+ }
+
+ bool visitInstruction(Instruction &I) { return false; }
+ bool visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
+};
+
+} // end anonymous namespace
+
+// Insert a leading fence (needed for broadest atomics ABI compatibility)
+// only if the Zacas extension is enabled and the AtomicCmpXchgInst has a
+// SequentiallyConsistent failure ordering.
+bool RISCVZacasABIFix::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ assert(ST->hasStdExtZacas() && "only necessary to run in presence of zacas");
+ IRBuilder<> Builder(&I);
+ if (I.getFailureOrdering() != AtomicOrdering::SequentiallyConsistent)
+ return false;
+
+ Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
+ return true;
+}
+
+bool RISCVZacasABIFix::runOnFunction(Function &F) {
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<RISCVTargetMachine>();
+ ST = &TM.getSubtarget<RISCVSubtarget>(F);
+
+ if (skipFunction(F) || !ST->hasStdExtZacas())
+ return false;
+
+ bool MadeChange = false;
+ for (auto &BB : F)
+ for (Instruction &I : llvm::make_early_inc_range(BB))
+ MadeChange |= visit(I);
+
+ return MadeChange;
+}
+
+INITIALIZE_PASS_BEGIN(RISCVZacasABIFix, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RISCVZacasABIFix, DEBUG_TYPE, PASS_NAME, false, false)
+
+char RISCVZacasABIFix::ID = 0;
+
+FunctionPass *llvm::createRISCVZacasABIFixPass() {
+ return new RISCVZacasABIFix();
+}
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 84c7f3f987c065..7b85e9c77a4232 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -22,6 +22,7 @@
; CHECK-NEXT: Expand large div/rem
; CHECK-NEXT: Expand large fp convert
; CHECK-NEXT: Expand Atomic instructions
+; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 5d14d14d216244..c29f15a15c1503 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -26,6 +26,7 @@
; CHECK-NEXT: Expand large div/rem
; CHECK-NEXT: Expand large fp convert
; CHECK-NEXT: Expand Atomic instructions
+; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
index b98d2d57a0b52d..e70ba93de75e01 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
@@ -36,6 +36,7 @@ define void @cmpxchg_and_branch1(ptr %ptr, i32 signext %cmp, i32 signext %val) n
; ZACAS: # %bb.0: # %entry
; ZACAS-NEXT: .LBB0_1: # %do_cmpxchg
; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; ZACAS-NEXT: fence rw, rw
; ZACAS-NEXT: mv a3, a1
; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
; ZACAS-NEXT: bne a3, a1, .LBB0_1
@@ -76,6 +77,7 @@ define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) n
; ZACAS: # %bb.0: # %entry
; ZACAS-NEXT: .LBB1_1: # %do_cmpxchg
; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; ZACAS-NEXT: fence rw, rw
; ZACAS-NEXT: mv a3, a1
; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
; ZACAS-NEXT: beq a3, a1, .LBB1_1
@@ -216,6 +218,7 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV64IA-ZABHA: # %bb.0: # %entry
; RV64IA-ZABHA-NEXT: .LBB2_1: # %do_cmpxchg
; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZABHA-NEXT: fence rw, rw
; RV64IA-ZABHA-NEXT: mv a3, a1
; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0)
; RV64IA-ZABHA-NEXT: bne a3, a1, .LBB2_1
@@ -368,6 +371,7 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV64IA-ZABHA: # %bb.0: # %entry
; RV64IA-ZABHA-NEXT: .LBB3_1: # %do_cmpxchg
; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZABHA-NEXT: fence rw, rw
; RV64IA-ZABHA-NEXT: mv a3, a1
; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0)
; RV64IA-ZABHA-NEXT: beq a3, a1, .LBB3_1
@@ -408,6 +412,7 @@ define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signe
; ZACAS: # %bb.0: # %entry
; ZACAS-NEXT: .LBB4_1: # %do_cmpxchg
; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; ZACAS-NEXT: fence rw, rw
; ZACAS-NEXT: mv a4, a1
; ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0)
; ZACAS-NEXT: beqz a3, .LBB4_1
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index c47db319fc2c3a..acd6e8f9afe2a0 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -1857,6 +1857,7 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
@@ -1885,6 +1886,7 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst
@@ -3787,6 +3789,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
@@ -3816,6 +3819,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst seq_cst
@@ -4788,6 +4792,7 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: fence rw, rw
; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV32IA-WMO-ZACAS-NEXT: ret
;
@@ -4804,6 +4809,7 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: fence rw, rw
; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
; RV32IA-TSO-ZACAS-NEXT: ret
;
@@ -4834,11 +4840,13 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV64IA-WMO-ZACAS-NEXT: ret
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
@@ -4856,11 +4864,13 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
; RV64IA-TSO-ZACAS-NEXT: ret
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
@@ -5753,11 +5763,13 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
;
; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
+; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
; RV64IA-WMO-ZACAS-NEXT: ret
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
+; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
@@ -5774,11 +5786,13 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
;
; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-TSO-ZACAS: # %bb.0:
+; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
; RV64IA-TSO-ZACAS-NEXT: ret
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
+; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 4223440b9cb888..03157e13bff787 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -4437,6 +4437,7 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 56
; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2)
; RV64IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 56
@@ -4452,6 +4453,7 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 56
; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
; RV64IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 56
@@ -14410,6 +14412,7 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 48
; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2)
; RV64IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 48
@@ -14425,6 +14428,7 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 48
; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
; RV64IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 48
@@ -21637,6 +21641,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32IA-WMO-ZACAS-NEXT: mv a3, a0
; RV32IA-WMO-ZACAS-NEXT: and a4, a0, a1
; RV32IA-WMO-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZACAS-NEXT: fence rw, rw
; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2)
; RV32IA-WMO-ZACAS-NEXT: bne a0, a3, .LBB154_1
; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -21651,6 +21656,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV32IA-TSO-ZACAS-NEXT: mv a3, a0
; RV32IA-TSO-ZACAS-NEXT: and a4, a0, a1
; RV32IA-TSO-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZACAS-NEXT: fence rw, rw
; RV32IA-TSO-ZACAS-NEXT: amocas.w a0, a4, (a2)
; RV32IA-TSO-ZACAS-NEXT: bne a0, a3, .LBB154_1
; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -21665,6 +21671,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-WMO-ZACAS-NEXT: mv a3, a0
; RV64IA-WMO-ZACAS-NEXT: and a4, a0, a1
; RV64IA-WMO-ZACAS-NEXT: not a4, a4
+; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2)
; RV64IA-WMO-ZACAS-NEXT: bne a0, a3, .LBB154_1
; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -21679,6 +21686,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: mv a3, a0
; RV64IA-TSO-ZACAS-NEXT: and a4, a0, a1
; RV64IA-TSO-ZACAS-NEXT: not a4, a4
+; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZACAS-NEXT: amocas.w a0, a4, (a2)
; RV64IA-TSO-ZACAS-NEXT: bne a0, a3, .LBB154_1
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -21717,6 +21725,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2)
; RV64IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -21731,6 +21740,7 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
; RV64IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -25546,6 +25556,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64IA-WMO-ZACAS-NEXT: mv a3, a0
; RV64IA-WMO-ZACAS-NEXT: and a4, a0, a1
; RV64IA-WMO-ZACAS-NEXT: not a4, a4
+; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a0, a4, (a2)
; RV64IA-WMO-ZACAS-NEXT: bne a0, a3, .LBB209_1
; RV64IA-WMO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -25560,6 +25571,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: mv a3, a0
; RV64IA-TSO-ZACAS-NEXT: and a4, a0, a1
; RV64IA-TSO-ZACAS-NEXT: not a4, a4
+; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZACAS-NEXT: amocas.d a0, a4, (a2)
; RV64IA-TSO-ZACAS-NEXT: bne a0, a3, .LBB209_1
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -25598,6 +25610,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
; RV64IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV64IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-ZACAS-NEXT: amocas.d.aqrl a0, a4, (a2)
; RV64IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB209_1
; RV64IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
@@ -25612,6 +25625,7 @@ define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV64IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
; RV64IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV64IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-ZACAS-NEXT: amocas.d a0, a4, (a2)
; RV64IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB209_1
; RV64IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 775c17c3ceb3f9..c143be478948ed 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -5562,6 +5562,7 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV32IA-ZACAS: # %bb.0:
; RV32IA-ZACAS-NEXT: beqz a3, .LBB64_2
; RV32IA-ZACAS-NEXT: # %bb.1: # %then
+; RV32IA-ZACAS-NEXT: fence rw, rw
; RV32IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV32IA-ZACAS-NEXT: mv a0, a1
; RV32IA-ZACAS-NEXT: ret
@@ -5612,6 +5613,7 @@ define signext i32 @cmpxchg_i32_monotonic_crossbb(ptr %ptr, i32 signext %cmp, i3
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: beqz a3, .LBB64_2
; RV64IA-ZACAS-NEXT: # %bb.1: # %then
+; RV64IA-ZACAS-NEXT: fence rw, rw
; RV64IA-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV64IA-ZACAS-NEXT: mv a0, a1
; RV64IA-ZACAS-NEXT: ret
More information about the llvm-commits
mailing list