[llvm] e9a4b8e - [LoongArch] Optimize the atomic store with amswap_db.[w/d]
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 23 02:14:03 PDT 2022
Author: gonglingqin
Date: 2022-08-23T17:11:57+08:00
New Revision: e9a4b8e39783b90e804a59a3b2d59b332d5c9fa8
URL: https://github.com/llvm/llvm-project/commit/e9a4b8e39783b90e804a59a3b2d59b332d5c9fa8
DIFF: https://github.com/llvm/llvm-project/commit/e9a4b8e39783b90e804a59a3b2d59b332d5c9fa8.diff
LOG: [LoongArch] Optimize the atomic store with amswap_db.[w/d]
When AtomicOrdering is release or stronger, use
amswap_db.[w/d] $zero, $a1, $a0
instead of
dbar 0
st.[w/d] $a0, $a1, 0
Thanks to @xry111 for the suggestion: https://reviews.llvm.org/D128901#3626635
Differential Revision: https://reviews.llvm.org/D129838
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 63ffe125b5418..56185b0ef5d14 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1746,3 +1746,21 @@ bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; }
bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; }
+
+bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
+ const Instruction *I) const {
+ if (!Subtarget.is64Bit())
+ return isa<LoadInst>(I) || isa<StoreInst>(I);
+
+ if (isa<LoadInst>(I))
+ return true;
+
+ // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
+ // require fences beacuse we can use amswap_db.[w/d].
+ if (isa<StoreInst>(I)) {
+ unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
+ return (Size == 8 || Size == 16);
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index d915181373b4e..756be382c3a5b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -132,9 +132,7 @@ class LoongArchTargetLowering : public TargetLowering {
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
- bool shouldInsertFencesForAtomic(const Instruction *I) const override {
- return isa<LoadInst>(I) || isa<StoreInst>(I);
- }
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 4197cbb5f4e98..f2445b456e3a5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -995,13 +995,48 @@ defm : LdPat<atomic_load_8, LD_B>;
defm : LdPat<atomic_load_16, LD_H>;
defm : LdPat<atomic_load_32, LD_W>;
+class release_seqcst_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getSuccessOrdering();
+ return isReleaseOrStronger(Ordering);
+}]>;
+
+class unordered_monotonic_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getSuccessOrdering();
+ return !isReleaseOrStronger(Ordering);
+}]>;
+
+def atomic_store_release_seqcst_32 : release_seqcst_store<atomic_store_32>;
+def atomic_store_release_seqcst_64 : release_seqcst_store<atomic_store_64>;
+def atomic_store_unordered_monotonic_32
+ : unordered_monotonic_store<atomic_store_32>;
+def atomic_store_unordered_monotonic_64
+ : unordered_monotonic_store<atomic_store_64>;
+
defm : StPat<atomic_store_8, ST_B, GPR, GRLenVT>;
defm : StPat<atomic_store_16, ST_H, GPR, GRLenVT>;
-defm : StPat<atomic_store_32, ST_W, GPR, i32>, Requires<[IsLA32]>;
+defm : StPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i32>,
+ Requires<[IsLA32]>;
+
+def PseudoAtomicStoreW : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk), []>,
+ PseudoInstExpansion<(AMSWAP_DB_W R0,
+ GPR:$rk, GPR:$rj)>;
+
+def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk),
+ (PseudoAtomicStoreW GPR:$rj, GPR:$rk)>;
+
let Predicates = [IsLA64] in {
+def PseudoAtomicStoreD : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk), []>,
+ PseudoInstExpansion<(AMSWAP_DB_D R0,
+ GPR:$rk, GPR:$rj)>;
+
+def : Pat<(atomic_store_release_seqcst_64 GPR:$rj, GPR:$rk),
+ (PseudoAtomicStoreD GPR:$rj, GPR:$rk)>;
+
defm : LdPat<atomic_load_64, LD_D>;
-defm : StPat<atomic_store_32, ST_W, GPR, i64>;
-defm : StPat<atomic_store_64, ST_D, GPR, i64>;
+defm : StPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i64>;
+defm : StPat<atomic_store_unordered_monotonic_64, ST_D, GPR, i64>;
} // Predicates = [IsLA64]
/// Other pseudo-instructions
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
index 9600bf0762ff7..e59d480b9246f 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
@@ -113,8 +113,7 @@ define void @store_release_i32(ptr %ptr, i32 signext %v) {
;
; LA64-LABEL: store_release_i32:
; LA64: # %bb.0:
-; LA64-NEXT: dbar 0
-; LA64-NEXT: st.w $a0, $a1, 0
+; LA64-NEXT: amswap_db.w $zero, $a1, $a0
; LA64-NEXT: ret
store atomic i32 %v, ptr %ptr release, align 4
ret void
@@ -135,9 +134,207 @@ define void @store_release_i64(ptr %ptr, i64 %v) {
;
; LA64-LABEL: store_release_i64:
; LA64: # %bb.0:
-; LA64-NEXT: dbar 0
-; LA64-NEXT: st.d $a0, $a1, 0
+; LA64-NEXT: amswap_db.d $zero, $a1, $a0
; LA64-NEXT: ret
store atomic i64 %v, ptr %ptr release, align 8
ret void
}
+
+define void @store_unordered_i8(ptr %ptr, i8 signext %v) {
+; LA32-LABEL: store_unordered_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_unordered_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i8 %v, ptr %ptr unordered, align 1
+ ret void
+}
+
+define void @store_unordered_i16(ptr %ptr, i16 signext %v) {
+; LA32-LABEL: store_unordered_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_unordered_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i16 %v, ptr %ptr unordered, align 2
+ ret void
+}
+
+define void @store_unordered_i32(ptr %ptr, i32 signext %v) {
+; LA32-LABEL: store_unordered_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_unordered_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: st.w $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i32 %v, ptr %ptr unordered, align 4
+ ret void
+}
+
+define void @store_unordered_i64(ptr %ptr, i64 %v) {
+; LA32-LABEL: store_unordered_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: move $a3, $zero
+; LA32-NEXT: bl %plt(__atomic_store_8)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_unordered_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: st.d $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i64 %v, ptr %ptr unordered, align 8
+ ret void
+}
+
+define void @store_monotonic_i8(ptr %ptr, i8 signext %v) {
+; LA32-LABEL: store_monotonic_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_monotonic_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i8 %v, ptr %ptr monotonic, align 1
+ ret void
+}
+
+define void @store_monotonic_i16(ptr %ptr, i16 signext %v) {
+; LA32-LABEL: store_monotonic_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_monotonic_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i16 %v, ptr %ptr monotonic, align 2
+ ret void
+}
+
+define void @store_monotonic_i32(ptr %ptr, i32 signext %v) {
+; LA32-LABEL: store_monotonic_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_monotonic_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: st.w $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i32 %v, ptr %ptr monotonic, align 4
+ ret void
+}
+
+define void @store_monotonic_i64(ptr %ptr, i64 %v) {
+; LA32-LABEL: store_monotonic_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: move $a3, $zero
+; LA32-NEXT: bl %plt(__atomic_store_8)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_monotonic_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: st.d $a0, $a1, 0
+; LA64-NEXT: ret
+ store atomic i64 %v, ptr %ptr monotonic, align 8
+ ret void
+}
+
+define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) {
+; LA32-LABEL: store_seq_cst_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: dbar 0
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: dbar 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_seq_cst_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: dbar 0
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ret
+ store atomic i8 %v, ptr %ptr seq_cst, align 1
+ ret void
+}
+
+define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) {
+; LA32-LABEL: store_seq_cst_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: dbar 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: dbar 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_seq_cst_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: dbar 0
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ret
+ store atomic i16 %v, ptr %ptr seq_cst, align 2
+ ret void
+}
+
+define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) {
+; LA32-LABEL: store_seq_cst_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: dbar 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: dbar 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_seq_cst_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: amswap_db.w $zero, $a1, $a0
+; LA64-NEXT: ret
+ store atomic i32 %v, ptr %ptr seq_cst, align 4
+ ret void
+}
+
+define void @store_seq_cst_i64(ptr %ptr, i64 %v) {
+; LA32-LABEL: store_seq_cst_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: ori $a3, $zero, 5
+; LA32-NEXT: bl %plt(__atomic_store_8)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: store_seq_cst_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: amswap_db.d $zero, $a1, $a0
+; LA64-NEXT: ret
+ store atomic i64 %v, ptr %ptr seq_cst, align 8
+ ret void
+}
diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll
index 4acf9761421ae..b0875669bc3a2 100644
--- a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll
+++ b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll
@@ -98,8 +98,7 @@ define void @store_release_i32(ptr %ptr, i32 signext %v) {
; LA32-NEXT: ret void
;
; LA64-LABEL: @store_release_i32(
-; LA64-NEXT: fence release
-; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4
+; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] release, align 4
; LA64-NEXT: ret void
;
store atomic i32 %v, ptr %ptr release, align 4
@@ -112,8 +111,7 @@ define void @store_release_i64(ptr %ptr, i64 %v) {
; LA32-NEXT: ret void
;
; LA64-LABEL: @store_release_i64(
-; LA64-NEXT: fence release
-; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 8
+; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] release, align 8
; LA64-NEXT: ret void
;
store atomic i64 %v, ptr %ptr release, align 8
More information about the llvm-commits
mailing list