[clang] e4ed93c - [PowerPC] Implement XL compatible behavior of __compare_and_swap
Kai Luo via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 22 18:16:11 PDT 2021
Author: Kai Luo
Date: 2021-07-23T01:16:02Z
New Revision: e4ed93cb25acc624a1d32e77d2c63c8c25fddbae
URL: https://github.com/llvm/llvm-project/commit/e4ed93cb25acc624a1d32e77d2c63c8c25fddbae
DIFF: https://github.com/llvm/llvm-project/commit/e4ed93cb25acc624a1d32e77d2c63c8c25fddbae.diff
LOG: [PowerPC] Implement XL compatible behavior of __compare_and_swap
According to https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
XL's `__compare_and_swap` has a weird behavior that
> In either case, the contents of the memory location specified by addr are copied into the memory location specified by old_val_addr.
(unlike c11 `atomic_compare_exchange` specified in http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1548.pdf)
This patch let clang's implementation follow this behavior.
Reviewed By: jsji
Differential Revision: https://reviews.llvm.org/D106344
Added:
llvm/test/CodeGen/PowerPC/opt-builtins-ppc-xlcompat-cas.ll
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-ppc-xlcompat-cas.c
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a4d0d87f2cbc..a3a0c3e88359 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -15662,6 +15662,15 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
auto Pair = EmitAtomicCompareExchange(
LV, RValue::get(OldVal), RValue::get(Ops[2]), E->getExprLoc(),
llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
+ // Unlike c11's atomic_compare_exchange, accroding to
+ // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
+ // > In either case, the contents of the memory location specified by addr
+ // > are copied into the memory location specified by old_val_addr.
+ // But it hasn't specified storing to OldValAddr is atomic or not and
+ // which order to use. Now following XL's codegen, treat it as a normal
+ // store.
+ Value *LoadedVal = Pair.first.getScalarVal();
+ Builder.CreateStore(LoadedVal, OldValAddr);
return Pair.second;
}
case PPC::BI__builtin_ppc_fetch_and_add:
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c b/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c
index ea4b349d9a52..cd66eb09d36f 100644
--- a/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c
+++ b/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c
@@ -19,6 +19,7 @@
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i32* [[A_ADDR]], i32 [[TMP1]], i32 [[TMP0]] monotonic monotonic, align 4
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
+// CHECK-NEXT: store i32 [[TMP3]], i32* [[B_ADDR]], align 4
// CHECK-NEXT: ret void
//
void test_builtin_ppc_compare_and_swap(int a, int b, int c) {
@@ -39,6 +40,7 @@ void test_builtin_ppc_compare_and_swap(int a, int b, int c) {
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i64* [[A_ADDR]], i64 [[TMP1]], i64 [[TMP0]] monotonic monotonic, align 8
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+// CHECK-NEXT: store i64 [[TMP3]], i64* [[B_ADDR]], align 8
// CHECK-NEXT: ret void
//
void test_builtin_ppc_compare_and_swaplp(long a, long b, long c) {
diff --git a/llvm/test/CodeGen/PowerPC/opt-builtins-ppc-xlcompat-cas.ll b/llvm/test/CodeGen/PowerPC/opt-builtins-ppc-xlcompat-cas.ll
new file mode 100644
index 000000000000..a3279c8a2c2c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/opt-builtins-ppc-xlcompat-cas.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -enable-new-pm=1 -S -passes='default<O3>' %s -o - | FileCheck %s
+define void @test_builtin_ppc_compare_and_swaplp(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @test_builtin_ppc_compare_and_swaplp(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+; CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = cmpxchg weak volatile i64* [[A_ADDR]], i64 [[B:%.*]], i64 [[C:%.*]] monotonic monotonic, align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca i64, align 8
+ %b.addr = alloca i64, align 8
+ %c.addr = alloca i64, align 8
+ store i64 %a, i64* %a.addr, align 8
+ store i64 %b, i64* %b.addr, align 8
+ store i64 %c, i64* %c.addr, align 8
+ %0 = load i64, i64* %c.addr, align 8
+ %1 = load i64, i64* %b.addr, align 8
+ %2 = cmpxchg weak volatile i64* %a.addr, i64 %1, i64 %0 monotonic monotonic, align 8
+ %3 = extractvalue { i64, i1 } %2, 0
+ %4 = extractvalue { i64, i1 } %2, 1
+ store i64 %3, i64* %b.addr, align 8
+ ret void
+}
+
+define dso_local void @test_builtin_ppc_compare_and_swaplp_loop(i64* %a) {
+; CHECK-LABEL: @test_builtin_ppc_compare_and_swaplp_loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = tail call i64 bitcast (i64 (...)* @bar to i64 ()*)()
+; CHECK-NEXT: br label [[DO_BODY:%.*]]
+; CHECK: do.body:
+; CHECK-NEXT: [[X_0:%.*]] = phi i64 [ [[CALL]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[DO_BODY]] ]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[X_0]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = cmpxchg weak volatile i64* [[A:%.*]], i64 [[X_0]], i64 [[ADD]] monotonic monotonic, align 8
+; CHECK-NEXT: [[TMP1]] = extractvalue { i64, i1 } [[TMP0]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP2]], label [[DO_BODY]], label [[DO_END:%.*]]
+; CHECK: do.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca i64*, align 8
+ %x = alloca i64, align 8
+ store i64* %a, i64** %a.addr, align 8
+ %call = call i64 bitcast (i64 (...)* @bar to i64 ()*)()
+ store i64 %call, i64* %x, align 8
+ br label %do.body
+
+do.body: ; preds = %do.cond, %entry
+ br label %do.cond
+
+do.cond: ; preds = %do.body
+ %0 = load i64*, i64** %a.addr, align 8
+ %1 = load i64, i64* %x, align 8
+ %add = add nsw i64 %1, 1
+ %2 = load i64*, i64** %a.addr, align 8
+ %3 = load i64, i64* %x, align 8
+ %4 = cmpxchg weak volatile i64* %2, i64 %3, i64 %add monotonic monotonic, align 8
+ %5 = extractvalue { i64, i1 } %4, 0
+ %6 = extractvalue { i64, i1 } %4, 1
+ store i64 %5, i64* %x, align 8
+ %tobool = icmp ne i1 %6, false
+ br i1 %tobool, label %do.body, label %do.end
+
+do.end: ; preds = %do.cond
+ ret void
+}
+
+declare i64 @bar(...)
More information about the cfe-commits
mailing list