[llvm] r323634 - [AArch64] Generate the CASP instruction for 128-bit cmpxchg
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 29 01:18:37 PST 2018
Author: olista01
Date: Mon Jan 29 01:18:37 2018
New Revision: 323634
URL: http://llvm.org/viewvc/llvm-project?rev=323634&view=rev
Log:
[AArch64] Generate the CASP instruction for 128-bit cmpxchg
The Large System Extension added an atomic compare-and-swap instruction
that operates on a pair of 64-bit registers, which we can use to
implement a 128-bit cmpxchg.
Because i128 is not a legal type for AArch64 we have to do all of the
instruction selection in C++, and the instruction requires even/odd
register pairs, so we have to wrap it in REG_SEQUENCE and EXTRACT_SUBREG
nodes. This is very similar to what we do for 64-bit cmpxchg in the ARM
backend.
Differential revision: https://reviews.llvm.org/D42104
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=323634&r1=323633&r2=323634&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Jan 29 01:18:37 2018
@@ -10655,11 +10655,79 @@ static std::pair<SDValue, SDValue> split
return std::make_pair(Lo, Hi);
}
+// Create an even/odd pair of X registers holding integer value V.
+static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
+ SDLoc dl(V.getNode());
+ SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
+ SDValue VHi = DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
+ dl, MVT::i64);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap (VLo, VHi);
+ SDValue RegClass =
+ DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
+ SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
+ SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
+ const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+ return SDValue(
+ DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
+}
+
static void ReplaceCMP_SWAP_128Results(SDNode *N,
- SmallVectorImpl<SDValue> & Results,
- SelectionDAG &DAG) {
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
assert(N->getValueType(0) == MVT::i128 &&
"AtomicCmpSwap on types less than 128 should be legal");
+
+ if (Subtarget->hasLSE()) {
+ // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
+ // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
+ SDValue Ops[] = {
+ createGPRPairNode(DAG, N->getOperand(2)), // Compare value
+ createGPRPairNode(DAG, N->getOperand(3)), // Store value
+ N->getOperand(1), // Ptr
+ N->getOperand(0), // Chain in
+ };
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+
+ unsigned Opcode;
+ switch (MemOp[0]->getOrdering()) {
+ case AtomicOrdering::Monotonic:
+ Opcode = AArch64::CASPX;
+ break;
+ case AtomicOrdering::Acquire:
+ Opcode = AArch64::CASPAX;
+ break;
+ case AtomicOrdering::Release:
+ Opcode = AArch64::CASPLX;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ Opcode = AArch64::CASPALX;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+
+ MachineSDNode *CmpSwap = DAG.getMachineNode(
+ Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
+ CmpSwap->setMemRefs(MemOp, MemOp + 1);
+
+ unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(SubReg1, SubReg2);
+ Results.push_back(DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
+ SDValue(CmpSwap, 0)));
+ Results.push_back(DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
+ SDValue(CmpSwap, 0)));
+ Results.push_back(SDValue(CmpSwap, 1)); // Chain out
+ return;
+ }
+
auto Desired = splitInt128(N->getOperand(2), DAG);
auto New = splitInt128(N->getOperand(3), DAG);
SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
@@ -10718,7 +10786,7 @@ void AArch64TargetLowering::ReplaceNodeR
// Let normal code take care of it by not adding anything to Results.
return;
case ISD::ATOMIC_CMP_SWAP:
- ReplaceCMP_SWAP_128Results(N, Results, DAG);
+ ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
}
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=323634&r1=323633&r2=323634&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Jan 29 01:18:37 2018
@@ -2592,6 +2592,16 @@ void AArch64InstrInfo::storeRegToStackSl
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov1d;
Offset = false;
+ } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
+ .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
+ getKillRegState(isKill))
+ .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
+ getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+ return;
}
break;
case 24:
@@ -2690,6 +2700,16 @@ void AArch64InstrInfo::loadRegFromStackS
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov1d;
Offset = false;
+ } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
+ .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
+ getDefRegState(true))
+ .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
+ getDefRegState(true))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+ return;
}
break;
case 24:
Modified: llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll?rev=323634&r1=323633&r2=323634&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/atomic-ops-lse.ll Mon Jan 29 01:18:37 2018
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s --check-prefix=CHECK-REG
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mcpu=saphira < %s | FileCheck %s
@@ -11,6 +12,7 @@
@var16 = global i16 0
@var32 = global i32 0
@var64 = global i64 0
+ at var128 = global i128 0
define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_add_i8:
@@ -713,6 +715,21 @@ define i64 @test_atomic_cmpxchg_i64(i64
ret i64 %old
}
+define i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i128:
+ %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire
+ %old = extractvalue { i128, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
+
+; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+ ret i128 %old
+}
+
define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_sub_i8:
%old = atomicrmw sub i8* @var8, i8 %offset seq_cst
@@ -1705,6 +1722,21 @@ define i64 @test_atomic_cmpxchg_i64_acqu
ret i64 %old
}
+define i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire:
+ %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new acquire acquire
+ %old = extractvalue { i128, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
+
+; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+ ret i128 %old
+}
+
define i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic:
%pair = cmpxchg i8* @var8, i8 %wanted, i8 %new monotonic monotonic
@@ -1765,6 +1797,21 @@ define i64 @test_atomic_cmpxchg_i64_mono
ret i64 %old
}
+define i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic:
+ %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new monotonic monotonic
+ %old = extractvalue { i128, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
+
+; CHECK: casp x0, x1, x2, x3, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+ ret i128 %old
+}
+
define i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst:
%pair = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst seq_cst
@@ -1825,6 +1872,21 @@ define i64 @test_atomic_cmpxchg_i64_seq_
ret i64 %old
}
+define i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst:
+ %pair = cmpxchg i128* @var128, i128 %wanted, i128 %new seq_cst seq_cst
+ %old = extractvalue { i128, i1 } %pair, 0
+
+; CHECK-NOT: dmb
+; CHECK: adrp [[TMPADDR:x[0-9]+]], var128
+; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128
+
+; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]]
+; CHECK-NOT: dmb
+
+ ret i128 %old
+}
+
define i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_max_i8_acq_rel:
%old = atomicrmw max i8* @var8, i8 %offset acq_rel
More information about the llvm-commits
mailing list