[llvm] [Xtensa] Implement Xtensa S32C1I Option and atomics lowering. (PR #137134)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 24 01:12:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-xtensa
Author: Andrei Safronov (andreisfr)
<details>
<summary>Changes</summary>
Implement Xtensa S32C1I Option and use s32c1i instruction to implement atomics operations.
---
Patch is 279.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137134.diff
13 Files Affected:
- (modified) llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp (+4-3)
- (modified) llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp (+3)
- (modified) llvm/lib/Target/Xtensa/XtensaFeatures.td (+16)
- (modified) llvm/lib/Target/Xtensa/XtensaISelLowering.cpp (+863)
- (modified) llvm/lib/Target/Xtensa/XtensaISelLowering.h (+19)
- (modified) llvm/lib/Target/Xtensa/XtensaInstrInfo.td (+158)
- (modified) llvm/lib/Target/Xtensa/XtensaOperators.td (+5)
- (modified) llvm/lib/Target/Xtensa/XtensaRegisterInfo.td (+7-1)
- (modified) llvm/lib/Target/Xtensa/XtensaSubtarget.h (+2)
- (modified) llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp (+6)
- (added) llvm/test/CodeGen/Xtensa/atomic-load-store.ll (+498)
- (added) llvm/test/CodeGen/Xtensa/atomic-rmw.ll (+4728)
- (added) llvm/test/CodeGen/Xtensa/forced-atomics.ll (+1288)
``````````diff
diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
index 6b355e6363b22..f3873ca4dbbe2 100644
--- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
+++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
@@ -114,9 +114,10 @@ static DecodeStatus DecodeMR23RegisterClass(MCInst &Inst, uint64_t RegNo,
}
const MCPhysReg SRDecoderTable[] = {
- Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
- Xtensa::M0, 32, Xtensa::M1, 33, Xtensa::M2, 34,
- Xtensa::M3, 35, Xtensa::WINDOWBASE, 72, Xtensa::WINDOWSTART, 73};
+ Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
+ Xtensa::SCOMPARE1, 12, Xtensa::M0, 32, Xtensa::M1, 33,
+ Xtensa::M2, 34, Xtensa::M3, 35, Xtensa::WINDOWBASE, 72,
+ Xtensa::WINDOWSTART, 73, Xtensa::ATOMCTL, 99};
static DecodeStatus DecodeSRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
index 792faf811aca9..59b7582c0268f 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
@@ -86,6 +86,9 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits) {
case Xtensa::WINDOWBASE:
case Xtensa::WINDOWSTART:
return FeatureBits[Xtensa::FeatureWindowed];
+ case Xtensa::ATOMCTL:
+ case Xtensa::SCOMPARE1:
+ return FeatureBits[Xtensa::FeatureWindowed];
case Xtensa::NoRegister:
return false;
}
diff --git a/llvm/lib/Target/Xtensa/XtensaFeatures.td b/llvm/lib/Target/Xtensa/XtensaFeatures.td
index 2a47214946401..623573840953b 100644
--- a/llvm/lib/Target/Xtensa/XtensaFeatures.td
+++ b/llvm/lib/Target/Xtensa/XtensaFeatures.td
@@ -67,3 +67,19 @@ def FeatureDiv32 : SubtargetFeature<"div32", "HasDiv32", "true",
"Enable Xtensa Div32 option">;
def HasDiv32 : Predicate<"Subtarget->hasDiv32()">,
AssemblerPredicate<(all_of FeatureDiv32)>;
+
+def FeatureS32C1I : SubtargetFeature<"s32c1i", "HasS32C1I", "true",
+ "Enable Xtensa S32C1I option">;
+def HasS32C1I : Predicate<"Subtarget->hasS32C1I()">,
+ AssemblerPredicate<(all_of FeatureS32C1I)>;
+
+// Assume that lock-free native-width atomics are available, even if the target
+// and operating system combination would not usually provide them. The user
+// is responsible for providing any necessary __sync implementations. Code
+// built with this feature is not ABI-compatible with code built without this
+// feature, if atomic variables are exposed across the ABI boundary.
+def FeatureForcedAtomics : SubtargetFeature<"forced-atomics", "HasForcedAtomics", "true",
+ "Assume that lock-free native-width atomics are available">;
+def HasForcedAtomics : Predicate<"Subtarget->hasForcedAtomics()">,
+ AssemblerPredicate<(all_of FeatureForcedAtomics)>;
+def HasAtomicLdSt : Predicate<"Subtarget->hasS32C1I() || Subtarget->hasForcedAtomics()">;
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index b17840aad9b4d..e74c5c1e61b5d 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -175,6 +175,40 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ // to have the best chance and doing something good with fences custom lower
+ // them
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ if (!Subtarget.hasS32C1I()) {
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_INTEGER_VALUETYPE; ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+ }
+ }
+ }
+
+ if (Subtarget.hasS32C1I()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ setMinCmpXchgSizeInBits(32);
+ } else if (Subtarget.hasForcedAtomics()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ } else {
+ setMaxAtomicSizeInBitsSupported(0);
+ }
+
// Compute derived properties from the register classes
computeRegisterProperties(STI.getRegisterInfo());
}
@@ -1241,6 +1275,13 @@ bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
+SDValue XtensaTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+SDLoc DL(Op);
+SDValue Chain = Op.getOperand(0);
+return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain);
+}
+
SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1282,6 +1323,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG, false);
+ case ISD::ATOMIC_FENCE:
+ return LowerATOMIC_FENCE(Op, DAG);
default:
report_fatal_error("Unexpected node to lower");
}
@@ -1383,6 +1426,731 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI,
return SinkMBB;
}
+// Emit instructions for atomic_cmp_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *thisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &CmpVal = MI.getOperand(2);
+ MachineOperand &SwpVal = MI.getOperand(3);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned Cmp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Cmp1).addReg(CmpVal.getReg());
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Swp1).addReg(SwpVal.getReg());
+
+ BB = BBLoop;
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, BB->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop)
+ .addReg(R4)
+ .addMBB(thisBB);
+
+ unsigned Cmp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Cmp2).addReg(Cmp1).addReg(MaskPhi);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp2).addReg(Swp1).addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Cmp2);
+
+ unsigned Swp3 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp3)
+ .addReg(Swp2)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp3).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop);
+
+ BB->addSuccessor(BBLoop);
+ BB->addSuccessor(BBExit);
+
+ BB = BBExit;
+ auto St = BBExit->begin();
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R5).addReg(Swp3);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::AND), Res.getReg())
+ .addReg(R5)
+ .addReg(Mask1);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop3 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop4 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop1);
+ F->insert(It, BBLoop2);
+ F->insert(It, BBLoop3);
+ F->insert(It, BBLoop4);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop1);
+ BBLoop1->addSuccessor(BBLoop2);
+ BBLoop2->addSuccessor(BBLoop3);
+ BBLoop2->addSuccessor(BBLoop4);
+ BBLoop3->addSuccessor(BBLoop2);
+ BBLoop3->addSuccessor(BBLoop4);
+ BBLoop4->addSuccessor(BBLoop1);
+ BBLoop4->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned SwpValShifted = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), SwpValShifted)
+ .addReg(SwpVal.getReg());
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R5).addReg(AddrAlign).addImm(0);
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), AtomVal).addReg(R5).addReg(Mask2);
+
+ unsigned AtomValPhi = MRI.createVirtualRegister(RC);
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop1, BBLoop1->begin(), DL, TII.get(Xtensa::PHI), AtomValPhi)
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop4)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop1;
+
+ BuildMI(BB, DL, TII.get(Xtensa::MEMW));
+
+ unsigned R6 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::L32I), R6).addReg(AddrAlign).addImm(0);
+
+ unsigned R7 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R7).addReg(R6).addReg(Mask3);
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop2, BBLoop2->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop3)
+ .addReg(R7)
+ .addMBB(BBLoop1);
+
+ BB = BBLoop2;
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp1)
+ .addReg(SwpValShifted)
+ .addReg(MaskPhi);
+
+ unsigned AtomVal1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), AtomVal1)
+ .addReg(AtomValPhi)
+ .addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(AtomVal1);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp2)
+ .addReg(Swp1)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BEQ))
+ .addReg(AtomVal1)
+ .addReg(Swp2)
+ .addMBB(BBLoop4);
+
+ BB = BBLoop3;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp2).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop2);
+
+ BB = BBLoop4;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), AtomValLoop).addReg(Swp2).addReg(Mask2);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(AtomValPhi)
+ .addMBB(BBLoop1);
+
+ BB = BBExit;
+
+ auto St = BB->begin();
+
+ unsigned R8 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R8).addReg(AtomValLoop);
+
+ if (isByteOperand) {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(7);
+ } else {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(15);
+ }
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 32 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MEMW));
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), AtomVal)
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop, BBLoop->begin(), DL, TII.get(Xtensa::PHI), Res.getReg())
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop;
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Res.getReg());
+
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), AtomValLoop)
+ .addReg(SwpVal.getReg())
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(Res.getReg())
+ .addMBB(BBLoop);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *XtensaTargetLowering::emitAtomicRMW(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode,
+ bool inv,
+ bool minmax) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *ThisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLV...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137134
More information about the llvm-commits
mailing list