[llvm] 5f9b4d8 - [LoongArch] Add codegen support for atomicrmw min/max operation on LA64
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 30 01:53:43 PST 2022
Author: gonglingqin
Date: 2022-11-30T17:45:18+08:00
New Revision: 5f9b4d8bad22a2cceac5a06e45a0f1d86ce6f0ef
URL: https://github.com/llvm/llvm-project/commit/5f9b4d8bad22a2cceac5a06e45a0f1d86ce6f0ef
DIFF: https://github.com/llvm/llvm-project/commit/5f9b4d8bad22a2cceac5a06e45a0f1d86ce6f0ef.diff
LOG: [LoongArch] Add codegen support for atomicrmw min/max operation on LA64
This patch is required by OpenMP. After applying this patch, OpenMP regression
test passed. To reduce review difficulty caused by too large patches,
atomicrmw min/max operations on LA32 will be added later.
Differential Revision: https://reviews.llvm.org/D138177
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsLoongArch.td
llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index e00edad46136a..c11334da929c9 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -41,6 +41,8 @@ defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
+defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
+defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;
// @llvm.loongarch.masked.cmpxchg.i64.<p>(
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 9dacb73a14853..51df0463e2352 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -133,6 +133,12 @@ bool LoongArchExpandAtomicPseudo::expandMI(
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case LoongArch::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ case LoongArch::PseudoMaskedAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
+ NextMBBI);
+ case LoongArch::PseudoMaskedAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
+ NextMBBI);
}
return false;
}
@@ -341,6 +347,17 @@ bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
return true;
}
+static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL,
+ MachineBasicBlock *MBB, Register ValReg,
+ Register ShamtReg) {
+ BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+ BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg)
+ .addReg(ValReg)
+ .addReg(ShamtReg);
+}
+
bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
@@ -417,6 +434,22 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
.addReg(Scratch2Reg)
.addMBB(LoopTailMBB);
break;
+ case AtomicRMWInst::Max:
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ // bge scratch2, incr, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ case AtomicRMWInst::Min:
+ insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
+ // bge incr, scratch2, .looptail
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
// TODO: support other AtomicRMWInst.
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 4f341c8cbb613..c1b2b06a46222 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2325,6 +2325,10 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
case AtomicRMWInst::UMin:
return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
+ case AtomicRMWInst::Max:
+ return Intrinsic::loongarch_masked_atomicrmw_max_i64;
+ case AtomicRMWInst::Min:
+ return Intrinsic::loongarch_masked_atomicrmw_min_i64;
// TODO: support other AtomicRMWInst.
}
}
@@ -2396,8 +2400,24 @@ Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
Value *Result;
- Result =
- Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+ // Must pass the shift amount needed to sign extend the loaded value prior
+ // to performing a signed comparison for min/max. ShiftAmt is the number of
+ // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
+ // is the number of bits to left+right shift the value in order to
+ // sign-extend.
+ if (AI->getOperation() == AtomicRMWInst::Min ||
+ AI->getOperation() == AtomicRMWInst::Max) {
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ unsigned ValWidth =
+ DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
+ Value *SextShamt =
+ Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
+ Result = Builder.CreateCall(LlwOpScwLoop,
+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ } else {
+ Result =
+ Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+ }
if (GRLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 5803c63625144..0da02c70102c6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1338,6 +1338,20 @@ class PseudoMaskedAMUMinUMax
def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax;
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax;
+class PseudoMaskedAMMinMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$sextshamt,
+ grlenimm:$ordering)> {
+ let Constraints = "@earlyclobber $res, at earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax;
+def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
+
/// Compare and exchange
class PseudoCmpXchg
@@ -1362,6 +1376,12 @@ def PseudoMaskedCmpXchg32
let hasSideEffects = 0;
}
+class PseudoMaskedAMMinMaxPat<Intrinsic intrin, Pseudo AMInst>
+ : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ timm:$ordering),
+ (AMInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
+ timm:$ordering)>;
+
class AtomicPat<Intrinsic intrin, Pseudo AMInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
@@ -1410,6 +1430,15 @@ def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk),
def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk),
(AMMAX_DB_DU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_min_32 GPR:$rj, GPR:$rk),
+ (AMMIN_DB_W GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_min_64 GPR:$rj, GPR:$rk),
+ (AMMIN_DB_D GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_max_32 GPR:$rj, GPR:$rk),
+ (AMMAX_DB_W GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_max_64 GPR:$rj, GPR:$rk),
+ (AMMAX_DB_D GPR:$rk, GPR:$rj)>;
+
def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
PseudoMaskedAtomicLoadUMax32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
@@ -1423,6 +1452,11 @@ def : Pat<(int_loongarch_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
(PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;
+
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i64,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i64,
+ PseudoMaskedAtomicLoadMin32>;
} // Predicates = [IsLA64]
defm : PseudoBinPat<"atomic_load_nand_32", PseudoAtomicLoadNand32>;
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
index e84ebd94fa105..cd4a9e7fa9c4f 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -181,3 +181,199 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
%1 = atomicrmw umin ptr %a, i64 %b acquire
ret i64 %1
}
+
+define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: andi $a4, $a0, 24
+; LA64-NEXT: xori $a4, $a4, 56
+; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a3
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a4
+; LA64-NEXT: sra.w $a7, $a7, $a4
+; LA64-NEXT: bge $a7, $a1, .LBB8_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a3
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB8_3: # in Loop: Header=BB8_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB8_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: andi $a3, $a0, 24
+; LA64-NEXT: ori $a4, $zero, 48
+; LA64-NEXT: sub.d $a3, $a4, $a3
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a7, $a1, .LBB9_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB9_3: # in Loop: Header=BB9_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB9_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i32_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammax_db.w $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_max_i64_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammax_db.d $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw max ptr %a, i64 %b acquire
+ ret i64 %1
+}
+
+define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: ext.w.b $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: andi $a4, $a0, 24
+; LA64-NEXT: xori $a4, $a4, 56
+; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a3
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a4
+; LA64-NEXT: sra.w $a7, $a7, $a4
+; LA64-NEXT: bge $a1, $a7, .LBB12_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a3
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB12_3: # in Loop: Header=BB12_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB12_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: andi $a3, $a0, 24
+; LA64-NEXT: ori $a4, $zero, 48
+; LA64-NEXT: sub.d $a3, $a4, $a3
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a5, $a2, 0
+; LA64-NEXT: and $a7, $a5, $a4
+; LA64-NEXT: move $a6, $a5
+; LA64-NEXT: sll.w $a7, $a7, $a3
+; LA64-NEXT: sra.w $a7, $a7, $a3
+; LA64-NEXT: bge $a1, $a7, .LBB13_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1
+; LA64-NEXT: xor $a6, $a5, $a1
+; LA64-NEXT: and $a6, $a6, $a4
+; LA64-NEXT: xor $a6, $a5, $a6
+; LA64-NEXT: .LBB13_3: # in Loop: Header=BB13_1 Depth=1
+; LA64-NEXT: sc.w $a6, $a2, 0
+; LA64-NEXT: beqz $a6, .LBB13_1
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.5:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i32_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammin_db.w $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_min_i64_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammin_db.d $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw min ptr %a, i64 %b acquire
+ ret i64 %1
+}
More information about the llvm-commits
mailing list