[llvm] 6c412b6 - [BPF] Add a few new insns under cpu=v4
Yonghong Song via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 26 08:37:56 PDT 2023
Author: Yonghong Song
Date: 2023-07-26T08:37:30-07:00
New Revision: 6c412b6c6faa2dabd8602d35d3f5e796fb1daf80
URL: https://github.com/llvm/llvm-project/commit/6c412b6c6faa2dabd8602d35d3f5e796fb1daf80
DIFF: https://github.com/llvm/llvm-project/commit/6c412b6c6faa2dabd8602d35d3f5e796fb1daf80.diff
LOG: [BPF] Add a few new insns under cpu=v4
In [1], a few new insns are proposed to expand BPF ISA to
. fixing the limitation of existing insn (e.g., 16bit jmp offset)
. adding new insns which may improve code quality
(sign_ext_ld, sign_ext_mov, st)
. feature complete (sdiv, smod)
. better user experience (bswap)
This patch implemented insn encoding for
. sign-extended load
. sign-extended mov
. sdiv/smod
. bswap insns
. unconditional jump with 32bit offset
The new bswap insns are generated under cpu=v4 for __builtin_bswap.
For cpu=v3 or earlier, for __builtin_bswap, be or le insns are generated
which is not intuitive for the user.
To support 32-bit branch offset, a 32-bit ja (JMPL) insn is implemented.
For conditional branch which is beyond 16-bit offset, llvm will do
some transformation 'cond_jmp' -> 'cond_jmp + jmpl' to simulate 32bit
conditional jmp. See BPFMIPeephole.cpp for details. The algorithm is
hueristic based. I have tested bpf selftest pyperf600 with unroll account
600 which can indeed generate 32-bit jump insn, e.g.,
13: 06 00 00 00 9b cd 00 00 gotol +0xcd9b <LBB0_6619>
Eduard is working on to add 'st' insn to cpu=v4.
A list of llc flags:
disable-ldsx, disable-movsx, disable-bswap,
disable-sdiv-smod, disable-gotol
can be used to disable a particular insn for cpu v4.
For example, user can do:
llc -march=bpf -mcpu=v4 -disable-movsx t.ll
to enable cpu v4 without movsx insns.
References:
[1] https://lore.kernel.org/bpf/4bfe98be-5333-1c7e-2f6d-42486c8ec039@meta.com/
Differential Revision: https://reviews.llvm.org/D144829
Added:
llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
llvm/test/CodeGen/BPF/bswap.ll
llvm/test/CodeGen/BPF/gotol.ll
llvm/test/CodeGen/BPF/ldsx.ll
llvm/test/CodeGen/BPF/movsx.ll
llvm/test/CodeGen/BPF/sdiv_smod.ll
Modified:
clang/lib/Basic/Targets/BPF.cpp
clang/lib/Basic/Targets/BPF.h
clang/test/Misc/target-invalid-cpu-note.c
llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
llvm/lib/Target/BPF/BPF.td
llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
llvm/lib/Target/BPF/BPFISelLowering.cpp
llvm/lib/Target/BPF/BPFISelLowering.h
llvm/lib/Target/BPF/BPFInstrFormats.td
llvm/lib/Target/BPF/BPFInstrInfo.td
llvm/lib/Target/BPF/BPFMIPeephole.cpp
llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
llvm/lib/Target/BPF/BPFSubtarget.cpp
llvm/lib/Target/BPF/BPFSubtarget.h
llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
Removed:
################################################################################
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
index c9095c28aec274..d6288d2e0d0e17 100644
--- a/clang/lib/Basic/Targets/BPF.cpp
+++ b/clang/lib/Basic/Targets/BPF.cpp
@@ -32,7 +32,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
}
static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",
- "v3", "probe"};
+ "v3", "v4", "probe"};
bool BPFTargetInfo::isValidCPUName(StringRef Name) const {
return llvm::is_contained(ValidCPUNames, Name);
diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h
index 8a9227bca34c69..489f29fc4fead3 100644
--- a/clang/lib/Basic/Targets/BPF.h
+++ b/clang/lib/Basic/Targets/BPF.h
@@ -106,7 +106,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo {
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
bool setCPU(const std::string &Name) override {
- if (Name == "v3") {
+ if (Name == "v3" || Name == "v4") {
HasAlu32 = true;
}
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index cd1b0bc157cc9f..5782ae9e4a0a40 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -73,7 +73,7 @@
// RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF
// BPF: error: unknown target CPU 'not-a-cpu'
-// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}}
+// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, v4, probe{{$}}
// RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR
// AVR: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 43edcaace32220..a9bdccb229c629 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -227,6 +227,7 @@ struct BPFOperand : public MCParsedAsmOperand {
.Case("if", true)
.Case("call", true)
.Case("goto", true)
+ .Case("gotol", true)
.Case("*", true)
.Case("exit", true)
.Case("lock", true)
@@ -241,13 +242,20 @@ struct BPFOperand : public MCParsedAsmOperand {
.Case("u32", true)
.Case("u16", true)
.Case("u8", true)
+ .Case("s32", true)
+ .Case("s16", true)
+ .Case("s8", true)
.Case("be64", true)
.Case("be32", true)
.Case("be16", true)
.Case("le64", true)
.Case("le32", true)
.Case("le16", true)
+ .Case("bswap16", true)
+ .Case("bswap32", true)
+ .Case("bswap64", true)
.Case("goto", true)
+ .Case("gotol", true)
.Case("ll", true)
.Case("skb", true)
.Case("s", true)
diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
index 0cc409dfcee168..7f38fbdd8c5c10 100644
--- a/llvm/lib/Target/BPF/BPF.td
+++ b/llvm/lib/Target/BPF/BPF.td
@@ -30,6 +30,7 @@ def : Proc<"generic", []>;
def : Proc<"v1", []>;
def : Proc<"v2", []>;
def : Proc<"v3", [ALU32]>;
+def : Proc<"v4", [ALU32]>;
def : Proc<"probe", []>;
def BPFInstPrinter : AsmWriter {
@@ -45,7 +46,7 @@ def BPFAsmParserVariant : AsmParserVariant {
int Variant = 0;
string Name = "BPF";
string BreakCharacters = ".";
- string TokenizingCharacters = "#()[]=:.<>!+*";
+ string TokenizingCharacters = "#()[]=:.<>!+*%/";
}
def BPF : Target {
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index fa626a775c8387..6cfeb9eab46e87 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -192,15 +192,17 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
default:
break;
case ISD::SDIV: {
- DebugLoc Empty;
- const DebugLoc &DL = Node->getDebugLoc();
- if (DL != Empty)
- errs() << "Error at line " << DL.getLine() << ": ";
- else
- errs() << "Error: ";
- errs() << "Unsupport signed division for DAG: ";
- Node->print(errs(), CurDAG);
- errs() << "Please convert to unsigned div/mod.\n";
+ if (!Subtarget->hasSdivSmod()) {
+ DebugLoc Empty;
+ const DebugLoc &DL = Node->getDebugLoc();
+ if (DL != Empty)
+ errs() << "Error at line " << DL.getLine() << ": ";
+ else
+ errs() << "Error: ";
+ errs() << "Unsupport signed division for DAG: ";
+ Node->print(errs(), CurDAG);
+ errs() << "Please convert to unsigned div/mod.\n";
+ }
break;
}
case ISD::INTRINSIC_W_CHAIN: {
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 83a4bfb2f7584c..5e84af0095914e 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -102,7 +102,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
+ if (!STI.hasSdivSmod())
+ setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
@@ -131,9 +132,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ if (!STI.hasMovsx()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+ }
// Extended load operations for i1 types must be promoted
for (MVT VT : MVT::integer_valuetypes()) {
@@ -141,9 +144,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ if (!STI.hasLdsx()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ }
}
setBooleanContents(ZeroOrOneBooleanContent);
@@ -183,6 +188,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
HasAlu32 = STI.getHasAlu32();
HasJmp32 = STI.getHasJmp32();
HasJmpExt = STI.getHasJmpExt();
+ HasMovsx = STI.hasMovsx();
}
bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
@@ -673,11 +679,15 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
- BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
- BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
- .addReg(PromotedReg0).addImm(32);
- BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
- .addReg(PromotedReg1).addImm(32);
+ if (HasMovsx) {
+ BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg);
+ } else {
+ BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
+ BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
+ .addReg(PromotedReg0).addImm(32);
+ BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
+ .addReg(PromotedReg1).addImm(32);
+ }
return PromotedReg2;
}
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 9b6fe85314432e..e78f4d9829cb63 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -71,6 +71,7 @@ class BPFTargetLowering : public TargetLowering {
bool HasAlu32;
bool HasJmp32;
bool HasJmpExt;
+ bool HasMovsx;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
index 27db0be080ae52..841d97efc01c5c 100644
--- a/llvm/lib/Target/BPF/BPFInstrFormats.td
+++ b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -90,6 +90,7 @@ def BPF_IMM : BPFModeModifer<0x0>;
def BPF_ABS : BPFModeModifer<0x1>;
def BPF_IND : BPFModeModifer<0x2>;
def BPF_MEM : BPFModeModifer<0x3>;
+def BPF_MEMSX : BPFModeModifer<0x4>;
def BPF_ATOMIC : BPFModeModifer<0x6>;
class BPFAtomicFlag<bits<4> val> {
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 27bd87667b8458..a0defeda8118cf 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -53,6 +53,12 @@ def BPFIsLittleEndian : Predicate<"CurDAG->getDataLayout().isLittleEndian()">;
def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">;
def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">;
def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">;
+def BPFHasLdsx : Predicate<"Subtarget->hasLdsx()">;
+def BPFHasMovsx : Predicate<"Subtarget->hasMovsx()">;
+def BPFHasBswap : Predicate<"Subtarget->hasBswap()">;
+def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
+def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
+def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
def brtarget : Operand<OtherVT> {
let PrintMethod = "printBrTargetOperand";
@@ -240,18 +246,19 @@ defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
}
// ALU instructions
-class ALU_RI<BPFOpClass Class, BPFArithOp Opc,
+class ALU_RI<BPFOpClass Class, BPFArithOp Opc, int off,
dag outs, dag ins, string asmstr, list<dag> pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value, outs, ins, asmstr, pattern> {
bits<4> dst;
bits<32> imm;
let Inst{51-48} = dst;
+ let Inst{47-32} = off;
let Inst{31-0} = imm;
let BPFClass = Class;
}
-class ALU_RR<BPFOpClass Class, BPFArithOp Opc,
+class ALU_RR<BPFOpClass Class, BPFArithOp Opc, int off,
dag outs, dag ins, string asmstr, list<dag> pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_X.Value, outs, ins, asmstr, pattern> {
bits<4> dst;
@@ -259,26 +266,27 @@ class ALU_RR<BPFOpClass Class, BPFArithOp Opc,
let Inst{55-52} = src;
let Inst{51-48} = dst;
+ let Inst{47-32} = off;
let BPFClass = Class;
}
-multiclass ALU<BPFArithOp Opc, string OpcodeStr, SDNode OpNode> {
- def _rr : ALU_RR<BPF_ALU64, Opc,
+multiclass ALU<BPFArithOp Opc, int off, string OpcodeStr, SDNode OpNode> {
+ def _rr : ALU_RR<BPF_ALU64, Opc, off,
(outs GPR:$dst),
(ins GPR:$src2, GPR:$src),
"$dst "#OpcodeStr#" $src",
[(set GPR:$dst, (OpNode i64:$src2, i64:$src))]>;
- def _ri : ALU_RI<BPF_ALU64, Opc,
+ def _ri : ALU_RI<BPF_ALU64, Opc, off,
(outs GPR:$dst),
(ins GPR:$src2, i64imm:$imm),
"$dst "#OpcodeStr#" $imm",
[(set GPR:$dst, (OpNode GPR:$src2, i64immSExt32:$imm))]>;
- def _rr_32 : ALU_RR<BPF_ALU, Opc,
+ def _rr_32 : ALU_RR<BPF_ALU, Opc, off,
(outs GPR32:$dst),
(ins GPR32:$src2, GPR32:$src),
"$dst "#OpcodeStr#" $src",
[(set GPR32:$dst, (OpNode i32:$src2, i32:$src))]>;
- def _ri_32 : ALU_RI<BPF_ALU, Opc,
+ def _ri_32 : ALU_RI<BPF_ALU, Opc, off,
(outs GPR32:$dst),
(ins GPR32:$src2, i32imm:$imm),
"$dst "#OpcodeStr#" $imm",
@@ -287,18 +295,23 @@ multiclass ALU<BPFArithOp Opc, string OpcodeStr, SDNode OpNode> {
let Constraints = "$dst = $src2" in {
let isAsCheapAsAMove = 1 in {
- defm ADD : ALU<BPF_ADD, "+=", add>;
- defm SUB : ALU<BPF_SUB, "-=", sub>;
- defm OR : ALU<BPF_OR, "|=", or>;
- defm AND : ALU<BPF_AND, "&=", and>;
- defm SLL : ALU<BPF_LSH, "<<=", shl>;
- defm SRL : ALU<BPF_RSH, ">>=", srl>;
- defm XOR : ALU<BPF_XOR, "^=", xor>;
- defm SRA : ALU<BPF_ARSH, "s>>=", sra>;
-}
- defm MUL : ALU<BPF_MUL, "*=", mul>;
- defm DIV : ALU<BPF_DIV, "/=", udiv>;
- defm MOD : ALU<BPF_MOD, "%=", urem>;
+ defm ADD : ALU<BPF_ADD, 0, "+=", add>;
+ defm SUB : ALU<BPF_SUB, 0, "-=", sub>;
+ defm OR : ALU<BPF_OR, 0, "|=", or>;
+ defm AND : ALU<BPF_AND, 0, "&=", and>;
+ defm SLL : ALU<BPF_LSH, 0, "<<=", shl>;
+ defm SRL : ALU<BPF_RSH, 0, ">>=", srl>;
+ defm XOR : ALU<BPF_XOR, 0, "^=", xor>;
+ defm SRA : ALU<BPF_ARSH, 0, "s>>=", sra>;
+}
+ defm MUL : ALU<BPF_MUL, 0, "*=", mul>;
+ defm DIV : ALU<BPF_DIV, 0, "/=", udiv>;
+ defm MOD : ALU<BPF_MOD, 0, "%=", urem>;
+
+ let Predicates = [BPFHasSdivSmod] in {
+ defm SDIV : ALU<BPF_DIV, 1, "s/=", sdiv>;
+ defm SMOD : ALU<BPF_MOD, 1, "s%=", srem>;
+ }
}
class NEG_RR<BPFOpClass Class, BPFArithOp Opc,
@@ -338,26 +351,49 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def LD_imm64 : LD_IMM64<0, "=">;
-def MOV_rr : ALU_RR<BPF_ALU64, BPF_MOV,
+def MOV_rr : ALU_RR<BPF_ALU64, BPF_MOV, 0,
(outs GPR:$dst),
(ins GPR:$src),
"$dst = $src",
[]>;
-def MOV_ri : ALU_RI<BPF_ALU64, BPF_MOV,
+def MOV_ri : ALU_RI<BPF_ALU64, BPF_MOV, 0,
(outs GPR:$dst),
(ins i64imm:$imm),
"$dst = $imm",
[(set GPR:$dst, (i64 i64immSExt32:$imm))]>;
-def MOV_rr_32 : ALU_RR<BPF_ALU, BPF_MOV,
+def MOV_rr_32 : ALU_RR<BPF_ALU, BPF_MOV, 0,
(outs GPR32:$dst),
(ins GPR32:$src),
"$dst = $src",
[]>;
-def MOV_ri_32 : ALU_RI<BPF_ALU, BPF_MOV,
+def MOV_ri_32 : ALU_RI<BPF_ALU, BPF_MOV, 0,
(outs GPR32:$dst),
(ins i32imm:$imm),
"$dst = $imm",
[(set GPR32:$dst, (i32 i32immSExt32:$imm))]>;
+
+let Predicates = [BPFHasMovsx] in {
+ def MOVSX_rr_8 : ALU_RR<BPF_ALU64, BPF_MOV, 8,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s8)$src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>;
+ def MOVSX_rr_16 : ALU_RR<BPF_ALU64, BPF_MOV, 16,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s16)$src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>;
+ def MOVSX_rr_32 : ALU_RR<BPF_ALU64, BPF_MOV, 32,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s32)$src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i32))]>;
+ def MOVSX_rr_32_8 : ALU_RR<BPF_ALU, BPF_MOV, 8,
+ (outs GPR32:$dst), (ins GPR32:$src),
+ "$dst = (s8)$src",
+ [(set GPR32:$dst, (sext_inreg GPR32:$src, i8))]>;
+ def MOVSX_rr_32_16 : ALU_RR<BPF_ALU, BPF_MOV, 16,
+ (outs GPR32:$dst), (ins GPR32:$src),
+ "$dst = (s16)$src",
+ [(set GPR32:$dst, (sext_inreg GPR32:$src, i16))]>;
+}
}
def FI_ri
@@ -421,8 +457,8 @@ let Predicates = [BPFNoALU32] in {
def STD : STOREi64<BPF_DW, "u64", store>;
// LOAD instructions
-class LOAD<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
- : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+ : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
(outs GPR:$dst),
(ins MEMri:$addr),
"$dst = *("#OpcodeStr#" *)($addr)",
@@ -436,8 +472,8 @@ class LOAD<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
let BPFClass = BPF_LDX;
}
-class LOADi64<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : LOAD<SizeOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
+class LOADi64<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
+ : LOAD<SizeOp, ModOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
let isCodeGenOnly = 1 in {
def CORE_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
@@ -451,7 +487,7 @@ let isCodeGenOnly = 1 in {
"$dst = core_alu32_mem($opcode, $src, $offset)",
[]>;
let Constraints = "$dst = $src" in {
- def CORE_SHIFT : ALU_RR<BPF_ALU64, BPF_LSH,
+ def CORE_SHIFT : ALU_RR<BPF_ALU64, BPF_LSH, 0,
(outs GPR:$dst),
(ins u64imm:$opcode, GPR:$src, u64imm:$offset),
"$dst = core_shift($opcode, $src, $offset)",
@@ -460,12 +496,18 @@ let isCodeGenOnly = 1 in {
}
let Predicates = [BPFNoALU32] in {
- def LDW : LOADi64<BPF_W, "u32", zextloadi32>;
- def LDH : LOADi64<BPF_H, "u16", zextloadi16>;
- def LDB : LOADi64<BPF_B, "u8", zextloadi8>;
+ def LDW : LOADi64<BPF_W, BPF_MEM, "u32", zextloadi32>;
+ def LDH : LOADi64<BPF_H, BPF_MEM, "u16", zextloadi16>;
+ def LDB : LOADi64<BPF_B, BPF_MEM, "u8", zextloadi8>;
+}
+
+let Predicates = [BPFHasLdsx] in {
+ def LDWSX : LOADi64<BPF_W, BPF_MEMSX, "s32", sextloadi32>;
+ def LDHSX : LOADi64<BPF_H, BPF_MEMSX, "s16", sextloadi16>;
+ def LDBSX : LOADi64<BPF_B, BPF_MEMSX, "s8", sextloadi8>;
}
-def LDD : LOADi64<BPF_DW, "u64", load>;
+def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>;
class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
@@ -479,6 +521,18 @@ class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
let BPFClass = BPF_JMP;
}
+class BRANCH_LONG<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+ : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
+ (outs),
+ (ins brtarget:$BrDst),
+ !strconcat(OpcodeStr, " $BrDst"),
+ Pattern> {
+ bits<32> BrDst;
+
+ let Inst{31-0} = BrDst;
+ let BPFClass = BPF_JMP32;
+}
+
class CALL<string OpcodeStr>
: TYPE_ALU_JMP<BPF_CALL.Value, BPF_K.Value,
(outs),
@@ -506,6 +560,7 @@ class CALLX<string OpcodeStr>
// Jump always
let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
def JMP : BRANCH<BPF_JA, "goto", [(br bb:$BrDst)]>;
+ def JMPL : BRANCH_LONG<BPF_JA, "gotol", []>;
}
// Jump and link
@@ -835,7 +890,7 @@ let Defs = [R0], Uses = [R0] in {
}
// bswap16, bswap32, bswap64
-class BSWAP<bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pattern>
+class BSWAP<BPFOpClass Class, bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pattern>
: TYPE_ALU_JMP<BPF_END.Value, SrcType.Value,
(outs GPR:$dst),
(ins GPR:$src),
@@ -845,21 +900,29 @@ class BSWAP<bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pat
let Inst{51-48} = dst;
let Inst{31-0} = SizeOp;
- let BPFClass = BPF_ALU;
+ let BPFClass = Class;
}
let Constraints = "$dst = $src" in {
+ let Predicates = [BPFHasBswap] in {
+ def BSWAP16 : BSWAP<BPF_ALU64, 16, "bswap16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def BSWAP32 : BSWAP<BPF_ALU64, 32, "bswap32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def BSWAP64 : BSWAP<BPF_ALU64, 64, "bswap64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ }
+
+ let Predicates = [BPFNoBswap] in {
let Predicates = [BPFIsLittleEndian] in {
- def BE16 : BSWAP<16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
- def BE32 : BSWAP<32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
- def BE64 : BSWAP<64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ def BE16 : BSWAP<BPF_ALU, 16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def BE32 : BSWAP<BPF_ALU, 32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def BE64 : BSWAP<BPF_ALU, 64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>;
}
let Predicates = [BPFIsBigEndian] in {
- def LE16 : BSWAP<16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
- def LE32 : BSWAP<32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
- def LE64 : BSWAP<64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ def LE16 : BSWAP<BPF_ALU, 16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def LE32 : BSWAP<BPF_ALU, 32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def LE64 : BSWAP<BPF_ALU, 64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
}
+ }
}
let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
@@ -898,13 +961,20 @@ def LD_IND_H : LOAD_IND<BPF_H, "u16", int_bpf_load_half>;
def LD_IND_W : LOAD_IND<BPF_W, "u32", int_bpf_load_word>;
let isCodeGenOnly = 1 in {
- def MOV_32_64 : ALU_RR<BPF_ALU, BPF_MOV,
+ def MOV_32_64 : ALU_RR<BPF_ALU, BPF_MOV, 0,
(outs GPR:$dst), (ins GPR32:$src),
"$dst = $src", []>;
}
-def : Pat<(i64 (sext GPR32:$src)),
- (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
+let Predicates = [BPFNoMovsx] in {
+ def : Pat<(i64 (sext GPR32:$src)),
+ (SRA_ri (SLL_ri (MOV_32_64 GPR32:$src), 32), 32)>;
+}
+
+let Predicates = [BPFHasMovsx] in {
+ def : Pat<(i64 (sext GPR32:$src)),
+ (MOVSX_rr_32 (MOV_32_64 GPR32:$src))>;
+}
def : Pat<(i64 (zext GPR32:$src)), (MOV_32_64 GPR32:$src)>;
@@ -940,8 +1010,8 @@ let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
}
-class LOAD32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
- : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+ : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
(outs GPR32:$dst),
(ins MEMri:$addr),
"$dst = *("#OpcodeStr#" *)($addr)",
@@ -955,13 +1025,13 @@ class LOAD32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
let BPFClass = BPF_LDX;
}
-class LOADi32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : LOAD32<SizeOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
+class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
+ : LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
- def LDW32 : LOADi32<BPF_W, "u32", load>;
- def LDH32 : LOADi32<BPF_H, "u16", zextloadi16>;
- def LDB32 : LOADi32<BPF_B, "u8", zextloadi8>;
+ def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;
+ def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>;
+ def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>;
}
let Predicates = [BPFHasALU32] in {
@@ -973,6 +1043,12 @@ let Predicates = [BPFHasALU32] in {
(STW32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>;
def : Pat<(i32 (extloadi8 ADDRri:$src)), (i32 (LDB32 ADDRri:$src))>;
def : Pat<(i32 (extloadi16 ADDRri:$src)), (i32 (LDH32 ADDRri:$src))>;
+
+ let Predicates = [BPFHasLdsx] in {
+ def : Pat<(i32 (sextloadi8 ADDRri:$src)), (EXTRACT_SUBREG (LDBSX ADDRri:$src), sub_32)>;
+ def : Pat<(i32 (sextloadi16 ADDRri:$src)), (EXTRACT_SUBREG (LDHSX ADDRri:$src), sub_32)>;
+ }
+
def : Pat<(i64 (zextloadi8 ADDRri:$src)),
(SUBREG_TO_REG (i64 0), (LDB32 ADDRri:$src), sub_32)>;
def : Pat<(i64 (zextloadi16 ADDRri:$src)),
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index d0272bd97bedf7..c46e21d8f063e7 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -34,6 +34,9 @@ using namespace llvm;
#define DEBUG_TYPE "bpf-mi-zext-elim"
+static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
+ cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));
+
STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
namespace {
@@ -302,6 +305,8 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
static char ID;
MachineFunction *MF;
const TargetRegisterInfo *TRI;
+ const BPFInstrInfo *TII;
+ bool SupportGotol;
BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
@@ -311,7 +316,9 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
// Initialize class variables.
void initialize(MachineFunction &MFParm);
+ bool in16BitRange(int Num);
bool eliminateRedundantMov();
+ bool adjustBranch();
public:
@@ -322,14 +329,20 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
initialize(MF);
- return eliminateRedundantMov();
+ bool Changed;
+ Changed = eliminateRedundantMov();
+ if (SupportGotol)
+ Changed = adjustBranch() || Changed;
+ return Changed;
}
};
// Initialize class variables.
void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
+ TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+ SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
}
@@ -374,6 +387,215 @@ bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
return Eliminated;
}
+bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
+ // Well, the cut-off is not precisely at 16bit range since
+ // new codes are added during the transformation. So let us
+ // a little bit conservative.
+ return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
+}
+
+// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
+// is supported for both unconditional (JMP) and condition (JEQ, JSGT,
+// etc.) branches. In certain cases, e.g., full unrolling, the branch
+// target offset might exceed 16bit range. If this happens, the llvm
+// will generate incorrect code as the offset is truncated to 16bit.
+//
+// To fix this rare case, a new insn JMPL is introduced. This new
+// insn supports supports 32bit branch target offset. The compiler
+// does not use this insn during insn selection. Rather, BPF backend
+// will estimate the branch target offset and do JMP -> JMPL and
+// JEQ -> JEQ + JMPL conversion if the estimated branch target offset
+// is beyond 16bit.
+bool BPFMIPreEmitPeephole::adjustBranch() {
+ bool Changed = false;
+ int CurrNumInsns = 0;
+ DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
+ DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
+ std::vector<MachineBasicBlock *> MBBs;
+
+ MachineBasicBlock *PrevBB = nullptr;
+ for (MachineBasicBlock &MBB : *MF) {
+ // MBB.size() is the number of insns in this basic block, including some
+ // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
+ // Typically we have way more normal insns than DEBUG_VALUE insns.
+ // Also, if we indeed need to convert conditional branch like JEQ to
+ // JEQ + JMPL, we actually introduced some new insns like below.
+ CurrNumInsns += (int)MBB.size();
+ SoFarNumInsns[&MBB] = CurrNumInsns;
+ if (PrevBB != nullptr)
+ FollowThroughBB[PrevBB] = &MBB;
+ PrevBB = &MBB;
+ // A list of original BBs to make later traveral easier.
+ MBBs.push_back(&MBB);
+ }
+ FollowThroughBB[PrevBB] = nullptr;
+
+ for (unsigned i = 0; i < MBBs.size(); i++) {
+ // We have four cases here:
+ // (1). no terminator, simple follow through.
+ // (2). jmp to another bb.
+ // (3). conditional jmp to another bb or follow through.
+ // (4). conditional jmp followed by an unconditional jmp.
+ MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
+
+ MachineBasicBlock *MBB = MBBs[i];
+ for (MachineInstr &Term : MBB->terminators()) {
+ if (Term.isConditionalBranch()) {
+ assert(CondJmp == nullptr);
+ CondJmp = &Term;
+ } else if (Term.isUnconditionalBranch()) {
+ assert(UncondJmp == nullptr);
+ UncondJmp = &Term;
+ }
+ }
+
+ // (1). no terminator, simple follow through.
+ if (!CondJmp && !UncondJmp)
+ continue;
+
+ MachineBasicBlock *CondTargetBB, *JmpBB;
+ CurrNumInsns = SoFarNumInsns[MBB];
+
+ // (2). jmp to another bb.
+ if (!CondJmp && UncondJmp) {
+ JmpBB = UncondJmp->getOperand(0).getMBB();
+ if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
+ continue;
+
+ // replace this insn as a JMPL.
+ BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
+ UncondJmp->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ const BasicBlock *TermBB = MBB->getBasicBlock();
+ int Dist;
+
+ // (3). conditional jmp to another bb or follow through.
+ if (!UncondJmp) {
+ CondTargetBB = CondJmp->getOperand(2).getMBB();
+ MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
+ Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
+ if (in16BitRange(Dist))
+ continue;
+
+ // We have
+ // B2: ...
+ // if (cond) goto B5
+ // B3: ...
+ // where B2 -> B5 is beyond 16bit range.
+ //
+ // We do not have 32bit cond jmp insn. So we try to do
+ // the following.
+ // B2: ...
+ // if (cond) goto New_B1
+ // New_B0 goto B3
+ // New_B1: gotol B5
+ // B3: ...
+ // Basically two new basic blocks are created.
+ MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
+ MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);
+
+ // Insert New_B0 and New_B1 into function block list.
+ MachineFunction::iterator MBB_I = ++MBB->getIterator();
+ MF->insert(MBB_I, New_B0);
+ MF->insert(MBB_I, New_B1);
+
+ // replace B2 cond jump
+ if (CondJmp->getOperand(1).isReg())
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addReg(CondJmp->getOperand(1).getReg())
+ .addMBB(New_B1);
+ else
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addImm(CondJmp->getOperand(1).getImm())
+ .addMBB(New_B1);
+
+ // it is possible that CondTargetBB and FollowBB are the same. But the
+ // above Dist checking should already filtered this case.
+ MBB->removeSuccessor(CondTargetBB);
+ MBB->removeSuccessor(FollowBB);
+ MBB->addSuccessor(New_B0);
+ MBB->addSuccessor(New_B1);
+
+ // Populate insns in New_B0 and New_B1.
+ BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);
+ BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
+ .addMBB(CondTargetBB);
+
+ New_B0->addSuccessor(FollowBB);
+ New_B1->addSuccessor(CondTargetBB);
+ CondJmp->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // (4). conditional jmp followed by an unconditional jmp.
+ CondTargetBB = CondJmp->getOperand(2).getMBB();
+ JmpBB = UncondJmp->getOperand(0).getMBB();
+
+ // We have
+ // B2: ...
+ // if (cond) goto B5
+ // JMP B7
+ // B3: ...
+ //
+ // If only B2->B5 is out of 16bit range, we can do
+ // B2: ...
+ // if (cond) goto new_B
+ // JMP B7
+ // New_B: gotol B5
+ // B3: ...
+ //
+ // If only 'JMP B7' is out of 16bit range, we can replace
+ // 'JMP B7' with 'JMPL B7'.
+ //
+ // If both B2->B5 and 'JMP B7' is out of range, just do
+ // both the above transformations.
+ Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
+ if (!in16BitRange(Dist)) {
+ MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);
+
+ // Insert New_B0 into function block list.
+ MF->insert(++MBB->getIterator(), New_B);
+
+ // replace B2 cond jump
+ if (CondJmp->getOperand(1).isReg())
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addReg(CondJmp->getOperand(1).getReg())
+ .addMBB(New_B);
+ else
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addImm(CondJmp->getOperand(1).getImm())
+ .addMBB(New_B);
+
+ if (CondTargetBB != JmpBB)
+ MBB->removeSuccessor(CondTargetBB);
+ MBB->addSuccessor(New_B);
+
+ // Populate insn in New_B.
+ BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);
+
+ New_B->addSuccessor(CondTargetBB);
+ CondJmp->eraseFromParent();
+ Changed = true;
+ }
+
+ if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
+ BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
+ UncondJmp->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
} // end default namespace
INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index 088195994edd61..a415f988226846 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -96,7 +96,8 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) {
return Opcode == BPF::LDD || Opcode == BPF::LDW || Opcode == BPF::LDH ||
Opcode == BPF::LDB || Opcode == BPF::LDW32 || Opcode == BPF::LDH32 ||
- Opcode == BPF::LDB32;
+ Opcode == BPF::LDB32 || Opcode == BPF::LDWSX || Opcode == BPF::LDHSX ||
+ Opcode == BPF::LDBSX;
}
void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
@@ -119,7 +120,8 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
unsigned COREOp;
if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH ||
- Opcode == BPF::STW || Opcode == BPF::STD)
+ Opcode == BPF::STW || Opcode == BPF::STD || Opcode == BPF::LDWSX ||
+ Opcode == BPF::LDHSX || Opcode == BPF::LDBSX)
COREOp = BPF::CORE_MEM;
else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 ||
Opcode == BPF::LDW32 || Opcode == BPF::STB32 ||
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index d66933fef72d46..b99f9069523e81 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -23,6 +23,17 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "BPFGenSubtargetInfo.inc"
+static cl::opt<bool> Disable_ldsx("disable-ldsx", cl::Hidden, cl::init(false),
+ cl::desc("Disable ldsx insns"));
+static cl::opt<bool> Disable_movsx("disable-movsx", cl::Hidden, cl::init(false),
+ cl::desc("Disable movsx insns"));
+static cl::opt<bool> Disable_bswap("disable-bswap", cl::Hidden, cl::init(false),
+ cl::desc("Disable bswap insns"));
+static cl::opt<bool> Disable_sdiv_smod("disable-sdiv-smod", cl::Hidden,
+ cl::init(false), cl::desc("Disable sdiv/smod insns"));
+static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
+ cl::desc("Disable gotol insn"));
+
void BPFSubtarget::anchor() {}
BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -38,6 +49,11 @@ void BPFSubtarget::initializeEnvironment() {
HasJmp32 = false;
HasAlu32 = false;
UseDwarfRIS = false;
+ HasLdsx = false;
+ HasMovsx = false;
+ HasBswap = false;
+ HasSdivSmod = false;
+ HasGotol = false;
}
void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -55,6 +71,17 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
HasAlu32 = true;
return;
}
+ if (CPU == "v4") {
+ HasJmpExt = true;
+ HasJmp32 = true;
+ HasAlu32 = true;
+ HasLdsx = !Disable_ldsx;
+ HasMovsx = !Disable_movsx;
+ HasBswap = !Disable_bswap;
+ HasSdivSmod = !Disable_sdiv_smod;
+ HasGotol = !Disable_gotol;
+ return;
+ }
}
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index 8f833b3c75d04f..12749dd739e244 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -56,6 +56,9 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
// whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections
bool UseDwarfRIS;
+ // whether cpu v4 insns are enabled.
+ bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol;
+
public:
// This constructor initializes the data members to match that
// of the specified triple.
@@ -71,6 +74,11 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool getHasJmp32() const { return HasJmp32; }
bool getHasAlu32() const { return HasAlu32; }
bool getUseDwarfRIS() const { return UseDwarfRIS; }
+ bool hasLdsx() const { return HasLdsx; }
+ bool hasMovsx() const { return HasMovsx; }
+ bool hasBswap() const { return HasBswap; }
+ bool hasSdivSmod() const { return HasSdivSmod; }
+ bool hasGotol() const { return HasGotol; }
const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const BPFFrameLowering *getFrameLowering() const override {
diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 2565d8a0d763f7..536bee5393843a 100644
--- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -57,8 +57,7 @@ class BPFDisassembler : public MCDisassembler {
BPF_ABS = 0x1,
BPF_IND = 0x2,
BPF_MEM = 0x3,
- BPF_LEN = 0x4,
- BPF_MSH = 0x5,
+ BPF_MEMSX = 0x4,
BPF_ATOMIC = 0x6
};
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 56fdd676613271..f2445d3695e065 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -6,12 +6,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/BPFMCFixups.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/EndianStream.h"
#include <cassert>
@@ -41,7 +43,10 @@ class BPFAsmBackend : public MCAsmBackend {
return false;
}
- unsigned getNumFixupKinds() const override { return 1; }
+ unsigned getNumFixupKinds() const override {
+ return BPF::NumTargetFixupKinds;
+ }
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
@@ -49,6 +54,20 @@ class BPFAsmBackend : public MCAsmBackend {
} // end anonymous namespace
+const MCFixupKindInfo &
+BPFAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[BPF::NumTargetFixupKinds] = {
+ { "FK_BPF_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const {
if ((Count % 8) != 0)
@@ -85,6 +104,11 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
Data[Fixup.getOffset() + 1] = 0x1;
support::endian::write32be(&Data[Fixup.getOffset() + 4], Value);
}
+ } else if (Fixup.getTargetKind() == BPF::FK_BPF_PCRel_4) {
+ // The input Value represents the number of bytes.
+ Value = (uint32_t)((Value - 8) / 8);
+ support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4], Value,
+ Endian);
} else {
assert(Fixup.getKind() == FK_PCRel_2);
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
index 0761681c115bc5..15ab55f95e69b8 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
#include "MCTargetDesc/BPFInstPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -100,8 +102,13 @@ void BPFInstPrinter::printBrTargetOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
- int16_t Imm = Op.getImm();
- O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ if (MI->getOpcode() == BPF::JMPL) {
+ int32_t Imm = Op.getImm();
+ O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ } else {
+ int16_t Imm = Op.getImm();
+ O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ }
} else if (Op.isExpr()) {
printExpr(Op.getExpr(), O);
} else {
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 4bc74b54a11d64..420a2aad480a18 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/BPFMCFixups.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -95,6 +96,8 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_4));
else if (MI.getOpcode() == BPF::LD_imm64)
Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8));
+ else if (MI.getOpcode() == BPF::JMPL)
+ Fixups.push_back(MCFixup::create(0, Expr, (MCFixupKind)BPF::FK_BPF_PCRel_4));
else
// bb label
Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2));
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
new file mode 100644
index 00000000000000..55bc8f90f12625
--- /dev/null
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
@@ -0,0 +1,27 @@
+//=======-- BPFMCFixups.h - BPF-specific fixup entries ------*- C++ -*-=======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace BPF {
+enum FixupKind {
+ // BPF specific relocations.
+ FK_BPF_PCRel_4 = FirstTargetFixupKind,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace BPF
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index e687650ab8861c..7dad40803d4770 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -79,12 +79,15 @@ class BPFMCInstrAnalysis : public MCInstrAnalysis {
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
// The target is the 3rd operand of cond inst and the 1st of uncond inst.
- int16_t Imm;
+ int32_t Imm;
if (isConditionalBranch(Inst)) {
- Imm = Inst.getOperand(2).getImm();
- } else if (isUnconditionalBranch(Inst))
- Imm = Inst.getOperand(0).getImm();
- else
+ Imm = (short)Inst.getOperand(2).getImm();
+ } else if (isUnconditionalBranch(Inst)) {
+ if (Inst.getOpcode() == BPF::JMP)
+ Imm = (short)Inst.getOperand(0).getImm();
+ else
+ Imm = (int)Inst.getOperand(0).getImm();
+ } else
return false;
Target = Addr + Size + Imm * Size;
diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
new file mode 100644
index 00000000000000..d52985986bdc36
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple bpfel --mcpu=v4 --assemble --filetype=obj %s \
+// RUN: | llvm-objdump -d --mattr=+alu32 - \
+// RUN: | FileCheck %s
+
+// CHECK: d7 01 00 00 10 00 00 00 r1 = bswap16 r1
+// CHECK: d7 02 00 00 20 00 00 00 r2 = bswap32 r2
+// CHECK: d7 03 00 00 40 00 00 00 r3 = bswap64 r3
+r1 = bswap16 r1
+r2 = bswap32 r2
+r3 = bswap64 r3
+
+// CHECK: 91 41 00 00 00 00 00 00 r1 = *(s8 *)(r4 + 0x0)
+// CHECK: 89 52 04 00 00 00 00 00 r2 = *(s16 *)(r5 + 0x4)
+// CHECK: 81 63 08 00 00 00 00 00 r3 = *(s32 *)(r6 + 0x8)
+r1 = *(s8 *)(r4 + 0)
+r2 = *(s16 *)(r5 + 4)
+r3 = *(s32 *)(r6 + 8)
+
+// CHECK: 91 41 00 00 00 00 00 00 r1 = *(s8 *)(r4 + 0x0)
+// CHECK: 89 52 04 00 00 00 00 00 r2 = *(s16 *)(r5 + 0x4)
+r1 = *(s8 *)(r4 + 0)
+r2 = *(s16 *)(r5 + 4)
+
+// CHECK: bf 41 08 00 00 00 00 00 r1 = (s8)r4
+// CHECK: bf 52 10 00 00 00 00 00 r2 = (s16)r5
+// CHECK: bf 63 20 00 00 00 00 00 r3 = (s32)r6
+r1 = (s8)r4
+r2 = (s16)r5
+r3 = (s32)r6
+
+// CHECK: bc 31 08 00 00 00 00 00 w1 = (s8)w3
+// CHECK: bc 42 10 00 00 00 00 00 w2 = (s16)w4
+w1 = (s8)w3
+w2 = (s16)w4
+
+// CHECK: 3f 31 01 00 00 00 00 00 r1 s/= r3
+// CHECK: 9f 42 01 00 00 00 00 00 r2 s%= r4
+r1 s/= r3
+r2 s%= r4
+
+// CHECK: 3c 31 01 00 00 00 00 00 w1 s/= w3
+// CHECK: 9c 42 01 00 00 00 00 00 w2 s%= w4
+w1 s/= w3
+w2 s%= w4
diff --git a/llvm/test/CodeGen/BPF/bswap.ll b/llvm/test/CodeGen/BPF/bswap.ll
new file mode 100644
index 00000000000000..86257ef6217180
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/bswap.ll
@@ -0,0 +1,47 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; long foo(int a, int b, long c) {
+; a = __builtin_bswap16(a);
+; b = __builtin_bswap32(b);
+; c = __builtin_bswap64(c);
+; return a + b + c;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
+define dso_local i64 @foo(i32 noundef %a, i32 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
+entry:
+ %conv = trunc i32 %a to i16
+ %0 = tail call i16 @llvm.bswap.i16(i16 %conv)
+ %conv1 = zext i16 %0 to i32
+ %1 = tail call i32 @llvm.bswap.i32(i32 %b)
+ %2 = tail call i64 @llvm.bswap.i64(i64 %c)
+ %add = add nsw i32 %1, %conv1
+ %conv2 = sext i32 %add to i64
+ %add3 = add nsw i64 %2, %conv2
+ ret i64 %add3
+}
+
+; CHECK: r1 = bswap16 r1 # encoding: [0xd7,0x01,0x00,0x00,0x10,0x00,0x00,0x00]
+; CHECK: r2 = bswap32 r2 # encoding: [0xd7,0x02,0x00,0x00,0x20,0x00,0x00,0x00]
+; CHECK: r0 = bswap64 r0 # encoding: [0xd7,0x00,0x00,0x00,0x40,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i16 @llvm.bswap.i16(i16) #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.bswap.i32(i32) #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.bswap.i64(i64) #1
+
+attributes #0 = { mustprogress nofree nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git a2913a8a2bfe572d2f1bfea950ab9b0848373648)"}
diff --git a/llvm/test/CodeGen/BPF/gotol.ll b/llvm/test/CodeGen/BPF/gotol.ll
new file mode 100644
index 00000000000000..0c7d9291f72dae
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/gotol.ll
@@ -0,0 +1,91 @@
+; RUN: llc -march=bpfel -mcpu=v4 -gotol-abs-low-bound 0 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; // This test covers all three cases:
+; // (1). jmp to another basic block (not the follow-through one)
+; // (2). conditional jmp (follow-through and non-follow-through)
+; // (3). conditional jmp followed by an unconditional jmp
+; // To trigger case (3) the following code is developed which
+; // covers case (1) and (2) as well.
+; unsigned foo(unsigned a, unsigned b) {
+; unsigned s = b;
+; if (a < b)
+; goto next;
+; else
+; goto next2;
+; begin:
+; s /= b;
+; if (s > a)
+; return s * s;
+; next:
+; s *= a;
+; if (s > b)
+; goto begin;
+; next2:
+; s *= b;
+; if (s > a)
+; goto begin;
+; return s;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -mcpu=v4 -S -emit-llvm t.c
+
+; Function Attrs: nofree norecurse nosync nounwind memory(none)
+define dso_local i32 @foo(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
+entry:
+ %cmp = icmp ult i32 %a, %b
+ br i1 %cmp, label %next, label %next2
+
+; case (3): conditional jmp followed by an unconditional jmp
+; CHECK: w0 = w2
+; CHECK-NEXT: if w1 < w2 goto
+; CHECK: gotol LBB0_4 # encoding: [0x06'A',A,A,A,0x00,0x00,0x00,0x00]
+; CHECK-NEXT: # fixup A - offset: 0, value: LBB0_4, kind: FK_BPF_PCRel_4
+
+begin: ; preds = %next2, %next
+ %s.0 = phi i32 [ %mul3, %next ], [ %mul7, %next2 ]
+ %div = udiv i32 %s.0, %b
+ %cmp1 = icmp ugt i32 %div, %a
+ br i1 %cmp1, label %if.then2, label %next
+
+; case (2): conditional jmp
+; CHECK: w0 *= w1
+; CHECK-NEXT: if w0 > w2 goto LBB0_7
+; CHECK: goto LBB0_4
+; CHECK-LABEL: LBB0_7:
+; CHECK: gotol
+
+; CHECK-LABEL: LBB0_4:
+
+if.then2: ; preds = %begin
+ %mul = mul i32 %div, %div
+ br label %cleanup
+
+; case (1): unconditional jmp
+; CHECK: w0 *= w0
+; CHECK-NEXT: gotol
+
+next: ; preds = %begin, %entry
+ %s.1 = phi i32 [ %b, %entry ], [ %div, %begin ]
+ %mul3 = mul i32 %s.1, %a
+ %cmp4 = icmp ugt i32 %mul3, %b
+ br i1 %cmp4, label %begin, label %next2
+
+next2: ; preds = %next, %entry
+ %s.2 = phi i32 [ %mul3, %next ], [ %b, %entry ]
+ %mul7 = mul i32 %s.2, %b
+ %cmp8 = icmp ugt i32 %mul7, %a
+ br i1 %cmp8, label %begin, label %cleanup
+
+cleanup: ; preds = %next2, %if.then2
+ %retval.0 = phi i32 [ %mul, %if.then2 ], [ %mul7, %next2 ]
+ ret i32 %retval.0
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v4" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 18.0.0 (https://github.com/llvm/llvm-project.git dccf0f74657ce8c50eb1e997bae356c32d7b1ffe)"}
diff --git a/llvm/test/CodeGen/BPF/ldsx.ll b/llvm/test/CodeGen/BPF/ldsx.ll
new file mode 100644
index 00000000000000..622cacf1ad0c1c
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/ldsx.ll
@@ -0,0 +1,104 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; int f1(char *p) {
+; return *p;
+; }
+; int f2(short *p) {
+; return *p;
+; }
+; int f3(int *p) {
+; return *p;
+; }
+; long f4(char *p) {
+; return *p;
+; }
+; long f5(short *p) {
+; return *p;
+; }
+; long f6(int *p) {
+; return *p;
+; }
+; long f7(long *p) {
+; return *p;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i32 @f1(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i8, ptr %p, align 1, !tbaa !3
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+; CHECK: r0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i32 @f2(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i16, ptr %p, align 2, !tbaa !6
+ %conv = sext i16 %0 to i32
+ ret i32 %conv
+}
+; CHECK: r0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i32 @f3(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, ptr %p, align 4, !tbaa !8
+ ret i32 %0
+}
+; CHECK: w0 = *(u32 *)(r1 + 0) # encoding: [0x61,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i64 @f4(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i8, ptr %p, align 1, !tbaa !3
+ %conv = sext i8 %0 to i64
+ ret i64 %conv
+}
+; CHECK: r0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i64 @f5(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i16, ptr %p, align 2, !tbaa !6
+ %conv = sext i16 %0 to i64
+ ret i64 %conv
+}
+; CHECK: r0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i64 @f6(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, ptr %p, align 4, !tbaa !8
+ %conv = sext i32 %0 to i64
+ ret i64 %conv
+}
+; CHECK: r0 = *(s32 *)(r1 + 0) # encoding: [0x81,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
+define dso_local i64 @f7(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i64, ptr %p, align 8, !tbaa !10
+ ret i64 %0
+}
+; CHECK: r0 = *(u64 *)(r1 + 0) # encoding: [0x79,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 1bf3221bf1e35d953a0b6783bc6e694cb9b0ceae)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"short", !4, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !4, i64 0}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"long", !4, i64 0}
diff --git a/llvm/test/CodeGen/BPF/movsx.ll b/llvm/test/CodeGen/BPF/movsx.ll
new file mode 100644
index 00000000000000..f781c8c10c4f1d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/movsx.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; short f1(int a) {
+; return (char)a;
+; }
+; int f2(int a) {
+; return (short)a;
+; }
+; long f3(int a) {
+; return (char)a;
+; }
+; long f4(int a) {
+; return (short)a;
+; }
+; long f5(int a) {
+; return a;
+; }
+; long f6(long a) {
+; return (int)a;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -mcpu=v4 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i16 @f1(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+ %conv = trunc i32 %a to i8
+ %conv1 = sext i8 %conv to i16
+ ret i16 %conv1
+}
+; CHECK: w0 = (s8)w1 # encoding: [0xbc,0x10,0x08,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @f2(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+ %sext = shl i32 %a, 16
+ %conv1 = ashr exact i32 %sext, 16
+ ret i32 %conv1
+}
+; CHECK: w0 = (s16)w1 # encoding: [0xbc,0x10,0x10,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @f3(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+ %conv = zext i32 %a to i64
+ %sext = shl i64 %conv, 56
+ %conv1 = ashr exact i64 %sext, 56
+ ret i64 %conv1
+}
+; CHECK: r0 = (s8)r1 # encoding: [0xbf,0x10,0x08,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @f4(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+ %conv = zext i32 %a to i64
+ %sext = shl i64 %conv, 48
+ %conv1 = ashr exact i64 %sext, 48
+ ret i64 %conv1
+}
+; CHECK: r0 = (s16)r1 # encoding: [0xbf,0x10,0x10,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @f5(i32 noundef %a) local_unnamed_addr #0 {
+entry:
+ %conv = sext i32 %a to i64
+ ret i64 %conv
+}
+; CHECK: r0 = (s32)r1 # encoding: [0xbf,0x10,0x20,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @f6(i64 noundef %a) local_unnamed_addr #0 {
+entry:
+ %sext = shl i64 %a, 32
+ %conv1 = ashr exact i64 %sext, 32
+ ret i64 %conv1
+}
+; CHECK: r0 = (s32)r1 # encoding: [0xbf,0x10,0x20,0x00,0x00,0x00,0x00,0x00]
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v4" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4e1ca21db4162f0c3cde98f730b08ed538fff2a4)"}
diff --git a/llvm/test/CodeGen/BPF/sdiv_smod.ll b/llvm/test/CodeGen/BPF/sdiv_smod.ll
new file mode 100644
index 00000000000000..f944e5f21c6f2c
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/sdiv_smod.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; int foo(int a, int b, int c) {
+; return a/b + a%c;
+; }
+; long bar(long a, long b, long c) {
+; return a/b + a%c;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -S -emit-llvm t.c
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i32 @foo(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
+entry:
+ %div = sdiv i32 %a, %b
+ %rem = srem i32 %a, %c
+ %add = add nsw i32 %rem, %div
+ ret i32 %add
+}
+
+; CHECK: w0 = w1
+; CHECK-NEXT: w1 s/= w2 # encoding: [0x3c,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT: w0 s%= w3 # encoding: [0x9c,0x30,0x01,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define dso_local i64 @bar(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 {
+entry:
+ %div = sdiv i64 %a, %b
+ %rem = srem i64 %a, %c
+ %add = add nsw i64 %rem, %div
+ ret i64 %add
+}
+; CHECK: r0 = r1
+; CHECK-NEXT: r1 s/= r2 # encoding: [0x3f,0x21,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT: r0 s%= r3 # encoding: [0x9f,0x30,0x01,0x00,0x00,0x00,0x00,0x00]
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git c102025a4299e74767cdb4dfba8abbf6cbad820b)"}
More information about the llvm-commits
mailing list