[llvm] [BPF] i128 direct return support (PR #183258)
Claire Fan via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 13:00:32 PST 2026
https://github.com/clairechingching updated https://github.com/llvm/llvm-project/pull/183258
>From 49328907ac24f979cd0c4bffd171469d5561dfc9 Mon Sep 17 00:00:00 2001
From: Claire Fan <fanyungching at gmail.com>
Date: Sun, 8 Feb 2026 09:07:52 +0100
Subject: [PATCH] [BPF] i128 direct return support
---
llvm/lib/Target/BPF/BPF.td | 3 +++
llvm/lib/Target/BPF/BPFCallingConv.td | 6 ++++--
llvm/lib/Target/BPF/BPFISelLowering.cpp | 20 ++++++++++++++----
llvm/lib/Target/BPF/BPFISelLowering.h | 3 +++
llvm/lib/Target/BPF/BPFSubtarget.cpp | 1 +
llvm/lib/Target/BPF/BPFSubtarget.h | 4 ++++
llvm/test/CodeGen/BPF/arr_ret1.ll | 27 +++++++++++++++++++++++++
llvm/test/CodeGen/BPF/i128-bpf64.ll | 25 +++++++++++++++++++++++
llvm/test/CodeGen/BPF/struct_ret2.ll | 7 ++++---
llvm/test/CodeGen/BPF/vec_ret1.ll | 13 ++++++++++++
10 files changed, 100 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/BPF/arr_ret1.ll
create mode 100644 llvm/test/CodeGen/BPF/i128-bpf64.ll
create mode 100644 llvm/test/CodeGen/BPF/vec_ret1.ll
diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
index 1fc364dad9988..84ecb90a98009 100644
--- a/llvm/lib/Target/BPF/BPF.td
+++ b/llvm/lib/Target/BPF/BPF.td
@@ -34,6 +34,9 @@ def MisalignedMemAccess : SubtargetFeature<"allows-misaligned-mem-access",
"AllowsMisalignedMemAccess", "true",
"Allows misaligned memory access">;
+def i128DirectReturn : SubtargetFeature<"has-i128-direct-return", "Hasi128DirectReturn",
+ "true", "Enable i128 direct return">;
+
def : Proc<"generic", []>;
def : Proc<"v1", []>;
def : Proc<"v2", []>;
diff --git a/llvm/lib/Target/BPF/BPFCallingConv.td b/llvm/lib/Target/BPF/BPFCallingConv.td
index a557211437e95..d92cc959bdeb6 100644
--- a/llvm/lib/Target/BPF/BPFCallingConv.td
+++ b/llvm/lib/Target/BPF/BPFCallingConv.td
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
// BPF 64-bit C return-value convention.
-def RetCC_BPF64 : CallingConv<[CCIfType<[i64], CCAssignToReg<[R0]>>]>;
+def RetCC_BPF64 : CallingConv<[CCIfType<[i64], CCAssignToReg<[R0, R1]>>]>;
// BPF 64-bit C Calling convention.
def CC_BPF64 : CallingConv<[
@@ -28,7 +28,9 @@ def CC_BPF64 : CallingConv<[
// Return-value convention when -mattr=+alu32 enabled
def RetCC_BPF32 : CallingConv<[
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [R0]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[R0], [W0]>>
+ CCIfType<[i32], CCAssignToRegWithShadow<[W1], [R1]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0], [W0]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R1], [W1]>>,
]>;
// Calling convention when -mattr=+alu32 enabled
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index fd26345920a71..fcca9228e9ba3 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -207,6 +207,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
HasJmpExt = STI.getHasJmpExt();
HasMovsx = STI.hasMovsx();
+ Hasi128DirectReturn = STI.getHasi128DirectReturn();
+
AllowsMisalignedMemAccess = STI.getAllowsMisalignedMemAccess();
}
@@ -633,9 +635,18 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
- if (MF.getFunction().getReturnType()->isAggregateType()) {
- fail(DL, DAG, "aggregate returns are not supported");
- return DAG.getNode(Opc, DL, MVT::Other, Chain);
+ const Function &F = MF.getFunction();
+ Type *retTy = F.getReturnType();
+
+ if (retTy->isAggregateType() || retTy->isVectorTy()) {
+ // BPF calling convention
+ // 1. in any case, does not allow returning more than 2 registers
+ // 2. when target doesn't supports i128 direct return through R0/R1,
+ // return size has to be <= 1
+ if (Outs.size() > 2 || (!Hasi128DirectReturn && Outs.size() > 1)) {
+ fail(DL, DAG, "aggregate returns are not supported");
+ return DAG.getNode(Opc, DL, MVT::Other, Chain);
+ }
}
// Analize return values.
@@ -677,7 +688,8 @@ SDValue BPFTargetLowering::LowerCallResult(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
- if (Ins.size() > 1) {
+ // BPF calling convention does not allow large return
+ if (Ins.size() > 2 || (!Hasi128DirectReturn && Ins.size() > 1)) {
fail(DL, DAG, "only small returns supported");
for (auto &In : Ins)
InVals.push_back(DAG.getConstant(0, DL, In.VT));
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index 8607e4f8c9e69..e828f6a050d29 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -65,6 +65,9 @@ class BPFTargetLowering : public TargetLowering {
bool HasJmpExt;
bool HasMovsx;
+ // Supports i128 Direct Return
+ bool Hasi128DirectReturn;
+
// Allows Misalignment
bool AllowsMisalignedMemAccess;
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index 726f8f4b39827..1efbae3c84793 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -69,6 +69,7 @@ void BPFSubtarget::initializeEnvironment() {
HasStoreImm = false;
HasLoadAcqStoreRel = false;
HasGotox = false;
+ Hasi128DirectReturn = false;
AllowsMisalignedMemAccess = false;
}
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index 24eff862224b0..7f0307d8da5ec 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -60,6 +60,9 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
// whether the cpu supports alu32 instructions.
bool HasAlu32;
+ // whether target supports i128 direct return.
+ bool Hasi128DirectReturn;
+
// whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections
bool UseDwarfRIS;
@@ -89,6 +92,7 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
bool getHasJmpExt() const { return HasJmpExt; }
bool getHasJmp32() const { return HasJmp32; }
bool getHasAlu32() const { return HasAlu32; }
+ bool getHasi128DirectReturn() const { return Hasi128DirectReturn; }
bool getUseDwarfRIS() const { return UseDwarfRIS; }
bool getAllowsMisalignedMemAccess() const {
return AllowsMisalignedMemAccess;
diff --git a/llvm/test/CodeGen/BPF/arr_ret1.ll b/llvm/test/CodeGen/BPF/arr_ret1.ll
new file mode 100644
index 0000000000000..da36a1713b67e
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/arr_ret1.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=bpf -mattr=+has-i128-direct-return < %s | FileCheck %s
+
+; Source code:
+; typedef struct {
+; long long v[2];
+; } arr2_i64;
+;
+; arr2_i64 bar(int a, int b, int c, int d, int e);
+;
+; arr2_i64 foo(int a, int b, int c) {
+; return bar(a, b, c, 1, 2);
+; }
+;
+; Compile with:
+; clang -target bpf -O2 -S -emit-llvm foo.c
+
+; Function Attrs: nounwind uwtable
+define [2 x i64] @foo(i32 %a, i32 %b, i32 %c) #0 {
+; CHECK-LABEL: foo:
+; CHECK: w4 = 1
+; CHECK-NEXT: w5 = 2
+entry:
+ %call = tail call [2 x i64] @bar(i32 %a, i32 %b, i32 %c, i32 1, i32 2) #3
+ ret [2 x i64] %call
+}
+
+declare [2 x i64] @bar(i32, i32, i32, i32, i32) #1
diff --git a/llvm/test/CodeGen/BPF/i128-bpf64.ll b/llvm/test/CodeGen/BPF/i128-bpf64.ll
new file mode 100644
index 0000000000000..d089b30276ca6
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/i128-bpf64.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=bpf -mcpu=generic -mattr=+has-i128-direct-return < %s | FileCheck %s
+
+; Source code:
+; void test(__int128 *a) {
+; __int128 tmp = __atomic_load_n(a, __ATOMIC_RELAXED);
+; __atomic_store_n(a, tmp, __ATOMIC_RELAXED);
+; }
+;
+; Compile with:
+; clang -target bpf -O2 -S -emit-llvm test.c
+
+define void @test(ptr %a) nounwind {
+; CHECK-LABEL: test:
+; CHECK: r6 = r1
+; CHECK-NEXT: r2 = 0
+; CHECK-NEXT: call __atomic_load_16
+; CHECK-NEXT: r3 = r1
+; CHECK-NEXT: r1 = r6
+; CHECK-NEXT: r2 = r0
+; CHECK-NEXT: r4 = 0
+; CHECK-NEXT: call __atomic_store_16
+ %1 = load atomic i128, ptr %a monotonic, align 16
+ store atomic i128 %1, ptr %a monotonic, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/BPF/struct_ret2.ll b/llvm/test/CodeGen/BPF/struct_ret2.ll
index 170d55cc29df0..9b0d7d4fe2f81 100644
--- a/llvm/test/CodeGen/BPF/struct_ret2.ll
+++ b/llvm/test/CodeGen/BPF/struct_ret2.ll
@@ -1,9 +1,10 @@
-; RUN: not llc -mtriple=bpf < %s 2> %t1
-; RUN: FileCheck %s < %t1
-; CHECK: only small returns
+; RUN: llc -mtriple=bpf -mattr=+has-i128-direct-return < %s | FileCheck %s
; Function Attrs: nounwind uwtable
define { i64, i32 } @foo(i32 %a, i32 %b, i32 %c) #0 {
+; CHECK-LABEL: foo:
+; CHECK: w4 = 1
+; CHECK-NEXT: w5 = 2
entry:
%call = tail call { i64, i32 } @bar(i32 %a, i32 %b, i32 %c, i32 1, i32 2) #3
ret { i64, i32 } %call
diff --git a/llvm/test/CodeGen/BPF/vec_ret1.ll b/llvm/test/CodeGen/BPF/vec_ret1.ll
new file mode 100644
index 0000000000000..f7e0d4ab95494
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/vec_ret1.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=bpf -mattr=+has-i128-direct-return < %s | FileCheck %s
+
+; Function Attrs: nounwind uwtable
+define <2 x i64> @foo(i32 %a, i32 %b, i32 %c) #0 {
+; CHECK-LABEL: foo:
+; CHECK: w4 = 1
+; CHECK-NEXT: w5 = 2
+entry:
+ %call = tail call <2 x i64> @bar(i32 %a, i32 %b, i32 %c, i32 1, i32 2) #3
+ ret <2 x i64> %call
+}
+
+declare <2 x i64> @bar(i32, i32, i32, i32, i32) #1
More information about the llvm-commits
mailing list