[libc-commits] [libc] [llvm] [libc][math] Refactor bf16divf128 to header-only in src/__support/math folder. (PR #182277)
Abhiram Jampani via libc-commits
libc-commits at lists.llvm.org
Thu Feb 19 05:51:45 PST 2026
https://github.com/Abhiramjampani created https://github.com/llvm/llvm-project/pull/182277
Move the [bf16divf128](cci:1://file:///d:/OpenSource/llvm-project/libc/src/__support/math/bf16divf128.h:23:0-25:1) implementation to a header-only function in
[src/__support/math/bf16divf128.h](cci:7://file:///d:/OpenSource/llvm-project/libc/src/__support/math/bf16divf128.h:0:0-0:0) and expose it via [shared/math/bf16divf128.h](cci:7://file:///d:/OpenSource/llvm-project/libc/shared/math/bf16divf128.h:0:0-0:0).
The original [src/math/generic/bf16divf128.cpp](cci:7://file:///d:/OpenSource/llvm-project/libc/src/math/generic/bf16divf128.cpp:0:0-0:0) is updated to delegate to
the new header-only implementation.
Part of the effort to make libc math functions header-only for C++23
constexpr math support.
Closes #181024
>From 2c2462c92d01dd633a31e5afd527e5f687e179ac Mon Sep 17 00:00:00 2001
From: Abhiramjampani <lcs2022059 at iiitl.ac.in>
Date: Fri, 6 Feb 2026 01:13:01 +0530
Subject: [PATCH 1/2] [X86] Fold shift into GF2P8AFFINEQB instruction
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 67 ++++++-
llvm/test/CodeGen/X86/gfni-shift-fold.ll | 219 +++++++++++++++++++++++
2 files changed, 282 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/gfni-shift-fold.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1837c8bbedf0e..f79cdbd97487a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29240,12 +29240,11 @@ uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt = 0) {
llvm_unreachable("Unsupported GFNI opcode");
}
-// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
-SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL,
- MVT VT, unsigned Amt = 0) {
+// Build a GFNI gf2p8affine bitmask from a raw 64-bit matrix value.
+static SDValue buildGFNIMatrixMask(uint64_t Imm, SelectionDAG &DAG,
+ const SDLoc &DL, MVT VT) {
assert(VT.getVectorElementType() == MVT::i8 &&
(VT.getSizeInBits() % 64) == 0 && "Illegal GFNI control type");
- uint64_t Imm = getGFNICtrlImm(Opcode, Amt);
SmallVector<SDValue> MaskBits;
for (unsigned I = 0, E = VT.getSizeInBits(); I != E; I += 8) {
uint64_t Bits = (Imm >> (I % 64)) & 255;
@@ -29254,6 +29253,13 @@ SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL,
return DAG.getBuildVector(VT, DL, MaskBits);
}
+// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
+SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL,
+ MVT VT, unsigned Amt = 0) {
+ uint64_t Imm = getGFNICtrlImm(Opcode, Amt);
+ return buildGFNIMatrixMask(Imm, DAG, DL, VT);
+}
+
/// Lower a vector CTLZ using native supported vector CTLZ instruction.
//
// i8/i16 vector implemented using dword LZCNT vector instruction
@@ -50624,6 +50630,59 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDLoc DL(N);
+ // Fold: shl(gf2p8affineqb(X, M), amt) -> gf2p8affineqb(X, M')
+ // where M' = M composed with shift matrix.
+ // This folds the shift into the matrix transformation.
+ // Handle both the X86ISD::GF2P8AFFINEQB form and the intrinsic form.
+ if (Subtarget.hasGFNI() && VT.isVector() && EltSizeInBits == 8) {
+ bool IsGF2P8 = N0.getOpcode() == X86ISD::GF2P8AFFINEQB;
+ bool IsIntrinsic =
+ N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ (N0.getConstantOperandVal(0) == Intrinsic::x86_vgf2p8affineqb_128 ||
+ N0.getConstantOperandVal(0) == Intrinsic::x86_vgf2p8affineqb_256 ||
+ N0.getConstantOperandVal(0) == Intrinsic::x86_vgf2p8affineqb_512);
+
+ if (IsGF2P8 || IsIntrinsic) {
+ // For vector shifts, the shift amount is a splat vector
+ APInt SplatVal;
+ if (ISD::isConstantSplatVector(N1.getNode(), SplatVal)) {
+ uint64_t ShiftAmt = SplatVal.getZExtValue();
+ if (ShiftAmt > 0 && ShiftAmt < 8) {
+ // Operand indices differ: X86ISD::GF2P8AFFINEQB uses 0,1,2
+ // INTRINSIC_WO_CHAIN uses 1,2,3 (operand 0 is intrinsic ID)
+ unsigned BaseIdx = IsIntrinsic ? 1 : 0;
+ SDValue Input = N0.getOperand(BaseIdx);
+ SDValue MatrixOp = N0.getOperand(BaseIdx + 1);
+ // Fold if matrix is constant. For non-zero XOR immediate, shift it
+ // too: (x ^ imm8) << i = (x << i) ^ (imm8 << i)
+ auto *BV = dyn_cast<BuildVectorSDNode>(MatrixOp);
+ if (BV) {
+ SmallVector<APInt> RawBits;
+ BitVector UndefElts;
+ if (BV->getConstantRawBits(/*IsLE=*/true, 64, RawBits, UndefElts) &&
+ !UndefElts[0]) {
+ uint64_t OrigMatrix = RawBits[0].getZExtValue();
+ // Shifting the matrix is equivalent to right-shifting by
+ // ShiftAmt bytes (each row moves to next position)
+ uint64_t NewMatrix = OrigMatrix >> (ShiftAmt * 8);
+
+ // Shift the XOR immediate as well
+ uint64_t OldImm = N0.getConstantOperandVal(BaseIdx + 2);
+ uint64_t NewImm = (OldImm << ShiftAmt) & 0xFF;
+
+ // Build new matrix vector and return new GF2P8AFFINEQB
+ SDValue NewMatrixOp = buildGFNIMatrixMask(
+ NewMatrix, DAG, DL, MatrixOp.getSimpleValueType());
+ return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, Input,
+ NewMatrixOp,
+ DAG.getTargetConstant(NewImm, DL, MVT::i8));
+ }
+ }
+ }
+ }
+ }
+ }
+
// Exploits AVX2 VSHLV/VSRLV instructions for efficient unsigned vector shifts
// with out-of-bounds clamping.
if (N0.getOpcode() == ISD::VSELECT &&
diff --git a/llvm/test/CodeGen/X86/gfni-shift-fold.ll b/llvm/test/CodeGen/X86/gfni-shift-fold.ll
new file mode 100644
index 0000000000000..59b8330048eb1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/gfni-shift-fold.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni | FileCheck %s --check-prefixes=GFNI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx512bw | FileCheck %s --check-prefixes=AVX512
+
+; Test that shift operations on gf2p8affineqb results are folded
+; into the matrix transformation.
+
+;
+; 128-bit tests
+;
+
+define <16 x i8> @test_shl1_v16i8(<16 x i8> %src) {
+; GFNI-LABEL: test_shl1_v16i8:
+; GFNI: # %bb.0:
+; GFNI-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNI-NEXT: retq
+;
+; AVX2-LABEL: test_shl1_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_shl1_v16i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+ <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+ %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %2
+}
+
+define <16 x i8> @test_shl2_v16i8(<16 x i8> %src) {
+; GFNI-LABEL: test_shl2_v16i8:
+; GFNI: # %bb.0:
+; GFNI-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNI-NEXT: retq
+;
+; AVX2-LABEL: test_shl2_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_shl2_v16i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+ <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+ %2 = shl <16 x i8> %1, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
+ i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+ ret <16 x i8> %2
+}
+
+define <16 x i8> @test_shl1_nonzero_imm_v16i8(<16 x i8> %src) {
+; GFNI-LABEL: test_shl1_nonzero_imm_v16i8:
+; GFNI: # %bb.0:
+; GFNI-NEXT: gf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNI-NEXT: retq
+;
+; AVX2-LABEL: test_shl1_nonzero_imm_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_shl1_nonzero_imm_v16i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+ <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 1)
+ %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %2
+}
+
+;
+; 256-bit tests (require avx2)
+;
+
+define <32 x i8> @test_shl1_v32i8(<32 x i8> %src) #1 {
+; GFNI-LABEL: test_shl1_v32i8:
+; GFNI: # %bb.0:
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; GFNI-NEXT: retq
+;
+; AVX2-LABEL: test_shl1_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_shl1_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src,
+ <32 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+ %2 = shl <32 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <32 x i8> %2
+}
+
+define <32 x i8> @test_shl1_nonzero_imm_v32i8(<32 x i8> %src) #1 {
+; GFNI-LABEL: test_shl1_nonzero_imm_v32i8:
+; GFNI: # %bb.0:
+; GFNI-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; GFNI-NEXT: retq
+;
+; AVX2-LABEL: test_shl1_nonzero_imm_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_shl1_nonzero_imm_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src,
+ <32 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 1)
+ %2 = shl <32 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <32 x i8> %2
+}
+
+;
+; 512-bit tests (require avx512bw)
+;
+
+define <64 x i8> @test_shl1_v64i8(<64 x i8> %src) #0 {
+; GFNI-LABEL: test_shl1_v64i8:
+; GFNI: # %bb.0:
+; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; GFNI-NEXT: retq
+;
+; AVX2-LABEL: test_shl1_v64i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_shl1_v64i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %1 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src,
+ <64 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+ %2 = shl <64 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <64 x i8> %2
+}
+
+define <64 x i8> @test_shl1_nonzero_imm_v64i8(<64 x i8> %src) #0 {
+; GFNI-LABEL: test_shl1_nonzero_imm_v64i8:
+; GFNI: # %bb.0:
+; GFNI-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; GFNI-NEXT: retq
+;
+; AVX2-LABEL: test_shl1_nonzero_imm_v64i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_shl1_nonzero_imm_v64i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %1 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src,
+ <64 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+ i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 1)
+ %2 = shl <64 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <64 x i8> %2
+}
+
+attributes #0 = { "target-features"="+avx512bw" }
+attributes #1 = { "target-features"="+avx2" }
+
+declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
+declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
+declare <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
>From 806418bb3cf05fe994d22ecc935ddde243b66fec Mon Sep 17 00:00:00 2001
From: Abhiramjampani <lcs2022059 at iiitl.ac.in>
Date: Thu, 19 Feb 2026 19:16:19 +0530
Subject: [PATCH 2/2] [libc][math] Refactor bf16divf128 to header-only in
src/__support/math folder.
---
libc/shared/math/bf16divf128.h | 29 +++++++++++++++++++++++
libc/src/__support/math/CMakeLists.txt | 12 ++++++++++
libc/src/__support/math/bf16divf128.h | 32 ++++++++++++++++++++++++++
libc/src/math/generic/CMakeLists.txt | 6 +----
libc/src/math/generic/bf16divf128.cpp | 7 ++----
5 files changed, 76 insertions(+), 10 deletions(-)
create mode 100644 libc/shared/math/bf16divf128.h
create mode 100644 libc/src/__support/math/bf16divf128.h
diff --git a/libc/shared/math/bf16divf128.h b/libc/shared/math/bf16divf128.h
new file mode 100644
index 0000000000000..8ea100db6bd7f
--- /dev/null
+++ b/libc/shared/math/bf16divf128.h
@@ -0,0 +1,29 @@
+//===-- Shared bf16divf128 function ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_BF16DIVF128_H
+#define LLVM_LIBC_SHARED_MATH_BF16DIVF128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/bf16divf128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::bf16divf128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_BF16DIVF128_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 58e4040911f8e..2191d9bd81a39 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -1490,6 +1490,18 @@ add_header_library(
libc.include.llvm-libc-types.float128
)
+add_header_library(
+ bf16divf128
+ HDRS
+ bf16divf128.h
+ DEPENDS
+ libc.src.__support.FPUtil.bfloat16
+ libc.src.__support.FPUtil.generic.div
+ libc.src.__support.common
+ libc.src.__support.macros.config
+ libc.include.llvm-libc-types.float128
+)
+
add_header_library(
tan
HDRS
diff --git a/libc/src/__support/math/bf16divf128.h b/libc/src/__support/math/bf16divf128.h
new file mode 100644
index 0000000000000..fa23062f070f6
--- /dev/null
+++ b/libc/src/__support/math/bf16divf128.h
@@ -0,0 +1,32 @@
+//===-- Implementation header for bf16divf128 -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_BF16DIVF128_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_BF16DIVF128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/div.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace math {
+
+LIBC_INLINE static bfloat16 bf16divf128(float128 x, float128 y) {
+ return fputil::generic::div<bfloat16>(x, y);
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_BF16DIVF128_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 1b18388ed60f8..e500edd2e64b6 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -5327,11 +5327,7 @@ add_entrypoint_object(
HDRS
../bf16divf128.h
DEPENDS
- libc.src.__support.common
- libc.src.__support.FPUtil.bfloat16
- libc.src.__support.FPUtil.generic.div
- libc.src.__support.macros.config
- libc.src.__support.macros.properties.types
+ libc.src.__support.math.bf16divf128
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/bf16divf128.cpp b/libc/src/math/generic/bf16divf128.cpp
index fbe9775ce4046..eb1b5b241e2a3 100644
--- a/libc/src/math/generic/bf16divf128.cpp
+++ b/libc/src/math/generic/bf16divf128.cpp
@@ -7,15 +7,12 @@
//===----------------------------------------------------------------------===//
#include "src/math/bf16divf128.h"
-#include "src/__support/FPUtil/bfloat16.h"
-#include "src/__support/FPUtil/generic/div.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/math/bf16divf128.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(bfloat16, bf16divf128, (float128 x, float128 y)) {
- return fputil::generic::div<bfloat16>(x, y);
+ return math::bf16divf128(x, y);
}
} // namespace LIBC_NAMESPACE_DECL
More information about the libc-commits
mailing list