[libc-commits] [libc] [llvm] [libc][math] Refactor bf16divf128 to header-only in src/__support/math folder. (PR #182277)

Thu Feb 19 05:51:45 PST 2026

https://github.com/Abhiramjampani created https://github.com/llvm/llvm-project/pull/182277

Move the [bf16divf128](cci:1://file:///d:/OpenSource/llvm-project/libc/src/__support/math/bf16divf128.h:23:0-25:1) implementation to a header-only function in
[src/__support/math/bf16divf128.h](cci:7://file:///d:/OpenSource/llvm-project/libc/src/__support/math/bf16divf128.h:0:0-0:0) and expose it via [shared/math/bf16divf128.h](cci:7://file:///d:/OpenSource/llvm-project/libc/shared/math/bf16divf128.h:0:0-0:0).
The original [src/math/generic/bf16divf128.cpp](cci:7://file:///d:/OpenSource/llvm-project/libc/src/math/generic/bf16divf128.cpp:0:0-0:0) is updated to delegate to
the new header-only implementation.

Part of the effort to make libc math functions header-only for C++23
constexpr math support.

Closes #181024

>From 2c2462c92d01dd633a31e5afd527e5f687e179ac Mon Sep 17 00:00:00 2001
From: Abhiramjampani <lcs2022059 at iiitl.ac.in>
Date: Fri, 6 Feb 2026 01:13:01 +0530
Subject: [PATCH 1/2] [X86] Fold shift into GF2P8AFFINEQB instruction

---
 llvm/lib/Target/X86/X86ISelLowering.cpp  |  67 ++++++-
 llvm/test/CodeGen/X86/gfni-shift-fold.ll | 219 +++++++++++++++++++++++
 2 files changed, 282 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/gfni-shift-fold.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1837c8bbedf0e..f79cdbd97487a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29240,12 +29240,11 @@ uint64_t getGFNICtrlImm(unsigned Opcode, unsigned Amt = 0) {
   llvm_unreachable("Unsupported GFNI opcode");
 }
 
-// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
-SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL,
-                        MVT VT, unsigned Amt = 0) {
+// Build a GFNI gf2p8affine bitmask from a raw 64-bit matrix value.
+static SDValue buildGFNIMatrixMask(uint64_t Imm, SelectionDAG &DAG,
+                                   const SDLoc &DL, MVT VT) {
   assert(VT.getVectorElementType() == MVT::i8 &&
          (VT.getSizeInBits() % 64) == 0 && "Illegal GFNI control type");
-  uint64_t Imm = getGFNICtrlImm(Opcode, Amt);
   SmallVector<SDValue> MaskBits;
   for (unsigned I = 0, E = VT.getSizeInBits(); I != E; I += 8) {
     uint64_t Bits = (Imm >> (I % 64)) & 255;
@@ -29254,6 +29253,13 @@ SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL,
   return DAG.getBuildVector(VT, DL, MaskBits);
 }
 
+// Generate a GFNI gf2p8affine bitmask for vXi8 bitreverse/shift/rotate.
+SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL,
+                        MVT VT, unsigned Amt = 0) {
+  uint64_t Imm = getGFNICtrlImm(Opcode, Amt);
+  return buildGFNIMatrixMask(Imm, DAG, DL, VT);
+}
+
 /// Lower a vector CTLZ using native supported vector CTLZ instruction.
 //
 // i8/i16 vector implemented using dword LZCNT vector instruction
@@ -50624,6 +50630,59 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   SDLoc DL(N);
 
+  // Fold: shl(gf2p8affineqb(X, M), amt) -> gf2p8affineqb(X, M')
+  // where M' = M composed with shift matrix.
+  // This folds the shift into the matrix transformation.
+  // Handle both the X86ISD::GF2P8AFFINEQB form and the intrinsic form.
+  if (Subtarget.hasGFNI() && VT.isVector() && EltSizeInBits == 8) {
+    bool IsGF2P8 = N0.getOpcode() == X86ISD::GF2P8AFFINEQB;
+    bool IsIntrinsic =
+        N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+        (N0.getConstantOperandVal(0) == Intrinsic::x86_vgf2p8affineqb_128 ||
+         N0.getConstantOperandVal(0) == Intrinsic::x86_vgf2p8affineqb_256 ||
+         N0.getConstantOperandVal(0) == Intrinsic::x86_vgf2p8affineqb_512);
+
+    if (IsGF2P8 || IsIntrinsic) {
+      // For vector shifts, the shift amount is a splat vector
+      APInt SplatVal;
+      if (ISD::isConstantSplatVector(N1.getNode(), SplatVal)) {
+        uint64_t ShiftAmt = SplatVal.getZExtValue();
+        if (ShiftAmt > 0 && ShiftAmt < 8) {
+          // Operand indices differ: X86ISD::GF2P8AFFINEQB uses 0,1,2
+          // INTRINSIC_WO_CHAIN uses 1,2,3 (operand 0 is intrinsic ID)
+          unsigned BaseIdx = IsIntrinsic ? 1 : 0;
+          SDValue Input = N0.getOperand(BaseIdx);
+          SDValue MatrixOp = N0.getOperand(BaseIdx + 1);
+          // Fold if matrix is constant. For non-zero XOR immediate, shift it
+          // too: (x ^ imm8) << i = (x << i) ^ (imm8 << i)
+          auto *BV = dyn_cast<BuildVectorSDNode>(MatrixOp);
+          if (BV) {
+            SmallVector<APInt> RawBits;
+            BitVector UndefElts;
+            if (BV->getConstantRawBits(/*IsLE=*/true, 64, RawBits, UndefElts) &&
+                !UndefElts[0]) {
+              uint64_t OrigMatrix = RawBits[0].getZExtValue();
+              // Shifting the matrix is equivalent to right-shifting by
+              // ShiftAmt bytes (each row moves to next position)
+              uint64_t NewMatrix = OrigMatrix >> (ShiftAmt * 8);
+
+              // Shift the XOR immediate as well
+              uint64_t OldImm = N0.getConstantOperandVal(BaseIdx + 2);
+              uint64_t NewImm = (OldImm << ShiftAmt) & 0xFF;
+
+              // Build new matrix vector and return new GF2P8AFFINEQB
+              SDValue NewMatrixOp = buildGFNIMatrixMask(
+                  NewMatrix, DAG, DL, MatrixOp.getSimpleValueType());
+              return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, Input,
+                                 NewMatrixOp,
+                                 DAG.getTargetConstant(NewImm, DL, MVT::i8));
+            }
+          }
+        }
+      }
+    }
+  }
+
   // Exploits AVX2 VSHLV/VSRLV instructions for efficient unsigned vector shifts
   // with out-of-bounds clamping.
   if (N0.getOpcode() == ISD::VSELECT &&
diff --git a/llvm/test/CodeGen/X86/gfni-shift-fold.ll b/llvm/test/CodeGen/X86/gfni-shift-fold.ll
new file mode 100644
index 0000000000000..59b8330048eb1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/gfni-shift-fold.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni | FileCheck %s --check-prefixes=GFNI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx2 | FileCheck %s --check-prefixes=AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx512bw | FileCheck %s --check-prefixes=AVX512
+
+; Test that shift operations on gf2p8affineqb results are folded
+; into the matrix transformation.
+
+;
+; 128-bit tests
+;
+
+define <16 x i8> @test_shl1_v16i8(<16 x i8> %src) {
+; GFNI-LABEL: test_shl1_v16i8:
+; GFNI:       # %bb.0:
+; GFNI-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNI-NEXT:    retq
+;
+; AVX2-LABEL: test_shl1_v16i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_shl1_v16i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+       <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+  %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %2
+}
+
+define <16 x i8> @test_shl2_v16i8(<16 x i8> %src) {
+; GFNI-LABEL: test_shl2_v16i8:
+; GFNI:       # %bb.0:
+; GFNI-NEXT:    gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNI-NEXT:    retq
+;
+; AVX2-LABEL: test_shl2_v16i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_shl2_v16i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+       <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+  %2 = shl <16 x i8> %1, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2,
+                          i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <16 x i8> %2
+}
+
+define <16 x i8> @test_shl1_nonzero_imm_v16i8(<16 x i8> %src) {
+; GFNI-LABEL: test_shl1_nonzero_imm_v16i8:
+; GFNI:       # %bb.0:
+; GFNI-NEXT:    gf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; GFNI-NEXT:    retq
+;
+; AVX2-LABEL: test_shl1_nonzero_imm_v16i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_shl1_nonzero_imm_v16i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %1 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src,
+       <16 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 1)
+  %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %2
+}
+
+;
+; 256-bit tests (require avx2)
+;
+
+define <32 x i8> @test_shl1_v32i8(<32 x i8> %src) #1 {
+; GFNI-LABEL: test_shl1_v32i8:
+; GFNI:       # %bb.0:
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; GFNI-NEXT:    retq
+;
+; AVX2-LABEL: test_shl1_v32i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_shl1_v32i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src,
+       <32 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+  %2 = shl <32 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <32 x i8> %2
+}
+
+define <32 x i8> @test_shl1_nonzero_imm_v32i8(<32 x i8> %src) #1 {
+; GFNI-LABEL: test_shl1_nonzero_imm_v32i8:
+; GFNI:       # %bb.0:
+; GFNI-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; GFNI-NEXT:    retq
+;
+; AVX2-LABEL: test_shl1_nonzero_imm_v32i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_shl1_nonzero_imm_v32i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src,
+       <32 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 1)
+  %2 = shl <32 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <32 x i8> %2
+}
+
+;
+; 512-bit tests (require avx512bw)
+;
+
+define <64 x i8> @test_shl1_v64i8(<64 x i8> %src) #0 {
+; GFNI-LABEL: test_shl1_v64i8:
+; GFNI:       # %bb.0:
+; GFNI-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; GFNI-NEXT:    retq
+;
+; AVX2-LABEL: test_shl1_v64i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_shl1_v64i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %1 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src,
+       <64 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 0)
+  %2 = shl <64 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <64 x i8> %2
+}
+
+define <64 x i8> @test_shl1_nonzero_imm_v64i8(<64 x i8> %src) #0 {
+; GFNI-LABEL: test_shl1_nonzero_imm_v64i8:
+; GFNI:       # %bb.0:
+; GFNI-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; GFNI-NEXT:    retq
+;
+; AVX2-LABEL: test_shl1_nonzero_imm_v64i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_shl1_nonzero_imm_v64i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vgf2p8affineqb $2, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %1 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src,
+       <64 x i8> <i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1,
+                  i8 -128, i8 64, i8 32, i8 16, i8 8, i8 4, i8 2, i8 1>, i8 1)
+  %2 = shl <64 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <64 x i8> %2
+}
+
+attributes #0 = { "target-features"="+avx512bw" }
+attributes #1 = { "target-features"="+avx2" }
+
+declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
+declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
+declare <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)

>From 806418bb3cf05fe994d22ecc935ddde243b66fec Mon Sep 17 00:00:00 2001
From: Abhiramjampani <lcs2022059 at iiitl.ac.in>
Date: Thu, 19 Feb 2026 19:16:19 +0530
Subject: [PATCH 2/2] [libc][math] Refactor bf16divf128 to header-only in
 src/__support/math folder.

---
 libc/shared/math/bf16divf128.h         | 29 +++++++++++++++++++++++
 libc/src/__support/math/CMakeLists.txt | 12 ++++++++++
 libc/src/__support/math/bf16divf128.h  | 32 ++++++++++++++++++++++++++
 libc/src/math/generic/CMakeLists.txt   |  6 +----
 libc/src/math/generic/bf16divf128.cpp  |  7 ++----
 5 files changed, 76 insertions(+), 10 deletions(-)
 create mode 100644 libc/shared/math/bf16divf128.h
 create mode 100644 libc/src/__support/math/bf16divf128.h

diff --git a/libc/shared/math/bf16divf128.h b/libc/shared/math/bf16divf128.h
new file mode 100644
index 0000000000000..8ea100db6bd7f
--- /dev/null
+++ b/libc/shared/math/bf16divf128.h
@@ -0,0 +1,29 @@
+//===-- Shared bf16divf128 function ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_BF16DIVF128_H
+#define LLVM_LIBC_SHARED_MATH_BF16DIVF128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/bf16divf128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::bf16divf128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_BF16DIVF128_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 58e4040911f8e..2191d9bd81a39 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -1490,6 +1490,18 @@ add_header_library(
     libc.include.llvm-libc-types.float128
 )
 
+add_header_library(
+  bf16divf128
+  HDRS
+    bf16divf128.h
+  DEPENDS
+    libc.src.__support.FPUtil.bfloat16
+    libc.src.__support.FPUtil.generic.div
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.include.llvm-libc-types.float128
+)
+
 add_header_library(
   tan
   HDRS
diff --git a/libc/src/__support/math/bf16divf128.h b/libc/src/__support/math/bf16divf128.h
new file mode 100644
index 0000000000000..fa23062f070f6
--- /dev/null
+++ b/libc/src/__support/math/bf16divf128.h
@@ -0,0 +1,32 @@
+//===-- Implementation header for bf16divf128 -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_BF16DIVF128_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_BF16DIVF128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/generic/div.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace math {
+
+LIBC_INLINE static bfloat16 bf16divf128(float128 x, float128 y) {
+  return fputil::generic::div<bfloat16>(x, y);
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_BF16DIVF128_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 1b18388ed60f8..e500edd2e64b6 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -5327,11 +5327,7 @@ add_entrypoint_object(
   HDRS
     ../bf16divf128.h
   DEPENDS
-    libc.src.__support.common
-    libc.src.__support.FPUtil.bfloat16
-    libc.src.__support.FPUtil.generic.div
-    libc.src.__support.macros.config
-    libc.src.__support.macros.properties.types
+    libc.src.__support.math.bf16divf128
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/bf16divf128.cpp b/libc/src/math/generic/bf16divf128.cpp
index fbe9775ce4046..eb1b5b241e2a3 100644
--- a/libc/src/math/generic/bf16divf128.cpp
+++ b/libc/src/math/generic/bf16divf128.cpp
@@ -7,15 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/bf16divf128.h"
-#include "src/__support/FPUtil/bfloat16.h"
-#include "src/__support/FPUtil/generic/div.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/math/bf16divf128.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(bfloat16, bf16divf128, (float128 x, float128 y)) {
-  return fputil::generic::div<bfloat16>(x, y);
+  return math::bf16divf128(x, y);
 }
 
 } // namespace LIBC_NAMESPACE_DECL