[clang] [clang][AVR] Add AVR-specific builtin functions (PR #203214)

Sat Jun 13 02:43:10 PDT 2026

================
@@ -0,0 +1,303 @@
+//===------ AVR.cpp - Emit LLVM Code for AVR builtins ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Builtin calls as LLVM code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGBuiltin.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicsAVR.h"
+
+using namespace clang;
+using namespace CodeGen;
+using namespace llvm;
+
+/// Emit an inline-asm-based fractional multiply (fmul/fmuls/fmulsu).
+/// All three variants share the same shape: two i8 inputs → one i16 output,
+/// with the result collected from R1:R0 via movw, then R1 cleared.
+static Value *EmitAVRFMulInlineAsm(CodeGenFunction &CGF, const CallExpr *E,
+                                   const char *AsmInsn) {
+  Value *Arg0 = CGF.EmitScalarExpr(E->getArg(0));
+  Value *Arg1 = CGF.EmitScalarExpr(E->getArg(1));
+  llvm::LLVMContext &Ctx = CGF.getLLVMContext();
+  llvm::Type *ResTy = llvm::Type::getInt16Ty(Ctx);
+  llvm::Type *ArgTy = llvm::Type::getInt8Ty(Ctx);
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(ResTy, {ArgTy, ArgTy}, false);
+
+  // Build the asm string: "<insn> $1, $2\n\tmovw $0, r0\n\tclr r1"
+  std::string Asm = std::string(AsmInsn) + " $1, $2\n\tmovw $0, r0\n\tclr r1";
+  llvm::InlineAsm *IA =
+      llvm::InlineAsm::get(FTy, Asm, "=r,a,a,~{r0},~{r1}", true);
+  return CGF.Builder.CreateCall(IA, {Arg0, Arg1});
+}
+
+/// Emit __builtin_avr_delay_cycles(N).
+///
+/// Generates an optimal sequence of inline assembly delay loops and NOPs
+/// to consume exactly N clock cycles.
+///
+/// The decomposed N into a sum of contributions from nested loops
+/// of decreasing register width, then fills the remainder with rjmp/.+0
+/// (2 cycles) and nop (1 cycle).
+///
+/// Loop types:
+///   4-byte loop: ldi×4 + (subi + sbci×3 + brne) = 9 setup + 6/iter
+///   3-byte loop: ldi×3 + (subi + sbci×2 + brne) = 7 setup + 5/iter
+///   2-byte loop: ldi×2 + (sbiw + brne)           = 5 setup + 4/iter
+///   1-byte loop: ldi   + (dec  + brne)            = 3/iter (no setup overhead)
+static Value *EmitAVRDelayLoops(CodeGenFunction &CGF, uint32_t Cycles) {
+  if (Cycles == 0)
+    return nullptr;
+
+  std::string Asm;
+  std::string Clobbers;
+  unsigned ClobberIdx = 0;
+  unsigned LabelIdx = 1;
+
+  auto AddClobber = [&](unsigned Reg) {
+    if (!Clobbers.empty())
+      Clobbers += ",";
+    Clobbers += "~{r" + std::to_string(Reg) + "}";
+  };
+
+  // 4-byte loop: 9 + 6*(loop_count-1) cycles
+  // ldi×4 + (subi + sbci×3 + brne) per iteration
+  if (Cycles >= 83886082u) {
+    uint32_t LoopCount = ((Cycles - 9) / 6) + 1;
+    uint32_t Used = ((LoopCount - 1) * 6) + 9;
+    unsigned Base = 16 + ClobberIdx;
+    std::string L = std::to_string(LabelIdx++);
+    Asm += "ldi r" + std::to_string(Base) + ", lo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 1) + ", hi8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 2) + ", hlo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 3) + ", hhi8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += L + ": subi r" + std::to_string(Base) + ", 1\n\t";
+    Asm += "sbci r" + std::to_string(Base + 1) + ", 0\n\t";
+    Asm += "sbci r" + std::to_string(Base + 2) + ", 0\n\t";
+    Asm += "sbci r" + std::to_string(Base + 3) + ", 0\n\t";
+    Asm += "brne " + L + "b\n\t";
+    AddClobber(Base);
+    AddClobber(Base + 1);
+    AddClobber(Base + 2);
+    AddClobber(Base + 3);
+    ClobberIdx += 4;
+    Cycles -= Used;
+  }
+
+  // 3-byte loop: 7 + 5*(loop_count-1) cycles
+  // ldi×3 + (subi + sbci×2 + brne) per iteration
+  if (Cycles >= 262145u) {
+    uint32_t LoopCount = ((Cycles - 7) / 5) + 1;
+    if (LoopCount > 0xFFFFFFu)
+      LoopCount = 0xFFFFFFu;
+    uint32_t Used = ((LoopCount - 1) * 5) + 7;
+    unsigned Base = 16 + ClobberIdx;
+    std::string L = std::to_string(LabelIdx++);
+    Asm += "ldi r" + std::to_string(Base) + ", lo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 1) + ", hi8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 2) + ", hlo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += L + ": subi r" + std::to_string(Base) + ", 1\n\t";
+    Asm += "sbci r" + std::to_string(Base + 1) + ", 0\n\t";
+    Asm += "sbci r" + std::to_string(Base + 2) + ", 0\n\t";
+    Asm += "brne " + L + "b\n\t";
+    AddClobber(Base);
+    AddClobber(Base + 1);
+    AddClobber(Base + 2);
+    ClobberIdx += 3;
+    Cycles -= Used;
+  }
+
+  // 2-byte loop: 5 + 4*(loop_count-1) cycles
+  // ldi×2 + (sbiw + brne) per iteration
+  // sbiw requires an even register in {r24, r26, r28, r30}.
+  if (Cycles >= 768u) {
+    uint32_t LoopCount = ((Cycles - 5) / 4) + 1;
+    if (LoopCount > 0xFFFFu)
+      LoopCount = 0xFFFFu;
+    uint32_t Used = ((LoopCount - 1) * 4) + 5;
+    std::string L = std::to_string(LabelIdx++);
+    // Use r24:r25 for sbiw (hardcoded per AVR ISA constraint).
+    Asm += "ldi r24, lo8(" + std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r25, hi8(" + std::to_string(LoopCount) + ")\n\t";
+    Asm += L + ": sbiw r24, 1\n\t";
+    Asm += "brne " + L + "b\n\t";
+    AddClobber(24);
+    AddClobber(25);
+    Cycles -= Used;
+  }
+
+  // 1-byte loop: 3*loop_count cycles
+  // ldi + (dec + brne) per iteration
+  if (Cycles >= 6u) {
+    uint32_t LoopCount = Cycles / 3;
+    if (LoopCount > 255u)
+      LoopCount = 255u;
+    uint32_t Used = LoopCount * 3;
+    unsigned Reg = 16 + ClobberIdx;
+    if (Reg > 31)
+      Reg = 31; // safety
+    std::string L = std::to_string(LabelIdx++);
+    Asm += "ldi r" + std::to_string(Reg) + ", " + std::to_string(LoopCount) +
+           "\n\t";
+    Asm += L + ": dec r" + std::to_string(Reg) + "\n\t";
+    Asm += "brne " + L + "b\n\t";
+    AddClobber(Reg);
+    ClobberIdx++;
+    Cycles -= Used;
+  }
+
+  // Fill remaining with rjmp .+0 (2 cycles each)
+  while (Cycles >= 2) {
+    Asm += "rjmp .+0\n\t";
+    Cycles -= 2;
+  }
+
+  // Final single cycle
+  if (Cycles == 1) {
+    Asm += "nop\n\t";
+  }
+
+  if (Asm.empty())
+    return nullptr;
+
+  // Remove trailing \n\t
+  if (Asm.size() >= 3 && Asm.substr(Asm.size() - 3) == "\n\t")
+    Asm.resize(Asm.size() - 3);
+
+  llvm::LLVMContext &Ctx = CGF.getLLVMContext();
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
+  llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Clobbers, true);
+  return CGF.Builder.CreateCall(IA);
+}
+
+Value *CodeGenFunction::EmitAVRBuiltinExpr(unsigned BuiltinID,
+                                           const CallExpr *E) {
+  switch (BuiltinID) {
+  default:
+    return nullptr;
+  case AVR::BI__builtin_avr_nop:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::avr_nop));
+  case AVR::BI__builtin_avr_sei:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::avr_sei));
+  case AVR::BI__builtin_avr_cli:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::avr_cli));
+  case AVR::BI__builtin_avr_sleep:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::avr_sleep));
+  case AVR::BI__builtin_avr_wdr:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::avr_wdr));
+  case AVR::BI__builtin_avr_swap: {
+    Value *Arg0 = EmitScalarExpr(E->getArg(0));
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::avr_swap), Arg0);
+  }
+  case AVR::BI__builtin_avr_fmul:
+    return EmitAVRFMulInlineAsm(*this, E, "fmul");
+  case AVR::BI__builtin_avr_fmuls:
+    return EmitAVRFMulInlineAsm(*this, E, "fmuls");
----------------
benshi001 wrote:

In my opionion, a patch can have TODOs, but can not generate wrong code even for coner calls.

https://github.com/llvm/llvm-project/pull/203214