================
@@ -0,0 +1,303 @@
+//===------ AVR.cpp - Emit LLVM Code for AVR builtins 
---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit Builtin calls as LLVM code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGBuiltin.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicsAVR.h"
+
+using namespace clang;
+using namespace CodeGen;
+using namespace llvm;
+
+/// Emit an inline-asm-based fractional multiply (fmul/fmuls/fmulsu).
+/// All three variants share the same shape: two i8 inputs → one i16 output,
+/// with the result collected from R1:R0 via movw, then R1 cleared.
+static Value *EmitAVRFMulInlineAsm(CodeGenFunction &CGF, const CallExpr *E,
+                                   const char *AsmInsn) {
+  Value *Arg0 = CGF.EmitScalarExpr(E->getArg(0));
+  Value *Arg1 = CGF.EmitScalarExpr(E->getArg(1));
+  llvm::LLVMContext &Ctx = CGF.getLLVMContext();
+  llvm::Type *ResTy = llvm::Type::getInt16Ty(Ctx);
+  llvm::Type *ArgTy = llvm::Type::getInt8Ty(Ctx);
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(ResTy, {ArgTy, ArgTy}, false);
+
+  // Build the asm string: "<insn> $1, $2\n\tmovw $0, r0\n\tclr r1"
+  std::string Asm = std::string(AsmInsn) + " $1, $2\n\tmovw $0, r0\n\tclr r1";
+  llvm::InlineAsm *IA =
+      llvm::InlineAsm::get(FTy, Asm, "=r,a,a,~{r0},~{r1}", true);
+  return CGF.Builder.CreateCall(IA, {Arg0, Arg1});
+}
+
+/// Emit __builtin_avr_delay_cycles(N).
+///
+/// Generates an optimal sequence of inline assembly delay loops and NOPs
+/// to consume exactly N clock cycles.
+///
+/// The decomposed N into a sum of contributions from nested loops
+/// of decreasing register width, then fills the remainder with rjmp/.+0
+/// (2 cycles) and nop (1 cycle).
+///
+/// Loop types:
+///   4-byte loop: ldi×4 + (subi + sbci×3 + brne) = 9 setup + 6/iter
+///   3-byte loop: ldi×3 + (subi + sbci×2 + brne) = 7 setup + 5/iter
+///   2-byte loop: ldi×2 + (sbiw + brne)           = 5 setup + 4/iter
+///   1-byte loop: ldi   + (dec  + brne)            = 3/iter (no setup 
overhead)
+static Value *EmitAVRDelayLoops(CodeGenFunction &CGF, uint32_t Cycles) {
+  if (Cycles == 0)
+    return nullptr;
+
+  std::string Asm;
+  std::string Clobbers;
+  unsigned ClobberIdx = 0;
+  unsigned LabelIdx = 1;
+
+  auto AddClobber = [&](unsigned Reg) {
+    if (!Clobbers.empty())
+      Clobbers += ",";
+    Clobbers += "~{r" + std::to_string(Reg) + "}";
+  };
+
+  // 4-byte loop: 9 + 6*(loop_count-1) cycles
+  // ldi×4 + (subi + sbci×3 + brne) per iteration
+  if (Cycles >= 83886082u) {
+    uint32_t LoopCount = ((Cycles - 9) / 6) + 1;
+    uint32_t Used = ((LoopCount - 1) * 6) + 9;
+    unsigned Base = 16 + ClobberIdx;
+    std::string L = std::to_string(LabelIdx++);
+    Asm += "ldi r" + std::to_string(Base) + ", lo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 1) + ", hi8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 2) + ", hlo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 3) + ", hhi8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += L + ": subi r" + std::to_string(Base) + ", 1\n\t";
+    Asm += "sbci r" + std::to_string(Base + 1) + ", 0\n\t";
+    Asm += "sbci r" + std::to_string(Base + 2) + ", 0\n\t";
+    Asm += "sbci r" + std::to_string(Base + 3) + ", 0\n\t";
+    Asm += "brne " + L + "b\n\t";
+    AddClobber(Base);
+    AddClobber(Base + 1);
+    AddClobber(Base + 2);
+    AddClobber(Base + 3);
+    ClobberIdx += 4;
+    Cycles -= Used;
+  }
+
+  // 3-byte loop: 7 + 5*(loop_count-1) cycles
+  // ldi×3 + (subi + sbci×2 + brne) per iteration
+  if (Cycles >= 262145u) {
+    uint32_t LoopCount = ((Cycles - 7) / 5) + 1;
+    if (LoopCount > 0xFFFFFFu)
+      LoopCount = 0xFFFFFFu;
+    uint32_t Used = ((LoopCount - 1) * 5) + 7;
+    unsigned Base = 16 + ClobberIdx;
+    std::string L = std::to_string(LabelIdx++);
+    Asm += "ldi r" + std::to_string(Base) + ", lo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 1) + ", hi8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r" + std::to_string(Base + 2) + ", hlo8(" +
+           std::to_string(LoopCount) + ")\n\t";
+    Asm += L + ": subi r" + std::to_string(Base) + ", 1\n\t";
+    Asm += "sbci r" + std::to_string(Base + 1) + ", 0\n\t";
+    Asm += "sbci r" + std::to_string(Base + 2) + ", 0\n\t";
+    Asm += "brne " + L + "b\n\t";
+    AddClobber(Base);
+    AddClobber(Base + 1);
+    AddClobber(Base + 2);
+    ClobberIdx += 3;
+    Cycles -= Used;
+  }
+
+  // 2-byte loop: 5 + 4*(loop_count-1) cycles
+  // ldi×2 + (sbiw + brne) per iteration
+  // sbiw requires an even register in {r24, r26, r28, r30}.
+  if (Cycles >= 768u) {
+    uint32_t LoopCount = ((Cycles - 5) / 4) + 1;
+    if (LoopCount > 0xFFFFu)
+      LoopCount = 0xFFFFu;
+    uint32_t Used = ((LoopCount - 1) * 4) + 5;
+    std::string L = std::to_string(LabelIdx++);
+    // Use r24:r25 for sbiw (hardcoded per AVR ISA constraint).
+    Asm += "ldi r24, lo8(" + std::to_string(LoopCount) + ")\n\t";
+    Asm += "ldi r25, hi8(" + std::to_string(LoopCount) + ")\n\t";
+    Asm += L + ": sbiw r24, 1\n\t";
----------------
benshi001 wrote:

How about devices without `adiw/sbiw` ? 

https://github.com/llvm/llvm-project/pull/203214
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to