branch: externals/llm
commit 291c56d0171435fa1a6a1b5c9e55ac826f7c6189
Author: Andrew Hyatt <ahy...@gmail.com>
Commit: GitHub <nore...@github.com>

    Add integration testing (#67)
    
    * Add integration tests, so we can test against severs with ert
    
    * Use environment variables, which will enable CI testing
    
    * Create function calling test, add environment to ci workflow
    
    * Turn off nonfree warnings on tests
    
    * More documentation on how these integration tests run
---
 .github/workflows/ci.yaml |   1 +
 llm-integration-test.el   | 156 ++++++++++++++++++++++++++++++++++++++++++++++
 llm.el                    |   2 +
 3 files changed, 159 insertions(+)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 6fcd4f385c..5fae703f94 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -10,6 +10,7 @@ on:
 jobs:
   test:
     runs-on: ubuntu-latest
+    environment: Continuous Integration
     strategy:
       matrix:
         emacs_version:
diff --git a/llm-integration-test.el b/llm-integration-test.el
new file mode 100644
index 0000000000..087f61dea4
--- /dev/null
+++ b/llm-integration-test.el
@@ -0,0 +1,156 @@
+;;; llm-intgration-test.el --- Integration tests for the llm module -*- 
lexical-binding: t; package-lint-main-file: "llm.el"; -*-
+
+;; Copyright (c) 2024  Free Software Foundation, Inc.
+
+;; Author: Andrew Hyatt <ahy...@gmail.com>
+;; SPDX-License-Identifier: GPL-3.0-or-later
+;;
+;; This program is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License as
+;; published by the Free Software Foundation; either version 3 of the
+;; License, or (at your option) any later version.
+;;
+;; This program is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;; This tests the `llm' module by running against real backends. It is designed
+;; to be as fast and accurate as possible, but since LLMs are not 
deterministic,
+;; some flakiness may happen.
+;;
+;; These tests will test multiple models, according to the environment 
variables
+;; set:
+;;
+;; - OPENAI_KEY: An OpenAI API key.
+;; - ANTHROPIC_KEY: An Anthropic API key, for Claude.
+;; - GEMINI_KEY: A Gemini API key.
+;; - VERTEX_PROJECT: A Google Cloud Vertex project.
+;; - OLLAMA_CHAT_MODELS: A list of Ollama models to test.
+;;
+;; If any of these are set, the corresponding provider will be tested.
+
+
+;;; Code:
+
+(require 'llm)
+(require 'ert)
+(require 'seq)
+
+(defconst llm-integration-test-chat-prompt
+  "What is the capital of France?  Give me only one word, in English, with no 
punctuation."
+  "A chat prompt to use for testing.")
+
+(defconst llm-integration-test-chat-answer
+  "Paris"
+  "The correct answer to the chat prompt.")
+
+(defun llm-integration-test-fc-prompt ()
+  "Return a function call prompt for testing."
+  (llm-make-chat-prompt
+   "What is the capital of France?"
+   :functions
+   (list (make-llm-function-call
+          :function (lambda (f) f)
+          :name "capital_of_country"
+          :description "Get the capital of a country."
+          :args (list (make-llm-function-arg
+                       :name "country"
+                       :description "The country whose capital to look up."
+                       :type 'string
+                       :required t))))))
+
+(defconst llm-integration-test-fc-answer
+  '(("capital_of_country" . "France"))
+  "The correct answer to the function call prompt.")
+
+(defun llm-integration-test-providers ()
+  "Return a list of providers to test."
+  (let ((providers))
+    (when (getenv "OPENAI_KEY")
+      (require 'llm-openai)
+      (push (make-llm-openai :key (getenv "OPENAI_KEY")) providers))
+    (when (getenv "ANTHROPIC_KEY")
+      (require 'llm-claude)
+      (push (make-llm-claude :key (getenv "ANTHROPIC_KEY")) providers))
+    (when (getenv "GEMINI_KEY")
+      (require 'llm-gemini)
+      (push (make-llm-gemini :key (getenv "GEMINI_KEY")) providers))
+    (when (getenv "VERTEX_PROJECT")
+      (require 'llm-vertex)
+      (push (make-llm-vertex :project (getenv "VERTEX_PROJECT")) providers))
+    (when (getenv "OLLAMA_MODELS")
+      (require 'llm-ollama)
+      ;; This variable is a list of models to test.
+      (dolist (model (split-string (getenv "OLLAMA_CHAT_MODELS") ", "))
+        (push (make-llm-ollama :chat-model model) providers)))))
+
+(ert-deftest llm-chat ()
+  (dolist (provider (llm-integration-test-providers))
+    (let ((llm-warn-on-nonfree nil))
+      (ert-info ((format "Using provider %s" (llm-name provider)))
+        (should (equal
+                 (llm-chat
+                  provider
+                  (llm-make-chat-prompt llm-integration-test-chat-prompt))
+                 llm-integration-test-chat-answer))))))
+
+(ert-deftest llm-chat-async ()
+  (dolist (provider (llm-integration-test-providers))
+    (ert-info ((format "Using provider %s" (llm-name provider)))
+      (let ((result nil)
+            (buf (current-buffer))
+            (llm-warn-on-nonfree nil))
+        (llm-chat-async
+         provider
+         (llm-make-chat-prompt llm-integration-test-chat-prompt)
+         (lambda (response)
+           (should (eq (current-buffer) buf))
+           (setq result response))
+         (lambda (error)
+           (error "Error: %s" error)))
+        (while (null result)
+          (sleep-for 0.1))
+        (should (equal result llm-integration-test-chat-answer))))))
+
+(ert-deftest llm-chat-streaming ()
+  (dolist (provider (seq-filter
+                     (lambda (provider)
+                       (member 'streaming (llm-capabilities provider)))
+                     (llm-integration-test-providers)))
+    (ert-info ((format "Using provider %s" (llm-name provider)))
+      (let ((streamed-result "")
+            (returned-result nil)
+            (llm-warn-on-nonfree nil)
+            (buf (current-buffer))
+            (start-time (current-time)))
+        (llm-chat-streaming
+         provider
+         (llm-make-chat-prompt llm-integration-test-chat-prompt)
+         (lambda (partial-response)
+           (should (eq (current-buffer) buf))
+           (setq streamed-result (concat streamed-result partial-response)))
+         (lambda (response)
+           (should (eq (current-buffer) buf))
+           (setq returned-result response))
+         (lambda (error)
+           (error "Error: %s" error)))
+        (while (and (null returned-result)
+                    (time-less-p (time-subtract (current-time) start-time) 10))
+          (sleep-for 0.1))
+        (should (equal returned-result llm-integration-test-chat-answer))
+        (should (equal streamed-result llm-integration-test-chat-answer))))))
+
+(ert-deftest llm-function-call ()
+  (dolist (provider (llm-integration-test-providers))
+    (let ((llm-warn-on-nonfree nil))
+      (ert-info ((format "Using provider %s" (llm-name provider)))
+        (should (equal
+                 (llm-chat provider (llm-integration-test-fc-prompt))
+                 llm-integration-test-fc-answer))))))
+
+(provide 'llm-integration-test)
diff --git a/llm.el b/llm.el
index 2a378d4bc1..292a718787 100644
--- a/llm.el
+++ b/llm.el
@@ -95,6 +95,8 @@ RESULT is the result of the function call.  This is required."
 (cl-defstruct llm-function-call
   "This is a struct to represent a function call the LLM can make.
 
+All fields are required.
+
 FUNCTION is a function to call.
 
 NAME is a human readable name of the function.

Reply via email to