branch: externals/elisa
commit 14d0284fb954257c7bcf289b61a09e3118657b9b
Author: Sergey Kostyaev <[email protected]>
Commit: Sergey Kostyaev <[email protected]>
Add embedding recalculation functionality
Added functions to drop and recreate the data embeddings table,
recalculate embeddings using a new provider, and an asynchronous
function to trigger this process.
---
elisa.el | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/elisa.el b/elisa.el
index afab3c1546..ed1aad8df3 100644
--- a/elisa.el
+++ b/elisa.el
@@ -107,6 +107,7 @@
(require 'shr)
(require 'plz)
(require 'json)
+(require 'sqlite)
(defgroup elisa nil
"RAG implementation for `ellama'."
@@ -354,6 +355,10 @@ database."
(format "CREATE VIRTUAL TABLE IF NOT EXISTS data_embeddings USING
vss0(embedding(%d));"
(elisa-get-embedding-size)))
+(defun elisa-data-embeddings-drop-table-sql ()
+ "Generate sql for drop data embeddings table."
+ "DROP TABLE IF EXISTS data_embeddings;")
+
(defun elisa-data-fts-create-table-sql ()
"Generate sql for create full text search table."
"CREATE VIRTUAL TABLE IF NOT EXISTS data_fts USING FTS5(data);")
@@ -1498,5 +1503,33 @@ Find similar quotes in COLLECTIONS and add it to
context."
(let ((cols (or collections elisa-enabled-collections)))
(elisa--rewrite-prompt prompt (elisa--gen-chat cols))))
+(defun elisa-recalculate-embeddings ()
+ "Recalculate and save new embeddings after embedding provider change."
+ (let* ((data-rows (sqlite-select elisa-db "SELECT rowid, data FROM data;"))
+ (texts (mapcar #'cadr data-rows))
+ (rowids (mapcar #'car data-rows))
+ (embeddings (elisa-embeddings texts))
+ (len (length rowids))
+ (i 0))
+ ;; Recreate embeddings table
+ (sqlite-execute elisa-db (elisa-data-embeddings-drop-table-sql))
+ (sqlite-execute elisa-db (elisa-data-embeddings-create-table-sql))
+ ;; Recalculate embeddings
+ (with-sqlite-transaction elisa-db
+ (while (< i len)
+ (let ((rowid (nth i rowids))
+ (embedding (nth i embeddings)))
+ (sqlite-execute
+ elisa-db
+ (format "INSERT INTO data_embeddings(rowid, embedding) VALUES (%s,
%s);"
+ rowid (elisa-vector-to-sqlite embedding)))
+ (setq i (1+ i)))))))
+
+;;;###autoload
+(defun elisa-async-recalculate-embeddings ()
+ "Recalculate embeddings asynchronously."
+ (interactive)
+ (elisa--async-do 'elisa-recalculate-embeddings))
+
(provide 'elisa)
;;; elisa.el ends here.