branch: externals/org-transclusion
commit 51623cbc0b23c6ed57c25ce6861fa1ecf2a92805
Author: Joseph Turner <jos...@breatheoutbreathe.in>
Commit: Joseph Turner <jos...@breatheoutbreathe.in>

    Add option to transclude local HTML content rendered as Org
    
    With org-transclusion-http added to org-transclusion-set-extensions,
    org-transclusion will now convert HTML files to Org using Pandoc and
    render their content in a transclusion.
    
    This commit also removes the networking logic for retrieving HTML
    files over a network.  That functionality will be handled by a
    separate package.
---
 org-transclusion-html.el       |  68 ++++++++++++++++++++++++--
 org-transclusion-http.el       | 105 -----------------------------------------
 org-transclusion.el            |   5 +-
 test/org-transclusion-html.org |   2 +
 test/org-transclusion-http.org |  17 -------
 test/source-html-no-ext        |  17 +++++++
 6 files changed, 85 insertions(+), 129 deletions(-)

diff --git a/org-transclusion-html.el b/org-transclusion-html.el
index d267b8b46d..f586c829fb 100644
--- a/org-transclusion-html.el
+++ b/org-transclusion-html.el
@@ -18,10 +18,14 @@
 
 ;;; Commentary:
 
-;; This file contains functionality related to converting HTML content to Org. 
 Features include:
-;;
-;; - Convert HTML to Org using Pandoc, a lá `org-web-tools'
-;;   + Convert only HTML headings matching link anchor
+;; This is an extension to `org-transclusion'.  When active, it enables
+;; transclusion of HTML files by converting HTML to Org with Pandoc.
+;; When a link anchor is specified only the HTML headings matching are
+;; transcluded.  Does not support live-sync.
+
+;; Requires Pandoc to be installed and in the $PATH.  Conversion of
+;; HTML to Org using Pandoc inspired by `org-web-tools'.
+
 
 ;;; Code:
 
@@ -30,13 +34,67 @@
 (require 'org)
 (require 'cl-lib)
 (require 'pcase)
+(require 'dom)
+
+;;;; Hook into org-transclusion
+
+(add-hook 'org-transclusion-add-functions #'org-transclusion-add-html-file)
 
 ;;;; Functions
 
+;;;;; Add HTML file
+
+(defun org-transclusion-add-html-file (link plist)
+  "Return a list for HTML file LINK object and PLIST.
+Return nil if not found."
+  (and (string= "file" (org-element-property :type link))
+       (or (string-suffix-p ".html" (org-element-property :path link))
+           (with-current-buffer (find-file-noselect
+                                 (org-element-property :path link) t)
+             (org-transclusion-html--html-p (current-buffer))))
+       (append '(:tc-type "html-org-file")
+               (org-transclusion-html-org-file-content link plist))))
+
+(defun org-transclusion-html-org-file-content (link _plist)
+  "Return payload list without :tc-type.
+:src-content value will be Org format converted from HTML at LINK."
+  (let* ((path (org-element-property :path link))
+         (html-buf (find-file-noselect path t))
+         (org-buf
+          (generate-new-buffer
+           (format " *org-transclusion-html-org %s*" (expand-file-name path))))
+         (src-content
+          (with-current-buffer org-buf
+            (insert-buffer-substring html-buf)
+            ;; TODO: It's not currently possible to link an HTML
+            ;; anchor inside of a 'file:' Org link, but if it ever
+            ;; becomes possible, we can use this:
+
+            ;; (let ((dom (with-current-buffer html-buf
+            ;;              (libxml-parse-html-region))))
+            ;;   (when (dom-by-id dom (format "\\`%s\\'" target))
+            ;;     ;; Page contains id element matching link target.
+            ;;     (erase-buffer)
+            ;;     (dom-print (org-transclusion-html--target-content dom 
target))))
+            (org-transclusion--insert-org-from-html-with-pandoc)
+            (buffer-string))))
+    (with-current-buffer html-buf
+      (org-with-wide-buffer
+       (list :src-buf (current-buffer)
+             :src-beg (point-min)
+             :src-end (point-max)
+             :src-content src-content)))))
+
+;;;;; Utilities
+
 (defun org-transclusion-html--target-content (dom target)
   "Return DOM element(s) that correspond to the TARGET.
 Since anchors may refer to headings but not the text following
-the heading, this function may not return the expected element."
+the heading, this function may not return the expected element.
+
+While is not possible to specify an HTML anchor in a file: Org
+link, this function is useful in other libraries for transcluding
+sections of HTML documents linked via http://, hyper://, etc.."
   ;; HTML link fragments (targets) point to a specific point in a document,
   ;; not a range of text.  This function attempts to guess what range of
   ;; text a target refers to based on what HTML element is targeted.
diff --git a/org-transclusion-http.el b/org-transclusion-http.el
deleted file mode 100644
index 36d678b2f7..0000000000
--- a/org-transclusion-http.el
+++ /dev/null
@@ -1,105 +0,0 @@
-;;; org-transclusion-http.el --- Transclude over HTTP -*- lexical-binding: t; 
-*-
-
-;; Copyright (C) 2024  Free Software Foundation, Inc.
-
-;; This program is free software; you can redistribute it and/or
-;; modify it under the terms of the GNU Affero General Public License
-;; as published by the Free Software Foundation, either version 3 of
-;; the License, or (at your option) any later version.
-
-;; This program is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;; Affero General Public License for more details.
-
-;; You should have received a copy of the GNU Affero General Public
-;; License along with this program. If not, see
-;; <https://www.gnu.org/licenses/>.
-
-;;; Commentary:
-
-;; This file contains functionality related to transcluding content over HTTP
-;; using plz.el.  Features include:
-;;
-;; - Transclude plain text
-;;   + Transclude only Org headings matching search options
-;; - Transclude HTML converted to Org using Pandoc, a lá `org-web-tools'
-;;   + Transclude only HTML headings matching link anchor
-;; - TODO: Support :lines
-
-;;; Code:
-
-;;;; Requirements
-
-(require 'org)
-(require 'org-element)
-(require 'org-transclusion)
-(require 'org-transclusion-html)
-(require 'cl-lib)
-(require 'pcase)
-(require 'plz)
-
-;;;; Functions
-
-(defun org-transclusion-http-add-file (link _plist)
-  "Return callback function when HTTP transclusion is appropriate.
-Otherwise, return nil.  Intended to be added to
-`org-transclusion-add-functions', which see for descriptions of
-arguments LINK and PLIST."
-  (pcase (org-element-property :type link)
-    ((or "http" "https")
-     (message "Asynchronously transcluding over HTTP at point %d, line %d..."
-              (point) (org-current-line))
-     #'org-transclusion-http-add-callback)))
-
-(add-hook 'org-transclusion-add-functions #'org-transclusion-http-add-file)
-
-(defun org-transclusion-http-add-callback (link plist copy)
-  "Load HTTP file at LINK and call
-`org-transclusion-add-callback' with PAYLOAD, LINK, PLIST, COPY."
-  (pcase-let* ((target-mkr (point-marker))
-               (url (org-element-property :raw-link link))
-               ((cl-struct url filename target) (url-generic-parse-url url))
-               (tc-type))
-    (plz 'get url
-      :as 'buffer
-      :then
-      (lambda (_response-buffer)
-        (when-let ((target-buf (marker-buffer target-mkr)))
-          (cond
-           ((org-transclusion-html--html-p (current-buffer))  ; HTML
-            (let ((dom (libxml-parse-html-region)))
-              (when (dom-by-id dom (format "\\`%s\\'" target))
-                ;; Page contains id element matching link target.
-                (erase-buffer)
-                (dom-print (org-transclusion-html--target-content dom target)))
-              (org-transclusion--insert-org-from-html-with-pandoc)
-              ;; Use "org-http" `tc-type' since HTML is converted to Org mode.
-              (setf tc-type "org-http")))
-           ((org-transclusion-org-file-p filename) ; Org-mode
-            ;; FIXME: filename may contain a query string, so it may not end
-            ;; with "org" or "org.gpg".  For example,
-            ;; https://example.com/foobar.org?query=answer has the filename
-            ;; /foobar.org?query=answer and therefore doesn't match.
-            (when target
-              (org-mode)
-              (let ((org-link-search-must-match-exact-headline t))
-                (when (with-demoted-errors "org-transclusion-http 
error:\n%s\ntranscluding whole file..."
-                        (org-link-search (format "#%s" target)))
-                  (org-narrow-to-subtree))))
-            (setf tc-type "org-http"))
-           (t  ; All other file types
-            (setf tc-type "others-http")))
-          (let* ((payload-without-type
-                  (org-transclusion-content-org-buffer-or-element nil plist))
-                 (payload (append `(:tc-type ,tc-type) payload-without-type)))
-            (with-current-buffer target-buf
-              (org-with-wide-buffer
-               (goto-char (marker-position target-mkr))
-               (org-transclusion-add-callback payload link plist copy)))))))))
-
-;;;; Footer
-
-(provide 'org-transclusion-http)
-
-;;; org-transclusion-http.el ends here
diff --git a/org-transclusion.el b/org-transclusion.el
index 9797881512..ccd399694a 100644
--- a/org-transclusion.el
+++ b/org-transclusion.el
@@ -23,7 +23,7 @@
 ;; Keywords: org-mode, transclusion, writing
 
 ;; Version: 1.3.2
-;; Package-Requires: ((emacs "27.1") (org "9.4") (plz "0.7.2"))
+;; Package-Requires: ((emacs "27.1") (org "9.4"))
 
 ;; This file is not part of GNU Emacs.
 
@@ -68,7 +68,8 @@ Intended for :set property for `customize'."
         (const :tag "font-lock: Add font-lock for Org-transclusion" 
org-transclusion-font-lock)
 
         (const :tag "indent-mode: Support org-indent-mode" 
org-transclusion-indent-mode)
-        (const :tag "http: Transclude content over HTTP" org-transclusion-http)
+        (const :tag "html: Transclude HTML converted to Org with Pandoc"
+               org-transclusion-http)
         (repeat :tag "Other packages" :inline t (symbol :tag "Package"))))
 
 (defcustom org-transclusion-add-all-on-activate t
diff --git a/test/org-transclusion-html.org b/test/org-transclusion-html.org
new file mode 100644
index 0000000000..ca0a438dd6
--- /dev/null
+++ b/test/org-transclusion-html.org
@@ -0,0 +1,2 @@
+[[file:source-html-no-ext]]
+#+transclude: [[file:source-html-no-ext]]
diff --git a/test/org-transclusion-http.org b/test/org-transclusion-http.org
deleted file mode 100644
index a57a2a4608..0000000000
--- a/test/org-transclusion-http.org
+++ /dev/null
@@ -1,17 +0,0 @@
-# id="care" points to <h2>: Transclude <h2> and content after <h2> also.
-#+transclude: [[https://ushin.org/needs-list.html#care]]
-
-# Org file with heading CUSTOM_ID: "care": Transclude only that heading.
-#+transclude: [[https://ushin.org/needs-list.org#care]]
-
-# Nonexistent target in Org file: Transclude entire file.
-#+transclude: [[https://ushin.org/needs-list.org#nonexistent-target]]
-
-# Nonexistent target in HTML file: Transclude entire file as Org.
-#+transclude: [[https://ushin.org/needs-list.html#nonexistent-target]]
-
-# id="jabber" points to <section>:  Transclude only <section> content.
-#+transclude: [[https://jmp.chat/faq#jabber]]
-
-# id="autocapitalize" points to <dt>:  Transclude <dt> and subsequent <dd>.
-#+transclude: 
[[https://developer.mozilla.org/en-US/docs/Web/HTML/Element/textarea#autocapitalize]]
diff --git a/test/source-html-no-ext b/test/source-html-no-ext
new file mode 100644
index 0000000000..d8a1cbd51e
--- /dev/null
+++ b/test/source-html-no-ext
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>
+<html xmlns="http://www.w3.org/1999/xhtml"; lang="en" xml:lang="en">
+  <body>
+    <div id="content" class="content">
+      <div id="outline-container-orgf29e149" class="outline-2">
+        <h2 id="orgf29e149">hi there</h2>
+      </div>
+    </div>
+    <div id="postamble" class="status">
+      <p class="author">Author: Joseph Turner</p>
+      <p class="date">Created: 2024-03-30 Sat 00:11</p>
+      <p class="validation"><a 
href="https://validator.w3.org/check?uri=referer";>Validate</a></p>
+    </div>
+  </body>
+</html>

Reply via email to