branch: externals/doc-toc commit 8f14a2928e64acde94973adce6834f2e90b9ffa2 Author: Daniel Nicolai <dalanico...@gmail.com> Commit: Daniel Nicolai <dalanico...@gmail.com>
Prepare package for publishing on MELPA --- toc-mode.el | 342 ++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 218 insertions(+), 124 deletions(-) diff --git a/toc-mode.el b/toc-mode.el index 6578d55e26..814c2c94ed 100644 --- a/toc-mode.el +++ b/toc-mode.el @@ -4,6 +4,9 @@ ;; Author: Daniel Laurens Nicolai <dalanico...@gmail.com> ;; Keywords: tools, outlines, convenience +;; Package-Requires: ((emacs "26.1")) +;; URL: https://github.com/dalanicolai/toc-mode + ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -20,51 +23,159 @@ ;;; Commentary: -;; - +;; toc-mode.el is a package to create and add a Table of Contents to pdf and +;; djvu documents. It implements features to extract a Table of Contents from +;; the textlayer of a document or via OCR if that last option is necessary or +;; prefered. Subsequently this package implements various features to assist in +;; tidy up the extracted Table of Contents, adjust the pagenumbers and finally +;; parsing the Table of Contents into syntax that is understood by the +;; `pdfoutline' and `djvused' commands that are used to add the table of +;; contents to pdf- and djvu-files respectively. + +;; Important: For djvu documents, only outlines with maximum level deepness of 1 +;; are correctly parsed. For outlines with level deepness of 2 not all levels +;; are set to correct level because the algorithm in the toc-parse-djvused +;; function does not parse such files correctly yet (you are welcome to ‘repair’ +;; the algorithm to produce the correct syntax as it is described on the djvused +;; website here). + +;; Requirements: Currently the package requires the `pdftotext' (part of +;; poppler-utils), `pdfoutline' (part of fntsample) and `djvused' (part of +;; http://djvu.sourceforge.net/) command line utilities to be available. +;; Extraction with OCR requires the tesseract command line utility to be +;; available. + +;; Usage: Extraction and adding contents to a document is done in 4 steps: 1 +;; extraction 2 cleanup 3 adjust/correct pagenumbers 4 add TOC to document + +;; 1. Extraction Open some pdf or djvu file in Emacs (pdf-tools and djvu package +;; recommended). Find the pagenumbers for the TOC. Then type M-x +;; toc-extract-pages, or M-x toc-extract-pages-ocr if doc has no text layer or +;; text layer is bad, and answer the subsequent prompts by entering the +;; pagenumbers for the first and the last page each followed by RET. For PDF +;; extraction with OCR, currently it is required to view all contents pages once +;; before extraction (toc-mode uses the cached file data). A buffer with the, +;; somewhat cleaned up, extracted text will open in TOC-cleanup mode. Prefix +;; command with the universal argument (C-u) to omit clean and get the raw text. +;; 2. TOC-Cleanup In this mode you can further cleanup the contents to create a +;; list where each line has the structure: + +;; TITLE (SOME) PAGENUMBER + +;; There can be any number of spaces between TITLE and PAGE. The correct +;; pagenumbers can be edited in the next step. A document outline supports +;; different levels and levels are automatically assigned in order of increasing +;; number of preceding spaces, i.e. the lines with the least amount of preceding +;; spaces are assigned level 0 etc., and lines with equal number of spaces get +;; assigned the same levels. + +;; Contents 1 +;; Chapter 1 2 +;; Section 1 3 +;; Section 1.1 4 +;; Chapter 2 5 + +;; There are some handy functions to assist in the cleanup. C-c C-j jumps +;; automatically to the next line not ending with a number and joins it with the +;; next line. If the indentation structure of the different lines does not +;; correspond with the levels, then the levels can be set automatically from the +;; number of separatorss in the indices with M-x toc-cleanup-set-level-by-index. +;; The default separators is a . but a different separators can be entered by +;; preceding the function invocation with the universal argument (C-u). Some +;; documents contain a structure like + +;; 1 Chapter 1 1 +;; Section 1 2 + +;; Here the indentation can be set with M-x replace-regexp ^[^0-9] -> \& (where +;; there is a space character before the \&). + +;; Type C-c C-c when finished + +;; 3. TOC-tabular (adjust pagenumbers) This mode provides the functionality for +;; easy adjustment of pagenmumbers. The buffer can be navigated with the arrow +;; up/down keys. The left and right arrow keys will shift down/up all the page +;; numbers from the current line and below (combine with SHIFT for setting +;; individual pagenumbers). + +;; The TAB key jumps to the pagenumber of the current line, while C-right/C-left +;; will shift all remaining page numbers up/down while jumping/scrolling to the +;; line its page in the document window. to the S-up/S-donw in the tablist +;; window will just scroll page up/down in the document window and, only for +;; pdf, C-up/C-down will scroll smoothly in that window. + +;; Type C-c C-c when done. + +;; 4. TOC-mode (add outline to document) The text of this buffer should have the +;; right structure for adding the contents to (for pdf’s a copy of) the original +;; document. Final adjusments can be done but should not be necessary. Type C-c +;; C-c for adding the contents to the document. + +;; By default, the TOC is simply added to the original file. ONLY FOR PDF’s, if +;; the (customizable) variable toc-replace-original-file is nil, then the TOC is +;; added to a copy of the original pdf file with the path as defined by the +;; variable toc-destination-file-name. Either a relative path to the original +;; file directory or an absolute path can be given. ;;; Code: -(defgroup toc-mode nil - "Setting for the toc-mode package") +;; Keybindings +;; all-modes (i.e. all steps) +;; Key Binding Description +;; ~C-c C-c~ dispatch (next step) + +;; toc-cleanup-mode +;; ~C-c C-j~ toc-join-next-unnumbered-lines + +;; toc-mode (tablist) +;; ~TAB~ preview/jump-to-page +;; ~right/left~ toc-in/decrease-remaining +;; ~C-right/C-left~ toc-in/decrease-remaining and view page +;; ~S-right/S-left~ in/decrease pagenumber current entry +;; ~C-down/C-up~ scroll document other window (if document buffer shown) +;; ~S-down/S-up~ full page scroll document other window ( idem ) + + +(defgroup toc nil + "Setting for the toc-mode package" + :group 'data) (defcustom toc-replace-original-file t "For PDF include TOC and replace old PDF file. For DJVU the old DJVU file is replaced by default" :type 'boolean - :group 'toc-mode) + :group 'toc) (defcustom toc-destination-file-name "pdfwithtoc.pdf" - "Filename for new PDF if toc-replace-original-file is nil" + "Filename for new PDF if `toc-replace-original-file' is nil." :type 'file - :group 'toc-mode) + :group 'toc) ;;;; toc-extract and cleanup ;;; toc-cleanup (defun toc-cleanup-dots () + "Remove dots between heading its title and page number." (interactive) (beginning-of-buffer) (while (not (eobp)) (re-search-forward "\\([\\. ]*\\)\\([0-9ivx]*\\) *$") (replace-match " \\2") - (forward-line 1)) - ) + (forward-line 1))) (defun toc-cleanup-dots-ocr () - (interactive) + "Remove dots between heading its title and page number. +Like `toc-cleanup-dots' but more suited for use after OCR" (beginning-of-buffer) (while (re-search-forward "\\([0-9\\. \\-]*\\)\\( [0-9]* *\\)$" nil t) - (replace-match " \\2")) - ) + (replace-match " \\2"))) -(defun toc-cleanup-lines-contents-string (&optional arg) - (interactive "nEnter line number of entry 'Contents': ") - (when (called-interactively-p 'any) - (next-line arg)) - (flush-lines "contents") - ) +(defun toc-cleanup-lines-contents-string () + "Delete all lines containing the string \"contents\"." + (interactive) + (flush-lines "contents")) -(defun toc-cleanup-lines-roman-string (&optional arg) +(defun toc-cleanup-lines-roman-string () + "Delete all lines that contain only linefeeds and/or blanks and/or roman numerals." (interactive) (beginning-of-buffer) ;; (re-search-forward "^ *[ivx0-9\\.]+ *$" nil t) @@ -72,25 +183,27 @@ For DJVU the old DJVU file is replaced by default" (while (not (eobp)) (re-search-forward "^[\f ]*[ivx0-9\\.]* *$") (replace-match "") - (forward-line 1)) - ) + (forward-line 1))) (defun toc-cleanup-blank-lines () + "Delete all empty lines." (interactive) (beginning-of-buffer) - (flush-lines "^ *$") - ) + (flush-lines "^ *$")) (defun toc-join-next-unnumbered-lines () + "Search from point for first occurence of line not ending with Western numerals." (interactive) (re-search-forward "[^0-9]\\s-*$" nil t) - (join-line 1)) + (join-line 1)) -(defun toc-join-next-overindexed-index () +(defun toc-jump-next-overindexed-index () + "Jump to next line with unwanted dot behind its index." (interactive) (re-search-forward "^[0-9\\.]*\\. " nil t)) (defun toc-cleanup (startpage &optional arg) + "Cleanup extracted Table Of Contents by running a series of cleanup functions." (interactive) (beginning-of-buffer) (when (search-forward "contents" nil t) @@ -100,49 +213,52 @@ For DJVU the old DJVU file is replaced by default" (toc-cleanup-dots-ocr) (toc-cleanup-dots)) (toc-cleanup-lines-roman-string) - (toc-cleanup-blank-lines) - ;; (toc-join-next-unnumbered-lines) - ) + (toc-cleanup-blank-lines)) -(defun get-index-levels (seperator) +(defun toc-get-section-inidices (separators) + "Determine index part of current line. Counting the number of SEPARATORS." (let* ((string (thing-at-point 'line t)) - (sep (cond (seperator) + (sep (cond (separators) ("\\.")))) (string-match (format "^\\([[:alnum:]]+%s\\)*" sep) string) (match-string 0 string))) -(defun toc-count-level-by-index (seperator) - (let* ((index (get-index-levels seperator)) - (sep (cond (seperator) +(defun toc-count-level-by-index (separators) + "Determine level of current line in TOC tree by counting SEPARATORS." + (let* ((index (toc-get-section-inidices separators)) + (sep (cond (separators) ("\\."))) (string-list (split-string index sep t))) (length string-list))) (defun toc-cleanup-set-level-by-index (&optional arg) - "Automatic set indentation by number of seperators in index. By default uses dots as seperator. Prepend with universal argument (C-u) to enter different seperator." + "Automatic set indentation by number of separatorss in index. +By default uses dots as separators. Prepend with universal +ARG (\\[universal-argument]) to enter different separators." (interactive "P") - (let ((seperator (if arg - (read-string - "Enter index seperator as regexp (escape with \\ if required): ") - nil))) + (let ((separators (if arg + (read-string + "Enter index separators as regexp (escape with \\ if required): ") + nil))) (beginning-of-buffer) (while (re-search-forward "^\\s-+" nil t) (replace-match "")) (beginning-of-buffer) (while (not (eobp)) - (let* ((level (toc-count-level-by-index seperator))) + (let* ((level (toc-count-level-by-index separators))) (dotimes (x level) (insert " ")) (forward-line 1))))) ;;; toc extract -(defun document-extract-pages-text (startpage endpage) +(defun toc-document-extract-pages-text (startpage endpage) + "Extract text from text layer of current document from STARTPAGE to ENDPAGE." (let* ((default-process-coding-system '(windows-1252-unix . utf-8-unix)) (source-buffer (current-buffer)) (ext (url-file-extension (buffer-file-name (current-buffer)))) (shell-command (cond ((string= ".pdf" ext) "pdftotext -f %s -l %s -layout %s -") ((string= ".djvu" ext) "djvutxt --page=%s-%s %s") (t (error "Buffer-filename does not have pdf or djvu extension")))) - (text (shell-command-to-string + (text (shell-command-to-string (format shell-command startpage endpage @@ -153,21 +269,25 @@ For DJVU the old DJVU file is replaced by default" (when (fboundp 'flyspell-mode) flyspell-mode) (setq-local doc-buffer source-buffer) - (insert text) - ;; (kill-whole-line) - )) + (insert text))) ;;;###autoload (defun toc-extract-pages (startpage endpage arg) - "Extract text and cleanup text from table of contents. -Use with the universal argument (C-u) omits cleanup to get the unprocessed text." + "Extract text from text layer of current document and cleanup. +Extract from STARTPAGE to ENDPAGE. Use with the universal +ARG (\\[universal-argument]) omits cleanup to get the unprocessed +text." (interactive "nEnter start-pagenumber for extraction: \nnEnter end-pagenumber for extraction: \nP") - (document-extract-pages-text startpage endpage) + (toc-document-extract-pages-text startpage endpage) (unless arg (toc-cleanup startpage))) ;;;###autoload (defun toc-extract-pages-ocr (startpage endpage arg) + "Extract via OCR text of current document and cleanup. +Extract from STARTPAGE to ENDPAGE. Use with the universal +ARG (\\[universal-argument]) omits cleanup to get the +unprocessed text." (interactive "nEnter start-pagenumber for extraction: \nnEnter end-pagenumber for extraction: \nP") (let* ((page startpage) (source-buffer (current-buffer)) @@ -191,13 +311,14 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (switch-to-buffer buffer) (toc-cleanup-mode) ;; required before setting local variable (when (fboundp 'flyspell-mode) - (flyspell-mode)) + (flyspell-mode)) (setq-local doc-buffer source-buffer) (unless arg (toc-cleanup startpage t)))) ;;;###autoload (defun toc-extract-outline () + "Extract Table of Contents attached to current document." (interactive) (let* ((source-buffer (current-buffer)) (ext (url-file-extension (buffer-file-name (current-buffer)))) @@ -206,7 +327,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. "mutool command is not found")) ((string= ".djvu" ext) "djvused -e 'print-outline' %s") (t (error "Buffer-filename does not have pdf or djvu extension")))) - (text (shell-command-to-string + (text (shell-command-to-string (format shell-command (shell-quote-argument buffer-file-name)))) (buffer (get-buffer-create (concat (file-name-sans-extension (buffer-name)) ".txt")))) @@ -215,6 +336,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (insert text))) (defun toc-create-tablist-buffer () + "Create tablist buffer, from cleaned up Table of Contents buffer, for easy page number adjustment." (interactive) (toc-list doc-buffer)) @@ -228,36 +350,39 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (define-derived-mode toc-cleanup-mode fundamental-mode "TOC-cleanup" - "Major mode for cleaning up Table Of Contents" - ) + "Major mode for cleaning up Table Of Contents +\\{toc-cleanup-mode-map}") ;;; toc tablist -(defun count-leading-spaces () +(defun toc-count-leading-spaces () + "Count number of leading spaces on current line." (interactive) (let ((start (string-match "^ *" (thing-at-point 'line))) (end (match-end 0))) (- end start))) (defun toc-level () + "Determine level of current line in TOC tree by somparing number of spaces with other lines in buffer." (interactive) (let (levels) (beginning-of-buffer) (while (not (eobp)) - (let ((spaces (count-leading-spaces))) + (let ((spaces (toc-count-leading-spaces))) (unless (member spaces levels) (setq levels (append levels (list spaces)))) (forward-line 1))) (progn levels))) (defun toc-convert-to-tabulated-list () + "Parse and prepare content of current buffer for `toc-tabular-mode'." (interactive) (beginning-of-buffer) (let (lines levels) (while (not (eobp)) (let ((line-list (split-string (buffer-substring (line-beginning-position) (line-end-position)))) - (spaces (count-leading-spaces))) + (spaces (toc-count-leading-spaces))) (unless (member spaces levels) (setq levels (append levels (list spaces)))) (setq lines @@ -269,11 +394,10 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (mapconcat 'identity (butlast line-list) " ") (mapconcat 'identity (last line-list) " ")))))) (forward-line))) - lines - ;; (progn lines) - )) + lines)) (defun toc-increase () + "Increase pagenumber of current entry." (interactive) (tabulated-list-set-col "page" @@ -281,6 +405,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. t)) (defun toc-decrease () + "Decrease pagenumber of current entry." (interactive) (tabulated-list-set-col "page" @@ -288,7 +413,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. t)) (defun toc-increase-remaining () - "Increase pagenumber of current entry and all entries below" + "Increase pagenumber of current entry and all entries below." (interactive) (save-excursion (while (not (eobp)) @@ -299,7 +424,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (forward-line 1)))) (defun toc-decrease-remaining () - "Decrease pagenumber of current entry and all entries below" + "Decrease pagenumber of current entry and all entries below." (interactive) (save-excursion (while (not (eobp)) @@ -310,6 +435,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (forward-line 1)))) (defun toc-tablist-follow () + "Preview pagenumber of current line in separate document buffer." (interactive) (let ((ext (url-file-extension (buffer-file-name doc-buffer))) (page (string-to-number (aref (tabulated-list-get-entry) 2)))) @@ -319,16 +445,19 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (other-window 1))) (defun toc-increase-remaining-and-follow () + "Increase pagenumber of current entry and all entries below and preview page in separate document buffer." (interactive) (toc-increase-remaining) (toc-tablist-follow)) (defun toc-decrease-remaining-and-follow () + "Decrease pagenumber of current entry and all entries below and preview page in separate document buffer." (interactive) (toc-decrease-remaining) (toc-tablist-follow)) (defun toc-scroll-other-window-page-up () + "Scroll page up in document buffer from current buffer." (interactive) (other-window 1) (let ((ext (url-file-extension (buffer-file-name (current-buffer))))) @@ -337,6 +466,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (other-window 1)) (defun toc-scroll-other-window-page-down () + "Scroll page down in document buffer from current buffer." (interactive) (other-window 1) (let ((ext (url-file-extension (buffer-file-name (current-buffer))))) @@ -345,12 +475,14 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (other-window 1)) (defun toc-scroll-pdf-other-window-down () + "Scroll down in document buffer from current buffer." (interactive) (other-window 1) (pdf-view-scroll-up-or-next-page 1) (other-window 1)) (defun toc-scroll-pdf-other-window-up () + "Scroll up in document buffer from current buffer." (interactive) (other-window 1) (pdf-view-scroll-down-or-previous-page 1) @@ -377,17 +509,19 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (define-derived-mode toc-tabular-mode tabulated-list-mode "TOC-tabular" - "Major mode for Table Of Contents." + "Major mode for Table Of Contents. +\\{toc-tabular-mode-map}" (setq-local tabulated-list-format [("level" 10 nil) ("name" 80 nil) ("page" 1 nil)]) (tabulated-list-init-header)) (defun toc-list (buffer) + "Create, BUFFER, new tabular-mode-buffer for easy pagenumber adjusment." (interactive) (let ((source-buffer buffer) (toc-tablist (toc-convert-to-tabulated-list))) (switch-to-buffer (concat (buffer-name) ".list")) (when (fboundp 'golden-ratio-mode) - (golden-ratio-mode)) + (golden-ratio-mode)) (toc-tabular-mode) (setq-local doc-buffer source-buffer) (setq-local tabulated-list-entries toc-tablist) @@ -402,12 +536,15 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (define-derived-mode toc-mode fundamental-mode "TOC" - "Major mode for editing pdf or djvu Table Of Contents source files" - ) + "Major mode for editing pdf or djvu Table Of Contents source files +\\{toc-mode-map}") ;;; pdf parse tablist to (defun toc-tablist-to-pdfoutline () + "Parse and prepare tablist-mode-buffer to source input. +Displays results in a newlycreated buffer for use as source input +to `pdfoutline' shell command." (interactive) (beginning-of-buffer) (let ((source-buffer doc-buffer) @@ -425,24 +562,25 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. ;;;; djvu parse tablist to outline (defun toc-parse-djvused (level) + "Parse and prepare, from tablist-mode-buffer, for use as source input to `pdfoutline' shell command. LEVEL is used as argument by the recursuve algorithm." (let* ((v (tabulated-list-get-entry)) (sexp (list (aref v 1) (format "#%s" (aref v 2))))) (forward-line 1) (if (not (eobp)) - (let ((v (tabulated-list-get-entry))) - (cond ((> (string-to-number (aref v 0)) level) - (setq sexp - (list (append sexp (toc-parse-djvused (string-to-number (aref v 0))))))) - ((= (string-to-number (aref v 0)) level) - (setq sexp (cons - sexp - (toc-parse-djvused (string-to-number (aref v 0)))))) - ((< (string-to-number (aref v 0)) level) - (list sexp)))) + (let ((v (tabulated-list-get-entry))) + (cond ((> (string-to-number (aref v 0)) level) + (setq sexp + (list (append sexp (toc-parse-djvused (string-to-number (aref v 0))))))) + ((= (string-to-number (aref v 0)) level) + (setq sexp (cons + sexp + (toc-parse-djvused (string-to-number (aref v 0)))))) + ((< (string-to-number (aref v 0)) level) + (list sexp)))) (list sexp)))) (defun toc-tablist-to-djvused () - (interactive) + "Parse and prepare, from tablist-mode-buffer, a new buffer for use as source input to `djvused' shell command." (beginning-of-buffer) (let* ((bookmarks '(bookmarks)) (outline (append bookmarks (toc-parse-djvused 0)))) @@ -457,6 +595,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (insert text)))) (defun toc-tablist-to-toc-source () + "Parse and prepare, from tablist-mode-buffer, a new buffer for use as source input to `pdfoutline' or `djvused' shell command." (interactive) (let ((ext (url-file-extension (buffer-file-name doc-buffer)))) (cond ((string= ".pdf" ext) (toc-tablist-to-pdfoutline)) @@ -466,7 +605,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. ;;;; add outline to document (defun toc-add-to-pdf () - "combine with add-toc-to-djvu in add-toc-to-document when ready" + "Combine with add-toc-to-djvu in add-toc-to-document when ready." (interactive) (save-buffer) (call-process "pdfoutline" nil "*pdfoutline*" nil @@ -477,7 +616,7 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. toc-destination-file-name))) (defun toc-add-to-djvu () - "combine with add-toc-to-djvu in add-toc-to-document when ready" + "Combine with add-toc-to-djvu in add-toc-to-document when ready." (interactive) (save-buffer) (shell-command (shell-command-to-string @@ -489,58 +628,13 @@ Use with the universal argument (C-u) omits cleanup to get the unprocessed text. (defun toc-add-to-doc () + "Add Table Of Contents to original document. +The text of the current buffer is passed as source input to either the +`pdfoutline' or `djvused' shell command." (interactive) (let ((ext (url-file-extension (buffer-file-name doc-buffer)))) (cond ((string= ".pdf" ext) (toc-add-to-pdf)) ((string= ".djvu" ext) (toc-add-to-djvu))))) -;;;; PDF-Tools functions (PDF-tools does not yet produce best layout, e.g. try pdf-extract-page-lines) - -(defun pdf-extract-page-text (&optional arg) - (interactive "P") - (let* ((page (cond ((numberp arg) arg) - (arg (read-number "Enter pagenumber for extraction: ")) - (t (pdf-view-current-page)))) - (regions (pdf-info-textregions page))) - (print regions))) - ;; (print (mapcar #'(lambda (region) (pdf-info-gettext page region)) regions)))) - -(defun line-edges (first last) - (list (nth 0 first) (nth 1 first) (nth 2 last) (nth 3 last))) - -(defun pdf-info-line-textregions (list) - (let (line-regions - (line list)) - (while line - (let ((first (car line))) - (while (and (car line) (= (nth 1 first) (nth 1 (car line)))) - (setq last (car line)) - (setq line (cdr line))) - (setq line-regions (append line-regions (list (line-edges first last)))))) - (print line-regions))) - -(defun pdf-info-line-textregions2 (regions) - (let (line-regions - (region (car regions))) - (while regions - (let ((r2 (car (cdr regions))) - (r1 region)) - (if (equal (nth 1 r2) (nth 1 r1)) - (setq region (list (nth 0 r1) (nth 1 r1) (nth 2 r2) (nth 3 r2))) - (progn (setq line-regions (append line-regions (list r1))) - (setq region r2))) - (setq regions (cdr regions)))) - (print line-regions))) - -(defun pdf-extract-page-lines (&optional arg) - (interactive "P") - (let* ((page (cond ((numberp arg) arg) - (arg (read-number "Enter pagenumber for extraction: ")) - (t (pdf-view-current-page)))) - (regions (pdf-info-textregions page))) - (print (pdf-info-gettext page (nth 1 (pdf-info-line-textregions regions)) 'line)))) - ;; (print (mapcar #'(lambda (region) (pdf-info-gettext page region)) (pdf-info-line-regions regions))))) - ;; (mapcar '(lambda (region) (pdf-info-gettext page region)) regions))) - (provide 'toc-mode)