pdf hard dependency (github issue #3)

ELPA Syncer Mon, 26 Sep 2022 12:10:01 -0700

branch: externals/doc-toc
commit 05b6d034aad8ccb7fe3ca9fdcfdbf543b1cd0277
Author: Daniel Nicolai <dalanico...@gmail.com>
Commit: Daniel Nicolai <dalanico...@gmail.com>


    Fix djvu/pdf hard dependency (github issue #3)
---
 README.org  | 31 ++++++++++++++++++-------------
 toc-mode.el | 30 ++++++++++++++++++++++--------
 2 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/README.org b/README.org
index aea75eaac6..c2e347e7dd 100644
--- a/README.org
+++ b/README.org
@@ -38,6 +38,11 @@ Extraction and adding contents to a document is done in 4 
steps:
 3. adjust/correct pagenumbers
 4. add TOC to document
 
+In each step below, check out available shortcuts using =C-h m=. Additionally 
you
+can find available functions by typing the =M-x mode-name= (e.g. =M-x 
toc-cleanup=),
+or with two dashes in the mode name (e.g. =M-x toc--cleanup=). Of course if you
+use packages like Ivy or Helm you just use the fuzzy search functionality.
+
 ** 1. Extraction
 Open some pdf or djvu file in Emacs (pdf-tools and djvu package recommended).
 Find the pagenumbers for the TOC. Then type =M-x toc-extract-pages=, or =M-x
@@ -51,12 +56,12 @@ data). Also the languages used for tesseract OCR can be 
customized via the
 [[toc-mode-extract.gif]]
 
 A buffer with the, somewhat cleaned up, extracted text will open in TOC-cleanup
-mode. Prefix command with the universal argument (=C-u=) to omit clean and get 
the
-raw text. If the extracted text is of too low quality you either can 
hack/extend
-the [[help:toc-extract-pages-ocr][toc-extract-pages-ocr]] definition, or 
alternatively you can try to extract
-the text with the 
[[https://pypi.org/project/document-contents-extractor/][python 
document-contents-extractor script]], which is more
-configurable (you are also welcome to hack on and improve that script).
-For this the 
[[https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html][tesseract]] 
documentation might be useful.
+mode. Prefix command with the universal argument (=C-u=) to omit cleanup and 
get
+the raw text. If the extracted text is of too low quality you either can
+hack/extend the [[help:toc-extract-pages-ocr][toc-extract-pages-ocr]] 
definition, or alternatively you can try
+to extract the text with the 
[[https://pypi.org/project/document-contents-extractor/][python 
document-contents-extractor script]], which is
+more configurable (you are also welcome to hack on and improve that script). 
For
+this the 
[[https://tesseract-ocr.github.io/tessdoc/Command-Line-Usage.html][tesseract]] 
documentation might be useful.
 
 If you merely want to extract text without further processing then you can
 use the command [[help:toc-extract-only][toc-extract-only]].
@@ -126,13 +131,13 @@ directory or an absolute path can be given.)
 Sometimes the =pdfoutline/djvused= application is not able to add the TOC to 
the
 document. In that case you can either debug the problem by copying the used
 terminal command from the =*messages*= buffer and run it manually in the
-document's folder, or you can delete the outline source buffer and run
-=toc--tablist-to-handyoutliner= from the tablist buffer to get an outline 
source
-file that can be used with 
[[http://handyoutlinerfo.sourceforge.net/][HandyOutliner]] (unfortunately the 
handyoutliner
-command does not take arguments, but if you customize the 
[[help:toc-handyoutliner-path][toc-handyoutliner-path]]
-and [[help:toc-file-browser-command][toc-file-browser-command]] variables, 
then Emacs will try to open
-HandyOutliner and the file browser so that you can drag the file ~contents.txt~
-directly into HandyOutliner).
+document's folder iside the terminal, or you can delete the outline source
+buffer and run =toc--tablist-to-handyoutliner= from the tablist buffer to get 
an
+outline source file that can be used with 
[[http://handyoutlinerfo.sourceforge.net/][HandyOutliner]] (unfortunately the
+handyoutliner command does not take arguments, but if you customize the
+[[help:toc-handyoutliner-path][toc-handyoutliner-path]] and 
[[help:toc-file-browser-command][toc-file-browser-command]] variables, then 
Emacs will
+try to open HandyOutliner and the file browser so that you can drag the file
+~contents.txt~ directly into HandyOutliner).
 
 
 
diff --git a/toc-mode.el b/toc-mode.el
index 43f884caf5..e514172daf 100644
--- a/toc-mode.el
+++ b/toc-mode.el
@@ -39,8 +39,19 @@
 ;; Extraction with OCR requires the tesseract command line utility to be
 ;; available.
 
-;; Usage: Extraction and adding contents to a document is done in 4 steps: 1
-;; extraction 2 cleanup 3 adjust/correct pagenumbers 4 add TOC to document
+;; Usage:
+
+;; In each step below, check out available shortcuts using C-h m. Additionally
+;; you can find available functions by typing the M-x mode-name (e.g. M-x
+;; toc-cleanup), or with two dashes in the mode name (e.g. M-x toc--cleanup). 
Of
+;; course if you use packages like Ivy or Helm you just use the fuzzy search
+;; functionality.
+
+;; Extraction and adding contents to a document is done in 4 steps:
+;; 1 extraction
+;; 2 cleanup
+;; 3 adjust/correct pagenumbers
+;; 4 add TOC to document
 
 ;; 1. Extraction Open some pdf or djvu file in Emacs (pdf-tools and djvu 
package
 ;; recommended). Find the pagenumbers for the TOC. Then type M-x
@@ -91,7 +102,7 @@
 ;; automatically to the next line not ending with a number and joins it with 
the
 ;; next line. If the indentation structure of the different lines does not
 ;; correspond with the levels, then the levels can be set automatically from 
the
-;; number of separatorss in the indices with M-x 
toc--cleanup-set-level-by-index.
+;; number of separatorss in the indices with M-x 
toc-cleanup-set-level-by-index.
 ;; The default separators is a . but a different separators can be entered by
 ;; preceding the function invocation with the universal argument (C-u). Some
 ;; documents contain a structure like
@@ -321,7 +332,7 @@ When ARG is non-nil it skips the last three steps"
          (string-list (split-string index sep t)))
     (length string-list)))
 
-(defun toc--cleanup-set-level-by-index (&optional arg)
+(defun toc-cleanup-set-level-by-index (&optional arg)
   "Automatic set indentation by number of separatorss in index.
 By default uses dots as separators. Prepend with universal
 ARG (\\[universal-argument]) to enter different separators."
@@ -494,10 +505,13 @@ Prompt for startpage and endpage and print OCR output to 
new buffer."
 
 ;;;; toc major modes
 
-(define-key pdf-view-mode-map (kbd "C-c C-e") 'toc-extract-pages)
-(define-key djvu-read-mode-map (kbd "C-c C-e") 'toc-extract-pages)
-(define-key pdf-view-mode-map (kbd "C-c e") 'toc-extract-pages-ocr)
-(define-key djvu-read-mode-map (kbd "C-c e") 'toc-extract-pages-ocr)
+(when (require 'pdf-tools nil t)
+  (define-key pdf-view-mode-map (kbd "C-c C-e") 'toc-extract-pages)
+  (define-key pdf-view-mode-map (kbd "C-c e") 'toc-extract-pages-ocr))
+
+(when (require 'djvu nil t)
+  (define-key djvu-read-mode-map (kbd "C-c C-e") 'toc-extract-pages)
+  (define-key djvu-read-mode-map (kbd "C-c e") 'toc-extract-pages-ocr))
 
 (defvar toc-cleanup-mode-map
   (let ((map (make-sparse-keymap)))

[elpa] externals/doc-toc 05b6d034aa 59/84: Fix djvu/pdf hard dependency (github issue #3)

Reply via email to