;;; hyphen.el --- word hyphenation commands for Emacs. ;; Copyright (C) 1998--2004 Ralph Schleicher ;; Author: Ralph Schleicher ;; Keywords: wp local ;; This file is not part of GNU Emacs. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; see the file COPYING. If not, write to ;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330, ;; Boston, MA 02111-1307, USA. ;;; Commentary: ;; The behavior of word processing programs regarding automatic word ;; hyphenation is quite common. When writing, word hyphenation happens ;; immediately: ;; hyphenatio| ;; ==> hyphen- ;; ation| ;; When deleting characters backward, hyphenation characters are removed ;; as soon as possible: ;; hyphen- ;; ation| ;; ==> hyphenatio| ;; When inserting characters before a word: ;; a| hyphenation ;; ==> au| hyphen- ;; ation| ;;; Code: (require 'custom) (eval-and-compile (require 'cl)) (defgroup hyphenation nil "Word hyphenation." :group 'wp) (defcustom hyphenation-locale nil "*The locale to use instead of the default one when breaking words. Automatically becomes buffer local when set in any fashion." :type '(choice (const nil) string) :group 'hyphenation) (make-variable-buffer-local 'hyphenation-locale) (defcustom hyphenation-initial-locale "us" "The locale to load initially as the default locale." :type '(choice (const nil) string) :group 'hyphenation) (defvar hyphen-locale-alist nil "Alist of hyphenation properties for different locales. List elements are cons cells of the form (LOCALE . INFO) LOCALE describes a locale (a string). INFO is a vector with six elements: [LEFT-MIN RIGHT-MIN EXCEPTIONS PATTERNS MIN-LEN MAX-LEN] LEFT-MIN is the minimum left hyphenation fragment size. RIGHT-MIN is the minimum right hyphenation fragment size. EXCEPTIONS is an alist of hyphenation exceptions. PATTERNS is an obarray of hyphenation patterns. MIN-LEN is the length of the shortest pattern. MAX-LEN is the length of the longest pattern. If INFO is a string, then LOCALE is an alias for locale INFO.") (defun hyphen-locale-info (locale &optional exists) "Return the hyphenation properties for LOCALE. Do not create a new entry if optional second argument EXISTS is non-nil." (let ((cell (assoc locale hyphen-locale-alist))) ;; Resolve aliases. (while (stringp (cdr cell)) (setq cell (assoc (cdr cell) hyphen-locale-alist))) ;; Create new locale. (unless (or cell exists) (setq cell (cons locale (vector 1 1 nil nil 0 0)) hyphen-locale-alist (cons cell hyphen-locale-alist))) (cdr cell))) (defmacro hyphen-get-left-min (locale) "Return the minimum left hyphenation fragment size of LOCALE." `(aref (hyphen-locale-info ,locale) 0)) (defmacro hyphen-set-left-min (locale left-min) "Set the minimum left hyphenation fragment size of LOCALE to LEFT-MIN." `(aset (hyphen-locale-info ,locale) 0 ,left-min)) (defmacro hyphen-get-right-min (locale) "Return the minimum right hyphenation fragment size of LOCALE." `(aref (hyphen-locale-info ,locale) 1)) (defmacro hyphen-set-right-min (locale right-min) "Set the minimum right hyphenation fragment size of LOCALE to RIGHT-MIN." `(aset (hyphen-locale-info ,locale) 1 ,right-min)) (defmacro hyphen-get-exceptions (locale) "Return the hyphenation exceptions of LOCALE." `(aref (hyphen-locale-info ,locale) 2)) (defmacro hyphen-set-exceptions (locale exceptions) "Set the hyphenation exceptions of LOCALE to EXCEPTIONS." `(aset (hyphen-locale-info ,locale) 2 ,exceptions)) (defmacro hyphen-get-patterns (locale) "Return the hyphenation patterns of LOCALE." `(aref (hyphen-locale-info ,locale) 3)) (defmacro hyphen-set-patterns (locale patterns) "Set the hyphenation patterns of LOCALE to PATTERNS." `(aset (hyphen-locale-info ,locale) 3 ,patterns)) (defmacro hyphen-get-pattern-min-length (locale) "Return the length of the shortest pattern of LOCALE." `(aref (hyphen-locale-info ,locale) 4)) (defmacro hyphen-set-pattern-min-length (locale min-length) "Set the length of the shortest pattern of LOCALE to MIN-LENGTH." `(aset (hyphen-locale-info ,locale) 4 ,min-length)) (defmacro hyphen-get-pattern-max-length (locale) "Return the length of the longest pattern of LOCALE." `(aref (hyphen-locale-info ,locale) 5)) (defmacro hyphen-set-pattern-max-length (locale max-length) "Set the length of the longest pattern of LOCALE to MAX-LENGTH." `(aset (hyphen-locale-info ,locale) 5 ,max-length)) (eval-and-compile (mapcar (lambda (keyword) (unless (boundp keyword) (set keyword keyword))) '(:left-min :right-min :exceptions :patterns))) (defun hyphen-define-locale (locale &rest arguments) "Add LOCALE to the hyphenation dictionary. Remaining arguments are one or more options of the form KEYWORD VALUE KEYWORD is either ':left-min', ':right-min', ':exceptions', or ':patterns'. See the documentation of the variable 'hyphen-locale-alist', for more details about these keywords." (let* ((options (hyphen-get-options '((:left-min . 1) (:right-min . 1) (:exceptions . nil) (:patterns . nil)) arguments)) (left-min (cdr (nth 0 options))) (right-min (cdr (nth 1 options))) (exceptions (cdr (nth 2 options))) (patterns (cdr (nth 3 options))) min-length max-length) (setq left-min (max 1 left-min)) (setq right-min (max 1 right-min)) (when (stringp exceptions) (setq exceptions (hyphen-load-library exceptions))) (when exceptions (setq exceptions (hyphen-build-exceptions exceptions))) (when (stringp patterns) (setq patterns (hyphen-load-library patterns))) (when patterns (multiple-value-setq (patterns min-length max-length) (hyphen-build-patterns patterns))) ;; No error up to here, define locale. (hyphen-set-left-min locale left-min) (hyphen-set-right-min locale right-min) (hyphen-set-exceptions locale exceptions) (hyphen-set-patterns locale patterns) (hyphen-set-pattern-min-length locale min-length) (hyphen-set-pattern-max-length locale max-length)) nil) (defun hyphen-define-locale-alias (alias locale) "Define ALIAS as an alternative name for LOCALE." (let (cell) (if (setq cell (assoc alias hyphen-locale-alist)) (setcdr cell locale) (setq hyphen-locale-alist (cons (cons alias locale) hyphen-locale-alist))))) (defun hyphen-add-exceptions (locale &rest words) "Add the listed words to the exception dictionary of LOCALE." (let ((exceptions (hyphen-get-exceptions locale)) (alist (hyphen-build-exceptions words)) tem) (while alist (if (setq tem (assoc (caar alist) exceptions)) (setcdr tem (cdar alist)) (setq exceptions (cons (car alist) exceptions))) (setq alist (cdr alist))) (hyphen-set-exceptions locale exceptions))) (put 'hyphen-define-locale 'lisp-indent-function 1) (put 'hyphen-add-exceptions 'lisp-indent-function 1) ;;;###autoload (defun hyphen-change-locale (locale &optional default) "Use LOCALE when breaking words in the current buffer. Optional second argument DEFAULT means make LOCALE the default locale for other buffers." (interactive (list (completing-read "Use new locale (RET for current): " (delete nil (mapcar (lambda (elem) (when (vectorp (cdr elem)) elem)) hyphen-locale-alist)) nil t) current-prefix-arg)) (unless (or (null locale) (string-equal locale "")) (setq hyphenation-locale locale)) (when (and (stringp hyphenation-locale) (not (string-equal hyphenation-locale "default")) default) (hyphen-define-locale-alias "default" hyphenation-locale))) ;;;###autoload (defun hyphen-show-hyphens (word) "Break WORD into syllables and display the result in the echo area." (interactive (multiple-value-bind (default) (hyphen-current-or-next-word) (list (read-string (if (null default) "Hyphenate word: " (format "Hyphenate word (default %s): " default)) nil nil default t)))) (message "%s" (mapconcat 'identity (hyphen-word-syllables word) "-"))) (define-key help-map "-" 'hyphen-show-hyphens) ;;;###autoload (defun hyphen-check-region (start end) "Check region contents for correct word breaks." (interactive "r") (let (head tail word breaks tem) (save-excursion (goto-char start) (while (setq tem (hyphen-search-forward end)) (setq head (nth 0 tem) tail (nth 1 tem) word (concat head tail) breaks (hyphen-word-breaks word)) (unless (member (length head) breaks) ;; TODO: Found an incorrect word break. ))))) ;;;###autoload (defun hyphen-check-buffer () "Check buffer contents for correct word breaks." (interactive) (hyphen-check-region (point-min) (point-max))) ;;;###autoload (defun hyphen-check-message () "Check message body for correct word breaks." (interactive) (let (start end) (save-excursion (goto-char (point-min)) (when (re-search-forward (concat "^" (regexp-quote mail-header-separator) "\n") nil t) (setq start (point))) (when (re-search-forward "^-- \n" nil t) (setq end (match-beginning 0)))) (hyphen-check-region (or start (point-min)) (or end (point-max))))) (defun hyphen-get-options (keywords arguments) "Parse a sequence of KEYWORD ARGUMENT pairs and return the result." (setq keywords (copy-alist keywords)) (let (keyword value tem) (while arguments (unless (symbolp (setq keyword (car arguments))) (error "Junk in argument list \"%S\"" arguments)) (unless (setq arguments (cdr arguments)) (error "Keyword \"%S\" is missing an argument" keyword)) (setq value (car arguments) arguments (cdr arguments)) (if (setq tem (assq keyword keywords)) (setcdr tem value) (error "Unknown keyword \"%S\"" keyword)))) keywords) (defun hyphen-expand-file-name (file-name &optional path) "Like 'expand-file-name', but searches the directories in 'load-path' too. If optional second argument PATH is non-nil, then search the directories listed in PATH." (let (found tem) (if (file-exists-p (setq tem (expand-file-name file-name))) (setq found tem) (unless path (setq path load-path)) (while (and (not found) path) (if (file-exists-p (setq tem (expand-file-name file-name (car path)))) (setq found tem) (setq path (cdr path))))) found)) (defun hyphen-load-library (library) "Return all words in the file LIBRARY as a list of strings. Text after the first '%' in a line is treated as a comment iff '%' is not part of a word." (let (file-name base-name words) (unless (setq file-name (hyphen-expand-file-name library)) (setq file-name library)) (setq base-name (file-name-nondirectory file-name)) (with-temp-buffer (message "Loading %s... " base-name) (insert-file-contents file-name) (while (not (eobp)) (cond ((looking-at "%") (forward-line 1)) ((looking-at "\\s +") (goto-char (match-end 0))) ((looking-at "\\S +") (goto-char (match-end 0)) (setq words (cons (match-string 0) words))) (t (let ((lines (count-lines (point-min) (point)))) (if (looking-at ".+") (error "%s:%d: Parse error at '%s'" library lines (match-string 0)) (error "%s:%d: Parse error" library lines)))))) (message "Loading %s... done" base-name)) (nreverse words))) (defun hyphen-build-exceptions (words) "Build an exception dictionary containing WORDS. A word must contain only letters and hyphens, for example, \"hy-phen-a-tion\". Value is an alist with cons cells of the form (WORD . BREAKS) WORD is the word itself (without hyphens) in lowercase letters. BREAKS are marker positions where WORD can be hyphenated (a list of integers), for example, 2 means that a word can be hyphenated between the second and third letter." (let (exceptions word syllables breaks counter start tem) (while words (setq word (downcase (car words)) syllables nil breaks nil counter 0 start 0) (while (string-match "-" word start) (setq syllables (cons (substring word start (match-beginning 0)) syllables) breaks (cons (- (match-beginning 0) counter) breaks) counter (1+ counter) start (match-end 0))) (setq syllables (nreverse (cons (substring word start) syllables)) word (apply 'concat syllables) breaks (nreverse breaks)) (if (setq tem (assoc word exceptions)) (setcdr tem breaks) (setq exceptions (cons (cons word breaks) exceptions))) (setq words (cdr words))) exceptions)) (defun hyphen-build-patterns (patterns) "Build a hyphenation dictionary. Each pattern (a string) is a sequence of VALUE CHAR pairs, followed by VALUE. VALUE is either a digit ('0' to '9') or empty; an empty VALUE stands for zero. CHAR can be any character. The special character '.' marks the beginning or end of a word. Values are a hash table and the length of the shortest and longest pattern (not counting values). A symbol name in the hash table is the plain pattern, a symbol's value is a vector of the pattern's values." (let ((table (make-vector 1777 0)) (min-length 32767) (max-length 0) pattern size index char chars value values name length) (message "Building hyphenation dictionary... ") (while patterns (setq pattern (car patterns) chars nil values nil size (length pattern) index 0) (while (< index size) (setq char (aref pattern index)) (if (and (>= char ?0) (<= char ?9)) (setq value (- char ?0) index (1+ index)) (setq value 0)) (setq values (cons value values)) (when (< index size) (setq chars (cons (aref pattern index) chars))) (setq index (1+ index))) (when (= (length values) (length chars)) (setq values (cons 0 values))) (setq name (concat (nreverse chars)) length (length name)) (when (< length min-length) (setq min-length length)) (when (> length max-length) (setq max-length length)) (set (intern name table) (concat (nreverse values))) (setq patterns (cdr patterns))) (message "Building hyphenation dictionary... done") (values table min-length max-length))) (defun hyphen-current-word () "Return the word point is within or the previous word. Values are the word itself and the buffer positions of the beginning and end of the word." (save-excursion (skip-syntax-backward "^w") (unless (= (skip-syntax-backward "w") 0) (let* ((start (point))) (skip-syntax-forward "w") (values (buffer-substring-no-properties start (point)) start (point)))))) (defun hyphen-current-or-next-word () "Return the word point is within or adjacent to. Values are the word itself and the buffer positions of the beginning and end of the word." (if (or (eobp) (not (looking-at "\\sw"))) (hyphen-current-word) (save-excursion (skip-syntax-backward "w") (let ((start (point))) (skip-syntax-forward "w") (values (buffer-substring-no-properties start (point)) start (point)))))) (defun hyphen-word-breaks (word) "Compute the break points in WORD." (let (locale left-min right-min exceptions patterns breaks tem) (setq locale (or hyphenation-locale "default")) (unless (hyphen-locale-info locale t) (error "Locale %s is not defined" locale)) (setq left-min (max 1 (hyphen-get-left-min locale)) right-min (max 1 (hyphen-get-right-min locale)) exceptions (hyphen-get-exceptions locale) patterns (hyphen-get-patterns locale)) (cond ((setq tem (assoc (downcase word) exceptions)) (setq breaks (cdr tem))) ((or (< (length word) (+ left-min right-min)) (null patterns))) ((let (h i j k l m n p s v w) (setq w (concat "." (downcase word) ".") l (length w) h (make-string (1+ l) 0) p (hyphen-get-patterns locale) k (max 1 (hyphen-get-pattern-min-length locale)) n (min l (hyphen-get-pattern-max-length locale))) ;; Liang's algorithm. (while (<= k n) (setq j 0 m (- l k)) (while (<= j m) (when (setq s (intern-soft (substring w j (+ j k)) p)) (setq v (symbol-value s) i 0) (while (<= i k) (aset h (+ j i) (max (aref h (+ j i)) (aref v i))) (setq i (1+ i)))) (setq j (1+ j))) (setq k (1+ k))) ;; Disallow 'x-' and '-xx' breaks. (setq i 0) (while (<= i left-min) (aset h i 0) (setq i (1+ i))) (setq i (- l right-min)) (while (<= i l) (aset h i 0) (setq i (1+ i))) ;; Pick up the break points. (setq i 0) (while (<= i l) (when (= (logand (aref h i) 1) 1) (setq breaks (cons (1- i) breaks))) (setq i (1+ i))) (setq breaks (nreverse breaks))))) breaks)) (defun hyphen-word-syllables (word) "Break WORD into syllables. Value is a list of strings." (let (syllables (start 0) (breaks (hyphen-word-breaks word))) (while breaks (setq syllables (cons (substring word start (car breaks)) syllables) start (car breaks) breaks (cdr breaks))) (nreverse (cons (substring word start) syllables)))) (defun hyphen-search-forward (&optional bound) "Search forward for a hyphenated word. An optional argument bounds the search; it is a buffer position." (let (head tail start continue) (save-excursion (when (re-search-forward "\\(\\sw+\\)-\n" bound t) (setq head (match-string 1) start (match-beginning 1)) (beginning-of-line-text) (when (looking-at "\\sw+") (setq tail (match-string 0) continue (match-beginning 0)) (unless (= (aref tail 0) (downcase (aref tail 0))) (setq tail nil))))) (when (and head tail) (goto-char (+ start (length head) 1)) (list head tail start continue)))) (provide 'hyphen) ;; Load the initial locale and make it the default locale. (when (stringp hyphenation-initial-locale) (load-library (concat "hyphen-" hyphenation-initial-locale)) (hyphen-define-locale-alias "default" hyphenation-initial-locale)) ;; Ensure that locale 'default' is defined. (unless (hyphen-locale-info "default" t) (hyphen-define-locale "default")) ;;; hyphen.el ends here