about summary refs log tree commit diff
diff options
context:
space:
mode:
authorArun Isaac2022-06-20 01:31:53 +0530
committerArun Isaac2022-06-20 01:39:47 +0530
commit1cf8f23fea19a9c055066325fd20a549d8ffc9e3 (patch)
treea88d96dec37b06e7d2b85a18836735f5006d14a7
parentb6a015a0a211208a20e4ec784165cecdd2ee0ed0 (diff)
downloadthogai-1cf8f23fea19a9c055066325fd20a549d8ffc9e3.tar.gz
thogai-1cf8f23fea19a9c055066325fd20a549d8ffc9e3.tar.lz
thogai-1cf8f23fea19a9c055066325fd20a549d8ffc9e3.zip
Implement orthography aware suffixes.
At build time, we import orthography rules from Plover source code. It
is the easiest way to keep up with Plover.

* english-orthography.py: New file.
* thogai.el: Require thogai-english-orthography.
(thogai-insert-orthography-aware-suffix): New function.
(thogai-insert-translation): Use
thogai-insert-orthography-aware-suffix.
-rw-r--r--english-orthography.py41
-rw-r--r--thogai.el36
2 files changed, 77 insertions, 0 deletions
diff --git a/english-orthography.py b/english-orthography.py
new file mode 100644
index 0000000..914cf39
--- /dev/null
+++ b/english-orthography.py
@@ -0,0 +1,41 @@
+# thogai.el --- Stenotyping software
+
+# Stenotyping software for Emacs
+# Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net>
+
+# This file is part of thogai.
+
+# thogai is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# thogai is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with thogai.  If not, see <http://www.gnu.org/licenses/>.
+
+from plover.system.english_stenotype import ORTHOGRAPHY_RULES
+
+def make_elisp_safe(pattern):
+    # Remove negative lookbehind assertion not supported by elisp
+    # regular expressions. Escape characters for elisp regular
+    # expressions. We compensate for the removed negative lookbehind
+    # assertion by implementing it in elisp code without regular
+    # expressions.
+    return pattern.replace('|(?<![gin]a)r', '') \
+                  .replace('\\', '\\\\') \
+                  .replace('(', '\\\\(') \
+                  .replace(')', '\\\\)') \
+                  .replace('|', '\\\\|')
+
+print(';; Generated from plover.system.english_stenotype\n')
+print("(setq thogai-english-orthography-rules '(")
+for pattern, replacement in ORTHOGRAPHY_RULES:
+    print(f'("{make_elisp_safe(pattern)}" . "{make_elisp_safe(replacement)}")')
+print('))')
+
+print("\n(provide 'thogai-english-orthography)")
diff --git a/thogai.el b/thogai.el
index 65d3407..af392a8 100644
--- a/thogai.el
+++ b/thogai.el
@@ -46,6 +46,8 @@
 (require 'subr-x)
 (require 'term)
 
+(require 'thogai-english-orthography)
+
 (defvar thogai-dictionary-files
   (list "~/.config/plover/main.json"
         "~/.config/plover/commands.json")
@@ -288,6 +290,35 @@ Insert LITERAL-TRANSLATION at point, respecting
   (setq thogai-attach-next nil
         thogai-glue nil))
 
+(defun thogai-insert-orthography-aware-suffix (suffix)
+  "Insert orthography aware SUFFIX for current word."
+  (let ((word (current-word t)))
+    (backward-kill-word 1)
+    (insert (or (seq-some (pcase-lambda (`(,pattern . ,replacement))
+                            (save-match-data
+                              (let ((string (concat word " ^ " suffix)))
+                                (and (string-match pattern string)
+                                     (replace-match replacement nil nil string)))))
+                          thogai-english-orthography-rules)
+                ;; We deleted a negative lookbehind assertion from
+                ;; Plover's English orthography rules since elisp does
+                ;; not support that in regular expressions. Implement
+                ;; it here without regular expressions.
+                ;;
+                ;; The following examples illustrate this rule:
+                ;; oligarch + s = oligarchs
+                ;; patriarch + s = patriarchs
+                ;; monarch + s = monarchs
+                ;; birch + s = birches
+                (and (string= suffix "s")
+                     (string-suffix-p "rch" word)
+                     (not (string-suffix-p "garch" word))
+                     (not (string-suffix-p "iarch" word))
+                     (not (string-suffix-p "narch" word))
+                     (concat word "es"))
+                ;; As a last resort, simply append the suffix.
+                (concat word suffix)))))
+
 (defun thogai-insert-translation (translation &optional non-first-part-p)
   "Insert TRANSLATION at point.
 
@@ -341,6 +372,11 @@ External callers should always pass nil as the value."
                 (string-prefix-p "{^~|" str)))
         (thogai-insert-translation
          (concat "{^}{" (string-remove-prefix "{^" translation))))
+       ;; Orthography aware suffix
+       ((pred (lambda (str)
+                (string-prefix-p "{^" str)))
+        (thogai-insert-orthography-aware-suffix
+         (string-remove-prefix "{^" (string-remove-suffix "}" translation))))
        ;; Glue operator
        ((pred (lambda (str)
                 (string-prefix-p "{&" str)))