about summary refs log tree commit diff
path: root/src/guile/skribilo/biblio/abbrev.scm
diff options
context:
space:
mode:
Diffstat (limited to 'src/guile/skribilo/biblio/abbrev.scm')
-rw-r--r--src/guile/skribilo/biblio/abbrev.scm170
1 files changed, 170 insertions, 0 deletions
diff --git a/src/guile/skribilo/biblio/abbrev.scm b/src/guile/skribilo/biblio/abbrev.scm
new file mode 100644
index 0000000..9c88b6a
--- /dev/null
+++ b/src/guile/skribilo/biblio/abbrev.scm
@@ -0,0 +1,170 @@
+;;; abbrev.scm  --  Determining abbreviations.
+;;;
+;;; Copyright 2006  Ludovic Courtès <ludovic.courtes@laas.fr>
+;;;
+;;;
+;;; This program is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 2 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+;;; USA.
+
+(define-module (skribilo biblio abbrev)
+  :use-module (srfi srfi-13)
+  :autoload   (skribilo ast)           (markup? markup-body-set!)
+  :autoload   (skribilo utils strings) (make-string-replace)
+  :autoload   (ice-9 regex)      (regexp-substitute/global)
+  :export (is-abbreviation? is-acronym? abbreviate-word
+           abbreviate-string abbreviate-markup
+
+           %cs-conference-abbreviations
+           %ordinal-number-abbreviations
+           %common-booktitle-abbreviations))
+
+;;; Author:  Ludovic Courtès
+;;;
+;;; Commentary:
+;;;
+;;; Heuristics to identify or generate abbreviations.  This module
+;;; particularly targets booktitle abbreviations (in bibliography entries).
+;;;
+;;; Code:
+
+(define (is-abbreviation? str)
+  ;; Return #t if STR denotes an abbreviation or name initial.
+  (and (>= (string-length str) 2)
+       (char=? (string-ref str 1) #\.)))
+
+(define (is-acronym? str)
+  (string=? str (string-upcase str)))
+
+(define (abbreviate-word word)
+   (if (or (string=? "" word)
+	   (and (>= (string-length word) 3)
+		(string=? "and" (substring word 0 3)))
+	   (is-acronym? word))
+       word
+       (let ((dash (string-index word #\-))
+	     (abbr (string (string-ref word 0) #\.)))
+	  (if (not dash)
+	      abbr
+	      (string-append (string (string-ref word 0)) "-"
+			     (abbreviate-word
+			      (substring word (+ 1 dash)
+					 (string-length word))))))))
+
+(define (abbreviate-string subst title)
+  ;; Abbreviate common conference names within TITLE based on the SUBST list
+  ;; of regexp-substitution pairs (see examples below).  This function also
+  ;; removes the abbreviation if it appears in parentheses right after the
+  ;; substitution regexp.  Example:
+  ;;
+  ;;   "Symposium on Operating Systems Principles (SOSP 2004)"
+  ;;
+  ;; yields
+  ;;
+  ;;   "SOSP"
+  ;;
+  (let loop ((title title)
+	     (subst subst))
+    (if (null? subst)
+	title
+	(let* ((abbr (cdar subst))
+	       (abbr-rexp (string-append "( \\(" abbr "[^\\)]*\\))?"))
+	       (to-replace (string-append (caar subst) abbr-rexp)))
+	  (loop (regexp-substitute/global #f to-replace title
+					  'pre abbr 'post)
+		(cdr subst))))))
+
+(define (abbreviate-markup subst markup)
+  ;; A version of `abbreviate-string' generalized to arbitrary markup
+  ;; objects.
+  (let loop ((markup markup))
+    (cond ((string? markup)
+           (let ((purify (make-string-replace '((#\newline " ")
+                                                (#\tab     " ")))))
+             (abbreviate-string subst (purify markup))))
+          ((list? markup)
+           (map loop markup))
+          ((markup? markup)
+           (markup-body-set! markup (loop (markup-body title)))
+           markup)
+          (else markup))))
+
+
+;;;
+;;; Common English abbreviations.
+;;;
+
+;; The following abbreviation alists may be passed to `abbreviate-string'
+;; and `abbreviate-markup'.
+
+(define %cs-conference-abbreviations
+  ;; Common computer science conferences and their acronym.
+  '(("(Symposium [oO]n )?Operating Systems? Design and [iI]mplementation"
+     . "OSDI")
+    ("(Symposium [oO]n )?Operating Systems? Principles"
+     . "SOSP")
+    ("([wW]orkshop [oO]n )?Hot Topics [iI]n Operating Systems"
+     . "HotOS")
+    ("([cC]onference [oO]n )?[fF]ile [aA]nd [sS]torage [tT]echnologies"
+     . "FAST")
+    ("([tT]he )?([iI]nternational )?[cC]onference [oO]n [aA]rchitectural Support [fF]or Programming Languages [aA]nd Operating Systems"
+     . "ASPLOS")
+    ("([tT]he )?([iI]nternational )?[cC]onference [oO]n Peer-[tT]o-[pP]eer Computing"
+     . "P2P")
+    ("([iI]nternational )?[cC]onference [oO]n [dD]ata [eE]ngineering"
+     . "ICDE")
+    ("([cC]onference [oO]n )?[mM]ass [sS]torage [sS]ystems( [aA]nd [tT]echnologies)?"
+     . "MSS")
+    ("([sS]ymposium [oO]n )?[nN]etworked [sS]ystems [dD]esign [aA]nd [Ii]mplementation"
+     . "NSDI")))
+
+
+(define %ordinal-number-abbreviations
+  ;; The poor man's abbreviation system.
+
+  ;; FIXME: Given the current `abbreviate-string', there is no clean way to
+  ;; make it ignore things like "twenty-first" (instead of yielding an awful
+  ;; "twenty-1st").
+  '(("[Ff]irst"       . "1st")
+    ("[sS]econd"      . "2nd")
+    ("[Tt]hird"       . "3rd")
+    ("[Ff]ourth"      . "4th")
+    ("[Ff]ifth"       . "5th")
+    ("[Ss]ixth"       . "6th")
+    ("[Ss]eventh"     . "7th")
+    ("[eE]ighth"      . "8th")
+    ("[Nn]inth"       . "9th")
+    ("[Tt]enth"       . "10th")
+    ("[Ee]leventh"    . "11th")
+    ("[Tt]welfth"     . "12th")
+    ("[Tt]hirteenth"  . "13th")
+    ("[Ff]ourteenth"  . "14th")
+    ("[Ff]ifteenth"   . "15th")
+    ("[Ss]ixteenth"   . "16th")
+    ("[Ss]eventeenth" . "17th")
+    ("[Ee]ighteenth"  . "18th")
+    ("[Nn]ineteenth"  . "19th")))
+
+(define %common-booktitle-abbreviations
+  ;; Common book title abbreviations.  This is used by
+  ;; `abbreviate-booktitle'.
+  '(("[pP]roceedings?"  . "Proc.")
+    ("[iI]nternational" . "Int.")
+    ("[sS]ymposium"     . "Symp.")
+    ("[cC]onference"    . "Conf.")))
+
+
+;;; arch-tag: 34e0c5bb-592f-467b-b59a-d6f7d130ae4e
+
+;;; abbrev.scm ends here