summaryrefslogtreecommitdiff
path: root/src/guile/skribilo/biblio/abbrev.scm
diff options
context:
space:
mode:
Diffstat (limited to 'src/guile/skribilo/biblio/abbrev.scm')
-rw-r--r--src/guile/skribilo/biblio/abbrev.scm170
1 files changed, 170 insertions, 0 deletions
diff --git a/src/guile/skribilo/biblio/abbrev.scm b/src/guile/skribilo/biblio/abbrev.scm
new file mode 100644
index 0000000..9c88b6a
--- /dev/null
+++ b/src/guile/skribilo/biblio/abbrev.scm
@@ -0,0 +1,170 @@
+;;; abbrev.scm -- Determining abbreviations.
+;;;
+;;; Copyright 2006 Ludovic Courtès <ludovic.courtes@laas.fr>
+;;;
+;;;
+;;; This program is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 2 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+;;; USA.
+
+(define-module (skribilo biblio abbrev)
+ :use-module (srfi srfi-13)
+ :autoload (skribilo ast) (markup? markup-body-set!)
+ :autoload (skribilo utils strings) (make-string-replace)
+ :autoload (ice-9 regex) (regexp-substitute/global)
+ :export (is-abbreviation? is-acronym? abbreviate-word
+ abbreviate-string abbreviate-markup
+
+ %cs-conference-abbreviations
+ %ordinal-number-abbreviations
+ %common-booktitle-abbreviations))
+
+;;; Author: Ludovic Courtès
+;;;
+;;; Commentary:
+;;;
+;;; Heuristics to identify or generate abbreviations. This module
+;;; particularly targets booktitle abbreviations (in bibliography entries).
+;;;
+;;; Code:
+
+(define (is-abbreviation? str)
+ ;; Return #t if STR denotes an abbreviation or name initial.
+ (and (>= (string-length str) 2)
+ (char=? (string-ref str 1) #\.)))
+
+(define (is-acronym? str)
+ (string=? str (string-upcase str)))
+
+(define (abbreviate-word word)
+ (if (or (string=? "" word)
+ (and (>= (string-length word) 3)
+ (string=? "and" (substring word 0 3)))
+ (is-acronym? word))
+ word
+ (let ((dash (string-index word #\-))
+ (abbr (string (string-ref word 0) #\.)))
+ (if (not dash)
+ abbr
+ (string-append (string (string-ref word 0)) "-"
+ (abbreviate-word
+ (substring word (+ 1 dash)
+ (string-length word))))))))
+
+(define (abbreviate-string subst title)
+ ;; Abbreviate common conference names within TITLE based on the SUBST list
+ ;; of regexp-substitution pairs (see examples below). This function also
+ ;; removes the abbreviation if it appears in parentheses right after the
+ ;; substitution regexp. Example:
+ ;;
+ ;; "Symposium on Operating Systems Principles (SOSP 2004)"
+ ;;
+ ;; yields
+ ;;
+ ;; "SOSP"
+ ;;
+ (let loop ((title title)
+ (subst subst))
+ (if (null? subst)
+ title
+ (let* ((abbr (cdar subst))
+ (abbr-rexp (string-append "( \\(" abbr "[^\\)]*\\))?"))
+ (to-replace (string-append (caar subst) abbr-rexp)))
+ (loop (regexp-substitute/global #f to-replace title
+ 'pre abbr 'post)
+ (cdr subst))))))
+
+(define (abbreviate-markup subst markup)
+ ;; A version of `abbreviate-string' generalized to arbitrary markup
+ ;; objects.
+ (let loop ((markup markup))
+ (cond ((string? markup)
+ (let ((purify (make-string-replace '((#\newline " ")
+ (#\tab " ")))))
+ (abbreviate-string subst (purify markup))))
+ ((list? markup)
+ (map loop markup))
+ ((markup? markup)
+ (markup-body-set! markup (loop (markup-body title)))
+ markup)
+ (else markup))))
+
+
+;;;
+;;; Common English abbreviations.
+;;;
+
+;; The following abbreviation alists may be passed to `abbreviate-string'
+;; and `abbreviate-markup'.
+
+(define %cs-conference-abbreviations
+ ;; Common computer science conferences and their acronym.
+ '(("(Symposium [oO]n )?Operating Systems? Design and [iI]mplementation"
+ . "OSDI")
+ ("(Symposium [oO]n )?Operating Systems? Principles"
+ . "SOSP")
+ ("([wW]orkshop [oO]n )?Hot Topics [iI]n Operating Systems"
+ . "HotOS")
+ ("([cC]onference [oO]n )?[fF]ile [aA]nd [sS]torage [tT]echnologies"
+ . "FAST")
+ ("([tT]he )?([iI]nternational )?[cC]onference [oO]n [aA]rchitectural Support [fF]or Programming Languages [aA]nd Operating Systems"
+ . "ASPLOS")
+ ("([tT]he )?([iI]nternational )?[cC]onference [oO]n Peer-[tT]o-[pP]eer Computing"
+ . "P2P")
+ ("([iI]nternational )?[cC]onference [oO]n [dD]ata [eE]ngineering"
+ . "ICDE")
+ ("([cC]onference [oO]n )?[mM]ass [sS]torage [sS]ystems( [aA]nd [tT]echnologies)?"
+ . "MSS")
+ ("([sS]ymposium [oO]n )?[nN]etworked [sS]ystems [dD]esign [aA]nd [Ii]mplementation"
+ . "NSDI")))
+
+
+(define %ordinal-number-abbreviations
+ ;; The poor man's abbreviation system.
+
+ ;; FIXME: Given the current `abbreviate-string', there is no clean way to
+ ;; make it ignore things like "twenty-first" (instead of yielding an awful
+ ;; "twenty-1st").
+ '(("[Ff]irst" . "1st")
+ ("[sS]econd" . "2nd")
+ ("[Tt]hird" . "3rd")
+ ("[Ff]ourth" . "4th")
+ ("[Ff]ifth" . "5th")
+ ("[Ss]ixth" . "6th")
+ ("[Ss]eventh" . "7th")
+ ("[eE]ighth" . "8th")
+ ("[Nn]inth" . "9th")
+ ("[Tt]enth" . "10th")
+ ("[Ee]leventh" . "11th")
+ ("[Tt]welfth" . "12th")
+ ("[Tt]hirteenth" . "13th")
+ ("[Ff]ourteenth" . "14th")
+ ("[Ff]ifteenth" . "15th")
+ ("[Ss]ixteenth" . "16th")
+ ("[Ss]eventeenth" . "17th")
+ ("[Ee]ighteenth" . "18th")
+ ("[Nn]ineteenth" . "19th")))
+
+(define %common-booktitle-abbreviations
+ ;; Common book title abbreviations. This is used by
+ ;; `abbreviate-booktitle'.
+ '(("[pP]roceedings?" . "Proc.")
+ ("[iI]nternational" . "Int.")
+ ("[sS]ymposium" . "Symp.")
+ ("[cC]onference" . "Conf.")))
+
+
+;;; arch-tag: 34e0c5bb-592f-467b-b59a-d6f7d130ae4e
+
+;;; abbrev.scm ends here