From ccf2cde16b88bc17d114da9612d6973e890c3a08 Mon Sep 17 00:00:00 2001 From: Omikhleia Date: Sat, 14 Sep 2024 14:25:52 +0200 Subject: [PATCH] refactor(packages): Better handling of number ranges in bibliographies Sort of a hack for now, see in-code comments. The whole global picture would be harder, so let's go for a quick win although imperfect. --- csl/core/engine.lua | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/csl/core/engine.lua b/csl/core/engine.lua index d3edf2e8e..82039df51 100644 --- a/csl/core/engine.lua +++ b/csl/core/engine.lua @@ -22,6 +22,7 @@ local CslLocale = require("csl.core.locale").CslLocale local superfolding = require("csl.core.utils.superfolding") local endash = luautf8.char(0x2013) +local emdash = luautf8.char(0x2014) local CslEngine = pl.class() @@ -1229,6 +1230,24 @@ function CslEngine:_postrender (text) -- or a question mark. But it's ugly. text = luautf8.gsub(text, "([…!?%.]" .. rdquote .. ")%.", "%1") end + -- HACK: Numbers ranges. + -- This is a shortcut, deviating from the CSL specification. + -- One one hand, it uses the page-range-delimiter to format all number ranges: + -- CSL says it should be used for page ranges only, and that an endash is implied otherwise. + -- But honestly, this is against most good typographic practices (in French, at least, + -- the single dash is recommended, but consistency is key anyhow, so chapter ranges + -- for instance should use the same delimiter as page ranges). + -- On the other hand, doing such things that late in the process is not great. + -- Looking at citeproc-lua, it does a lots of things in cs:number processing, + -- e.g. "1 & 2" is valid, and in "ordinal" form, it should be "1st & 2nd". + -- We aren't implementing that yet. + -- I'm not sure (at a glance) that those other citeproc implementations handle + -- roman page numbers (which may occur in some book front matters), etc. + -- Needless to say, centuries in titles, etc. are another can of worms. + local range = self.punctuation.page_range_delimiter or endash + range = range == "-" and "%-" or range -- escape hyphen + local dashes = "%-" .. endash .. emdash .. range + text = luautf8.gsub(text, "(%d+)[^%d]+%s*[" .. dashes .. "]+%s*(%d+)", "%1" .. range .. "%2") return text end