Skip to content

Commit

Permalink
Merge branch 'bibliography-author-refactor'
Browse files Browse the repository at this point in the history
  • Loading branch information
alerque committed Aug 23, 2024
2 parents 9e21871 + f4ea4ed commit 317a9ec
Show file tree
Hide file tree
Showing 8 changed files with 677 additions and 449 deletions.
317 changes: 36 additions & 281 deletions packages/bibtex/bibliography.lua
Original file line number Diff line number Diff line change
@@ -1,276 +1,6 @@
-- luacheck: globals setfenv getfenv
-- luacheck: ignore _ENV

-- The following functions borrowed from Norman Ramsey's nbibtex,
-- with permission.

local function find_outside_braces (str, pat, i)
-- local len = string.len(str)
local j, k = string.find(str, pat, i)
if not j then
return j, k
end
local jb, kb = string.find(str, "%b{}", i)
while jb and jb < j do -- scan past braces
-- braces come first, so we search again after close brace
local i2 = kb + 1
j, k = string.find(str, pat, i2)
if not j then
return j, k
end
jb, kb = string.find(str, "%b{}", i2)
end
-- either pat precedes braces or there are no braces
return string.find(str, pat, j) -- 2nd call needed to get captures
end

local function split (str, pat, find) -- return list of substrings separated by pat
find = find or string.find -- could be find_outside_braces
-- @Omikhelia: I added this check here to avoid breaking on error,
-- but probably in could have been done earlier...
if not str then
return {}
end

local len = string.len(str)
local t = {}
local insert = table.insert
local i, j = 1, true
local k
while j and i <= len + 1 do
j, k = find(str, pat, i)
if j then
insert(t, string.sub(str, i, j - 1))
i = k + 1
else
insert(t, string.sub(str, i))
end
end
return t
end

local function splitters (str, pat, find) -- return list of separators
find = find or string.find -- could be find_outside_braces
local t = {}
local insert = table.insert
local j, k = find(str, pat, 1)
while j do
insert(t, string.sub(str, j, k))
j, k = find(str, pat, k + 1)
end
return t
end

local function namesplit (str)
local t = split(str, "%s+[aA][nN][dD]%s+", find_outside_braces)
local i = 2
while i <= #t do
while string.find(t[i], "^[aA][nN][dD]%s+") do
t[i] = string.gsub(t[i], "^[aA][nN][dD]%s+", "")
table.insert(t, i, "")
i = i + 1
end
i = i + 1
end
return t
end

local sep_and_not_tie = "%-"
local sep_chars = sep_and_not_tie .. "%~"

local parse_name
do
local white_sep = "[" .. sep_chars .. "%s]+"
local white_comma_sep = "[" .. sep_chars .. "%s%,]+"
local trailing_commas = "(,[" .. sep_chars .. "%s%,]*)$"
local sep_char = "[" .. sep_chars .. "]"
local leading_white_sep = "^" .. white_sep

-- <name-parsing utilities>=
local function isVon (str)
local lower = find_outside_braces(str, "%l") -- first nonbrace lowercase
local letter = find_outside_braces(str, "%a") -- first nonbrace letter
local bs, _, _ = find_outside_braces(str, "%{%\\(%a+)") -- \xxx
if lower and lower <= letter and lower <= (bs or lower) then
return true
elseif letter and letter <= (bs or letter) then
return false
elseif bs then
-- if upper_specials[command] then
-- return false
-- elseif lower_specials[command] then
-- return true
-- else
-- local close_brace = find_outside_braces(str, '%}', ebs+1)
lower = string.find(str, "%l") -- first nonbrace lowercase
letter = string.find(str, "%a") -- first nonbrace letter
return lower and lower <= letter
-- end
else
return false
end
end

function parse_name (str, inter_token)
if string.find(str, trailing_commas) then
SU.error("Name '%s' has one or more commas at the end", str)
end
str = string.gsub(str, trailing_commas, "")
str = string.gsub(str, leading_white_sep, "")
local tokens = split(str, white_comma_sep, find_outside_braces)
local trailers = splitters(str, white_comma_sep, find_outside_braces)
-- The string separating tokens is reduced to a single
-- ``separator character.'' A comma always trumps other
-- separator characters. Otherwise, if there's no comma,
-- we take the first character, be it a separator or a
-- space. (Patashnik considers that multiple such
-- characters constitute ``silliness'' on the user's
-- part.)
-- <rewrite [[trailers]] to hold a single separator character each>=
for i = 1, #trailers do
local trailer = trailers[i]
assert(string.len(trailer) > 0)
if string.find(trailer, ",") then
trailers[i] = ","
else
trailers[i] = string.sub(trailer, 1, 1)
end
end
local commas = {} -- maps each comma to index of token the follows it
for i, t in ipairs(trailers) do
string.gsub(t, ",", function ()
table.insert(commas, i + 1)
end)
end
local name = {}
-- A name has up to four parts: the most general form is
-- either ``First von Last, Junior'' or ``von Last,
-- First, Junior'', but various vons and Juniors can be
-- omitted. The name-parsing algorithm is baroque and is
-- transliterated from the original BibTeX source, but
-- the principle is clear: assign the full version of
-- each part to the four fields [[ff]], [[vv]], [[ll]],
-- and [[jj]]; and assign an abbreviated version of each
-- part to the fields [[f]], [[v]], [[l]], and [[j]].
-- <parse the name tokens and set fields of [[name]]>=
local first_start, first_lim, last_lim, von_start, von_lim, jr_lim
-- variables mark subsequences; if start == lim, sequence is empty
local n = #tokens
-- The von name, if any, goes from the first von token to
-- the last von token, except the last name is entitled
-- to at least one token. So to find the limit of the von
-- name, we start just before the last token and wind
-- down until we find a von token or we hit the von start
-- (in which latter case there is no von name).
-- <local parsing functions>=
local function divide_von_from_last ()
von_lim = last_lim - 1
while von_lim > von_start and not isVon(tokens[von_lim - 1]) do
von_lim = von_lim - 1
end
end

local commacount = #commas
if commacount == 0 then -- first von last jr
von_start, first_start, last_lim, jr_lim = 1, 1, n + 1, n + 1
-- OK, here's one form.
--
-- <parse first von last jr>=
local got_von = false
while von_start < last_lim - 1 do
if isVon(tokens[von_start]) then
divide_von_from_last()
got_von = true
break
else
von_start = von_start + 1
end
end
if not got_von then -- there is no von name
while von_start > 1 and string.find(trailers[von_start - 1], sep_and_not_tie) do
von_start = von_start - 1
end
von_lim = von_start
end
first_lim = von_start
elseif commacount == 1 then -- von last jr, first
von_start, last_lim, jr_lim, first_start, first_lim = 1, commas[1], commas[1], commas[1], n + 1
divide_von_from_last()
elseif commacount == 2 then -- von last, jr, first
von_start, last_lim, jr_lim, first_start, first_lim = 1, commas[1], commas[2], commas[2], n + 1
divide_von_from_last()
else
SU.error(("Too many commas in name '%s'"):format(str))
end
-- <set fields of name based on [[first_start]] and friends>=
-- We set long and short forms together; [[ss]] is the
-- long form and [[s]] is the short form.
-- <definition of function [[set_name]]>=
local function set_name (start, lim, long, short)
if start < lim then
-- string concatenation is quadratic, but names are short
-- An abbreviated token is the first letter of a token,
-- except again we have to deal with the damned specials.
-- <definition of [[abbrev]], for shortening a token>=
local function abbrev (token)
local first_alpha, _, alpha = string.find(token, "(%a)")
local first_brace = string.find(token, "%{%\\")
if first_alpha and first_alpha <= (first_brace or first_alpha) then
return alpha
elseif first_brace then
local i, _, special = string.find(token, "(%b{})", first_brace)
if i then
return special
else -- unbalanced braces
return string.sub(token, first_brace)
end
else
return ""
end
end
local longname = tokens[start]
local shortname = abbrev(tokens[start])
for i = start + 1, lim - 1 do
if inter_token then
longname = longname .. inter_token .. tokens[i]
shortname = shortname .. inter_token .. abbrev(tokens[i])
else
local ssep, nnext = trailers[i - 1], tokens[i]
local sep, next = ssep, abbrev(nnext)
-- Here is the default for a character between tokens:
-- a tie is the default space character between the last
-- two tokens of the name part, and between the first two
-- tokens if the first token is short enough; otherwise,
-- a space is the default.
-- <possibly adjust [[sep]] and [[ssep]] according to token position and size>=
if not string.find(sep, sep_char) then
if i == lim - 1 then
sep, ssep = "~", "~"
elseif i == start + 1 then
sep = string.len(shortname) < 3 and "~" or " "
ssep = string.len(longname) < 3 and "~" or " "
else
sep, ssep = " ", " "
end
end
longname = longname .. ssep .. nnext
shortname = shortname .. "." .. sep .. next
end
end
name[long] = longname
name[short] = shortname
end
end
set_name(first_start, first_lim, "ff", "f")
set_name(von_start, von_lim, "vv", "v")
set_name(von_lim, last_lim, "ll", "l")
set_name(last_lim, jr_lim, "jj", "j")
return name
end
end

-- Thanks, Norman, for the above functions!

local Bibliography
Bibliography = {
CitationStyles = {
Expand Down Expand Up @@ -346,28 +76,36 @@ Bibliography = {

Style = {
andAuthors = function (item)
local authors = namesplit(item.author)
local authors = item.author or {}
if #authors == 0 then
return ""
end
if #authors == 1 then
return parse_name(authors[1]).ll
return authors[1].ll
else
local names = {}
for i = 1, #authors do
local author = parse_name(authors[i])
authors[i] = author.ll .. ", " .. author.f .. "."
local author = authors[i]
names[i] = author.ll .. ", " .. author.f .. "."
end
return table.concat(authors, " " .. fluent:get_message("bibliography-and") .. " ")
return Bibliography.Style.commafy(names)
end
end,

andSurnames = function (max)
return function (item)
local authors = namesplit(item.author)
local authors = item.author or {}
if #authors == 0 then
return ""
end
if #authors > max then
return parse_name(authors[1]).ll .. " " .. fluent:get_message("bibliography-et-al")
return authors[1].ll .. " " .. fluent:get_message("bibliography-et-al")
else
local names = {}
for i = 1, #authors do
authors[i] = parse_name(authors[i]).ll
names[i] = authors[i].ll
end
return Bibliography.Style.commafy(authors)
return Bibliography.Style.commafy(names)
end
end
end,
Expand All @@ -381,10 +119,14 @@ Bibliography = {
transEditor = function (item)
local r = {}
if item.editor then
r[#r + 1] = fluent:get_message("bibliography-edited-by")({ name = item.editor })
r[#r + 1] = fluent:get_message("bibliography-edited-by")({
name = Bibliography.Style.firstLastNames(item.editor),
})
end
if item.translator then
r[#r + 1] = fluent:get_message("bibliography-translated-by")({ name = item.translator })
r[#r + 1] = fluent:get_message("bibliography-translated-by")({
name = Bibliography.Style.firstLastNames(item.translator),
})
end
if #r then
return table.concat(r, ", ")
Expand Down Expand Up @@ -424,6 +166,19 @@ Bibliography = {
end
end,

firstLastNames = function (field)
local namelist = field or {}
if #namelist == 0 then
return ""
end
local names = {}
for i = 1, #namelist do
local author = namelist[i]
names[i] = author.ff .. " " .. author.ll
end
return Bibliography.Style.commafy(names)
end,

commafy = function (t, andword) -- also stolen from nbibtex
andword = andword or fluent:get_message("bibliography-and")
if #t == 1 then
Expand Down
Loading

0 comments on commit 317a9ec

Please sign in to comment.