add _extensions
This commit is contained in:
@@ -0,0 +1,170 @@
|
||||
--[[
|
||||
capitalize-subtitle – Capitalize first letter after colons and em dashes in bibliography entries
|
||||
|
||||
This filter capitalizes the first letter after colons and em dashes in bibliography
|
||||
entries, following APA and similar styles that require subtitle capitalization.
|
||||
|
||||
It only processes paragraphs within bibliography divs
|
||||
Must be run after Citeproc
|
||||
|
||||
Copyright: © 2025–present Tom Ben
|
||||
License: MIT
|
||||
]]
|
||||
|
||||
local EXTRA_PUNCT = "“”‘’«»‹›„‟「」『』﹁﹂﹃﹄–—‐"
|
||||
local APOSTROPHES = "'’"
|
||||
local HYPHENS = "%-‐"
|
||||
local AFTER_PUNCT_CLASS = "%s%p" .. EXTRA_PUNCT
|
||||
local WORD_TAIL_PATTERN = "[%a" .. APOSTROPHES .. HYPHENS .. "]*"
|
||||
local TRIGGER_TAIL_PATTERN = "[^%a" .. APOSTROPHES .. HYPHENS .. "]*$"
|
||||
|
||||
local function is_punctuation(char)
|
||||
if not char or char == "" then
|
||||
return true
|
||||
end
|
||||
if char:match("[%s%p]") then
|
||||
return true
|
||||
end
|
||||
return EXTRA_PUNCT:find(char, 1, true) ~= nil
|
||||
end
|
||||
|
||||
local INTERNAL_MARKS = {
|
||||
{
|
||||
mark = ":",
|
||||
skip = function(punct)
|
||||
return punct == "" or punct:find("/")
|
||||
end,
|
||||
},
|
||||
{ mark = "—" },
|
||||
}
|
||||
|
||||
local function capitalize_internal_marks(text)
|
||||
for _, config in ipairs(INTERNAL_MARKS) do
|
||||
local pattern = config.mark .. "([" .. AFTER_PUNCT_CLASS .. "]*)([a-z])"
|
||||
text = text:gsub(pattern, function(punct, letter)
|
||||
-- Require some separation after colon (e.g., space or quote) to avoid URLs and protocols
|
||||
if config.skip and config.skip(punct) then
|
||||
return config.mark .. punct .. letter
|
||||
end
|
||||
return config.mark .. punct .. letter:upper()
|
||||
end)
|
||||
end
|
||||
|
||||
return text
|
||||
end
|
||||
|
||||
--- Capitalize first letter if it's lowercase.
|
||||
--- Returns transformed string and a boolean indicating whether capitalization occurred.
|
||||
local function capitalize_first(str)
|
||||
if not str or str == "" then
|
||||
return str, false
|
||||
end
|
||||
-- Separate leading punctuation/spaces (including common typographic quotes) from the word
|
||||
local leading, remainder = str:match("^([%s%p" .. EXTRA_PUNCT .. "]*)(.*)")
|
||||
if not remainder or remainder == "" then
|
||||
return str, false
|
||||
end
|
||||
local already_capitalized = remainder:match("^([A-Z]" .. WORD_TAIL_PATTERN .. ")")
|
||||
if already_capitalized then
|
||||
return str, true
|
||||
end
|
||||
local word, suffix = remainder:match("^([a-z]" .. WORD_TAIL_PATTERN .. ")(.*)")
|
||||
if not word then
|
||||
local first_char = remainder:sub(1, 1)
|
||||
if not is_punctuation(first_char) then
|
||||
return str, true
|
||||
end
|
||||
return str, false
|
||||
end
|
||||
-- Only capitalize when the word is purely alphabetic (with optional hyphen/apostrophe)
|
||||
-- and is followed by punctuation/space or nothing. This avoids changing items like e2105061118.
|
||||
local next_char = suffix:sub(1, 1)
|
||||
if suffix ~= "" and not is_punctuation(next_char) then
|
||||
return str, false
|
||||
end
|
||||
return leading .. word:sub(1, 1):upper() .. word:sub(2) .. suffix, true
|
||||
end
|
||||
|
||||
local process_inlines --- forward declaration for mutual recursion
|
||||
|
||||
local SIMPLE_WRAPPERS = {
|
||||
Emph = pandoc.Emph,
|
||||
Strong = pandoc.Strong,
|
||||
SmallCaps = pandoc.SmallCaps,
|
||||
}
|
||||
|
||||
local function rebuild_container(elem, capitalize_next)
|
||||
if not elem.content then
|
||||
return nil, capitalize_next
|
||||
end
|
||||
|
||||
local processed
|
||||
processed, capitalize_next = process_inlines(elem.content, capitalize_next)
|
||||
|
||||
local wrap = SIMPLE_WRAPPERS[elem.t]
|
||||
if wrap then
|
||||
return wrap(processed), capitalize_next
|
||||
elseif elem.t == "Span" then
|
||||
return pandoc.Span(processed, elem.attr), capitalize_next
|
||||
elseif elem.t == "Quoted" then
|
||||
return pandoc.Quoted(elem.quotetype, processed), capitalize_next
|
||||
end
|
||||
|
||||
return nil, capitalize_next
|
||||
end
|
||||
|
||||
--- Process a list of inlines recursively: capitalize word after colon or em dash.
|
||||
-- Returns processed inline list and updated capitalize_next flag.
|
||||
function process_inlines(inlines_list, capitalize_next)
|
||||
local result = {}
|
||||
capitalize_next = capitalize_next or false
|
||||
|
||||
for _, elem in ipairs(inlines_list) do
|
||||
if elem.t == "Str" then
|
||||
local text = elem.text
|
||||
|
||||
text = capitalize_internal_marks(text)
|
||||
|
||||
if capitalize_next then
|
||||
local new_text, consumed = capitalize_first(text)
|
||||
text = new_text
|
||||
capitalize_next = not consumed
|
||||
end
|
||||
|
||||
if text:find(":" .. TRIGGER_TAIL_PATTERN) or text:find("—" .. TRIGGER_TAIL_PATTERN) then
|
||||
capitalize_next = true
|
||||
end
|
||||
|
||||
result[#result + 1] = pandoc.Str(text)
|
||||
elseif elem.t == "Space" or elem.t == "SoftBreak" or elem.t == "LineBreak" then
|
||||
result[#result + 1] = elem
|
||||
else
|
||||
local rebuilt
|
||||
rebuilt, capitalize_next = rebuild_container(elem, capitalize_next)
|
||||
if rebuilt then
|
||||
result[#result + 1] = rebuilt
|
||||
else
|
||||
result[#result + 1] = elem
|
||||
capitalize_next = false
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return result, capitalize_next
|
||||
end
|
||||
|
||||
--- Process paragraphs: capitalize word after colon or em dash
|
||||
local function process_para(para)
|
||||
local processed_content = process_inlines(para.content, false)
|
||||
return pandoc.Para(processed_content)
|
||||
end
|
||||
|
||||
--- Only process divs with bibliography classes
|
||||
function Div(div)
|
||||
-- Check for bibliography-related classes
|
||||
if div.classes:includes("references") or
|
||||
div.classes:includes("csl-bib-body") or
|
||||
div.classes:includes("csl-entry") then
|
||||
return pandoc.walk_block(div, { Para = process_para })
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user