Files
2026-05-21 13:37:53 +08:00

214 lines
7.7 KiB
Lua
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
--- Localize Chinese bibliographies generated by Pandoc Citeproc and CSL
--- Copyright: © 2024Present Tom Ben
--- License: MIT License
-- Function to check if a string contains Chinese characters
function contains_chinese(text)
return text and text:find("[\228-\233][\128-\191][\128-\191]") ~= nil
end
-- Function to extract digits from a string
function extract_digits(text)
return text and text:match("%d+")
end
-- Helper to find the closest previous Str (skips spaces and empty strings)
local function find_prev_str(content, start_index)
for idx = start_index - 1, 1, -1 do
local item = content[idx]
if item and item.t == "Str" and item.text ~= "" then
return item, idx
end
end
return nil, nil
end
-- Helper to find the closest next Str (skips spaces and empty strings)
local function find_next_str(content, start_index)
for idx = start_index + 1, #content do
local item = content[idx]
if item and item.t == "Str" and item.text ~= "" then
return item, idx
end
end
return nil, nil
end
-- Function to check if Emph contains "et al." or "et al.,"
function is_et_al_emph(emph)
if emph.t ~= "Emph" or #emph.content < 3 then
return false
end
local has_et = emph.content[1].t == "Str" and emph.content[1].text == "et"
local has_space = emph.content[2].t == "Space"
local has_al = emph.content[3].t == "Str" and
(emph.content[3].text == "al." or emph.content[3].text == "al.,")
return has_et and has_space and has_al
end
-- Function to process `et al.` in citations for author-date style
function process_citation(el)
local new_inlines = {}
local i = 1
while i <= #el.content do
local current = el.content[i]
-- Handle italic et al. (wrapped in Emph)
if current.t == "Emph" and is_et_al_emph(current) and i > 1 then
local prev = el.content[i - 1]
if prev.t == "Str" and contains_chinese(prev.text) then
-- Replace the Emph element with plain "等" or "等,"
if current.content[3].text == "al.," then
table.insert(new_inlines, pandoc.Str("等,"))
else
table.insert(new_inlines, pandoc.Str(""))
end
i = i + 1
else
table.insert(new_inlines, current)
i = i + 1
end
-- Handle non-italic et al.
elseif current.t == "Str" and contains_chinese(current.text) and current.text:sub(-2) == "et" and
i + 2 <= #el.content and el.content[i + 1].t == "Space" and el.content[i + 2].t == "Str" then
local modified_text
if el.content[i + 2].text == "al." then
modified_text = current.text:sub(1, -3) .. ""
elseif el.content[i + 2].text == "al.," then
modified_text = current.text:sub(1, -3) .. "等,"
end
if modified_text then
table.insert(new_inlines, pandoc.Str(modified_text))
i = i + 3 -- Skip the next Space and 'al.' or 'al.,'
else
table.insert(new_inlines, current)
i = i + 1
end
else
table.insert(new_inlines, current)
i = i + 1
end
end
el.content = new_inlines
return el
end
-- Function to process localizations in bibliography entries
function process_bibliography(elem)
local new_inlines = {}
local i = 1
-- Process both italic and non-italic `et al.`
while i <= #elem.content do
local current = elem.content[i]
-- Handle italic et al. (wrapped in Emph)
if current.t == "Emph" and is_et_al_emph(current) and i > 2 then
local prev = elem.content[i - 2]
if prev and prev.t == "Str" and contains_chinese(prev.text) then
-- Replace with plain "等." or "等,"
if current.content[3].text == "al.," then
table.insert(new_inlines, pandoc.Str("等,"))
else
table.insert(new_inlines, pandoc.Str("等."))
end
i = i + 1
else
table.insert(new_inlines, current)
i = i + 1
end
-- Handle non-italic et al.
elseif i <= #elem.content - 2 and current.t == "Str" and current.text == "et" and
elem.content[i + 1].t == "Space" and i > 2 then
local prev = elem.content[i - 2]
if prev and prev.t == "Str" and contains_chinese(prev.text) then
if elem.content[i + 2].t == "Str" and elem.content[i + 2].text == "al.," then
table.insert(new_inlines, pandoc.Str("等,"))
i = i + 3
elseif elem.content[i + 2].t == "Str" and elem.content[i + 2].text == "al." then
table.insert(new_inlines, pandoc.Str("等."))
i = i + 3
else
table.insert(new_inlines, current)
i = i + 1
end
else
table.insert(new_inlines, current)
i = i + 1
end
else
table.insert(new_inlines, current)
i = i + 1
end
end
elem.content = new_inlines
-- Process other localizations
for i = 1, #elem.content do
local v = elem.content[i]
local prev_str, prev_idx = find_prev_str(elem.content, i)
local next_str, next_idx = find_next_str(elem.content, i)
if v and v.t == "Str" then
local text = v.text:lower()
if text == "vol." and next_str then
if prev_str and contains_chinese(prev_str.text) then
local vol_num, identifier = next_str.text:match("([^%[]+)%[(.+)%]")
if vol_num and identifier then
elem.content[i] = pandoc.Str("" .. vol_num .. "卷[" .. identifier .. "].")
for remove_idx = next_idx, i + 1, -1 do
table.remove(elem.content, remove_idx)
end
end
end
elseif (text == "tran." or text == "trans.") and i > 2 then
if prev_str and contains_chinese(prev_str.text) and prev_str.text:match(",$") then
elem.content[i] = pandoc.Str("译.")
end
elseif (text == "ed." or text == "eds.") and i > 2 and prev_str and prev_str.t == "Str" then
if contains_chinese(prev_str.text) and prev_str.text:match(",$") then
elem.content[i] = pandoc.Str("编.")
else
local ed_num = extract_digits(prev_str.text)
local prev_prev_str = prev_idx and select(1, find_prev_str(elem.content, prev_idx))
if ed_num and prev_prev_str and contains_chinese(prev_prev_str.text) then
elem.content[prev_idx] = pandoc.Str(ed_num .. "版.")
table.remove(elem.content, i)
if elem.content[i - 1] and elem.content[i - 1].t == "Space" then
table.remove(elem.content, i - 1)
end
end
end
end
end
end
return elem
end
function process_div(el)
if el.classes:includes("csl-entry") then
for _, block in ipairs(el.content) do
if block.t == "Para" then
process_bibliography(block)
end
end
end
return el
end
return {
{
Cite = process_citation,
Link = process_citation,
Div = process_div,
Span = process_bibliography
}
}