214 lines
7.7 KiB
Lua
214 lines
7.7 KiB
Lua
--- Localize Chinese bibliographies generated by Pandoc Citeproc and CSL
|
||
|
||
--- Copyright: © 2024–Present Tom Ben
|
||
--- License: MIT License
|
||
|
||
-- Function to check if a string contains Chinese characters
|
||
function contains_chinese(text)
|
||
return text and text:find("[\228-\233][\128-\191][\128-\191]") ~= nil
|
||
end
|
||
|
||
-- Function to extract digits from a string
|
||
function extract_digits(text)
|
||
return text and text:match("%d+")
|
||
end
|
||
|
||
-- Helper to find the closest previous Str (skips spaces and empty strings)
|
||
local function find_prev_str(content, start_index)
|
||
for idx = start_index - 1, 1, -1 do
|
||
local item = content[idx]
|
||
if item and item.t == "Str" and item.text ~= "" then
|
||
return item, idx
|
||
end
|
||
end
|
||
return nil, nil
|
||
end
|
||
|
||
-- Helper to find the closest next Str (skips spaces and empty strings)
|
||
local function find_next_str(content, start_index)
|
||
for idx = start_index + 1, #content do
|
||
local item = content[idx]
|
||
if item and item.t == "Str" and item.text ~= "" then
|
||
return item, idx
|
||
end
|
||
end
|
||
return nil, nil
|
||
end
|
||
|
||
-- Function to check if Emph contains "et al." or "et al.,"
|
||
function is_et_al_emph(emph)
|
||
if emph.t ~= "Emph" or #emph.content < 3 then
|
||
return false
|
||
end
|
||
|
||
local has_et = emph.content[1].t == "Str" and emph.content[1].text == "et"
|
||
local has_space = emph.content[2].t == "Space"
|
||
local has_al = emph.content[3].t == "Str" and
|
||
(emph.content[3].text == "al." or emph.content[3].text == "al.,")
|
||
|
||
return has_et and has_space and has_al
|
||
end
|
||
|
||
-- Function to process `et al.` in citations for author-date style
|
||
function process_citation(el)
|
||
local new_inlines = {}
|
||
local i = 1
|
||
|
||
while i <= #el.content do
|
||
local current = el.content[i]
|
||
|
||
-- Handle italic et al. (wrapped in Emph)
|
||
if current.t == "Emph" and is_et_al_emph(current) and i > 1 then
|
||
local prev = el.content[i - 1]
|
||
if prev.t == "Str" and contains_chinese(prev.text) then
|
||
-- Replace the Emph element with plain "等" or "等,"
|
||
if current.content[3].text == "al.," then
|
||
table.insert(new_inlines, pandoc.Str("等,"))
|
||
else
|
||
table.insert(new_inlines, pandoc.Str("等"))
|
||
end
|
||
i = i + 1
|
||
else
|
||
table.insert(new_inlines, current)
|
||
i = i + 1
|
||
end
|
||
-- Handle non-italic et al.
|
||
elseif current.t == "Str" and contains_chinese(current.text) and current.text:sub(-2) == "et" and
|
||
i + 2 <= #el.content and el.content[i + 1].t == "Space" and el.content[i + 2].t == "Str" then
|
||
local modified_text
|
||
if el.content[i + 2].text == "al." then
|
||
modified_text = current.text:sub(1, -3) .. "等"
|
||
elseif el.content[i + 2].text == "al.," then
|
||
modified_text = current.text:sub(1, -3) .. "等,"
|
||
end
|
||
if modified_text then
|
||
table.insert(new_inlines, pandoc.Str(modified_text))
|
||
i = i + 3 -- Skip the next Space and 'al.' or 'al.,'
|
||
else
|
||
table.insert(new_inlines, current)
|
||
i = i + 1
|
||
end
|
||
else
|
||
table.insert(new_inlines, current)
|
||
i = i + 1
|
||
end
|
||
end
|
||
|
||
el.content = new_inlines
|
||
return el
|
||
end
|
||
|
||
-- Function to process localizations in bibliography entries
|
||
function process_bibliography(elem)
|
||
local new_inlines = {}
|
||
local i = 1
|
||
|
||
-- Process both italic and non-italic `et al.`
|
||
while i <= #elem.content do
|
||
local current = elem.content[i]
|
||
|
||
-- Handle italic et al. (wrapped in Emph)
|
||
if current.t == "Emph" and is_et_al_emph(current) and i > 2 then
|
||
local prev = elem.content[i - 2]
|
||
if prev and prev.t == "Str" and contains_chinese(prev.text) then
|
||
-- Replace with plain "等." or "等,"
|
||
if current.content[3].text == "al.," then
|
||
table.insert(new_inlines, pandoc.Str("等,"))
|
||
else
|
||
table.insert(new_inlines, pandoc.Str("等."))
|
||
end
|
||
i = i + 1
|
||
else
|
||
table.insert(new_inlines, current)
|
||
i = i + 1
|
||
end
|
||
-- Handle non-italic et al.
|
||
elseif i <= #elem.content - 2 and current.t == "Str" and current.text == "et" and
|
||
elem.content[i + 1].t == "Space" and i > 2 then
|
||
local prev = elem.content[i - 2]
|
||
if prev and prev.t == "Str" and contains_chinese(prev.text) then
|
||
if elem.content[i + 2].t == "Str" and elem.content[i + 2].text == "al.," then
|
||
table.insert(new_inlines, pandoc.Str("等,"))
|
||
i = i + 3
|
||
elseif elem.content[i + 2].t == "Str" and elem.content[i + 2].text == "al." then
|
||
table.insert(new_inlines, pandoc.Str("等."))
|
||
i = i + 3
|
||
else
|
||
table.insert(new_inlines, current)
|
||
i = i + 1
|
||
end
|
||
else
|
||
table.insert(new_inlines, current)
|
||
i = i + 1
|
||
end
|
||
else
|
||
table.insert(new_inlines, current)
|
||
i = i + 1
|
||
end
|
||
end
|
||
|
||
elem.content = new_inlines
|
||
|
||
-- Process other localizations
|
||
for i = 1, #elem.content do
|
||
local v = elem.content[i]
|
||
local prev_str, prev_idx = find_prev_str(elem.content, i)
|
||
local next_str, next_idx = find_next_str(elem.content, i)
|
||
|
||
if v and v.t == "Str" then
|
||
local text = v.text:lower()
|
||
|
||
if text == "vol." and next_str then
|
||
if prev_str and contains_chinese(prev_str.text) then
|
||
local vol_num, identifier = next_str.text:match("([^%[]+)%[(.+)%]")
|
||
if vol_num and identifier then
|
||
elem.content[i] = pandoc.Str("第" .. vol_num .. "卷[" .. identifier .. "].")
|
||
for remove_idx = next_idx, i + 1, -1 do
|
||
table.remove(elem.content, remove_idx)
|
||
end
|
||
end
|
||
end
|
||
elseif (text == "tran." or text == "trans.") and i > 2 then
|
||
if prev_str and contains_chinese(prev_str.text) and prev_str.text:match(",$") then
|
||
elem.content[i] = pandoc.Str("译.")
|
||
end
|
||
elseif (text == "ed." or text == "eds.") and i > 2 and prev_str and prev_str.t == "Str" then
|
||
if contains_chinese(prev_str.text) and prev_str.text:match(",$") then
|
||
elem.content[i] = pandoc.Str("编.")
|
||
else
|
||
local ed_num = extract_digits(prev_str.text)
|
||
local prev_prev_str = prev_idx and select(1, find_prev_str(elem.content, prev_idx))
|
||
if ed_num and prev_prev_str and contains_chinese(prev_prev_str.text) then
|
||
elem.content[prev_idx] = pandoc.Str(ed_num .. "版.")
|
||
table.remove(elem.content, i)
|
||
if elem.content[i - 1] and elem.content[i - 1].t == "Space" then
|
||
table.remove(elem.content, i - 1)
|
||
end
|
||
end
|
||
end
|
||
end
|
||
end
|
||
end
|
||
return elem
|
||
end
|
||
|
||
function process_div(el)
|
||
if el.classes:includes("csl-entry") then
|
||
for _, block in ipairs(el.content) do
|
||
if block.t == "Para" then
|
||
process_bibliography(block)
|
||
end
|
||
end
|
||
end
|
||
return el
|
||
end
|
||
|
||
return {
|
||
{
|
||
Cite = process_citation,
|
||
Link = process_citation,
|
||
Div = process_div,
|
||
Span = process_bibliography
|
||
}
|
||
}
|