su2026rwep/_extensions/drwater/localizecnbib/localizecnbib.lua

--- Localize Chinese bibliographies generated by Pandoc Citeproc and CSL

--- Copyright: © 2024–Present Tom Ben
--- License: MIT License

-- Function to check if a string contains Chinese characters
function contains_chinese(text)
    return text and text:find("[\228-\233][\128-\191][\128-\191]") ~= nil
end

-- Function to extract digits from a string
function extract_digits(text)
    return text and text:match("%d+")
end

-- Helper to find the closest previous Str (skips spaces and empty strings)
local function find_prev_str(content, start_index)
    for idx = start_index - 1, 1, -1 do
        local item = content[idx]
        if item and item.t == "Str" and item.text ~= "" then
            return item, idx
        end
    end
    return nil, nil
end

-- Helper to find the closest next Str (skips spaces and empty strings)
local function find_next_str(content, start_index)
    for idx = start_index + 1, #content do
        local item = content[idx]
        if item and item.t == "Str" and item.text ~= "" then
            return item, idx
        end
    end
    return nil, nil
end

-- Function to check if Emph contains "et al." or "et al.,"
function is_et_al_emph(emph)
    if emph.t ~= "Emph" or #emph.content < 3 then
        return false
    end

    local has_et = emph.content[1].t == "Str" and emph.content[1].text == "et"
    local has_space = emph.content[2].t == "Space"
    local has_al = emph.content[3].t == "Str" and
        (emph.content[3].text == "al." or emph.content[3].text == "al.,")

    return has_et and has_space and has_al
end

-- Function to process `et al.` in citations for author-date style
function process_citation(el)
    local new_inlines = {}
    local i = 1

    while i <= #el.content do
        local current = el.content[i]

        -- Handle italic et al. (wrapped in Emph)
        if current.t == "Emph" and is_et_al_emph(current) and i > 1 then
            local prev = el.content[i - 1]
            if prev.t == "Str" and contains_chinese(prev.text) then
                -- Replace the Emph element with plain "等" or "等,"
                if current.content[3].text == "al.," then
                    table.insert(new_inlines, pandoc.Str("等,"))
                else
                    table.insert(new_inlines, pandoc.Str("等"))
                end
                i = i + 1
            else
                table.insert(new_inlines, current)
                i = i + 1
            end
            -- Handle non-italic et al.
        elseif current.t == "Str" and contains_chinese(current.text) and current.text:sub(-2) == "et" and
            i + 2 <= #el.content and el.content[i + 1].t == "Space" and el.content[i + 2].t == "Str" then
            local modified_text
            if el.content[i + 2].text == "al." then
                modified_text = current.text:sub(1, -3) .. "等"
            elseif el.content[i + 2].text == "al.," then
                modified_text = current.text:sub(1, -3) .. "等,"
            end
            if modified_text then
                table.insert(new_inlines, pandoc.Str(modified_text))
                i = i + 3 -- Skip the next Space and 'al.' or 'al.,'
            else
                table.insert(new_inlines, current)
                i = i + 1
            end
        else
            table.insert(new_inlines, current)
            i = i + 1
        end
    end

    el.content = new_inlines
    return el
end

-- Function to process localizations in bibliography entries
function process_bibliography(elem)
    local new_inlines = {}
    local i = 1

    -- Process both italic and non-italic `et al.`
    while i <= #elem.content do
        local current = elem.content[i]

        -- Handle italic et al. (wrapped in Emph)
        if current.t == "Emph" and is_et_al_emph(current) and i > 2 then
            local prev = elem.content[i - 2]
            if prev and prev.t == "Str" and contains_chinese(prev.text) then
                -- Replace with plain "等." or "等,"
                if current.content[3].text == "al.," then
                    table.insert(new_inlines, pandoc.Str("等,"))
                else
                    table.insert(new_inlines, pandoc.Str("等."))
                end
                i = i + 1
            else
                table.insert(new_inlines, current)
                i = i + 1
            end
            -- Handle non-italic et al.
        elseif i <= #elem.content - 2 and current.t == "Str" and current.text == "et" and
            elem.content[i + 1].t == "Space" and i > 2 then
            local prev = elem.content[i - 2]
            if prev and prev.t == "Str" and contains_chinese(prev.text) then
                if elem.content[i + 2].t == "Str" and elem.content[i + 2].text == "al.," then
                    table.insert(new_inlines, pandoc.Str("等,"))
                    i = i + 3
                elseif elem.content[i + 2].t == "Str" and elem.content[i + 2].text == "al." then
                    table.insert(new_inlines, pandoc.Str("等."))
                    i = i + 3
                else
                    table.insert(new_inlines, current)
                    i = i + 1
                end
            else
                table.insert(new_inlines, current)
                i = i + 1
            end
        else
            table.insert(new_inlines, current)
            i = i + 1
        end
    end

    elem.content = new_inlines

    -- Process other localizations
    for i = 1, #elem.content do
        local v = elem.content[i]
        local prev_str, prev_idx = find_prev_str(elem.content, i)
        local next_str, next_idx = find_next_str(elem.content, i)

        if v and v.t == "Str" then
            local text = v.text:lower()

            if text == "vol." and next_str then
                if prev_str and contains_chinese(prev_str.text) then
                    local vol_num, identifier = next_str.text:match("([^%[]+)%[(.+)%]")
                    if vol_num and identifier then
                        elem.content[i] = pandoc.Str("第" .. vol_num .. "卷[" .. identifier .. "].")
                        for remove_idx = next_idx, i + 1, -1 do
                            table.remove(elem.content, remove_idx)
                        end
                    end
                end
            elseif (text == "tran." or text == "trans.") and i > 2 then
                if prev_str and contains_chinese(prev_str.text) and prev_str.text:match(",$") then
                    elem.content[i] = pandoc.Str("译.")
                end
            elseif (text == "ed." or text == "eds.") and i > 2 and prev_str and prev_str.t == "Str" then
                if contains_chinese(prev_str.text) and prev_str.text:match(",$") then
                    elem.content[i] = pandoc.Str("编.")
                else
                    local ed_num = extract_digits(prev_str.text)
                    local prev_prev_str = prev_idx and select(1, find_prev_str(elem.content, prev_idx))
                    if ed_num and prev_prev_str and contains_chinese(prev_prev_str.text) then
                        elem.content[prev_idx] = pandoc.Str(ed_num .. "版.")
                        table.remove(elem.content, i)
                        if elem.content[i - 1] and elem.content[i - 1].t == "Space" then
                            table.remove(elem.content, i - 1)
                        end
                    end
                end
            end
        end
    end
    return elem
end

function process_div(el)
    if el.classes:includes("csl-entry") then
        for _, block in ipairs(el.content) do
            if block.t == "Para" then
                process_bibliography(block)
            end
        end
    end
    return el
end

return {
    {
        Cite = process_citation,
        Link = process_citation,
        Div = process_div,
        Span = process_bibliography
    }
}