Files
2026-05-21 13:37:53 +08:00

56 lines
2.0 KiB
Lua
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
--- Remove spaces around Chinese characters except citations
--- Copyright: © 2024Present Tom Ben
--- License: MIT License
-- Check if the character is non-ASCII (potentially a Chinese character).
local function is_non_ascii(char)
return char and string.byte(char) > 127
end
-- Process the paragraph to remove spaces adjacent to non-ASCII characters
local function process_paragraph(para)
local cs = para.content
local new_content = {}
for i, elem in ipairs(cs) do
-- If the element is not a Space, always keep it
if elem.t ~= 'Space' then
table.insert(new_content, elem)
else
-- Element is a Space, determine whether to keep it
local next_elem = cs[i + 1]
local prev_elem = cs[i - 1]
-- Check adjacent characters for Chinese text
local next_char = next_elem and next_elem.t == 'Str' and next_elem.text:sub(1, 1)
local prev_char = prev_elem and prev_elem.t == 'Str' and prev_elem.text:sub(-1)
-- Check if adjacent elements are citations or citation-related elements
local next_is_cite = next_elem and (next_elem.t == 'Cite' or next_elem.t == 'Note')
local prev_is_cite = prev_elem and (prev_elem.t == 'Cite' or prev_elem.t == 'Note')
-- Determine if we need to remove this space
local has_adjacent_chinese = (next_char and is_non_ascii(next_char)) or (prev_char and is_non_ascii(prev_char))
local is_adjacent_to_cite = (prev_is_cite and next_char and is_non_ascii(next_char)) or
(next_is_cite and prev_char and is_non_ascii(prev_char))
-- Keep space if:
-- 1. It's not adjacent to Chinese characters, OR
-- 2. It's between a citation and Chinese characters
if not has_adjacent_chinese or is_adjacent_to_cite then
table.insert(new_content, elem)
end
-- Otherwise, remove the space (by not adding it to new_content)
end
end
para.content = new_content
return para
end
-- Return the filter for Pandoc
return {
{ Para = process_paragraph }
}