add _extensions
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
title: Remove Spaces around Chinese Characters except Citations
|
||||
author: Tom Ben
|
||||
version: 1.0.0
|
||||
quarto-required: ">=1.5.0"
|
||||
contributes:
|
||||
filters:
|
||||
- remove-spaces.lua
|
||||
@@ -0,0 +1,55 @@
|
||||
--- Remove spaces around Chinese characters except citations
|
||||
|
||||
--- Copyright: © 2024–Present Tom Ben
|
||||
--- License: MIT License
|
||||
|
||||
-- Check if the character is non-ASCII (potentially a Chinese character).
|
||||
local function is_non_ascii(char)
|
||||
return char and string.byte(char) > 127
|
||||
end
|
||||
|
||||
-- Process the paragraph to remove spaces adjacent to non-ASCII characters
|
||||
local function process_paragraph(para)
|
||||
local cs = para.content
|
||||
local new_content = {}
|
||||
|
||||
for i, elem in ipairs(cs) do
|
||||
-- If the element is not a Space, always keep it
|
||||
if elem.t ~= 'Space' then
|
||||
table.insert(new_content, elem)
|
||||
else
|
||||
-- Element is a Space, determine whether to keep it
|
||||
local next_elem = cs[i + 1]
|
||||
local prev_elem = cs[i - 1]
|
||||
|
||||
-- Check adjacent characters for Chinese text
|
||||
local next_char = next_elem and next_elem.t == 'Str' and next_elem.text:sub(1, 1)
|
||||
local prev_char = prev_elem and prev_elem.t == 'Str' and prev_elem.text:sub(-1)
|
||||
|
||||
-- Check if adjacent elements are citations or citation-related elements
|
||||
local next_is_cite = next_elem and (next_elem.t == 'Cite' or next_elem.t == 'Note')
|
||||
local prev_is_cite = prev_elem and (prev_elem.t == 'Cite' or prev_elem.t == 'Note')
|
||||
|
||||
-- Determine if we need to remove this space
|
||||
local has_adjacent_chinese = (next_char and is_non_ascii(next_char)) or (prev_char and is_non_ascii(prev_char))
|
||||
local is_adjacent_to_cite = (prev_is_cite and next_char and is_non_ascii(next_char)) or
|
||||
(next_is_cite and prev_char and is_non_ascii(prev_char))
|
||||
|
||||
-- Keep space if:
|
||||
-- 1. It's not adjacent to Chinese characters, OR
|
||||
-- 2. It's between a citation and Chinese characters
|
||||
if not has_adjacent_chinese or is_adjacent_to_cite then
|
||||
table.insert(new_content, elem)
|
||||
end
|
||||
-- Otherwise, remove the space (by not adding it to new_content)
|
||||
end
|
||||
end
|
||||
|
||||
para.content = new_content
|
||||
return para
|
||||
end
|
||||
|
||||
-- Return the filter for Pandoc
|
||||
return {
|
||||
{ Para = process_paragraph }
|
||||
}
|
||||
Reference in New Issue
Block a user