add _extensions

This commit is contained in:
2026-05-21 13:37:53 +08:00
parent 6a9a5fc90e
commit 61bd0bea2f
252 changed files with 33972 additions and 1 deletions
@@ -0,0 +1,72 @@
--- Emulate Pandoc's extension `east_asian_line_breaks` in Quarto
--- Ignore soft break adjacent to Chinese characters
--- Tracking Quarto issue: https://github.com/quarto-dev/quarto-cli/issues/8520
--- Copyright: © 2024Present Tom Ben
--- License: MIT License
function is_chinese(text)
return text:find("[\228-\233][\128-\191][\128-\191]")
end
function is_ascii(char)
if char == nil then return false end
local ascii_code = string.byte(char)
return ascii_code >= 0 and ascii_code <= 127
end
function is_chinese_punctuation(char)
if char == nil then return false end
local punctuation_marks = ",。!?;:“”‘’()【】《》〈〉「」『』、"
return string.find(punctuation_marks, char, 1, true) ~= nil
end
function is_alphanumeric(char)
if char == nil then return false end
return char:match("[%w]") ~= nil
end
return {
{
Para = function(para)
local cs = para.content
for k, v in ipairs(cs) do
if v.t == 'SoftBreak' and cs[k - 1] and cs[k + 1] then
local p_text = cs[k - 1].text
local n_text = cs[k + 1].text
-- Ensure p_text and n_text are not nil and not empty strings
if p_text and n_text and #p_text > 0 and #n_text > 0 then
local prev_char -- Stores the last UTF-8 character of p_text
for char_item in p_text:gmatch("([\0-\x7F\xC2-\xF4][\x80-\xBF]*)") do
prev_char = char_item
end
local next_char -- Stores the first UTF-8 character of n_text
for char_item in n_text:gmatch("([\0-\x7F\xC2-\xF4][\x80-\xBF]*)") do
next_char = char_item
break -- Found the first character
end
-- Ensure characters were actually extracted
if prev_char and next_char then
-- Rule 1: Remove soft break between Chinese characters
if is_chinese(prev_char) and is_chinese(next_char) then
para.content[k] = pandoc.Str("")
-- Rule 2: Remove soft break after Chinese punctuation
elseif is_chinese_punctuation(prev_char) then
para.content[k] = pandoc.Str("")
-- Rule 3: Remove soft break before Chinese punctuation
elseif is_chinese_punctuation(next_char) then
para.content[k] = pandoc.Str("")
-- Rule 4: Keep soft break between Chinese chars and ASCII alphanumeric
-- This preserves spacing between Chinese and English words
-- No action needed - soft break remains
end
end
end
end
end
return para
end
}
}