add _extensions
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
--- Emulate Pandoc's extension `east_asian_line_breaks` in Quarto
|
||||
--- Ignore soft break adjacent to Chinese characters
|
||||
--- Tracking Quarto issue: https://github.com/quarto-dev/quarto-cli/issues/8520
|
||||
|
||||
--- Copyright: © 2024–Present Tom Ben
|
||||
--- License: MIT License
|
||||
|
||||
function is_chinese(text)
|
||||
return text:find("[\228-\233][\128-\191][\128-\191]")
|
||||
end
|
||||
|
||||
function is_ascii(char)
|
||||
if char == nil then return false end
|
||||
local ascii_code = string.byte(char)
|
||||
return ascii_code >= 0 and ascii_code <= 127
|
||||
end
|
||||
|
||||
function is_chinese_punctuation(char)
|
||||
if char == nil then return false end
|
||||
local punctuation_marks = ",。!?;:“”‘’()【】《》〈〉「」『』、"
|
||||
return string.find(punctuation_marks, char, 1, true) ~= nil
|
||||
end
|
||||
|
||||
function is_alphanumeric(char)
|
||||
if char == nil then return false end
|
||||
return char:match("[%w]") ~= nil
|
||||
end
|
||||
|
||||
return {
|
||||
{
|
||||
Para = function(para)
|
||||
local cs = para.content
|
||||
for k, v in ipairs(cs) do
|
||||
if v.t == 'SoftBreak' and cs[k - 1] and cs[k + 1] then
|
||||
local p_text = cs[k - 1].text
|
||||
local n_text = cs[k + 1].text
|
||||
-- Ensure p_text and n_text are not nil and not empty strings
|
||||
if p_text and n_text and #p_text > 0 and #n_text > 0 then
|
||||
local prev_char -- Stores the last UTF-8 character of p_text
|
||||
for char_item in p_text:gmatch("([\0-\x7F\xC2-\xF4][\x80-\xBF]*)") do
|
||||
prev_char = char_item
|
||||
end
|
||||
|
||||
local next_char -- Stores the first UTF-8 character of n_text
|
||||
for char_item in n_text:gmatch("([\0-\x7F\xC2-\xF4][\x80-\xBF]*)") do
|
||||
next_char = char_item
|
||||
break -- Found the first character
|
||||
end
|
||||
|
||||
-- Ensure characters were actually extracted
|
||||
if prev_char and next_char then
|
||||
-- Rule 1: Remove soft break between Chinese characters
|
||||
if is_chinese(prev_char) and is_chinese(next_char) then
|
||||
para.content[k] = pandoc.Str("")
|
||||
-- Rule 2: Remove soft break after Chinese punctuation
|
||||
elseif is_chinese_punctuation(prev_char) then
|
||||
para.content[k] = pandoc.Str("")
|
||||
-- Rule 3: Remove soft break before Chinese punctuation
|
||||
elseif is_chinese_punctuation(next_char) then
|
||||
para.content[k] = pandoc.Str("")
|
||||
-- Rule 4: Keep soft break between Chinese chars and ASCII alphanumeric
|
||||
-- This preserves spacing between Chinese and English words
|
||||
-- No action needed - soft break remains
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
return para
|
||||
end
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user