Files
su2026rwep/_extensions/drwater/autocorrect/autocorrect.py
T
2026-05-21 13:37:53 +08:00

62 lines
1.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Improve copywriting, correct spaces, words, and punctuations between CJK and English with AutoCorrect
# Copyright: © 2024Present Tom Ben
# License: MIT License
import autocorrect_py as autocorrect
import json
import panflute as pf
from panflute import elements as pf_elements
# Allow typst and comment raw blocks until upstream panflute adds support.
ADDITIONAL_RAW_FORMATS = {'typst', 'comment'}
if hasattr(pf_elements, 'RAW_FORMATS'):
pf_elements.RAW_FORMATS = set(pf_elements.RAW_FORMATS)
pf_elements.RAW_FORMATS.update(ADDITIONAL_RAW_FORMATS)
def load_config():
# yaml-language-server: $schema=https://huacnlee.github.io/autocorrect/schema.json
config = {
# 0 - off, 1 - error, 2 - warning
"rules": {
# Add space between some punctuations
"space-punctuation": 0,
# Add space between brackets (), [] when near the CJK
"space-bracket": 0,
# Add space between ``, when near the CJK
"space-backticks": 0,
# Add space between dash `-`
"space-dash": 0,
# Convert to fullwidth
"fullwidth": 0,
# To remove space arouned the fullwidth quotes “”, ‘’
"no-space-fullwidth-quote": 0,
# Fullwidth alphanumeric characters to halfwidth
"halfwidth-word": 1,
# Fullwidth punctuations to halfwidth in English
"halfwidth-punctuation": 1,
# Spellcheck
"spellcheck": 0
}
}
config_str = json.dumps(config)
autocorrect.load_config(config_str)
def correct_text(elem, doc):
if isinstance(elem, pf.Str):
# Apply autocorrect formatting to each text node
corrected_text = autocorrect.format(elem.text)
return pf.Str(corrected_text)
def main(doc=None):
# Load autocorrect configuration
load_config()
return pf.run_filter(correct_text, doc=doc)
if __name__ == "__main__":
main()