# Improve copywriting, correct spaces, words, and punctuations between CJK and English with AutoCorrect

# Copyright: © 2024–Present Tom Ben
# License: MIT License

import autocorrect_py as autocorrect
import json
import panflute as pf
from panflute import elements as pf_elements

# Allow typst and comment raw blocks until upstream panflute adds support.
ADDITIONAL_RAW_FORMATS = {'typst', 'comment'}
if hasattr(pf_elements, 'RAW_FORMATS'):
    pf_elements.RAW_FORMATS = set(pf_elements.RAW_FORMATS)
    pf_elements.RAW_FORMATS.update(ADDITIONAL_RAW_FORMATS)


def load_config():
    # yaml-language-server: $schema=https://huacnlee.github.io/autocorrect/schema.json
    config = {
        # 0 - off, 1 - error, 2 - warning
        "rules": {
            # Add space between some punctuations
            "space-punctuation": 0,
            # Add space between brackets (), [] when near the CJK
            "space-bracket": 0,
            # Add space between ``, when near the CJK
            "space-backticks": 0,
            # Add space between dash `-`
            "space-dash": 0,
            # Convert to fullwidth
            "fullwidth": 0,
            # To remove space arouned the fullwidth quotes “”, ‘’
            "no-space-fullwidth-quote": 0,
            # Fullwidth alphanumeric characters to halfwidth
            "halfwidth-word": 1,
            # Fullwidth punctuations to halfwidth in English
            "halfwidth-punctuation": 1,
            # Spellcheck
            "spellcheck": 0
        }
    }
    config_str = json.dumps(config)
    autocorrect.load_config(config_str)


def correct_text(elem, doc):
    if isinstance(elem, pf.Str):
        # Apply autocorrect formatting to each text node
        corrected_text = autocorrect.format(elem.text)
        return pf.Str(corrected_text)


def main(doc=None):
    # Load autocorrect configuration
    load_config()
    return pf.run_filter(correct_text, doc=doc)


if __name__ == "__main__":
    main()