add _extensions

2026-05-21 13:37:53 +08:00
parent 6a9a5fc90e
commit 61bd0bea2f
252 changed files with 33972 additions and 1 deletions
@@ -0,0 +1,161 @@
+# Convert *.md files to *.qmd files and pre-process them
+# Randomize footnote identifiers in multiple Quarto files to avoid conflicts
+# Convert reference-style links to inline links
+# Remove line breaks within a straight angle quotation mark
+# Reformat display math equations in Ulysses
+
+# Copyright: © 2024–Present Tom Ben
+# License: MIT License
+
+import re
+import glob
+import os
+import random
+import string
+
+
+def get_md_files():
+    # Get all *.md files
+    return [f for f in glob.glob("contents/[0-9]*.md")]
+
+
+def randomize_footnote_identifiers(qmd_content):
+    # Find all existing footnote identifiers (numbers)
+    existing_ids = set(re.findall(r'\[\^(\d+)\]', qmd_content))
+
+    # Generate a unique random identifier for each existing footnote
+    unique_ids = {}
+    for id in existing_ids:
+        # Generate a random string of 5 characters
+        new_id = ''.join(random.choices(
+            string.ascii_letters + string.digits, k=5))
+        while new_id in unique_ids.values():
+            new_id = ''.join(random.choices(
+                string.ascii_letters + string.digits, k=5))
+        unique_ids[id] = new_id
+
+    # Replace all footnote references and definitions with new identifiers
+    for old_id, new_id in unique_ids.items():
+        qmd_content = re.sub(rf'\[\^{old_id}\]', f'[^{new_id}]', qmd_content)
+        qmd_content = re.sub(rf'\[\^{old_id}\]:', f'[^{new_id}]:', qmd_content)
+
+    return qmd_content
+
+
+def convert_reference_to_inline(qmd_content):
+    # Extract reference links
+    reference_links = {}
+    reference_pattern = re.compile(r'\n\[(\d+)\]:\s*(.*)')
+    for match in reference_pattern.findall(qmd_content):
+        reference_links[match[0]] = match[1]
+
+    # Remove the reference link definitions from the qmd_content
+    qmd_content = reference_pattern.sub('', qmd_content)
+
+    # Replace reference-style link usages with inline links
+    def replace_link(match):
+        text = match.group(1)
+        key = match.group(2)
+        url = reference_links.get(key, '')
+        return f'[{text}]({url})'
+
+    usage_pattern = re.compile(r'\[(.*?)\]\[(\d+)\]')
+    qmd_content = usage_pattern.sub(replace_link, qmd_content)
+
+    return qmd_content
+
+
+def remove_linebreaks_in_quotes(text):
+    # Regular expression pattern to find blocks within single Chinese quotes
+    pattern = r'「[^」]*?」'
+
+    # Function to replace newlines in the found quoted text
+    def replace_newlines(m):
+        # Remove all newlines within the quote block
+        return m.group(0).replace('\n', '')
+
+    # Use re.sub to replace the newline characters in each match
+    cleaned_text = re.sub(pattern, replace_newlines, text)
+
+    return cleaned_text
+
+
+def reformat_math_equations(content):
+    # Reformat display math with labels to block format
+    labeled_pattern = r"\$(.+?)\$ *(\{#.+?\})"
+
+    def replace_with_labeled_block(match):
+        equation = match.group(1).strip()
+        label = match.group(2).strip()
+        return f"$$\n{equation}\n$$ {label}"
+
+    content = re.sub(labeled_pattern, replace_with_labeled_block, content)
+
+    # Reformat display math without labels to block format
+    # Match `$$ ... $$` without label
+    display_pattern = r"(?<!\$)\$\$([^\$]+?)\$\$(?!\{#)"
+
+    def replace_with_display_block(match):
+        equation = match.group(1).strip()
+        return f"$$\n{equation}\n$$"
+
+    content = re.sub(display_pattern, replace_with_display_block, content)
+
+    return content
+
+
+def process_file(input_file, output_file):
+    with open(input_file, "r", encoding="utf-8") as f:
+        content = f.read()
+
+    # Remove links with `[@]` and a space before it
+    content = re.sub(r"\s*\[@\].*?[\]\)]", "", content)
+    # Remove square brackets enclosing the caption
+    content = re.sub(r"^\[(.*)\}\]$", r"\n  :\1}", content, flags=re.MULTILINE)
+    # Merge multiple adjacent citations into one
+    content = re.sub(r"\][\(\[].*?;\s*\[", "; ", content)
+    # Replace '{{\<...\>}}' with '{{<...>}}'
+    content = re.sub(r"\{\{\\<(.*)\\>}}", r"{{<\1>}}", content)
+    # Remove comment blocks to avoid errors of Python filter
+    content = re.sub(r"^```{=comment}.*?^```$", "",
+                     content, flags=re.DOTALL | re.MULTILINE)
+
+    # Randomize footnote identifiers
+    content = randomize_footnote_identifiers(content)
+    # Convert reference-style links to inline links
+    content = convert_reference_to_inline(content)
+    # Remove line breaks in quotes
+    content = remove_linebreaks_in_quotes(content)
+    # Reformat math equations
+    content = reformat_math_equations(content)
+
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(content)
+
+
+def main():
+    md_files = get_md_files()
+
+    # Create contents_tmp directory if it doesn't exist
+    tmp_dir = "contents_tmp"
+    if not os.path.exists(tmp_dir):
+        os.makedirs(tmp_dir)
+
+    # Convert *.md files to *.qmd files in contents_tmp directory
+    qmd_files = [os.path.join(tmp_dir, os.path.basename(
+        f).replace(".md", ".qmd")) for f in md_files]
+
+    for md_file, qmd_file in zip(md_files, qmd_files):
+        process_file(md_file, qmd_file)
+
+    # Process existing .qmd files in contents directory and output to contents_tmp
+    os.chdir('contents')
+    existing_qmd_files = glob.glob('*.qmd')
+
+    for qmd_file in existing_qmd_files:
+        output_file = os.path.join('..', tmp_dir, qmd_file)
+        process_file(qmd_file, output_file)
+
+
+if __name__ == "__main__":
+    main()