add _extensions

This commit is contained in:
2026-05-21 13:37:53 +08:00
parent 6a9a5fc90e
commit 61bd0bea2f
252 changed files with 33972 additions and 1 deletions
+161
View File
@@ -0,0 +1,161 @@
# Convert *.md files to *.qmd files and pre-process them
# Randomize footnote identifiers in multiple Quarto files to avoid conflicts
# Convert reference-style links to inline links
# Remove line breaks within a straight angle quotation mark
# Reformat display math equations in Ulysses
# Copyright: © 2024Present Tom Ben
# License: MIT License
import re
import glob
import os
import random
import string
def get_md_files():
# Get all *.md files
return [f for f in glob.glob("contents/[0-9]*.md")]
def randomize_footnote_identifiers(qmd_content):
# Find all existing footnote identifiers (numbers)
existing_ids = set(re.findall(r'\[\^(\d+)\]', qmd_content))
# Generate a unique random identifier for each existing footnote
unique_ids = {}
for id in existing_ids:
# Generate a random string of 5 characters
new_id = ''.join(random.choices(
string.ascii_letters + string.digits, k=5))
while new_id in unique_ids.values():
new_id = ''.join(random.choices(
string.ascii_letters + string.digits, k=5))
unique_ids[id] = new_id
# Replace all footnote references and definitions with new identifiers
for old_id, new_id in unique_ids.items():
qmd_content = re.sub(rf'\[\^{old_id}\]', f'[^{new_id}]', qmd_content)
qmd_content = re.sub(rf'\[\^{old_id}\]:', f'[^{new_id}]:', qmd_content)
return qmd_content
def convert_reference_to_inline(qmd_content):
# Extract reference links
reference_links = {}
reference_pattern = re.compile(r'\n\[(\d+)\]:\s*(.*)')
for match in reference_pattern.findall(qmd_content):
reference_links[match[0]] = match[1]
# Remove the reference link definitions from the qmd_content
qmd_content = reference_pattern.sub('', qmd_content)
# Replace reference-style link usages with inline links
def replace_link(match):
text = match.group(1)
key = match.group(2)
url = reference_links.get(key, '')
return f'[{text}]({url})'
usage_pattern = re.compile(r'\[(.*?)\]\[(\d+)\]')
qmd_content = usage_pattern.sub(replace_link, qmd_content)
return qmd_content
def remove_linebreaks_in_quotes(text):
# Regular expression pattern to find blocks within single Chinese quotes
pattern = r'「[^」]*?」'
# Function to replace newlines in the found quoted text
def replace_newlines(m):
# Remove all newlines within the quote block
return m.group(0).replace('\n', '')
# Use re.sub to replace the newline characters in each match
cleaned_text = re.sub(pattern, replace_newlines, text)
return cleaned_text
def reformat_math_equations(content):
# Reformat display math with labels to block format
labeled_pattern = r"\$(.+?)\$ *(\{#.+?\})"
def replace_with_labeled_block(match):
equation = match.group(1).strip()
label = match.group(2).strip()
return f"$$\n{equation}\n$$ {label}"
content = re.sub(labeled_pattern, replace_with_labeled_block, content)
# Reformat display math without labels to block format
# Match `$$ ... $$` without label
display_pattern = r"(?<!\$)\$\$([^\$]+?)\$\$(?!\{#)"
def replace_with_display_block(match):
equation = match.group(1).strip()
return f"$$\n{equation}\n$$"
content = re.sub(display_pattern, replace_with_display_block, content)
return content
def process_file(input_file, output_file):
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
# Remove links with `[@]` and a space before it
content = re.sub(r"\s*\[@\].*?[\]\)]", "", content)
# Remove square brackets enclosing the caption
content = re.sub(r"^\[(.*)\}\]$", r"\n :\1}", content, flags=re.MULTILINE)
# Merge multiple adjacent citations into one
content = re.sub(r"\][\(\[].*?;\s*\[", "; ", content)
# Replace '{{\<...\>}}' with '{{<...>}}'
content = re.sub(r"\{\{\\<(.*)\\>}}", r"{{<\1>}}", content)
# Remove comment blocks to avoid errors of Python filter
content = re.sub(r"^```{=comment}.*?^```$", "",
content, flags=re.DOTALL | re.MULTILINE)
# Randomize footnote identifiers
content = randomize_footnote_identifiers(content)
# Convert reference-style links to inline links
content = convert_reference_to_inline(content)
# Remove line breaks in quotes
content = remove_linebreaks_in_quotes(content)
# Reformat math equations
content = reformat_math_equations(content)
with open(output_file, "w", encoding="utf-8") as f:
f.write(content)
def main():
md_files = get_md_files()
# Create contents_tmp directory if it doesn't exist
tmp_dir = "contents_tmp"
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
# Convert *.md files to *.qmd files in contents_tmp directory
qmd_files = [os.path.join(tmp_dir, os.path.basename(
f).replace(".md", ".qmd")) for f in md_files]
for md_file, qmd_file in zip(md_files, qmd_files):
process_file(md_file, qmd_file)
# Process existing .qmd files in contents directory and output to contents_tmp
os.chdir('contents')
existing_qmd_files = glob.glob('*.qmd')
for qmd_file in existing_qmd_files:
output_file = os.path.join('..', tmp_dir, qmd_file)
process_file(qmd_file, output_file)
if __name__ == "__main__":
main()