add _extensions
This commit is contained in:
@@ -0,0 +1,161 @@
|
||||
# Convert *.md files to *.qmd files and pre-process them
|
||||
# Randomize footnote identifiers in multiple Quarto files to avoid conflicts
|
||||
# Convert reference-style links to inline links
|
||||
# Remove line breaks within a straight angle quotation mark
|
||||
# Reformat display math equations in Ulysses
|
||||
|
||||
# Copyright: © 2024–Present Tom Ben
|
||||
# License: MIT License
|
||||
|
||||
import re
|
||||
import glob
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
|
||||
|
||||
def get_md_files():
|
||||
# Get all *.md files
|
||||
return [f for f in glob.glob("contents/[0-9]*.md")]
|
||||
|
||||
|
||||
def randomize_footnote_identifiers(qmd_content):
|
||||
# Find all existing footnote identifiers (numbers)
|
||||
existing_ids = set(re.findall(r'\[\^(\d+)\]', qmd_content))
|
||||
|
||||
# Generate a unique random identifier for each existing footnote
|
||||
unique_ids = {}
|
||||
for id in existing_ids:
|
||||
# Generate a random string of 5 characters
|
||||
new_id = ''.join(random.choices(
|
||||
string.ascii_letters + string.digits, k=5))
|
||||
while new_id in unique_ids.values():
|
||||
new_id = ''.join(random.choices(
|
||||
string.ascii_letters + string.digits, k=5))
|
||||
unique_ids[id] = new_id
|
||||
|
||||
# Replace all footnote references and definitions with new identifiers
|
||||
for old_id, new_id in unique_ids.items():
|
||||
qmd_content = re.sub(rf'\[\^{old_id}\]', f'[^{new_id}]', qmd_content)
|
||||
qmd_content = re.sub(rf'\[\^{old_id}\]:', f'[^{new_id}]:', qmd_content)
|
||||
|
||||
return qmd_content
|
||||
|
||||
|
||||
def convert_reference_to_inline(qmd_content):
|
||||
# Extract reference links
|
||||
reference_links = {}
|
||||
reference_pattern = re.compile(r'\n\[(\d+)\]:\s*(.*)')
|
||||
for match in reference_pattern.findall(qmd_content):
|
||||
reference_links[match[0]] = match[1]
|
||||
|
||||
# Remove the reference link definitions from the qmd_content
|
||||
qmd_content = reference_pattern.sub('', qmd_content)
|
||||
|
||||
# Replace reference-style link usages with inline links
|
||||
def replace_link(match):
|
||||
text = match.group(1)
|
||||
key = match.group(2)
|
||||
url = reference_links.get(key, '')
|
||||
return f'[{text}]({url})'
|
||||
|
||||
usage_pattern = re.compile(r'\[(.*?)\]\[(\d+)\]')
|
||||
qmd_content = usage_pattern.sub(replace_link, qmd_content)
|
||||
|
||||
return qmd_content
|
||||
|
||||
|
||||
def remove_linebreaks_in_quotes(text):
|
||||
# Regular expression pattern to find blocks within single Chinese quotes
|
||||
pattern = r'「[^」]*?」'
|
||||
|
||||
# Function to replace newlines in the found quoted text
|
||||
def replace_newlines(m):
|
||||
# Remove all newlines within the quote block
|
||||
return m.group(0).replace('\n', '')
|
||||
|
||||
# Use re.sub to replace the newline characters in each match
|
||||
cleaned_text = re.sub(pattern, replace_newlines, text)
|
||||
|
||||
return cleaned_text
|
||||
|
||||
|
||||
def reformat_math_equations(content):
|
||||
# Reformat display math with labels to block format
|
||||
labeled_pattern = r"\$(.+?)\$ *(\{#.+?\})"
|
||||
|
||||
def replace_with_labeled_block(match):
|
||||
equation = match.group(1).strip()
|
||||
label = match.group(2).strip()
|
||||
return f"$$\n{equation}\n$$ {label}"
|
||||
|
||||
content = re.sub(labeled_pattern, replace_with_labeled_block, content)
|
||||
|
||||
# Reformat display math without labels to block format
|
||||
# Match `$$ ... $$` without label
|
||||
display_pattern = r"(?<!\$)\$\$([^\$]+?)\$\$(?!\{#)"
|
||||
|
||||
def replace_with_display_block(match):
|
||||
equation = match.group(1).strip()
|
||||
return f"$$\n{equation}\n$$"
|
||||
|
||||
content = re.sub(display_pattern, replace_with_display_block, content)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def process_file(input_file, output_file):
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove links with `[@]` and a space before it
|
||||
content = re.sub(r"\s*\[@\].*?[\]\)]", "", content)
|
||||
# Remove square brackets enclosing the caption
|
||||
content = re.sub(r"^\[(.*)\}\]$", r"\n :\1}", content, flags=re.MULTILINE)
|
||||
# Merge multiple adjacent citations into one
|
||||
content = re.sub(r"\][\(\[].*?;\s*\[", "; ", content)
|
||||
# Replace '{{\<...\>}}' with '{{<...>}}'
|
||||
content = re.sub(r"\{\{\\<(.*)\\>}}", r"{{<\1>}}", content)
|
||||
# Remove comment blocks to avoid errors of Python filter
|
||||
content = re.sub(r"^```{=comment}.*?^```$", "",
|
||||
content, flags=re.DOTALL | re.MULTILINE)
|
||||
|
||||
# Randomize footnote identifiers
|
||||
content = randomize_footnote_identifiers(content)
|
||||
# Convert reference-style links to inline links
|
||||
content = convert_reference_to_inline(content)
|
||||
# Remove line breaks in quotes
|
||||
content = remove_linebreaks_in_quotes(content)
|
||||
# Reformat math equations
|
||||
content = reformat_math_equations(content)
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def main():
|
||||
md_files = get_md_files()
|
||||
|
||||
# Create contents_tmp directory if it doesn't exist
|
||||
tmp_dir = "contents_tmp"
|
||||
if not os.path.exists(tmp_dir):
|
||||
os.makedirs(tmp_dir)
|
||||
|
||||
# Convert *.md files to *.qmd files in contents_tmp directory
|
||||
qmd_files = [os.path.join(tmp_dir, os.path.basename(
|
||||
f).replace(".md", ".qmd")) for f in md_files]
|
||||
|
||||
for md_file, qmd_file in zip(md_files, qmd_files):
|
||||
process_file(md_file, qmd_file)
|
||||
|
||||
# Process existing .qmd files in contents directory and output to contents_tmp
|
||||
os.chdir('contents')
|
||||
existing_qmd_files = glob.glob('*.qmd')
|
||||
|
||||
for qmd_file in existing_qmd_files:
|
||||
output_file = os.path.join('..', tmp_dir, qmd_file)
|
||||
process_file(qmd_file, output_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user