add _extensions
This commit is contained in:
@@ -0,0 +1,170 @@
|
||||
--[[
|
||||
capitalize-subtitle – Capitalize first letter after colons and em dashes in bibliography entries
|
||||
|
||||
This filter capitalizes the first letter after colons and em dashes in bibliography
|
||||
entries, following APA and similar styles that require subtitle capitalization.
|
||||
|
||||
It only processes paragraphs within bibliography divs
|
||||
Must be run after Citeproc
|
||||
|
||||
Copyright: © 2025–present Tom Ben
|
||||
License: MIT
|
||||
]]
|
||||
|
||||
local EXTRA_PUNCT = "“”‘’«»‹›„‟「」『』﹁﹂﹃﹄–—‐"
|
||||
local APOSTROPHES = "'’"
|
||||
local HYPHENS = "%-‐"
|
||||
local AFTER_PUNCT_CLASS = "%s%p" .. EXTRA_PUNCT
|
||||
local WORD_TAIL_PATTERN = "[%a" .. APOSTROPHES .. HYPHENS .. "]*"
|
||||
local TRIGGER_TAIL_PATTERN = "[^%a" .. APOSTROPHES .. HYPHENS .. "]*$"
|
||||
|
||||
local function is_punctuation(char)
|
||||
if not char or char == "" then
|
||||
return true
|
||||
end
|
||||
if char:match("[%s%p]") then
|
||||
return true
|
||||
end
|
||||
return EXTRA_PUNCT:find(char, 1, true) ~= nil
|
||||
end
|
||||
|
||||
local INTERNAL_MARKS = {
|
||||
{
|
||||
mark = ":",
|
||||
skip = function(punct)
|
||||
return punct == "" or punct:find("/")
|
||||
end,
|
||||
},
|
||||
{ mark = "—" },
|
||||
}
|
||||
|
||||
local function capitalize_internal_marks(text)
|
||||
for _, config in ipairs(INTERNAL_MARKS) do
|
||||
local pattern = config.mark .. "([" .. AFTER_PUNCT_CLASS .. "]*)([a-z])"
|
||||
text = text:gsub(pattern, function(punct, letter)
|
||||
-- Require some separation after colon (e.g., space or quote) to avoid URLs and protocols
|
||||
if config.skip and config.skip(punct) then
|
||||
return config.mark .. punct .. letter
|
||||
end
|
||||
return config.mark .. punct .. letter:upper()
|
||||
end)
|
||||
end
|
||||
|
||||
return text
|
||||
end
|
||||
|
||||
--- Capitalize first letter if it's lowercase.
|
||||
--- Returns transformed string and a boolean indicating whether capitalization occurred.
|
||||
local function capitalize_first(str)
|
||||
if not str or str == "" then
|
||||
return str, false
|
||||
end
|
||||
-- Separate leading punctuation/spaces (including common typographic quotes) from the word
|
||||
local leading, remainder = str:match("^([%s%p" .. EXTRA_PUNCT .. "]*)(.*)")
|
||||
if not remainder or remainder == "" then
|
||||
return str, false
|
||||
end
|
||||
local already_capitalized = remainder:match("^([A-Z]" .. WORD_TAIL_PATTERN .. ")")
|
||||
if already_capitalized then
|
||||
return str, true
|
||||
end
|
||||
local word, suffix = remainder:match("^([a-z]" .. WORD_TAIL_PATTERN .. ")(.*)")
|
||||
if not word then
|
||||
local first_char = remainder:sub(1, 1)
|
||||
if not is_punctuation(first_char) then
|
||||
return str, true
|
||||
end
|
||||
return str, false
|
||||
end
|
||||
-- Only capitalize when the word is purely alphabetic (with optional hyphen/apostrophe)
|
||||
-- and is followed by punctuation/space or nothing. This avoids changing items like e2105061118.
|
||||
local next_char = suffix:sub(1, 1)
|
||||
if suffix ~= "" and not is_punctuation(next_char) then
|
||||
return str, false
|
||||
end
|
||||
return leading .. word:sub(1, 1):upper() .. word:sub(2) .. suffix, true
|
||||
end
|
||||
|
||||
local process_inlines --- forward declaration for mutual recursion
|
||||
|
||||
local SIMPLE_WRAPPERS = {
|
||||
Emph = pandoc.Emph,
|
||||
Strong = pandoc.Strong,
|
||||
SmallCaps = pandoc.SmallCaps,
|
||||
}
|
||||
|
||||
local function rebuild_container(elem, capitalize_next)
|
||||
if not elem.content then
|
||||
return nil, capitalize_next
|
||||
end
|
||||
|
||||
local processed
|
||||
processed, capitalize_next = process_inlines(elem.content, capitalize_next)
|
||||
|
||||
local wrap = SIMPLE_WRAPPERS[elem.t]
|
||||
if wrap then
|
||||
return wrap(processed), capitalize_next
|
||||
elseif elem.t == "Span" then
|
||||
return pandoc.Span(processed, elem.attr), capitalize_next
|
||||
elseif elem.t == "Quoted" then
|
||||
return pandoc.Quoted(elem.quotetype, processed), capitalize_next
|
||||
end
|
||||
|
||||
return nil, capitalize_next
|
||||
end
|
||||
|
||||
--- Process a list of inlines recursively: capitalize word after colon or em dash.
|
||||
-- Returns processed inline list and updated capitalize_next flag.
|
||||
function process_inlines(inlines_list, capitalize_next)
|
||||
local result = {}
|
||||
capitalize_next = capitalize_next or false
|
||||
|
||||
for _, elem in ipairs(inlines_list) do
|
||||
if elem.t == "Str" then
|
||||
local text = elem.text
|
||||
|
||||
text = capitalize_internal_marks(text)
|
||||
|
||||
if capitalize_next then
|
||||
local new_text, consumed = capitalize_first(text)
|
||||
text = new_text
|
||||
capitalize_next = not consumed
|
||||
end
|
||||
|
||||
if text:find(":" .. TRIGGER_TAIL_PATTERN) or text:find("—" .. TRIGGER_TAIL_PATTERN) then
|
||||
capitalize_next = true
|
||||
end
|
||||
|
||||
result[#result + 1] = pandoc.Str(text)
|
||||
elseif elem.t == "Space" or elem.t == "SoftBreak" or elem.t == "LineBreak" then
|
||||
result[#result + 1] = elem
|
||||
else
|
||||
local rebuilt
|
||||
rebuilt, capitalize_next = rebuild_container(elem, capitalize_next)
|
||||
if rebuilt then
|
||||
result[#result + 1] = rebuilt
|
||||
else
|
||||
result[#result + 1] = elem
|
||||
capitalize_next = false
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return result, capitalize_next
|
||||
end
|
||||
|
||||
--- Process paragraphs: capitalize word after colon or em dash
|
||||
local function process_para(para)
|
||||
local processed_content = process_inlines(para.content, false)
|
||||
return pandoc.Para(processed_content)
|
||||
end
|
||||
|
||||
--- Only process divs with bibliography classes
|
||||
function Div(div)
|
||||
-- Check for bibliography-related classes
|
||||
if div.classes:includes("references") or
|
||||
div.classes:includes("csl-bib-body") or
|
||||
div.classes:includes("csl-entry") then
|
||||
return pandoc.walk_block(div, { Para = process_para })
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,91 @@
|
||||
--- Modified from https://github.com/bcdavasconcelos/citetools/blob/main/_extensions/citetools/citation-backlinks.lua
|
||||
|
||||
--- Todo: even with link-citations and link-fields turned off, citation-backlinks still adds a single backlink to each reference in the final bibliography div.
|
||||
|
||||
--- citation-backlinks.lua – adds citation backlinks to the bibliography
|
||||
-- https://github.com/tarleb/citation-backlinks
|
||||
--- Copyright: © 2022–2024 John MacFarlane and Albert Krewinkel and Bernardo Vasconcelos
|
||||
--- License: MIT – see LICENSE for details
|
||||
|
||||
-- Makes sure users know if their pandoc version is too old for this
|
||||
-- filter.
|
||||
PANDOC_VERSION:must_be_at_least '2.17'
|
||||
|
||||
-- cites is a table mapping citation item identifiers
|
||||
-- to an array of cite identifiers
|
||||
local cites = {}
|
||||
|
||||
-- counter for cite identifiers
|
||||
local cite_number = 1
|
||||
|
||||
local function with_label(s, el)
|
||||
if FORMAT == "latex" then
|
||||
return { pandoc.RawInline("latex", "\\label{" .. s .. "}"), el }
|
||||
end
|
||||
if FORMAT == "typst" then
|
||||
return { el, pandoc.RawInline("typst", " #label(\"" .. s .. "\")") }
|
||||
end
|
||||
if FORMAT == "docx" or FORMAT == "html" then
|
||||
return { el }
|
||||
end
|
||||
end
|
||||
|
||||
function Cite(el)
|
||||
local cite_id = "cite_" .. cite_number
|
||||
cite_number = cite_number + 1
|
||||
for _, citation in ipairs(el.citations) do
|
||||
-- Only process the citation if they are cited in the text, excluding 'nocite'!!
|
||||
if citation.mode == 'NormalCitation' then
|
||||
if cites[citation.id] then
|
||||
table.insert(cites[citation.id], cite_id)
|
||||
else
|
||||
cites[citation.id] = { cite_id }
|
||||
end
|
||||
end
|
||||
end
|
||||
if FORMAT == "typst" then
|
||||
return pandoc.Span(with_label(cite_id, el))
|
||||
else
|
||||
return pandoc.Span(with_label(cite_id, el), pandoc.Attr(cite_id))
|
||||
end
|
||||
end
|
||||
|
||||
function append_inline(blocks, inlines)
|
||||
local last = blocks[#blocks]
|
||||
if last.t == 'Para' or last.t == 'Plain' then
|
||||
-- append to last block
|
||||
last.content:extend(inlines)
|
||||
else
|
||||
-- append as additional block
|
||||
blocks[#blocks + 1] = pandoc.Plain(inlines)
|
||||
end
|
||||
return blocks
|
||||
end
|
||||
|
||||
function Div(el)
|
||||
local citation_id = el.identifier:match("ref%-(.+)")
|
||||
local tex_return_link = pandoc.RawInline("latex", "\\Acrobatmenu{GoBack}{$\\hookleftarrow$}")
|
||||
|
||||
if citation_id then
|
||||
local backlinks = pandoc.Inlines { pandoc.Space(), pandoc.Str("[") }
|
||||
if FORMAT == "latex" then
|
||||
table.insert(backlinks, tex_return_link)
|
||||
end
|
||||
|
||||
for i, cite_id in ipairs(cites[citation_id] or {}) do
|
||||
local marker = pandoc.Str(i)
|
||||
if FORMAT == "latex" then
|
||||
marker = pandoc.RawInline("latex", "\\pageref{" .. cite_id .. "}")
|
||||
end
|
||||
if #backlinks > 2 then
|
||||
table.insert(backlinks, pandoc.Str(","))
|
||||
table.insert(backlinks, pandoc.Space())
|
||||
end
|
||||
table.insert(backlinks, pandoc.Link(marker, "#" .. cite_id))
|
||||
end
|
||||
if #backlinks > 2 then
|
||||
append_inline(el.content, backlinks .. { pandoc.Str("]") })
|
||||
end
|
||||
return el
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,254 @@
|
||||
"""
|
||||
Citation Tools for Academic Writing
|
||||
|
||||
This script provides utilities for managing citations in academic writing:
|
||||
1. Extract citation keys from Markdown files and create a filtered bibliography
|
||||
2. Copy cited reference files to a specified directory for backup or sharing
|
||||
|
||||
Typical usage:
|
||||
python citation-tools.py --extract
|
||||
python citation-tools.py --copy
|
||||
|
||||
Copyright: © 2025–Present Tom Ben
|
||||
License: MIT License
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def extract_citation_keys(markdown_file):
|
||||
"""Extract citation keys from a markdown file."""
|
||||
with open(markdown_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Pattern 1: [@key] or [@key1; @key2] format
|
||||
pattern1 = r'\[@([a-zA-Z0-9\-]+)(?:[\s\]\;\,]|$)'
|
||||
|
||||
# Pattern 2: standalone @key format
|
||||
pattern2 = r'(?<![a-zA-Z0-9])@([a-zA-Z0-9\-]+)(?:[\s\.\,\;\:\)\]\}]|$)'
|
||||
|
||||
keys1 = re.findall(pattern1, content)
|
||||
keys2 = re.findall(pattern2, content)
|
||||
|
||||
# Combine keys and filter out figure and table references
|
||||
all_keys = set(keys1 + keys2)
|
||||
return {key for key in all_keys if not (
|
||||
key.startswith('fig-') or key.startswith('tbl-'))}
|
||||
|
||||
|
||||
def load_csl_entries(csl_json_file):
|
||||
"""Load CSL JSON entries from file."""
|
||||
with open(csl_json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not isinstance(data, list):
|
||||
raise ValueError(
|
||||
f"Expected a list of CSL JSON entries, got {type(data)} instead.")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def parse_file_field(file_field):
|
||||
"""Return a list of file paths from a CSL JSON file field."""
|
||||
if not file_field or not isinstance(file_field, str):
|
||||
return []
|
||||
|
||||
return [path.strip() for path in file_field.split(';') if path.strip()]
|
||||
|
||||
|
||||
def build_citation_file_index(entries):
|
||||
"""Build a dictionary mapping citation IDs to attached file paths."""
|
||||
index = {}
|
||||
|
||||
for entry in entries:
|
||||
key = entry.get('id')
|
||||
if not key:
|
||||
continue
|
||||
paths = parse_file_field(entry.get('file'))
|
||||
if paths:
|
||||
index[key] = paths
|
||||
|
||||
return index
|
||||
|
||||
|
||||
def extract_csl_json_entries(csl_json_file, citation_keys, remove_fields=None):
|
||||
"""Extract CSL JSON entries for the given citation keys."""
|
||||
if remove_fields is None:
|
||||
remove_fields = ['file']
|
||||
|
||||
entries = load_csl_entries(csl_json_file)
|
||||
citation_keys = set(citation_keys)
|
||||
filtered_entries = []
|
||||
|
||||
for entry in entries:
|
||||
key = entry.get('id')
|
||||
if key and key in citation_keys:
|
||||
entry_copy = {k: v for k, v in entry.items()
|
||||
if k not in remove_fields}
|
||||
filtered_entries.append(entry_copy)
|
||||
|
||||
filtered_entries.sort(key=lambda item: item.get('id', ''))
|
||||
return json.dumps(filtered_entries, ensure_ascii=False, indent=2) + '\n'
|
||||
|
||||
|
||||
def copy_cited_files(args):
|
||||
"""Copy cited files from bibliography to a new folder."""
|
||||
# Clean output directory if requested
|
||||
if args.clean and os.path.exists(args.output_dir):
|
||||
print(f"Cleaning output directory: {args.output_dir}")
|
||||
shutil.rmtree(args.output_dir)
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
|
||||
# Parse bibliography (silently)
|
||||
entries = load_csl_entries(args.bib)
|
||||
citation_files = build_citation_file_index(entries)
|
||||
|
||||
# Find all Markdown files in content directory
|
||||
markdown_files = list(Path(args.content_dir).glob('[0-9]*.md'))
|
||||
|
||||
# Extract all citation keys from Markdown files
|
||||
all_keys = set()
|
||||
for md_file in markdown_files:
|
||||
all_keys.update(extract_citation_keys(md_file))
|
||||
|
||||
# Copy files to output directory
|
||||
copied_count = 0
|
||||
missing_count = 0
|
||||
file_not_found_count = 0
|
||||
missing_keys = []
|
||||
not_found_pairs = []
|
||||
|
||||
for key in all_keys:
|
||||
if key in citation_files:
|
||||
paths = citation_files[key]
|
||||
existing_path = next(
|
||||
(path for path in paths if os.path.exists(path)), None)
|
||||
source_path = existing_path or paths[0]
|
||||
_, file_extension = os.path.splitext(source_path)
|
||||
dest_path = os.path.join(args.output_dir, f"{key}{file_extension}")
|
||||
|
||||
try:
|
||||
if existing_path and os.path.exists(existing_path):
|
||||
shutil.copy2(existing_path, dest_path)
|
||||
copied_count += 1
|
||||
else:
|
||||
file_not_found_count += 1
|
||||
not_found_pairs.append((key, source_path))
|
||||
except Exception as e:
|
||||
print(f"Error copying {key}: {e}")
|
||||
else:
|
||||
missing_count += 1
|
||||
missing_keys.append(key)
|
||||
|
||||
# Print simplified summary
|
||||
print(f"Markdown files in content directory: {len(markdown_files)}")
|
||||
print(f"Total unique citation keys found: {len(all_keys)}")
|
||||
print(f"Files successfully copied: {copied_count}")
|
||||
print(f"Citation keys without file paths: {missing_count}")
|
||||
print(
|
||||
f"Files not found (path exists in bibliography but file missing): {file_not_found_count}")
|
||||
|
||||
if missing_keys:
|
||||
print("\nCitation keys without file paths:")
|
||||
for key in sorted(missing_keys):
|
||||
print(f" - {key}")
|
||||
|
||||
if not_found_pairs:
|
||||
print("\nCitation keys where file wasn't found:")
|
||||
for key, path in sorted(not_found_pairs):
|
||||
print(f" - {key}: {path}")
|
||||
|
||||
return all_keys
|
||||
|
||||
|
||||
def extract_citations(args):
|
||||
"""Extract citations from Markdown files and save them to a CSL JSON file."""
|
||||
# Find all Markdown files in content directory
|
||||
markdown_files = list(Path(args.content_dir).glob('[0-9]*.md'))
|
||||
|
||||
# Extract all citation keys from Markdown files
|
||||
all_keys = set()
|
||||
for md_file in markdown_files:
|
||||
all_keys.update(extract_citation_keys(md_file))
|
||||
|
||||
# Extract CSL JSON entries
|
||||
json_content = extract_csl_json_entries(
|
||||
args.bib, all_keys, args.remove_fields)
|
||||
|
||||
# Write to output file
|
||||
with open(args.output_bib, 'w', encoding='utf-8') as f:
|
||||
f.write(json_content)
|
||||
|
||||
# Print simplified summary
|
||||
print(f"Markdown files in content directory: {len(markdown_files)}")
|
||||
print(f"Total unique citation keys found: {len(all_keys)}")
|
||||
print(f"Extracted citations to `{args.output_bib}`")
|
||||
|
||||
return all_keys
|
||||
|
||||
|
||||
def main():
|
||||
"""Parse command line arguments and execute the appropriate function."""
|
||||
# Get script location and project root
|
||||
script_dir = Path(__file__).parent.resolve()
|
||||
project_root = script_dir.parent if script_dir.name == "_extensions" else script_dir
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Citation tools for extracting and copying cited references')
|
||||
|
||||
# Common arguments
|
||||
default_bib = os.path.expanduser(
|
||||
"~/Library/CloudStorage/Dropbox/pkm/bibliography.json")
|
||||
default_content_dir = str(project_root / "contents")
|
||||
|
||||
# Add command flags instead of subcommands
|
||||
parser.add_argument('--extract', action='store_true',
|
||||
help='Extract citations to a filtered CSL JSON file')
|
||||
parser.add_argument('--copy', action='store_true',
|
||||
help='Copy cited files to a directory')
|
||||
|
||||
# Common arguments for both commands
|
||||
parser.add_argument('--bib',
|
||||
default=default_bib,
|
||||
help=f'Path to bibliography.json file (default: {default_bib})')
|
||||
parser.add_argument('--content_dir',
|
||||
default=default_content_dir,
|
||||
help=f'Path to content directory with Markdown files (default: {default_content_dir})')
|
||||
|
||||
# Arguments specific to extract
|
||||
parser.add_argument('--output_bib',
|
||||
default=str(project_root / "citebib.json"),
|
||||
help=f'Path to output CSL JSON file (default: {project_root}/citebib.json)')
|
||||
parser.add_argument('--remove_fields',
|
||||
nargs='+',
|
||||
default=['file'],
|
||||
help='Fields to remove from CSL JSON entries (default: file)')
|
||||
|
||||
# Arguments specific to copy
|
||||
parser.add_argument('--output_dir',
|
||||
default=os.path.expanduser(
|
||||
"~/Downloads/cited-docs"),
|
||||
help='Path to output directory for copied files (default: ~/Downloads/cited-docs)')
|
||||
parser.add_argument('--clean',
|
||||
action='store_true',
|
||||
help='Clean the output directory before copying files')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.extract:
|
||||
extract_citations(args)
|
||||
elif args.copy:
|
||||
copy_cited_files(args)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,161 @@
|
||||
# Convert *.md files to *.qmd files and pre-process them
|
||||
# Randomize footnote identifiers in multiple Quarto files to avoid conflicts
|
||||
# Convert reference-style links to inline links
|
||||
# Remove line breaks within a straight angle quotation mark
|
||||
# Reformat display math equations in Ulysses
|
||||
|
||||
# Copyright: © 2024–Present Tom Ben
|
||||
# License: MIT License
|
||||
|
||||
import re
|
||||
import glob
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
|
||||
|
||||
def get_md_files():
|
||||
# Get all *.md files
|
||||
return [f for f in glob.glob("contents/[0-9]*.md")]
|
||||
|
||||
|
||||
def randomize_footnote_identifiers(qmd_content):
|
||||
# Find all existing footnote identifiers (numbers)
|
||||
existing_ids = set(re.findall(r'\[\^(\d+)\]', qmd_content))
|
||||
|
||||
# Generate a unique random identifier for each existing footnote
|
||||
unique_ids = {}
|
||||
for id in existing_ids:
|
||||
# Generate a random string of 5 characters
|
||||
new_id = ''.join(random.choices(
|
||||
string.ascii_letters + string.digits, k=5))
|
||||
while new_id in unique_ids.values():
|
||||
new_id = ''.join(random.choices(
|
||||
string.ascii_letters + string.digits, k=5))
|
||||
unique_ids[id] = new_id
|
||||
|
||||
# Replace all footnote references and definitions with new identifiers
|
||||
for old_id, new_id in unique_ids.items():
|
||||
qmd_content = re.sub(rf'\[\^{old_id}\]', f'[^{new_id}]', qmd_content)
|
||||
qmd_content = re.sub(rf'\[\^{old_id}\]:', f'[^{new_id}]:', qmd_content)
|
||||
|
||||
return qmd_content
|
||||
|
||||
|
||||
def convert_reference_to_inline(qmd_content):
|
||||
# Extract reference links
|
||||
reference_links = {}
|
||||
reference_pattern = re.compile(r'\n\[(\d+)\]:\s*(.*)')
|
||||
for match in reference_pattern.findall(qmd_content):
|
||||
reference_links[match[0]] = match[1]
|
||||
|
||||
# Remove the reference link definitions from the qmd_content
|
||||
qmd_content = reference_pattern.sub('', qmd_content)
|
||||
|
||||
# Replace reference-style link usages with inline links
|
||||
def replace_link(match):
|
||||
text = match.group(1)
|
||||
key = match.group(2)
|
||||
url = reference_links.get(key, '')
|
||||
return f'[{text}]({url})'
|
||||
|
||||
usage_pattern = re.compile(r'\[(.*?)\]\[(\d+)\]')
|
||||
qmd_content = usage_pattern.sub(replace_link, qmd_content)
|
||||
|
||||
return qmd_content
|
||||
|
||||
|
||||
def remove_linebreaks_in_quotes(text):
|
||||
# Regular expression pattern to find blocks within single Chinese quotes
|
||||
pattern = r'「[^」]*?」'
|
||||
|
||||
# Function to replace newlines in the found quoted text
|
||||
def replace_newlines(m):
|
||||
# Remove all newlines within the quote block
|
||||
return m.group(0).replace('\n', '')
|
||||
|
||||
# Use re.sub to replace the newline characters in each match
|
||||
cleaned_text = re.sub(pattern, replace_newlines, text)
|
||||
|
||||
return cleaned_text
|
||||
|
||||
|
||||
def reformat_math_equations(content):
|
||||
# Reformat display math with labels to block format
|
||||
labeled_pattern = r"\$(.+?)\$ *(\{#.+?\})"
|
||||
|
||||
def replace_with_labeled_block(match):
|
||||
equation = match.group(1).strip()
|
||||
label = match.group(2).strip()
|
||||
return f"$$\n{equation}\n$$ {label}"
|
||||
|
||||
content = re.sub(labeled_pattern, replace_with_labeled_block, content)
|
||||
|
||||
# Reformat display math without labels to block format
|
||||
# Match `$$ ... $$` without label
|
||||
display_pattern = r"(?<!\$)\$\$([^\$]+?)\$\$(?!\{#)"
|
||||
|
||||
def replace_with_display_block(match):
|
||||
equation = match.group(1).strip()
|
||||
return f"$$\n{equation}\n$$"
|
||||
|
||||
content = re.sub(display_pattern, replace_with_display_block, content)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def process_file(input_file, output_file):
|
||||
with open(input_file, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove links with `[@]` and a space before it
|
||||
content = re.sub(r"\s*\[@\].*?[\]\)]", "", content)
|
||||
# Remove square brackets enclosing the caption
|
||||
content = re.sub(r"^\[(.*)\}\]$", r"\n :\1}", content, flags=re.MULTILINE)
|
||||
# Merge multiple adjacent citations into one
|
||||
content = re.sub(r"\][\(\[].*?;\s*\[", "; ", content)
|
||||
# Replace '{{\<...\>}}' with '{{<...>}}'
|
||||
content = re.sub(r"\{\{\\<(.*)\\>}}", r"{{<\1>}}", content)
|
||||
# Remove comment blocks to avoid errors of Python filter
|
||||
content = re.sub(r"^```{=comment}.*?^```$", "",
|
||||
content, flags=re.DOTALL | re.MULTILINE)
|
||||
|
||||
# Randomize footnote identifiers
|
||||
content = randomize_footnote_identifiers(content)
|
||||
# Convert reference-style links to inline links
|
||||
content = convert_reference_to_inline(content)
|
||||
# Remove line breaks in quotes
|
||||
content = remove_linebreaks_in_quotes(content)
|
||||
# Reformat math equations
|
||||
content = reformat_math_equations(content)
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def main():
|
||||
md_files = get_md_files()
|
||||
|
||||
# Create contents_tmp directory if it doesn't exist
|
||||
tmp_dir = "contents_tmp"
|
||||
if not os.path.exists(tmp_dir):
|
||||
os.makedirs(tmp_dir)
|
||||
|
||||
# Convert *.md files to *.qmd files in contents_tmp directory
|
||||
qmd_files = [os.path.join(tmp_dir, os.path.basename(
|
||||
f).replace(".md", ".qmd")) for f in md_files]
|
||||
|
||||
for md_file, qmd_file in zip(md_files, qmd_files):
|
||||
process_file(md_file, qmd_file)
|
||||
|
||||
# Process existing .qmd files in contents directory and output to contents_tmp
|
||||
os.chdir('contents')
|
||||
existing_qmd_files = glob.glob('*.qmd')
|
||||
|
||||
for qmd_file in existing_qmd_files:
|
||||
output_file = os.path.join('..', tmp_dir, qmd_file)
|
||||
process_file(qmd_file, output_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,17 @@
|
||||
--- Get a BibTeX/BibLaTeX or CSL JSON file cited from a large database
|
||||
--- Source: https://pandoc.org/lua-filters.html#pandoc.utils.references
|
||||
--- https://fosstodon.org/@pandoc/109549882954402931
|
||||
|
||||
--- Copyright: © 2022–Present Albert Krewinkel
|
||||
--- License: MIT License
|
||||
|
||||
-- *Note*: For BibLaTeX, it is needed to change the following entry name:
|
||||
-- - journal -> journaltitle
|
||||
-- - address -> location
|
||||
-- - publisher -> institution (only for thesis)
|
||||
|
||||
function Pandoc(doc)
|
||||
doc.meta.references = pandoc.utils.references(doc)
|
||||
doc.meta.bibliography = nil
|
||||
return doc
|
||||
end
|
||||
@@ -0,0 +1,14 @@
|
||||
--- Remove hyperlinks from DOIs while preserving the DOI text
|
||||
--- Useful for styles without DOIs but you don't want to edit the bibliography file
|
||||
|
||||
--- Copyright: © 2024–Present Tom Ben
|
||||
--- License: MIT License
|
||||
|
||||
function Link(el)
|
||||
-- Check if this is a DOI link
|
||||
if el.target:match("^https?://doi%.org") then
|
||||
-- Return just the link text content without the link wrapper
|
||||
return el.content
|
||||
end
|
||||
return el
|
||||
end
|
||||
Reference in New Issue
Block a user