You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

162 lines
4.8 KiB

import difflib
import os
from collections.abc import Iterable
from pathlib import Path
from typing import Annotated
import typer
from scripts.doc_parsing_utils import (
extract_code_includes,
extract_header_permalinks,
extract_html_links,
extract_markdown_links,
extract_multiline_code_blocks,
replace_code_includes_with_placeholders,
replace_header_permalinks,
replace_html_links,
replace_markdown_links,
replace_multiline_code_blocks_in_text,
replace_placeholders_with_code_includes,
)
non_translated_sections = (
f"reference{os.sep}",
"release-notes.md",
"fastapi-people.md",
"external-links.md",
"newsletter.md",
"management-tasks.md",
"management.md",
"contributing.md",
)
cli = typer.Typer()
@cli.callback()
def callback():
pass
def iter_all_lang_paths(lang_path_root: Path) -> Iterable[Path]:
"""
Iterate on the markdown files to translate in order of priority.
"""
first_dirs = [
lang_path_root / "learn",
lang_path_root / "tutorial",
lang_path_root / "advanced",
lang_path_root / "about",
lang_path_root / "how-to",
]
first_parent = lang_path_root
yield from first_parent.glob("*.md")
for dir_path in first_dirs:
yield from dir_path.rglob("*.md")
first_dirs_str = tuple(str(d) for d in first_dirs)
for path in lang_path_root.rglob("*.md"):
if str(path).startswith(first_dirs_str):
continue
if path.parent == first_parent:
continue
yield path
def get_all_paths(lang: str):
res: list[str] = []
lang_docs_root = Path("docs") / lang / "docs"
for path in iter_all_lang_paths(lang_docs_root):
relpath = path.relative_to(lang_docs_root)
if not str(relpath).startswith(non_translated_sections):
res.append(str(relpath))
return res
@cli.command()
def fix_all(ctx: typer.Context, language: str):
docs = get_all_paths(language)
for page in docs:
doc_path = Path("docs") / language / "docs" / page
try:
fix_pages(doc_paths=[doc_path])
except ValueError as e:
print(f"Error processing {doc_path}: {e}")
@cli.command()
def fix_pages(
doc_paths: Annotated[
list[Path],
typer.Argument(help="List of paths to documents."),
],
):
for path in doc_paths:
lang_code = path.parts[1]
if lang_code == "en":
print(f"Skipping English document: {path}")
continue
en_doc_path = Path("docs") / "en" / Path(*path.parts[2:])
doc_lines = path.read_text(encoding="utf-8").splitlines()
en_doc_lines = en_doc_path.read_text(encoding="utf-8").splitlines()
# Fix code includes
en_code_includes = extract_code_includes(en_doc_lines)
doc_lines_with_placeholders = replace_code_includes_with_placeholders(doc_lines)
fixed_doc_lines = replace_placeholders_with_code_includes(
doc_lines_with_placeholders, en_code_includes
)
if fixed_doc_lines != doc_lines:
print(f"Fixing code includes in: {path}")
diff = difflib.unified_diff(
doc_lines, fixed_doc_lines, fromfile="translation", tofile="fixed"
)
print("\n".join(diff))
doc_lines = fixed_doc_lines
# Fix permalinks
en_permalinks = extract_header_permalinks(en_doc_lines)
fixed_doc_lines = replace_header_permalinks(doc_lines, en_permalinks)
if fixed_doc_lines != doc_lines:
print(f"Fixing header permalinks in: {path}")
doc_lines = fixed_doc_lines
# Fix markdown links
en_markdown_links = extract_markdown_links(en_doc_lines)
fixed_doc_lines = replace_markdown_links(
doc_lines, en_markdown_links, lang_code
)
if fixed_doc_lines != doc_lines:
print(f"Fixing markdown links in: {path}")
doc_lines = fixed_doc_lines
# Fix HTML links
en_html_links = extract_html_links(en_doc_lines)
fixed_doc_lines = replace_html_links(doc_lines, en_html_links, lang_code)
if fixed_doc_lines != doc_lines:
print(f"Fixing HTML links in: {path}")
doc_lines = fixed_doc_lines
# Fix multiline code blocks
en_code_blocks = extract_multiline_code_blocks(en_doc_lines)
doc_code_blocks = extract_multiline_code_blocks(doc_lines)
fixed_doc_lines = replace_multiline_code_blocks_in_text(
doc_lines, doc_code_blocks, en_code_blocks
)
if fixed_doc_lines != doc_lines:
print(f"Fixing multiline code blocks in: {path}")
doc_lines = fixed_doc_lines
# Write back the fixed document
doc_lines.append("") # Ensure file ends with a newline
path.write_text("\n".join(doc_lines), encoding="utf-8")
if __name__ == "__main__":
cli()