From 2b6fee87d42ef87ec830aa40bd89bdb4685ebd75 Mon Sep 17 00:00:00 2001 From: Yurii Motov Date: Fri, 1 Aug 2025 12:35:02 +0200 Subject: [PATCH 1/4] Add examples of handling code blocks to avoid translating them --- scripts/translate.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/scripts/translate.py b/scripts/translate.py index 2fdc18ca0..268d9e0d6 100644 --- a/scripts/translate.py +++ b/scripts/translate.py @@ -34,6 +34,36 @@ The content is written in markdown, write the translation in markdown as well. D When there's an example of code, the console or a terminal, normally surrounded by triple backticks and a keyword like "console" or "bash" (e.g. ```console), do not translate the content, keep the original in English. +For example, if the original (English) content is: + +```bash +# Print greeting +echo "Hello, World!" +``` + +It should be exacly the same in the output document: + +```bash +# Print greeting +echo "Hello, World!" +``` + +If the original (English) content is: + +```console +$ fastapi run main.py + FastAPI Starting server + Searching for package file structure +``` + +It should be exacly the same in the output document: + +```console +$ fastapi run main.py + FastAPI Starting server + Searching for package file structure +``` + The original content will be surrounded by triple percentage signs (%) and you should translate it to the target language. Do not include the triple percentage signs in the translation. There are special blocks of notes, tips and others that look like: From 4c590c161061b5b3a51d169ba37eb3c4704ed790 Mon Sep 17 00:00:00 2001 From: Nils Lindemann Date: Mon, 4 Aug 2025 10:42:37 +0200 Subject: [PATCH 2/4] Add explicit encoding info Otherwise it raises on my Windows, as it tries to use the system encoding, which is not utf-8. --- scripts/translate.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/translate.py b/scripts/translate.py index 268d9e0d6..cddbd73f9 100644 --- a/scripts/translate.py +++ b/scripts/translate.py @@ -100,7 +100,7 @@ app = typer.Typer() @lru_cache def get_langs() -> dict[str, str]: - return yaml.safe_load(Path("docs/language_names.yml").read_text()) + return yaml.safe_load(Path("docs/language_names.yml").read_text(encoding="utf-8")) def generate_lang_path(*, lang: str, path: Path) -> Path: @@ -135,7 +135,7 @@ def translate_page( lang_path.mkdir(exist_ok=True) lang_prompt_path = lang_path / "llm-prompt.md" assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}" - lang_prompt_content = lang_prompt_path.read_text() + lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8") en_docs_path = Path("docs/en/docs") assert str(en_path).startswith(str(en_docs_path)), ( @@ -143,11 +143,11 @@ def translate_page( ) out_path = generate_lang_path(lang=language, path=en_path) out_path.parent.mkdir(parents=True, exist_ok=True) - original_content = en_path.read_text() + original_content = en_path.read_text(encoding="utf-8") old_translation: str | None = None if out_path.exists(): print(f"Found existing translation: {out_path}") - old_translation = out_path.read_text() + old_translation = out_path.read_text(encoding="utf-8") print(f"Translating {en_path} to {language} ({language_name})") agent = Agent("openai:gpt-4o") @@ -184,7 +184,7 @@ def translate_page( result = agent.run_sync(prompt) out_content = f"{result.data.strip()}\n" print(f"Saving translation to {out_path}") - out_path.write_text(out_content) + out_path.write_text(out_content, encoding='utf-8', newline='\n') def iter_all_en_paths() -> Iterable[Path]: From 16ee06bb89e7dce402c86570fe63b9c1beddd2f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Aug 2025 16:07:23 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20for?= =?UTF-8?q?mat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/translate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/translate.py b/scripts/translate.py index cddbd73f9..6dfeeadf2 100644 --- a/scripts/translate.py +++ b/scripts/translate.py @@ -184,7 +184,7 @@ def translate_page( result = agent.run_sync(prompt) out_content = f"{result.data.strip()}\n" print(f"Saving translation to {out_path}") - out_path.write_text(out_content, encoding='utf-8', newline='\n') + out_path.write_text(out_content, encoding="utf-8", newline="\n") def iter_all_en_paths() -> Iterable[Path]: From da9659b82efd8211e206e7ebae9ab29da937f728 Mon Sep 17 00:00:00 2001 From: Yurii Motov Date: Tue, 5 Aug 2025 11:14:00 +0200 Subject: [PATCH 4/4] Update general prompt to generate fixed anchors for headers --- scripts/translate.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/scripts/translate.py b/scripts/translate.py index 6dfeeadf2..cd73ba277 100644 --- a/scripts/translate.py +++ b/scripts/translate.py @@ -93,6 +93,24 @@ Source: Result: /// details | Vista previa + +For **every** Markdown header (all levels), add a custom anchor based on the original English header: + +- Convert the English header to lowercase, replace spaces and punctuation with hyphens (kebab-case). +- Keep this anchor identical across all translations — do not translate it. +- Even if the original English document doesn't have an anchor, always add the correct anchor in the translated version. +- Use these fixed anchors for all internal links across languages. + +Example: + +Source (English): + +## Alternative API docs + +Result (Spanish): + +## Documentación de la API alternativa + """ app = typer.Typer()