Browse Source

🔨 Tweak translation script (#15174)

pull/15234/merge
Motov Yurii 1 month ago
committed by GitHub
parent
commit
38039e12a8
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 4
      scripts/general-llm-prompt.md
  2. 96
      scripts/translate.py

4
scripts/general-llm-prompt.md

@ -6,6 +6,8 @@ The original content is written in Markdown, write the translation in Markdown a
The original content will be surrounded by triple percentage signs (%%%). Do not include the triple percentage signs in the translation. The original content will be surrounded by triple percentage signs (%%%). Do not include the triple percentage signs in the translation.
[placeholder_for_additional_instructions]
### Technical terms in English ### Technical terms in English
For technical terms in English that don't have a common translation term, use the original term in English. For technical terms in English that don't have a common translation term, use the original term in English.
@ -223,6 +225,8 @@ Result (German):
Use the following rules for links (apply both to Markdown-style links ([text](url)) and to HTML-style <a href="url">text</a> tags): Use the following rules for links (apply both to Markdown-style links ([text](url)) and to HTML-style <a href="url">text</a> tags):
- The order of links should match the order of links in the English source. Do not change the order of links. Rephrase the sentence if necessary.
- For relative URLs, only translate the link text. Do not translate the URL or its parts. - For relative URLs, only translate the link text. Do not translate the URL or its parts.
Example: Example:

96
scripts/translate.py

@ -57,39 +57,19 @@ def generate_en_path(*, lang: str, path: Path) -> Path:
return out_path return out_path
@app.command() def get_prompt(
def translate_page( lang_prompt_content: str,
*, old_translation: str | None,
language: Annotated[str, typer.Option(envvar="LANGUAGE")], language: str,
en_path: Annotated[Path, typer.Option(envvar="EN_PATH")], language_name: str,
) -> None: original_content: str,
assert language != "en", ( additional_instructions: str,
"`en` is the source language, choose another language as translation target" ) -> str:
general_prompt_with_additional_instructions = general_prompt.replace(
"[placeholder_for_additional_instructions]", additional_instructions
) )
langs = get_langs()
language_name = langs[language]
lang_path = Path(f"docs/{language}")
lang_path.mkdir(exist_ok=True)
lang_prompt_path = lang_path / "llm-prompt.md"
assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}"
lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8")
en_docs_path = Path("docs/en/docs")
assert str(en_path).startswith(str(en_docs_path)), (
f"Path must be inside {en_docs_path}"
)
out_path = generate_lang_path(lang=language, path=en_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
original_content = en_path.read_text(encoding="utf-8")
old_translation: str | None = None
if out_path.exists():
print(f"Found existing translation: {out_path}")
old_translation = out_path.read_text(encoding="utf-8")
print(f"Translating {en_path} to {language} ({language_name})")
agent = Agent("openai:gpt-5")
prompt_segments = [ prompt_segments = [
general_prompt, general_prompt_with_additional_instructions,
lang_prompt_content, lang_prompt_content,
] ]
if old_translation: if old_translation:
@ -119,12 +99,57 @@ def translate_page(
f"%%%\n{original_content}%%%", f"%%%\n{original_content}%%%",
] ]
) )
prompt = "\n\n".join(prompt_segments) return "\n\n".join(prompt_segments)
@app.command()
def translate_page(
*,
language: Annotated[str, typer.Option(envvar="LANGUAGE")],
en_path: Annotated[Path, typer.Option(envvar="EN_PATH")],
) -> None:
assert language != "en", (
"`en` is the source language, choose another language as translation target"
)
langs = get_langs()
language_name = langs[language]
lang_path = Path(f"docs/{language}")
lang_path.mkdir(exist_ok=True)
lang_prompt_path = lang_path / "llm-prompt.md"
assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}"
lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8")
en_docs_path = Path("docs/en/docs")
assert str(en_path).startswith(str(en_docs_path)), (
f"Path must be inside {en_docs_path}"
)
out_path = generate_lang_path(lang=language, path=en_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
original_content = en_path.read_text(encoding="utf-8")
old_translation: str | None = None
if out_path.exists():
print(f"Found existing translation: {out_path}")
old_translation = out_path.read_text(encoding="utf-8")
print(f"Translating {en_path} to {language} ({language_name})")
agent = Agent("openai:gpt-5")
MAX_ATTEMPTS = 3 MAX_ATTEMPTS = 3
additional_instructions = ""
for attempt_no in range(1, MAX_ATTEMPTS + 1): for attempt_no in range(1, MAX_ATTEMPTS + 1):
print(f"Running agent for {out_path} (attempt {attempt_no}/{MAX_ATTEMPTS})") print(f"Running agent for {out_path} (attempt {attempt_no}/{MAX_ATTEMPTS})")
result = agent.run_sync(prompt) prompt = get_prompt(
lang_prompt_content=lang_prompt_content,
old_translation=old_translation,
language=language,
language_name=language_name,
original_content=original_content,
additional_instructions=additional_instructions,
)
result = agent.run_sync(
prompt.replace(
"[placeholder_for_additional_instructions]", additional_instructions
)
)
out_content = f"{result.output.strip()}\n" out_content = f"{result.output.strip()}\n"
try: try:
check_translation( check_translation(
@ -139,6 +164,11 @@ def translate_page(
print( print(
f"Translation check failed on attempt {attempt_no}/{MAX_ATTEMPTS}: {e}" f"Translation check failed on attempt {attempt_no}/{MAX_ATTEMPTS}: {e}"
) )
additional_instructions = (
f"Current translation fails validation checks ({str(e)}). "
"Please, pay special attention to it."
)
old_translation = out_content
continue # Retry if not reached max attempts continue # Retry if not reached max attempts
else: # Max retry attempts reached else: # Max retry attempts reached
print(f"Translation failed for {out_path} after {MAX_ATTEMPTS} attempts") print(f"Translation failed for {out_path} after {MAX_ATTEMPTS} attempts")

Loading…
Cancel
Save