|
|
@ -57,39 +57,19 @@ def generate_en_path(*, lang: str, path: Path) -> Path: |
|
|
return out_path |
|
|
return out_path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.command() |
|
|
def get_prompt( |
|
|
def translate_page( |
|
|
lang_prompt_content: str, |
|
|
*, |
|
|
old_translation: str | None, |
|
|
language: Annotated[str, typer.Option(envvar="LANGUAGE")], |
|
|
language: str, |
|
|
en_path: Annotated[Path, typer.Option(envvar="EN_PATH")], |
|
|
language_name: str, |
|
|
) -> None: |
|
|
original_content: str, |
|
|
assert language != "en", ( |
|
|
additional_instructions: str, |
|
|
"`en` is the source language, choose another language as translation target" |
|
|
) -> str: |
|
|
|
|
|
general_prompt_with_additional_instructions = general_prompt.replace( |
|
|
|
|
|
"[placeholder_for_additional_instructions]", additional_instructions |
|
|
) |
|
|
) |
|
|
langs = get_langs() |
|
|
|
|
|
language_name = langs[language] |
|
|
|
|
|
lang_path = Path(f"docs/{language}") |
|
|
|
|
|
lang_path.mkdir(exist_ok=True) |
|
|
|
|
|
lang_prompt_path = lang_path / "llm-prompt.md" |
|
|
|
|
|
assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}" |
|
|
|
|
|
lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8") |
|
|
|
|
|
|
|
|
|
|
|
en_docs_path = Path("docs/en/docs") |
|
|
|
|
|
assert str(en_path).startswith(str(en_docs_path)), ( |
|
|
|
|
|
f"Path must be inside {en_docs_path}" |
|
|
|
|
|
) |
|
|
|
|
|
out_path = generate_lang_path(lang=language, path=en_path) |
|
|
|
|
|
out_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
original_content = en_path.read_text(encoding="utf-8") |
|
|
|
|
|
old_translation: str | None = None |
|
|
|
|
|
if out_path.exists(): |
|
|
|
|
|
print(f"Found existing translation: {out_path}") |
|
|
|
|
|
old_translation = out_path.read_text(encoding="utf-8") |
|
|
|
|
|
print(f"Translating {en_path} to {language} ({language_name})") |
|
|
|
|
|
agent = Agent("openai:gpt-5") |
|
|
|
|
|
|
|
|
|
|
|
prompt_segments = [ |
|
|
prompt_segments = [ |
|
|
general_prompt, |
|
|
general_prompt_with_additional_instructions, |
|
|
lang_prompt_content, |
|
|
lang_prompt_content, |
|
|
] |
|
|
] |
|
|
if old_translation: |
|
|
if old_translation: |
|
|
@ -119,12 +99,57 @@ def translate_page( |
|
|
f"%%%\n{original_content}%%%", |
|
|
f"%%%\n{original_content}%%%", |
|
|
] |
|
|
] |
|
|
) |
|
|
) |
|
|
prompt = "\n\n".join(prompt_segments) |
|
|
return "\n\n".join(prompt_segments) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.command() |
|
|
|
|
|
def translate_page( |
|
|
|
|
|
*, |
|
|
|
|
|
language: Annotated[str, typer.Option(envvar="LANGUAGE")], |
|
|
|
|
|
en_path: Annotated[Path, typer.Option(envvar="EN_PATH")], |
|
|
|
|
|
) -> None: |
|
|
|
|
|
assert language != "en", ( |
|
|
|
|
|
"`en` is the source language, choose another language as translation target" |
|
|
|
|
|
) |
|
|
|
|
|
langs = get_langs() |
|
|
|
|
|
language_name = langs[language] |
|
|
|
|
|
lang_path = Path(f"docs/{language}") |
|
|
|
|
|
lang_path.mkdir(exist_ok=True) |
|
|
|
|
|
lang_prompt_path = lang_path / "llm-prompt.md" |
|
|
|
|
|
assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}" |
|
|
|
|
|
lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8") |
|
|
|
|
|
|
|
|
|
|
|
en_docs_path = Path("docs/en/docs") |
|
|
|
|
|
assert str(en_path).startswith(str(en_docs_path)), ( |
|
|
|
|
|
f"Path must be inside {en_docs_path}" |
|
|
|
|
|
) |
|
|
|
|
|
out_path = generate_lang_path(lang=language, path=en_path) |
|
|
|
|
|
out_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
original_content = en_path.read_text(encoding="utf-8") |
|
|
|
|
|
old_translation: str | None = None |
|
|
|
|
|
if out_path.exists(): |
|
|
|
|
|
print(f"Found existing translation: {out_path}") |
|
|
|
|
|
old_translation = out_path.read_text(encoding="utf-8") |
|
|
|
|
|
print(f"Translating {en_path} to {language} ({language_name})") |
|
|
|
|
|
agent = Agent("openai:gpt-5") |
|
|
|
|
|
|
|
|
MAX_ATTEMPTS = 3 |
|
|
MAX_ATTEMPTS = 3 |
|
|
|
|
|
additional_instructions = "" |
|
|
for attempt_no in range(1, MAX_ATTEMPTS + 1): |
|
|
for attempt_no in range(1, MAX_ATTEMPTS + 1): |
|
|
print(f"Running agent for {out_path} (attempt {attempt_no}/{MAX_ATTEMPTS})") |
|
|
print(f"Running agent for {out_path} (attempt {attempt_no}/{MAX_ATTEMPTS})") |
|
|
result = agent.run_sync(prompt) |
|
|
prompt = get_prompt( |
|
|
|
|
|
lang_prompt_content=lang_prompt_content, |
|
|
|
|
|
old_translation=old_translation, |
|
|
|
|
|
language=language, |
|
|
|
|
|
language_name=language_name, |
|
|
|
|
|
original_content=original_content, |
|
|
|
|
|
additional_instructions=additional_instructions, |
|
|
|
|
|
) |
|
|
|
|
|
result = agent.run_sync( |
|
|
|
|
|
prompt.replace( |
|
|
|
|
|
"[placeholder_for_additional_instructions]", additional_instructions |
|
|
|
|
|
) |
|
|
|
|
|
) |
|
|
out_content = f"{result.output.strip()}\n" |
|
|
out_content = f"{result.output.strip()}\n" |
|
|
try: |
|
|
try: |
|
|
check_translation( |
|
|
check_translation( |
|
|
@ -139,6 +164,11 @@ def translate_page( |
|
|
print( |
|
|
print( |
|
|
f"Translation check failed on attempt {attempt_no}/{MAX_ATTEMPTS}: {e}" |
|
|
f"Translation check failed on attempt {attempt_no}/{MAX_ATTEMPTS}: {e}" |
|
|
) |
|
|
) |
|
|
|
|
|
additional_instructions = ( |
|
|
|
|
|
f"Current translation fails validation checks ({str(e)}). " |
|
|
|
|
|
"Please, pay special attention to it." |
|
|
|
|
|
) |
|
|
|
|
|
old_translation = out_content |
|
|
continue # Retry if not reached max attempts |
|
|
continue # Retry if not reached max attempts |
|
|
else: # Max retry attempts reached |
|
|
else: # Max retry attempts reached |
|
|
print(f"Translation failed for {out_path} after {MAX_ATTEMPTS} attempts") |
|
|
print(f"Translation failed for {out_path} after {MAX_ATTEMPTS} attempts") |
|
|
|