Browse Source

Add explicit encoding info

Otherwise it raises on my Windows, as it tries to use the system encoding, which is not utf-8.
pull/13968/head
Nils Lindemann 4 days ago
committed by Yurii Motov
parent
commit
4c590c1610
  1. 10
      scripts/translate.py

10
scripts/translate.py

@ -100,7 +100,7 @@ app = typer.Typer()
@lru_cache
def get_langs() -> dict[str, str]:
return yaml.safe_load(Path("docs/language_names.yml").read_text())
return yaml.safe_load(Path("docs/language_names.yml").read_text(encoding="utf-8"))
def generate_lang_path(*, lang: str, path: Path) -> Path:
@ -135,7 +135,7 @@ def translate_page(
lang_path.mkdir(exist_ok=True)
lang_prompt_path = lang_path / "llm-prompt.md"
assert lang_prompt_path.exists(), f"Prompt file not found: {lang_prompt_path}"
lang_prompt_content = lang_prompt_path.read_text()
lang_prompt_content = lang_prompt_path.read_text(encoding="utf-8")
en_docs_path = Path("docs/en/docs")
assert str(en_path).startswith(str(en_docs_path)), (
@ -143,11 +143,11 @@ def translate_page(
)
out_path = generate_lang_path(lang=language, path=en_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
original_content = en_path.read_text()
original_content = en_path.read_text(encoding="utf-8")
old_translation: str | None = None
if out_path.exists():
print(f"Found existing translation: {out_path}")
old_translation = out_path.read_text()
old_translation = out_path.read_text(encoding="utf-8")
print(f"Translating {en_path} to {language} ({language_name})")
agent = Agent("openai:gpt-4o")
@ -184,7 +184,7 @@ def translate_page(
result = agent.run_sync(prompt)
out_content = f"{result.data.strip()}\n"
print(f"Saving translation to {out_path}")
out_path.write_text(out_content)
out_path.write_text(out_content, encoding='utf-8', newline='\n')
def iter_all_en_paths() -> Iterable[Path]:

Loading…
Cancel
Save