Browse Source

👷 Add CI to translate with LLMs (#13937)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pull/13939/head
Sebastián Ramírez 7 days ago
committed by GitHub
parent
commit
6516a6c4a6
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 77
      .github/workflows/translate.yml
  2. 96
      scripts/translate.py

77
.github/workflows/translate.yml

@ -0,0 +1,77 @@
name: Translate
on:
workflow_dispatch:
inputs:
debug_enabled:
description: Run with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)
required: false
default: "false"
command:
description: Command to run
type: choice
options:
- translate-page
- translate-lang
- update-outdated
- add-missing
- update-and-add
- remove-all-removable
lang:
description: Language to translate to as a letter code (e.g. "es" for Spanish)
type: string
required: false
default: ""
en_path:
description: File path in English to translate (e.g. docs/en/docs/index.md)
type: string
required: false
default: ""
env:
UV_SYSTEM_PYTHON: 1
jobs:
job:
if: github.repository_owner == 'fastapi'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJson(github) }}
run: echo "$GITHUB_CONTEXT"
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Setup uv
uses: astral-sh/setup-uv@v6
with:
version: "0.4.15"
enable-cache: true
cache-dependency-glob: |
requirements**.txt
pyproject.toml
- name: Install Dependencies
run: uv pip install -r requirements-github-actions.txt -r requirements-translations.txt
# Allow debugging with tmate
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled == 'true' }}
with:
limit-access-to-actor: true
env:
GITHUB_TOKEN: ${{ secrets.FASTAPI_TRANSLATIONS }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- name: FastAPI Translate
run: |
python ./scripts/translate.py ${{ github.event.inputs.command }}
python ./scripts/translate.py make-pr
env:
GITHUB_TOKEN: ${{ secrets.FASTAPI_TRANSLATIONS }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
LANG: ${{ github.event.inputs.lang }}
EN_PATH: ${{ github.event.inputs.en_path }}

96
scripts/translate.py

@ -1,10 +1,13 @@
import secrets
import subprocess
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import Iterable from typing import Annotated, Iterable
import git import git
import typer import typer
import yaml import yaml
from github import Github
from pydantic_ai import Agent from pydantic_ai import Agent
from rich import print from rich import print
@ -89,7 +92,11 @@ def generate_en_path(*, lang: str, path: Path) -> Path:
@app.command() @app.command()
def translate_page(*, lang: str, path: Path) -> None: def translate_page(
*,
lang: Annotated[str, typer.Option(envvar="LANG")],
en_path: Annotated[Path, typer.Option(envvar="EN_PATH")],
) -> None:
langs = get_langs() langs = get_langs()
language = langs[lang] language = langs[lang]
lang_path = Path(f"docs/{lang}") lang_path = Path(f"docs/{lang}")
@ -99,17 +106,17 @@ def translate_page(*, lang: str, path: Path) -> None:
lang_prompt_content = lang_prompt_path.read_text() lang_prompt_content = lang_prompt_path.read_text()
en_docs_path = Path("docs/en/docs") en_docs_path = Path("docs/en/docs")
assert str(path).startswith(str(en_docs_path)), ( assert str(en_path).startswith(str(en_docs_path)), (
f"Path must be inside {en_docs_path}" f"Path must be inside {en_docs_path}"
) )
out_path = generate_lang_path(lang=lang, path=path) out_path = generate_lang_path(lang=lang, path=en_path)
out_path.parent.mkdir(parents=True, exist_ok=True) out_path.parent.mkdir(parents=True, exist_ok=True)
original_content = path.read_text() original_content = en_path.read_text()
old_translation: str | None = None old_translation: str | None = None
if out_path.exists(): if out_path.exists():
print(f"Found existing translation: {out_path}") print(f"Found existing translation: {out_path}")
old_translation = out_path.read_text() old_translation = out_path.read_text()
print(f"Translating {path} to {lang} ({language})") print(f"Translating {en_path} to {lang} ({language})")
agent = Agent("openai:gpt-4o") agent = Agent("openai:gpt-4o")
prompt_segments = [ prompt_segments = [
@ -173,7 +180,7 @@ def iter_en_paths_to_translate() -> Iterable[Path]:
@app.command() @app.command()
def translate_all(lang: str) -> None: def translate_lang(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
paths_to_process = list(iter_en_paths_to_translate()) paths_to_process = list(iter_en_paths_to_translate())
print("Original paths:") print("Original paths:")
for p in paths_to_process: for p in paths_to_process:
@ -197,7 +204,7 @@ def translate_all(lang: str) -> None:
print(f"Total paths to process: {len(missing_paths)}") print(f"Total paths to process: {len(missing_paths)}")
for p in missing_paths: for p in missing_paths:
print(f"Translating: {p}") print(f"Translating: {p}")
translate_page(lang="es", path=p) translate_page(lang="es", en_path=p)
print(f"Done translating: {p}") print(f"Done translating: {p}")
@ -244,6 +251,18 @@ def remove_all_removable() -> None:
print("Done removing all removable paths") print("Done removing all removable paths")
@app.command()
def list_missing(lang: str) -> list[Path]:
missing_paths: list[Path] = []
en_lang_paths = list(iter_en_paths_to_translate())
for path in en_lang_paths:
lang_path = generate_lang_path(lang=lang, path=path)
if not lang_path.exists():
missing_paths.append(path)
print(missing_paths)
return missing_paths
@app.command() @app.command()
def list_outdated(lang: str) -> list[Path]: def list_outdated(lang: str) -> list[Path]:
dir_path = Path(__file__).absolute().parent.parent dir_path = Path(__file__).absolute().parent.parent
@ -254,7 +273,6 @@ def list_outdated(lang: str) -> list[Path]:
for path in en_lang_paths: for path in en_lang_paths:
lang_path = generate_lang_path(lang=lang, path=path) lang_path = generate_lang_path(lang=lang, path=path)
if not lang_path.exists(): if not lang_path.exists():
outdated_paths.append(path)
continue continue
en_commit_datetime = list(repo.iter_commits(paths=path, max_count=1))[ en_commit_datetime = list(repo.iter_commits(paths=path, max_count=1))[
0 0
@ -269,14 +287,70 @@ def list_outdated(lang: str) -> list[Path]:
@app.command() @app.command()
def update_outdated(lang: str) -> None: def update_outdated(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
outdated_paths = list_outdated(lang) outdated_paths = list_outdated(lang)
for path in outdated_paths: for path in outdated_paths:
print(f"Updating lang: {lang} path: {path}") print(f"Updating lang: {lang} path: {path}")
translate_page(lang=lang, path=path) translate_page(lang=lang, en_path=path)
print(f"Done updating: {path}") print(f"Done updating: {path}")
print("Done updating all outdated paths") print("Done updating all outdated paths")
@app.command()
def add_missing(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
missing_paths = list_missing(lang)
for path in missing_paths:
print(f"Adding lang: {lang} path: {path}")
translate_page(lang=lang, en_path=path)
print(f"Done adding: {path}")
print("Done adding all missing paths")
@app.command()
def update_and_add(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
print(f"Updating outdated translations for {lang}")
update_outdated(lang=lang)
print(f"Adding missing translations for {lang}")
add_missing(lang=lang)
print(f"Done updating and adding for {lang}")
@app.command()
def make_pr(
*,
lang: Annotated[str, typer.Option(envvar="LANG")],
github_token: Annotated[str, typer.Option(envvar="GITHUB_TOKEN")],
github_repository: Annotated[str, typer.Option(envvar="GITHUB_REPOSITORY")],
) -> None:
print("Setting up GitHub Actions git user")
repo = git.Repo(Path(__file__).absolute().parent.parent)
if not repo.is_dirty(untracked_files=True):
print("Repository is clean, no changes to commit")
return
subprocess.run(["git", "config", "user.name", "github-actions"], check=True)
subprocess.run(
["git", "config", "user.email", "github-actions@github.com"], check=True
)
branch_name = f"translate-{lang}-{secrets.token_hex(4)}"
print(f"Creating a new branch {branch_name}")
subprocess.run(["git", "checkout", "-b", branch_name], check=True)
print("Adding updated files")
lang_path = Path(f"docs/{lang}")
subprocess.run(["git", "add", str(lang_path)], check=True)
print("Committing updated file")
message = f"🌐 Update translations - {lang}"
subprocess.run(["git", "commit", "-m", message], check=True)
print("Pushing branch")
subprocess.run(["git", "push", "origin", branch_name], check=True)
print("Creating PR")
g = Github(github_token)
gh_repo = g.get_repo(github_repository)
pr = gh_repo.create_pull(
title=message, body=message, base="master", head=branch_name
)
print(f"Created PR: {pr.number}")
print("Finished")
if __name__ == "__main__": if __name__ == "__main__":
app() app()

Loading…
Cancel
Save