Browse Source

👷 Add CI to translate with LLMs (#13937)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pull/13939/head
Sebastián Ramírez 6 days ago
committed by GitHub
parent
commit
6516a6c4a6
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 77
      .github/workflows/translate.yml
  2. 96
      scripts/translate.py

77
.github/workflows/translate.yml

@ -0,0 +1,77 @@
name: Translate
on:
workflow_dispatch:
inputs:
debug_enabled:
description: Run with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)
required: false
default: "false"
command:
description: Command to run
type: choice
options:
- translate-page
- translate-lang
- update-outdated
- add-missing
- update-and-add
- remove-all-removable
lang:
description: Language to translate to as a letter code (e.g. "es" for Spanish)
type: string
required: false
default: ""
en_path:
description: File path in English to translate (e.g. docs/en/docs/index.md)
type: string
required: false
default: ""
env:
UV_SYSTEM_PYTHON: 1
jobs:
job:
if: github.repository_owner == 'fastapi'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Dump GitHub context
env:
GITHUB_CONTEXT: ${{ toJson(github) }}
run: echo "$GITHUB_CONTEXT"
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Setup uv
uses: astral-sh/setup-uv@v6
with:
version: "0.4.15"
enable-cache: true
cache-dependency-glob: |
requirements**.txt
pyproject.toml
- name: Install Dependencies
run: uv pip install -r requirements-github-actions.txt -r requirements-translations.txt
# Allow debugging with tmate
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.debug_enabled == 'true' }}
with:
limit-access-to-actor: true
env:
GITHUB_TOKEN: ${{ secrets.FASTAPI_TRANSLATIONS }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- name: FastAPI Translate
run: |
python ./scripts/translate.py ${{ github.event.inputs.command }}
python ./scripts/translate.py make-pr
env:
GITHUB_TOKEN: ${{ secrets.FASTAPI_TRANSLATIONS }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
LANG: ${{ github.event.inputs.lang }}
EN_PATH: ${{ github.event.inputs.en_path }}

96
scripts/translate.py

@ -1,10 +1,13 @@
import secrets
import subprocess
from functools import lru_cache
from pathlib import Path
from typing import Iterable
from typing import Annotated, Iterable
import git
import typer
import yaml
from github import Github
from pydantic_ai import Agent
from rich import print
@ -89,7 +92,11 @@ def generate_en_path(*, lang: str, path: Path) -> Path:
@app.command()
def translate_page(*, lang: str, path: Path) -> None:
def translate_page(
*,
lang: Annotated[str, typer.Option(envvar="LANG")],
en_path: Annotated[Path, typer.Option(envvar="EN_PATH")],
) -> None:
langs = get_langs()
language = langs[lang]
lang_path = Path(f"docs/{lang}")
@ -99,17 +106,17 @@ def translate_page(*, lang: str, path: Path) -> None:
lang_prompt_content = lang_prompt_path.read_text()
en_docs_path = Path("docs/en/docs")
assert str(path).startswith(str(en_docs_path)), (
assert str(en_path).startswith(str(en_docs_path)), (
f"Path must be inside {en_docs_path}"
)
out_path = generate_lang_path(lang=lang, path=path)
out_path = generate_lang_path(lang=lang, path=en_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
original_content = path.read_text()
original_content = en_path.read_text()
old_translation: str | None = None
if out_path.exists():
print(f"Found existing translation: {out_path}")
old_translation = out_path.read_text()
print(f"Translating {path} to {lang} ({language})")
print(f"Translating {en_path} to {lang} ({language})")
agent = Agent("openai:gpt-4o")
prompt_segments = [
@ -173,7 +180,7 @@ def iter_en_paths_to_translate() -> Iterable[Path]:
@app.command()
def translate_all(lang: str) -> None:
def translate_lang(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
paths_to_process = list(iter_en_paths_to_translate())
print("Original paths:")
for p in paths_to_process:
@ -197,7 +204,7 @@ def translate_all(lang: str) -> None:
print(f"Total paths to process: {len(missing_paths)}")
for p in missing_paths:
print(f"Translating: {p}")
translate_page(lang="es", path=p)
translate_page(lang="es", en_path=p)
print(f"Done translating: {p}")
@ -244,6 +251,18 @@ def remove_all_removable() -> None:
print("Done removing all removable paths")
@app.command()
def list_missing(lang: str) -> list[Path]:
missing_paths: list[Path] = []
en_lang_paths = list(iter_en_paths_to_translate())
for path in en_lang_paths:
lang_path = generate_lang_path(lang=lang, path=path)
if not lang_path.exists():
missing_paths.append(path)
print(missing_paths)
return missing_paths
@app.command()
def list_outdated(lang: str) -> list[Path]:
dir_path = Path(__file__).absolute().parent.parent
@ -254,7 +273,6 @@ def list_outdated(lang: str) -> list[Path]:
for path in en_lang_paths:
lang_path = generate_lang_path(lang=lang, path=path)
if not lang_path.exists():
outdated_paths.append(path)
continue
en_commit_datetime = list(repo.iter_commits(paths=path, max_count=1))[
0
@ -269,14 +287,70 @@ def list_outdated(lang: str) -> list[Path]:
@app.command()
def update_outdated(lang: str) -> None:
def update_outdated(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
outdated_paths = list_outdated(lang)
for path in outdated_paths:
print(f"Updating lang: {lang} path: {path}")
translate_page(lang=lang, path=path)
translate_page(lang=lang, en_path=path)
print(f"Done updating: {path}")
print("Done updating all outdated paths")
@app.command()
def add_missing(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
missing_paths = list_missing(lang)
for path in missing_paths:
print(f"Adding lang: {lang} path: {path}")
translate_page(lang=lang, en_path=path)
print(f"Done adding: {path}")
print("Done adding all missing paths")
@app.command()
def update_and_add(lang: Annotated[str, typer.Option(envvar="LANG")]) -> None:
print(f"Updating outdated translations for {lang}")
update_outdated(lang=lang)
print(f"Adding missing translations for {lang}")
add_missing(lang=lang)
print(f"Done updating and adding for {lang}")
@app.command()
def make_pr(
*,
lang: Annotated[str, typer.Option(envvar="LANG")],
github_token: Annotated[str, typer.Option(envvar="GITHUB_TOKEN")],
github_repository: Annotated[str, typer.Option(envvar="GITHUB_REPOSITORY")],
) -> None:
print("Setting up GitHub Actions git user")
repo = git.Repo(Path(__file__).absolute().parent.parent)
if not repo.is_dirty(untracked_files=True):
print("Repository is clean, no changes to commit")
return
subprocess.run(["git", "config", "user.name", "github-actions"], check=True)
subprocess.run(
["git", "config", "user.email", "github-actions@github.com"], check=True
)
branch_name = f"translate-{lang}-{secrets.token_hex(4)}"
print(f"Creating a new branch {branch_name}")
subprocess.run(["git", "checkout", "-b", branch_name], check=True)
print("Adding updated files")
lang_path = Path(f"docs/{lang}")
subprocess.run(["git", "add", str(lang_path)], check=True)
print("Committing updated file")
message = f"🌐 Update translations - {lang}"
subprocess.run(["git", "commit", "-m", message], check=True)
print("Pushing branch")
subprocess.run(["git", "push", "origin", branch_name], check=True)
print("Creating PR")
g = Github(github_token)
gh_repo = g.get_repo(github_repository)
pr = gh_repo.create_pull(
title=message, body=message, base="master", head=branch_name
)
print(f"Created PR: {pr.number}")
print("Finished")
if __name__ == "__main__":
app()

Loading…
Cancel
Save