Browse Source

Refactor `replace_html_links`, improve error message

pull/14710/head
Yurii Motov 5 months ago
parent
commit
badefaba9f
  1. 20
      scripts/doc_parsing_utils.py
  2. 5
      scripts/translation_fixer.py

20
scripts/doc_parsing_utils.py

@ -269,7 +269,7 @@ def extract_markdown_links(lines: list[str]) -> list[tuple[str, int]]:
text=m.group("text"), text=m.group("text"),
title=m.group("title"), title=m.group("title"),
attributes=m.group("attrs"), attributes=m.group("attrs"),
full_match=m.group(0) full_match=m.group(0),
) )
) )
return links return links
@ -337,7 +337,9 @@ def replace_markdown_links(
) )
line_no = link_info["line_no"] - 1 line_no = link_info["line_no"] - 1
modified_line = modified_text[line_no] modified_line = modified_text[line_no]
modified_line = modified_line.replace(link_info["full_match"], replacement_link, 1) modified_line = modified_line.replace(
link_info["full_match"], replacement_link, 1
)
modified_text[line_no] = modified_line modified_text[line_no] = modified_line
return modified_text return modified_text
@ -430,7 +432,10 @@ def _construct_html_link(
def replace_html_links( def replace_html_links(
text: list[str], original_links: list[HtmlLinkInfo], lang_code: str text: list[str],
links: list[HtmlLinkInfo],
original_links: list[HtmlLinkInfo],
lang_code: str,
) -> list[str]: ) -> list[str]:
""" """
Replace HTML links in the given text with the links from the original document. Replace HTML links in the given text with the links from the original document.
@ -439,13 +444,12 @@ def replace_html_links(
Fail if the number of links does not match the original. Fail if the number of links does not match the original.
""" """
links = extract_html_links(text) if len(links) != len(original_links):
if len(links) > len(original_links):
raise ValueError( raise ValueError(
"Number of HTML links exceeds number of HTML links in the original document" "Number of HTML links does not match the number of HTML links in the "
"original document "
f"({len(links)} vs {len(original_links)})"
) )
elif len(links) < len(original_links):
raise ValueError("Number of HTML links is less than in the original document")
modified_text = text.copy() modified_text = text.copy()
for link_index, link in enumerate(links): for link_index, link in enumerate(links):

5
scripts/translation_fixer.py

@ -131,7 +131,10 @@ def process_one_page(path: Path) -> bool:
# Fix HTML links # Fix HTML links
en_html_links = extract_html_links(en_doc_lines) en_html_links = extract_html_links(en_doc_lines)
fixed_doc_lines = replace_html_links(doc_lines, en_html_links, lang_code) doc_html_links = extract_html_links(doc_lines)
fixed_doc_lines = replace_html_links(
doc_lines, doc_html_links, en_html_links, lang_code
)
if fixed_doc_lines != doc_lines: if fixed_doc_lines != doc_lines:
print(f"Fixing HTML links in: {path}") print(f"Fixing HTML links in: {path}")
doc_lines = fixed_doc_lines doc_lines = fixed_doc_lines

Loading…
Cancel
Save