address review: shorten comment, parametrize tests, add empty-data case

2 weeks ago · 866e577031
2 changed files with 25 additions and 53 deletions
--- a/fastapi/sse.py
+++ b/fastapi/sse.py
@ -144,12 +144,8 @@ class ServerSentEvent(BaseModel):


 def _split_sse_lines(value: str) -> list[str]:
-    # SSE recognizes only `\n`, `\r\n`, and `\r` as line terminators
-    # (https://html.spec.whatwg.org/multipage/server-sent-events.html).
-    # `str.splitlines()` is wrong on two counts: it treats 8 extra characters
-    # (`\v`, `\f`, `\x1c`-`\x1e`, `\x85`, U+2028, U+2029) as line breaks, and
-    # it drops a trailing empty string, so e.g. `"hello\n"` would emit only
-    # one `data:` line instead of two.
+    # Split on SSE-spec line terminators only (\n, \r\n, \r), preserving
+    # trailing empty strings.
    return value.replace("\r\n", "\n").replace("\r", "\n").split("\n")


--- a/tests/test_sse.py
+++ b/tests/test_sse.py
@ -318,52 +318,28 @@ def test_no_keepalive_when_fast(client: TestClient):
    assert ": ping\n" not in response.text


-# format_sse_event line-splitting tests
-#
-# These cover the splitlines() footgun: it drops trailing empty strings and
-# treats 8 extra characters as line breaks (vertical tab, form feed, FS/GS/RS,
-# NEL, LINE SEPARATOR, PARAGRAPH SEPARATOR). SSE only recognizes \n, \r\n, \r.
-
-
-def test_format_sse_event_preserves_trailing_newline():
-    # "Hello\n" should produce TWO data lines: "Hello" and "" (the trailing
-    # empty line). Pre-fix, splitlines() ate the trailing empty string.
-    assert format_sse_event(data_str="Hello\n") == b"data: Hello\ndata: \n\n"
-
-
-def test_format_sse_event_preserves_trailing_double_newline():
-    assert format_sse_event(data_str="Hello\n\n") == b"data: Hello\ndata: \ndata: \n\n"
-
-
-def test_format_sse_event_single_newline_data():
-    assert format_sse_event(data_str="\n") == b"data: \ndata: \n\n"
-
-
-def test_format_sse_event_crlf_normalizes_to_lf():
-    # \r\n is a valid SSE line terminator and should be normalized to \n
-    # for output, producing the same two data lines as \n input would.
-    assert (
-        format_sse_event(data_str="Hello\r\nWorld") == b"data: Hello\ndata: World\n\n"
-    )
-
-
-def test_format_sse_event_bare_cr_treated_as_line_break():
-    # Lone \r is also a valid SSE line terminator per the spec.
-    assert format_sse_event(data_str="Hello\rWorld") == b"data: Hello\ndata: World\n\n"
-
-
-def test_format_sse_event_unicode_line_separator_not_split():
-    # U+2028 LINE SEPARATOR is treated as a line break by str.splitlines()
-    # but is NOT a line terminator in the SSE spec. It must stay inside the
-    # data payload, not be promoted to a new "data:" line.
-    assert format_sse_event(data_str="A B") == "data: A B\n\n".encode()
-
-
-def test_format_sse_event_vertical_tab_not_split():
-    # \v is treated as a line break by splitlines() but not by SSE.
-    assert format_sse_event(data_str="A\vB") == b"data: A\x0bB\n\n"
+@pytest.mark.parametrize(
+    ("data", "expected_result"),
+    [
+        ("Hello\n", b"data: Hello\ndata: \n\n"),
+        ("Hello\n\n", b"data: Hello\ndata: \ndata: \n\n"),
+        ("\n", b"data: \ndata: \n\n"),
+        ("Hello\r\nWorld", b"data: Hello\ndata: World\n\n"),
+        ("Hello\rWorld", b"data: Hello\ndata: World\n\n"),
+        ("A\u2028B", "data: A\u2028B\n\n".encode()),
+        ("A\vB", b"data: A\x0bB\n\n"),
+    ],
+)
+def test_format_sse_event_splitlines_behavior_in_data(
+    data: str, expected_result: bytes
+) -> None:
+    assert format_sse_event(data_str=data) == expected_result
+
+
+def test_format_sse_event_splitlines_behavior_in_comment():
+    assert format_sse_event(comment="hi\n") == b": hi\n: \n\n"


-def test_format_sse_event_comment_preserves_trailing_newline():
-    # Same bug existed in the comment branch.
-    assert format_sse_event(comment="hi\n") == b": hi\n: \n\n"
+def test_format_sse_event_keeps_empty_data_line():
+    payload = format_sse_event(data_str="")
+    assert payload == b"data: \n\n"