From 7fe098a72ed9bd1a1841ff9031b443832ccd4a7c Mon Sep 17 00:00:00 2001 From: Zawwarsami16 Date: Thu, 14 May 2026 03:41:33 -0400 Subject: [PATCH] fix(sse): preserve trailing newlines + use spec-correct line splitting in format_sse_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit splitlines() drops trailing empty strings and treats 8 extra characters (\v, \f, \x1c-\x1e, \x85, U+2028, U+2029) as line breaks. SSE only recognizes \n, \r\n, and \r per the spec, and trailing empty data lines are part of the payload — silently dropping them corrupts the stream. Both the data: and the comment branch were affected. Adds 8 unit tests covering trailing-newline preservation, CRLF/CR normalization, and the splitlines() quirks (U+2028, vertical tab) staying inside the payload. Closes #15500 --- fastapi/sse.py | 14 +++++++++-- tests/test_sse.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/fastapi/sse.py b/fastapi/sse.py index 901d824964..a55c7acb52 100644 --- a/fastapi/sse.py +++ b/fastapi/sse.py @@ -143,6 +143,16 @@ class ServerSentEvent(BaseModel): return self +def _split_sse_lines(value: str) -> list[str]: + # SSE recognizes only `\n`, `\r\n`, and `\r` as line terminators + # (https://html.spec.whatwg.org/multipage/server-sent-events.html). + # `str.splitlines()` is wrong on two counts: it treats 8 extra characters + # (`\v`, `\f`, `\x1c`-`\x1e`, `\x85`, U+2028, U+2029) as line breaks, and + # it drops a trailing empty string, so e.g. `"hello\n"` would emit only + # one `data:` line instead of two. + return value.replace("\r\n", "\n").replace("\r", "\n").split("\n") + + def format_sse_event( *, data_str: Annotated[ @@ -193,14 +203,14 @@ def format_sse_event( lines: list[str] = [] if comment is not None: - for line in comment.splitlines(): + for line in _split_sse_lines(comment): lines.append(f": {line}") if event is not None: lines.append(f"event: {event}") if data_str is not None: - for line in data_str.splitlines(): + for line in _split_sse_lines(data_str): lines.append(f"data: {line}") if id is not None: diff --git a/tests/test_sse.py b/tests/test_sse.py index 6dfec61838..23361e989f 100644 --- a/tests/test_sse.py +++ b/tests/test_sse.py @@ -6,7 +6,7 @@ import fastapi.routing import pytest from fastapi import APIRouter, FastAPI from fastapi.responses import EventSourceResponse -from fastapi.sse import ServerSentEvent +from fastapi.sse import ServerSentEvent, format_sse_event from fastapi.testclient import TestClient from pydantic import BaseModel @@ -316,3 +316,65 @@ def test_no_keepalive_when_fast(client: TestClient): assert response.status_code == 200 # KEEPALIVE_COMMENT is ": ping\n\n". assert ": ping\n" not in response.text + + +# format_sse_event line-splitting tests +# +# These cover the splitlines() footgun: it drops trailing empty strings and +# treats 8 extra characters as line breaks (vertical tab, form feed, FS/GS/RS, +# NEL, LINE SEPARATOR, PARAGRAPH SEPARATOR). SSE only recognizes \n, \r\n, \r. + + +def test_format_sse_event_preserves_trailing_newline(): + # "Hello\n" should produce TWO data lines: "Hello" and "" (the trailing + # empty line). Pre-fix, splitlines() ate the trailing empty string. + assert format_sse_event(data_str="Hello\n") == b"data: Hello\ndata: \n\n" + + +def test_format_sse_event_preserves_trailing_double_newline(): + assert ( + format_sse_event(data_str="Hello\n\n") + == b"data: Hello\ndata: \ndata: \n\n" + ) + + +def test_format_sse_event_single_newline_data(): + assert format_sse_event(data_str="\n") == b"data: \ndata: \n\n" + + +def test_format_sse_event_crlf_normalizes_to_lf(): + # \r\n is a valid SSE line terminator and should be normalized to \n + # for output, producing the same two data lines as \n input would. + assert ( + format_sse_event(data_str="Hello\r\nWorld") + == b"data: Hello\ndata: World\n\n" + ) + + +def test_format_sse_event_bare_cr_treated_as_line_break(): + # Lone \r is also a valid SSE line terminator per the spec. + assert ( + format_sse_event(data_str="Hello\rWorld") + == b"data: Hello\ndata: World\n\n" + ) + + +def test_format_sse_event_unicode_line_separator_not_split(): + # U+2028 LINE SEPARATOR is treated as a line break by str.splitlines() + # but is NOT a line terminator in the SSE spec. It must stay inside the + # data payload, not be promoted to a new "data:" line. + assert ( + format_sse_event(data_str="A
B") == "data: A
B\n\n".encode() + ) + + +def test_format_sse_event_vertical_tab_not_split(): + # \v is treated as a line break by splitlines() but not by SSE. + assert ( + format_sse_event(data_str="A\vB") == b"data: A\x0bB\n\n" + ) + + +def test_format_sse_event_comment_preserves_trailing_newline(): + # Same bug existed in the comment branch. + assert format_sse_event(comment="hi\n") == b": hi\n: \n\n"