From 7fe098a72ed9bd1a1841ff9031b443832ccd4a7c Mon Sep 17 00:00:00 2001
From: Zawwarsami16 <zawwarsami16@gmail.com>
Date: Thu, 14 May 2026 03:41:33 -0400
Subject: [PATCH] fix(sse): preserve trailing newlines + use spec-correct line
 splitting in format_sse_event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

splitlines() drops trailing empty strings and treats 8 extra characters
(\v, \f, \x1c-\x1e, \x85, U+2028, U+2029) as line breaks. SSE only
recognizes \n, \r\n, and \r per the spec, and trailing empty data lines
are part of the payload — silently dropping them corrupts the stream.

Both the data: and the comment branch were affected. Adds 8 unit tests
covering trailing-newline preservation, CRLF/CR normalization, and the
splitlines() quirks (U+2028, vertical tab) staying inside the payload.

Closes #15500
---
 fastapi/sse.py    | 14 +++++++++--
 tests/test_sse.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/fastapi/sse.py b/fastapi/sse.py
index 901d824964..a55c7acb52 100644
--- a/fastapi/sse.py
+++ b/fastapi/sse.py
@@ -143,6 +143,16 @@ class ServerSentEvent(BaseModel):
         return self
 
 
+def _split_sse_lines(value: str) -> list[str]:
+    # SSE recognizes only `\n`, `\r\n`, and `\r` as line terminators
+    # (https://html.spec.whatwg.org/multipage/server-sent-events.html).
+    # `str.splitlines()` is wrong on two counts: it treats 8 extra characters
+    # (`\v`, `\f`, `\x1c`-`\x1e`, `\x85`, U+2028, U+2029) as line breaks, and
+    # it drops a trailing empty string, so e.g. `"hello\n"` would emit only
+    # one `data:` line instead of two.
+    return value.replace("\r\n", "\n").replace("\r", "\n").split("\n")
+
+
 def format_sse_event(
     *,
     data_str: Annotated[
@@ -193,14 +203,14 @@ def format_sse_event(
     lines: list[str] = []
 
     if comment is not None:
-        for line in comment.splitlines():
+        for line in _split_sse_lines(comment):
             lines.append(f": {line}")
 
     if event is not None:
         lines.append(f"event: {event}")
 
     if data_str is not None:
-        for line in data_str.splitlines():
+        for line in _split_sse_lines(data_str):
             lines.append(f"data: {line}")
 
     if id is not None:
diff --git a/tests/test_sse.py b/tests/test_sse.py
index 6dfec61838..23361e989f 100644
--- a/tests/test_sse.py
+++ b/tests/test_sse.py
@@ -6,7 +6,7 @@ import fastapi.routing
 import pytest
 from fastapi import APIRouter, FastAPI
 from fastapi.responses import EventSourceResponse
-from fastapi.sse import ServerSentEvent
+from fastapi.sse import ServerSentEvent, format_sse_event
 from fastapi.testclient import TestClient
 from pydantic import BaseModel
 
@@ -316,3 +316,65 @@ def test_no_keepalive_when_fast(client: TestClient):
     assert response.status_code == 200
     # KEEPALIVE_COMMENT is ": ping\n\n".
     assert ": ping\n" not in response.text
+
+
+# format_sse_event line-splitting tests
+#
+# These cover the splitlines() footgun: it drops trailing empty strings and
+# treats 8 extra characters as line breaks (vertical tab, form feed, FS/GS/RS,
+# NEL, LINE SEPARATOR, PARAGRAPH SEPARATOR). SSE only recognizes \n, \r\n, \r.
+
+
+def test_format_sse_event_preserves_trailing_newline():
+    # "Hello\n" should produce TWO data lines: "Hello" and "" (the trailing
+    # empty line). Pre-fix, splitlines() ate the trailing empty string.
+    assert format_sse_event(data_str="Hello\n") == b"data: Hello\ndata: \n\n"
+
+
+def test_format_sse_event_preserves_trailing_double_newline():
+    assert (
+        format_sse_event(data_str="Hello\n\n")
+        == b"data: Hello\ndata: \ndata: \n\n"
+    )
+
+
+def test_format_sse_event_single_newline_data():
+    assert format_sse_event(data_str="\n") == b"data: \ndata: \n\n"
+
+
+def test_format_sse_event_crlf_normalizes_to_lf():
+    # \r\n is a valid SSE line terminator and should be normalized to \n
+    # for output, producing the same two data lines as \n input would.
+    assert (
+        format_sse_event(data_str="Hello\r\nWorld")
+        == b"data: Hello\ndata: World\n\n"
+    )
+
+
+def test_format_sse_event_bare_cr_treated_as_line_break():
+    # Lone \r is also a valid SSE line terminator per the spec.
+    assert (
+        format_sse_event(data_str="Hello\rWorld")
+        == b"data: Hello\ndata: World\n\n"
+    )
+
+
+def test_format_sse_event_unicode_line_separator_not_split():
+    # U+2028 LINE SEPARATOR is treated as a line break by str.splitlines()
+    # but is NOT a line terminator in the SSE spec. It must stay inside the
+    # data payload, not be promoted to a new "data:" line.
+    assert (
+        format_sse_event(data_str="A B") == "data: A B\n\n".encode()
+    )
+
+
+def test_format_sse_event_vertical_tab_not_split():
+    # \v is treated as a line break by splitlines() but not by SSE.
+    assert (
+        format_sse_event(data_str="A\vB") == b"data: A\x0bB\n\n"
+    )
+
+
+def test_format_sse_event_comment_preserves_trailing_newline():
+    # Same bug existed in the comment branch.
+    assert format_sse_event(comment="hi\n") == b": hi\n: \n\n"