Browse Source

️ Replace email.message with string parsing for content-type detection

Replace the per-request email.message.Message allocation in
get_request_handler with a lightweight _is_json_content_type() helper
that uses simple string operations (split, find, lower).

Benchmarks show a ~5x speedup (80% time reduction) for content-type
checking on every JSON body request.

- Add _is_json_content_type() to fastapi/routing.py
- Remove import email.message (no longer needed)
- Add 26 parametrized unit tests for the new helper
- Add benchmark script comparing old vs new approach
pull/15629/head
Denny Biasiolli 1 week ago
parent
commit
ce1f5e8397
Failed to extract signature
  1. 39
      fastapi/routing.py
  2. 68
      tests/bench_content_type.py
  3. 76
      tests/test_is_json_content_type.py

39
fastapi/routing.py

@ -1,5 +1,4 @@
import contextlib
import email.message
import functools
import inspect
import json
@ -348,6 +347,36 @@ def _build_response_args(
return response_args
def _is_json_content_type(content_type: str) -> bool:
"""Check if a content-type header value indicates JSON content.
Matches ``application/json`` and ``application/*+json`` variants
(e.g. ``application/geo+json``, ``application/vnd.api+json``),
ignoring parameters like ``charset=utf-8``.
This replaces the previous ``email.message.Message`` approach which
carried significant per-request overhead.
>>> _is_json_content_type("application/json")
True
>>> _is_json_content_type("application/json; charset=utf-8")
True
>>> _is_json_content_type("application/geo+json")
True
>>> _is_json_content_type("text/plain")
False
"""
media_type = content_type.split(";", 1)[0].strip().lower()
slash = media_type.find("/")
if slash == -1:
return False
main_type = media_type[:slash]
if main_type != "application":
return False
subtype = media_type[slash + 1 :]
return subtype == "json" or subtype.endswith("+json")
def get_request_handler(
dependant: Dependant,
body_field: ModelField | None = None,
@ -414,12 +443,8 @@ def get_request_handler(
if not actual_strict_content_type:
json_body = await request.json()
else:
message = email.message.Message()
message["content-type"] = content_type_value
if message.get_content_maintype() == "application":
subtype = message.get_content_subtype()
if subtype == "json" or subtype.endswith("+json"):
json_body = await request.json()
if _is_json_content_type(content_type_value):
json_body = await request.json()
if json_body != Undefined:
body = json_body
else:

68
tests/bench_content_type.py

@ -0,0 +1,68 @@
"""Benchmark: email.message.Message vs string-parsing for content-type detection."""
import email.message
import timeit
from fastapi.routing import _is_json_content_type
CONTENT_TYPES = [
"application/json",
"application/json; charset=utf-8",
"application/geo+json",
"application/vnd.api+json",
"text/plain",
"application/xml",
"application/octet-stream",
"multipart/form-data; boundary=----",
"application/not-really-json",
"application/geo+json-seq",
]
ITERATIONS = 100_000
def old_is_json(content_type: str) -> bool:
"""Original implementation using email.message.Message."""
message = email.message.Message()
message["content-type"] = content_type
if message.get_content_maintype() == "application":
subtype = message.get_content_subtype()
if subtype == "json" or subtype.endswith("+json"):
return True
return False
def bench(func, label: str) -> float:
def run():
for ct in CONTENT_TYPES:
func(ct)
elapsed = timeit.timeit(run, number=ITERATIONS)
ops = ITERATIONS * len(CONTENT_TYPES)
rate = ops / elapsed
print(f" {label:30s} {elapsed:8.3f}s ({rate:,.0f} ops/s)")
return elapsed
def main() -> None:
# Verify both implementations agree on all inputs
for ct in CONTENT_TYPES:
assert old_is_json(ct) == _is_json_content_type(ct), (
f"Mismatch on {ct!r}: old={old_is_json(ct)}, new={_is_json_content_type(ct)}"
)
print(
f"\nBenchmark: {ITERATIONS:,} iterations x {len(CONTENT_TYPES)} content-types\n"
)
old_time = bench(old_is_json, "email.message (old)")
new_time = bench(_is_json_content_type, "string parsing (new)")
speedup = old_time / new_time
pct = (1 - new_time / old_time) * 100
print(f"\n Speedup: {speedup:.1f}x faster ({pct:.1f}% reduction in time)")
if __name__ == "__main__":
main()

76
tests/test_is_json_content_type.py

@ -0,0 +1,76 @@
import pytest
from fastapi.routing import _is_json_content_type
@pytest.mark.parametrize(
"content_type",
[
"application/json",
"application/JSON",
"Application/JSON",
"APPLICATION/JSON",
"application/json; charset=utf-8",
"application/json;charset=utf-8",
"application/json ; charset=utf-8",
"application/geo+json",
"application/vnd.api+json",
"application/vnd.example.api+json",
"application/vnd.api+json; charset=utf-8",
" application/json ",
],
ids=[
"plain",
"upper-subtype",
"mixed-case",
"all-upper",
"with-charset",
"charset-no-space",
"charset-extra-space",
"geo+json",
"vnd+json",
"nested-vnd+json",
"vnd+json-with-charset",
"surrounding-whitespace",
],
)
def test_json_content_types_accepted(content_type: str) -> None:
assert _is_json_content_type(content_type) is True
@pytest.mark.parametrize(
"content_type",
[
"text/plain",
"text/html",
"multipart/form-data",
"application/xml",
"application/octet-stream",
"application/not-really-json",
"application/geo+json-seq",
"application/jsonl",
"application/x-ndjson",
"json",
"",
"application",
"/json",
"application/",
],
ids=[
"text-plain",
"text-html",
"multipart",
"xml",
"octet-stream",
"not-really-json",
"json-seq",
"jsonl",
"ndjson",
"no-slash",
"empty",
"no-subtype",
"no-maintype",
"trailing-slash",
],
)
def test_non_json_content_types_rejected(content_type: str) -> None:
assert _is_json_content_type(content_type) is False
Loading…
Cancel
Save