better pdf using headless chrome

This commit is contained in:
Waylon S. Walker 2024-10-15 15:53:32 -05:00
parent 16e207000f
commit 7f0934ac14
7 changed files with 143 additions and 26 deletions

View file

@ -7,23 +7,26 @@ venv:
run: run:
uv run -- uvicorn --reload --log-level debug src.fastapi_dynamic_response.main:app uv run -- uvicorn --reload --log-level debug src.fastapi_dynamic_response.main:app
run-workers:
uv run -- uvicorn --workers 6 --log-level debug src.fastapi_dynamic_response.main:app
get: get:
http GET :8000/example http GET :8000/example
get-plain: get-plain:
http GET :8000/exa Content-Type=text/plain http GET :8000/exa Content-Type:text/plain
get-rtf: get-rtf:
http GET :8000/example Content-Type=application/rtf http GET :8000/example Content-Type:application/rtf
get-json: get-json:
http GET :8000 Content-Type=application/json http GET :8000 Content-Type:application/json
get-html: get-html:
http GET :8000 Content-Type=text/html http GET :8000 Content-Type:text/html
get-md: get-md:
http GET :8000 Content-Type=application/markdown http GET :8000 Content-Type:application/markdown
livez: livez:

View file

@ -12,10 +12,15 @@ ACCEPT_TYPES = {
"application/markdown": "markdown", "application/markdown": "markdown",
"text/markdown": "markdown", "text/markdown": "markdown",
"text/x-markdown": "markdown", "text/x-markdown": "markdown",
"image/png": "png",
"application/pdf": "pdf",
"json": "JSON", "json": "JSON",
"html": "html", "html": "html",
"rtf": "rtf", "rtf": "rtf",
"plain": "text", "plain": "text",
"text": "text",
"markdown": "markdown", "markdown": "markdown",
"md": "markdown", "md": "markdown",
"png": "png",
"pdf": "pdf",
} }

View file

@ -8,8 +8,10 @@ from fastapi_dynamic_response.dependencies import get_content_type
from fastapi_dynamic_response.middleware import ( from fastapi_dynamic_response.middleware import (
Sitemap, Sitemap,
catch_exceptions_middleware, catch_exceptions_middleware,
log_request_state,
respond_based_on_content_type, respond_based_on_content_type,
set_prefers, set_prefers,
set_span_id,
) )
from fastapi_dynamic_response.zpages.router import router as zpages_router from fastapi_dynamic_response.zpages.router import router as zpages_router
@ -24,6 +26,8 @@ app = FastAPI(
debug=True, debug=True,
dependencies=[ dependencies=[
Depends(set_prefers), Depends(set_prefers),
Depends(set_span_id),
Depends(log_request_state),
], ],
) )
app.include_router(zpages_router) app.include_router(zpages_router)

View file

@ -3,6 +3,7 @@ from io import BytesIO
import json import json
import traceback import traceback
from typing import Any, Dict from typing import Any, Dict
from uuid import uuid4
from fastapi import Request, Response from fastapi import Request, Response
from fastapi.exceptions import ( from fastapi.exceptions import (
@ -17,11 +18,13 @@ from rich.markdown import Markdown
from rich.panel import Panel from rich.panel import Panel
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from weasyprint import HTML as WEAZYHTML
import base64
from fastapi_dynamic_response.constant import ACCEPT_TYPES from fastapi_dynamic_response.constant import ACCEPT_TYPES
from fastapi_dynamic_response.globals import templates from fastapi_dynamic_response.globals import templates
console = Console()
class Prefers(BaseModel): class Prefers(BaseModel):
JSON: bool = False JSON: bool = False
@ -30,6 +33,8 @@ class Prefers(BaseModel):
text: bool = False text: bool = False
markdown: bool = False markdown: bool = False
partial: bool = False partial: bool = False
png: bool = False
pdf: bool = False
@property @property
def textlike(self) -> bool: def textlike(self) -> bool:
@ -37,19 +42,50 @@ class Prefers(BaseModel):
@model_validator(mode="after") @model_validator(mode="after")
def check_one_true(self) -> Dict[str, Any]: def check_one_true(self) -> Dict[str, Any]:
format_flags = [self.JSON, self.html, self.rtf, self.text, self.markdown] format_flags = [
self.JSON,
self.html,
self.rtf,
self.text,
self.markdown,
self.png,
self.pdf,
]
if format_flags.count(True) != 1: if format_flags.count(True) != 1:
message = "Exactly one of JSON, html, rtf, text, or markdown must be True." message = "Exactly one of JSON, html, rtf, text, or markdown must be True."
raise ValueError(message) raise ValueError(message)
return self
def log_request_state(request: Request):
console.log(request.state.span_id)
console.log(request.url.path)
console.log(request.state.prefers)
def set_span_id(request: Request):
span_id = uuid4()
request.state.span_id = span_id
def set_prefers( def set_prefers(
request: Request, request: Request,
): ):
content_type = ( content_type = (
request.query_params.get("content_type") request.query_params.get(
or request.headers.get("content-type") "content-type",
or request.headers.get("accept", None) request.query_params.get(
"content_type",
request.query_params.get("accept"),
),
)
or request.headers.get(
"content-type",
request.headers.get(
"content_type",
request.headers.get("accept"),
),
)
).lower() ).lower()
if content_type == "*/*": if content_type == "*/*":
content_type = None content_type = None
@ -72,13 +108,9 @@ def set_prefers(
# if content_type in ACCEPT_TYPES: # if content_type in ACCEPT_TYPES:
for accept_type, accept_value in ACCEPT_TYPES.items(): for accept_type, accept_value in ACCEPT_TYPES.items():
if accept_type in content_type: if accept_type in content_type:
request.state.prefers = Prefers(**{ACCEPT_TYPES[accept_value]: True}) request.state.prefers = Prefers(**{accept_value: True})
print("content_type:", content_type)
print("prefers:", request.state.prefers)
return return
request.state.prefers = Prefers(JSON=True, partial=False) request.state.prefers = Prefers(JSON=True, partial=False)
print("prefers:", request.state.prefers)
print("content_type:", content_type)
class Sitemap: class Sitemap:
@ -133,6 +165,41 @@ def get_screenshot(html_content: str) -> BytesIO:
return buffer return buffer
def get_pdf(html_content: str, scale: float = 1.0) -> BytesIO:
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1280x1024")
chrome_options.add_argument("--disable-dev-shm-usage") # Helps avoid memory issues
driver = webdriver.Chrome(options=chrome_options)
driver.get("data:text/html;charset=utf-8," + html_content)
# Generate PDF
pdf = driver.execute_cdp_cmd(
"Page.printToPDF",
{
"printBackground": True, # Include CSS backgrounds in the PDF
"paperWidth": 8.27, # A4 paper size width in inches
"paperHeight": 11.69, # A4 paper size height in inches
"marginTop": 0,
"marginBottom": 0,
"marginLeft": 0,
"marginRight": 0,
"scale": scale,
},
)["data"]
driver.quit()
# Convert base64 PDF to BytesIO
pdf_buffer = BytesIO()
pdf_buffer.write(base64.b64decode(pdf))
pdf_buffer.seek(0)
return pdf_buffer.getvalue()
def format_json_as_plain_text(data: dict) -> str: def format_json_as_plain_text(data: dict) -> str:
"""Convert JSON to human-readable plain text format with indentation and bullet points.""" """Convert JSON to human-readable plain text format with indentation and bullet points."""
@ -156,8 +223,8 @@ def format_json_as_plain_text(data: dict) -> str:
def format_json_as_rich_text(data: dict, template_name: str) -> str: def format_json_as_rich_text(data: dict, template_name: str) -> str:
"""Convert JSON to a human-readable rich text format using rich.""" """Convert JSON to a human-readable rich text format using rich."""
console = Console()
# pretty_data = Pretty(data, indent_guides=True) # pretty_data = Pretty(data, indent_guides=True)
console = Console()
template = templates.get_template(template_name) template = templates.get_template(template_name)
html_content = template.render(data=data) html_content = template.render(data=data)
@ -211,7 +278,7 @@ def handle_not_found(request: Request, call_next, data: str):
async def respond_based_on_content_type(request: Request, call_next): async def respond_based_on_content_type(request: Request, call_next):
requested_path = request.url.path requested_path = request.url.path
if requested_path in ["/docs", "/redoc", "/openapi.json"]: if requested_path in ["/docs", "/redoc", "/openapi.json", "/static/app.css"]:
return await call_next(request) return await call_next(request)
try: try:
@ -298,16 +365,21 @@ async def handle_response(request: Request, data: str):
rich_text_content = format_json_as_rich_text(json_data, template_name) rich_text_content = format_json_as_rich_text(json_data, template_name)
return PlainTextResponse(content=rich_text_content) return PlainTextResponse(content=rich_text_content)
elif content_type == "image/png": elif request.state.prefers.png:
template = templates.get_template(template_name) template = templates.get_template(template_name)
html_content = template.render(data=json_data) html_content = template.render(data=json_data)
screenshot = get_screenshot(html_content) screenshot = get_screenshot(html_content)
return Response(content=screenshot.getvalue(), media_type="image/png") return Response(content=screenshot.getvalue(), media_type="image/png")
elif content_type == "application/pdf": elif request.state.prefers.pdf:
template = templates.get_template(template_name) template = templates.get_template(template_name)
html_content = template.render(data=json_data) html_content = template.render(data=json_data)
pdf = WEAZYHTML(string=html_content).write_pdf() scale = float(
request.headers.get("scale", request.query_params.get("scale", 1.0))
)
console.log(f"Scale: {scale}")
pdf = get_pdf(html_content, scale)
return Response(content=pdf, media_type="application/pdf") return Response(content=pdf, media_type="application/pdf")
return JSONResponse(content=json_data) return JSONResponse(content=json_data)

View file

@ -593,6 +593,19 @@ video {
margin-right: auto; margin-right: auto;
} }
.my-4 {
margin-top: 1rem;
margin-bottom: 1rem;
}
.ml-8 {
margin-left: 2rem;
}
.mt-4 {
margin-top: 1rem;
}
.mt-auto { .mt-auto {
margin-top: auto; margin-top: auto;
} }
@ -609,6 +622,10 @@ video {
min-height: 100vh; min-height: 100vh;
} }
.list-disc {
list-style-type: disc;
}
.flex-col { .flex-col {
flex-direction: column; flex-direction: column;
} }
@ -649,6 +666,11 @@ video {
text-align: center; text-align: center;
} }
.text-2xl {
font-size: 1.5rem;
line-height: 2rem;
}
.text-xl { .text-xl {
font-size: 1.25rem; font-size: 1.25rem;
line-height: 1.75rem; line-height: 1.75rem;
@ -663,6 +685,16 @@ video {
color: rgb(229 231 235 / var(--tw-text-opacity)); color: rgb(229 231 235 / var(--tw-text-opacity));
} }
.text-gray-300 {
--tw-text-opacity: 1;
color: rgb(209 213 219 / var(--tw-text-opacity));
}
.text-gray-400 {
--tw-text-opacity: 1;
color: rgb(156 163 175 / var(--tw-text-opacity));
}
.text-teal-400 { .text-teal-400 {
--tw-text-opacity: 1; --tw-text-opacity: 1;
color: rgb(45 212 191 / var(--tw-text-opacity)); color: rgb(45 212 191 / var(--tw-text-opacity));

View file

@ -3,16 +3,16 @@
{% block title %}Another Example{% endblock %} {% block title %}Another Example{% endblock %}
{% block content %} {% block content %}
<h2>Example</h2> <h2 class='text-gray-400 font-bold text-2xl'>Example</h2>
<p> <p>
{{ data.message }} {{ data.message }}
</p> </p>
<h3>Items</h3> <h3 class='mt-4 text-gray-400 font-bold text-xl'>Items</h3>
<p> <p class='text-gray-300 my-4'>
there are {{ data.get('items', [])|length }} items in the list there are {{ data.get('items', [])|length }} items in the list
</p> </p>
<ul> <ul class='list-disc ml-8'>
{% for item in data.get('items', []) %} {% for item in data.get('items', []) %}
<li>{{ item }}</li> <li>{{ item }}</li>
{% endfor %} {% endfor %}

View file

@ -4,8 +4,9 @@
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}FastAPI Dynamic Response{% endblock %}</title> <title>{% block title %}FastAPI Dynamic Response{% endblock %}</title>
<script src="https://cdn.tailwindcss.com"></script> <!-- <link href="/static/app.css" rel="stylesheet"> -->
<link href="/static/app.css" rel="stylesheet"> <link href="http://localhost:8000/static/app.css" rel="stylesheet">
</head> </head>
<body class="bg-gray-900 text-gray-200 min-h-screen flex flex-col"> <body class="bg-gray-900 text-gray-200 min-h-screen flex flex-col">
<header class="bg-gray-800 p-4"> <header class="bg-gray-800 p-4">