better pdf using headless chrome

This commit is contained in:
Waylon S. Walker 2024-10-15 15:53:32 -05:00
parent 16e207000f
commit 7f0934ac14
7 changed files with 143 additions and 26 deletions

View file

@ -7,23 +7,26 @@ venv:
run:
uv run -- uvicorn --reload --log-level debug src.fastapi_dynamic_response.main:app
run-workers:
uv run -- uvicorn --workers 6 --log-level debug src.fastapi_dynamic_response.main:app
get:
http GET :8000/example
get-plain:
http GET :8000/exa Content-Type=text/plain
http GET :8000/exa Content-Type:text/plain
get-rtf:
http GET :8000/example Content-Type=application/rtf
http GET :8000/example Content-Type:application/rtf
get-json:
http GET :8000 Content-Type=application/json
http GET :8000 Content-Type:application/json
get-html:
http GET :8000 Content-Type=text/html
http GET :8000 Content-Type:text/html
get-md:
http GET :8000 Content-Type=application/markdown
http GET :8000 Content-Type:application/markdown
livez:

View file

@ -12,10 +12,15 @@ ACCEPT_TYPES = {
"application/markdown": "markdown",
"text/markdown": "markdown",
"text/x-markdown": "markdown",
"image/png": "png",
"application/pdf": "pdf",
"json": "JSON",
"html": "html",
"rtf": "rtf",
"plain": "text",
"text": "text",
"markdown": "markdown",
"md": "markdown",
"png": "png",
"pdf": "pdf",
}

View file

@ -8,8 +8,10 @@ from fastapi_dynamic_response.dependencies import get_content_type
from fastapi_dynamic_response.middleware import (
Sitemap,
catch_exceptions_middleware,
log_request_state,
respond_based_on_content_type,
set_prefers,
set_span_id,
)
from fastapi_dynamic_response.zpages.router import router as zpages_router
@ -24,6 +26,8 @@ app = FastAPI(
debug=True,
dependencies=[
Depends(set_prefers),
Depends(set_span_id),
Depends(log_request_state),
],
)
app.include_router(zpages_router)

View file

@ -3,6 +3,7 @@ from io import BytesIO
import json
import traceback
from typing import Any, Dict
from uuid import uuid4
from fastapi import Request, Response
from fastapi.exceptions import (
@ -17,11 +18,13 @@ from rich.markdown import Markdown
from rich.panel import Panel
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from weasyprint import HTML as WEAZYHTML
import base64
from fastapi_dynamic_response.constant import ACCEPT_TYPES
from fastapi_dynamic_response.globals import templates
console = Console()
class Prefers(BaseModel):
JSON: bool = False
@ -30,6 +33,8 @@ class Prefers(BaseModel):
text: bool = False
markdown: bool = False
partial: bool = False
png: bool = False
pdf: bool = False
@property
def textlike(self) -> bool:
@ -37,19 +42,50 @@ class Prefers(BaseModel):
@model_validator(mode="after")
def check_one_true(self) -> Dict[str, Any]:
format_flags = [self.JSON, self.html, self.rtf, self.text, self.markdown]
format_flags = [
self.JSON,
self.html,
self.rtf,
self.text,
self.markdown,
self.png,
self.pdf,
]
if format_flags.count(True) != 1:
message = "Exactly one of JSON, html, rtf, text, or markdown must be True."
raise ValueError(message)
return self
def log_request_state(request: Request):
console.log(request.state.span_id)
console.log(request.url.path)
console.log(request.state.prefers)
def set_span_id(request: Request):
span_id = uuid4()
request.state.span_id = span_id
def set_prefers(
request: Request,
):
content_type = (
request.query_params.get("content_type")
or request.headers.get("content-type")
or request.headers.get("accept", None)
request.query_params.get(
"content-type",
request.query_params.get(
"content_type",
request.query_params.get("accept"),
),
)
or request.headers.get(
"content-type",
request.headers.get(
"content_type",
request.headers.get("accept"),
),
)
).lower()
if content_type == "*/*":
content_type = None
@ -72,13 +108,9 @@ def set_prefers(
# if content_type in ACCEPT_TYPES:
for accept_type, accept_value in ACCEPT_TYPES.items():
if accept_type in content_type:
request.state.prefers = Prefers(**{ACCEPT_TYPES[accept_value]: True})
print("content_type:", content_type)
print("prefers:", request.state.prefers)
request.state.prefers = Prefers(**{accept_value: True})
return
request.state.prefers = Prefers(JSON=True, partial=False)
print("prefers:", request.state.prefers)
print("content_type:", content_type)
class Sitemap:
@ -133,6 +165,41 @@ def get_screenshot(html_content: str) -> BytesIO:
return buffer
def get_pdf(html_content: str, scale: float = 1.0) -> BytesIO:
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1280x1024")
chrome_options.add_argument("--disable-dev-shm-usage") # Helps avoid memory issues
driver = webdriver.Chrome(options=chrome_options)
driver.get("data:text/html;charset=utf-8," + html_content)
# Generate PDF
pdf = driver.execute_cdp_cmd(
"Page.printToPDF",
{
"printBackground": True, # Include CSS backgrounds in the PDF
"paperWidth": 8.27, # A4 paper size width in inches
"paperHeight": 11.69, # A4 paper size height in inches
"marginTop": 0,
"marginBottom": 0,
"marginLeft": 0,
"marginRight": 0,
"scale": scale,
},
)["data"]
driver.quit()
# Convert base64 PDF to BytesIO
pdf_buffer = BytesIO()
pdf_buffer.write(base64.b64decode(pdf))
pdf_buffer.seek(0)
return pdf_buffer.getvalue()
def format_json_as_plain_text(data: dict) -> str:
"""Convert JSON to human-readable plain text format with indentation and bullet points."""
@ -156,8 +223,8 @@ def format_json_as_plain_text(data: dict) -> str:
def format_json_as_rich_text(data: dict, template_name: str) -> str:
"""Convert JSON to a human-readable rich text format using rich."""
console = Console()
# pretty_data = Pretty(data, indent_guides=True)
console = Console()
template = templates.get_template(template_name)
html_content = template.render(data=data)
@ -211,7 +278,7 @@ def handle_not_found(request: Request, call_next, data: str):
async def respond_based_on_content_type(request: Request, call_next):
requested_path = request.url.path
if requested_path in ["/docs", "/redoc", "/openapi.json"]:
if requested_path in ["/docs", "/redoc", "/openapi.json", "/static/app.css"]:
return await call_next(request)
try:
@ -298,16 +365,21 @@ async def handle_response(request: Request, data: str):
rich_text_content = format_json_as_rich_text(json_data, template_name)
return PlainTextResponse(content=rich_text_content)
elif content_type == "image/png":
elif request.state.prefers.png:
template = templates.get_template(template_name)
html_content = template.render(data=json_data)
screenshot = get_screenshot(html_content)
return Response(content=screenshot.getvalue(), media_type="image/png")
elif content_type == "application/pdf":
elif request.state.prefers.pdf:
template = templates.get_template(template_name)
html_content = template.render(data=json_data)
pdf = WEAZYHTML(string=html_content).write_pdf()
scale = float(
request.headers.get("scale", request.query_params.get("scale", 1.0))
)
console.log(f"Scale: {scale}")
pdf = get_pdf(html_content, scale)
return Response(content=pdf, media_type="application/pdf")
return JSONResponse(content=json_data)

View file

@ -593,6 +593,19 @@ video {
margin-right: auto;
}
.my-4 {
margin-top: 1rem;
margin-bottom: 1rem;
}
.ml-8 {
margin-left: 2rem;
}
.mt-4 {
margin-top: 1rem;
}
.mt-auto {
margin-top: auto;
}
@ -609,6 +622,10 @@ video {
min-height: 100vh;
}
.list-disc {
list-style-type: disc;
}
.flex-col {
flex-direction: column;
}
@ -649,6 +666,11 @@ video {
text-align: center;
}
.text-2xl {
font-size: 1.5rem;
line-height: 2rem;
}
.text-xl {
font-size: 1.25rem;
line-height: 1.75rem;
@ -663,6 +685,16 @@ video {
color: rgb(229 231 235 / var(--tw-text-opacity));
}
.text-gray-300 {
--tw-text-opacity: 1;
color: rgb(209 213 219 / var(--tw-text-opacity));
}
.text-gray-400 {
--tw-text-opacity: 1;
color: rgb(156 163 175 / var(--tw-text-opacity));
}
.text-teal-400 {
--tw-text-opacity: 1;
color: rgb(45 212 191 / var(--tw-text-opacity));

View file

@ -3,16 +3,16 @@
{% block title %}Another Example{% endblock %}
{% block content %}
<h2>Example</h2>
<h2 class='text-gray-400 font-bold text-2xl'>Example</h2>
<p>
{{ data.message }}
</p>
<h3>Items</h3>
<p>
<h3 class='mt-4 text-gray-400 font-bold text-xl'>Items</h3>
<p class='text-gray-300 my-4'>
there are {{ data.get('items', [])|length }} items in the list
</p>
<ul>
<ul class='list-disc ml-8'>
{% for item in data.get('items', []) %}
<li>{{ item }}</li>
{% endfor %}

View file

@ -4,8 +4,9 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}FastAPI Dynamic Response{% endblock %}</title>
<script src="https://cdn.tailwindcss.com"></script>
<link href="/static/app.css" rel="stylesheet">
<!-- <link href="/static/app.css" rel="stylesheet"> -->
<link href="http://localhost:8000/static/app.css" rel="stylesheet">
</head>
<body class="bg-gray-900 text-gray-200 min-h-screen flex flex-col">
<header class="bg-gray-800 p-4">