Initial Commit for rss-link-app

Analyze links from rss feeds
This commit is contained in:
Waylon Walker 2025-09-03 20:22:39 -05:00
commit 060f998c59
8 changed files with 1837 additions and 0 deletions

966
.gitignore vendored Normal file
View file

@ -0,0 +1,966 @@
# Created by https://www.toptal.com/developers/gitignore/api/vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode
# Edit at https://www.toptal.com/developers/gitignore?templates=vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode
### Data ###
*.csv
*.dat
*.efx
*.gbr
*.key
*.pps
*.ppt
*.pptx
*.sdf
*.tax2010
*.vcf
*.xml
### Emacs ###
# -*- mode: gitignore; -*-
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*
# Org-mode
.org-id-locations
*_archive
# flymake-mode
*_flymake.*
# eshell files
/eshell/history
/eshell/lastdir
# elpa packages
/elpa/
# reftex files
*.rel
# AUCTeX auto folder
/auto/
# cask packages
.cask/
dist/
# Flycheck
flycheck_*.el
# server auth directory
/server/
# projectiles files
.projectile
# directory configuration
.dir-locals.el
# network security
/network-security.data
### Executable ###
*.app
*.bat
*.cgi
*.com
*.exe
*.gadget
*.jar
*.pif
*.vb
*.wsf
### Node ###
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*
### Node Patch ###
# Serverless Webpack directories
.webpack/
# Optional stylelint cache
# SvelteKit build / generate output
.svelte-kit
### PyCharm ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
### PyCharm Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr
# Sonarlint plugin
# https://plugins.jetbrains.com/plugin/7973-sonarlint
.idea/**/sonarlint/
# SonarQube Plugin
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
.idea/**/sonarIssues.xml
# Markdown Navigator plugin
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
.idea/**/markdown-navigator.xml
.idea/**/markdown-navigator-enh.xml
.idea/**/markdown-navigator/
# Cache file creation bug
# See https://youtrack.jetbrains.com/issue/JBR-2257
.idea/$CACHE_FILE$
# CodeStream plugin
# https://plugins.jetbrains.com/plugin/12206-codestream
.idea/codestream.xml
# Azure Toolkit for IntelliJ plugin
# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
.idea/**/azureSettings.xml
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# LSP config files
pyrightconfig.json
### SublimeText ###
# Cache files for Sublime Text
*.tmlanguage.cache
*.tmPreferences.cache
*.stTheme.cache
# Workspace files are user-specific
*.sublime-workspace
# Project files should be checked into the repository, unless a significant
# proportion of contributors will probably not be using Sublime Text
# *.sublime-project
# SFTP configuration file
sftp-config.json
sftp-config-alt*.json
# Package control specific files
Package Control.last-run
Package Control.ca-list
Package Control.ca-bundle
Package Control.system-ca-bundle
Package Control.cache/
Package Control.ca-certs/
Package Control.merged-ca-bundle
Package Control.user-ca-bundle
oscrypto-ca-bundle.crt
bh_unicode_properties.cache
# Sublime-github package stores a github token in this file
# https://packagecontrol.io/packages/sublime-github
GitHub.sublime-settings
### Vim ###
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]
# Session
Session.vim
Sessionx.vim
# Temporary
.netrwhist
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~
### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix
### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide
### VisualStudio ###
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp
# Visual Studio 6 technical files
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
*.code-workspace
# Local History for Visual Studio Code
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml
### VisualStudio Patch ###
# Additional files built by Visual Studio
# End of https://www.toptal.com/developers/gitignore/api/vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode
*.db

50
README.md Normal file
View file

@ -0,0 +1,50 @@
# RSS Link Audit (FastAPI)
A FastAPI app that accepts an RSS/Atom feed URL, fetches each posts full HTML, extracts outbound links, groups them by hostname, **hunts for each hosts RSS feed** (common endpoints + homepage discovery), and renders a stylish report using the **Royal Armory** palette.
## Features
- Input a feed URL via UI or JSON.
- Concurrent fetching (httpx + asyncio).
- Extract links from each post page.
- Group by hostname; count occurrences.
- Heuristic RSS discovery:
- Probe common feed endpoints (e.g. `/feed`, `/rss.xml`, `/atom.xml`, etc.).
- Parse homepage `<link rel="alternate" ...>` for RSS/Atom.
- Scan homepage `<a>` tags for `rss|atom|feed`.
- Validate candidates with `feedparser`.
- Report UI:
- Per-host card with counts.
- **Bar** visual for how many links a host has.
- **Top links** (if mentioned > 1).
- Links list truncated with a **More** button.
- RSS/Atom badge if found.
## Run locally
```bash
python -m venv .venv
source .venv/bin/activate # Windows: .venv\Scripts\activate
pip install -r requirements.txt
uvicorn main:app --reload
```
Open: http://127.0.0.1:8000
## API
```
POST /api/analyze
Content-Type: application/json
{"feed_url": "https://example.com/feed.xml"}
```
Returns JSON with the summarized data.
## Notes / Caveats
- Only static HTML is parsed (no JS rendering).
- Some sites block bots; results may vary.
- For large feeds, you may wish to trim the number of posts (e.g., slice `post_urls` in `analyze_feed`).
- Consider adding caching (e.g., `aiocache`, Redis) if youll run this frequently.

537
main.py Normal file
View file

@ -0,0 +1,537 @@
# main.py (v1.2) — robust feed parsing, clearer SSE progress, normalized host caching, concurrent discovery
import asyncio
import json
import uuid
from collections import Counter
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set, Tuple
from urllib.parse import urljoin, urlparse
from contextlib import asynccontextmanager
import httpx
import feedparser
from bs4 import BeautifulSoup
from fastapi import FastAPI, Request, Form, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from sqlmodel import Field, SQLModel, create_engine, Session, select
from datetime import datetime, timezone
# ------------------------------
# Settings / Constants
# ------------------------------
REQUEST_TIMEOUT = httpx.Timeout(15.0, connect=8.0, read=15.0)
HEADERS = {
"User-Agent": "LinkAuditBot/1.2 (+https://example.com; contact: admin@example.com)"
}
COMMON_FEED_PATHS = [
"/feed", "/feed/", "/feed.xml",
"/rss", "/rss.xml", "/rss/",
"/atom", "/atom.xml",
"/index.xml",
"/blog/feed", "/blog/rss", "/blog/rss.xml", "/blog/index.xml",
"/feeds/posts/default?alt=rss", # Blogger
"/news/atom.xml", "/news/rss.xml",
"/.rss", "/?feed=rss2", # WP variants
"/category/news/feed", "/?feed=atom",
]
FEED_MIME_HINTS = {
"application/rss+xml",
"application/atom+xml",
"application/xml",
"text/xml",
}
DISCOVERY_CONCURRENCY = 10
# ------------------------------
# Database Models (SQLModel)
# ------------------------------
class PageCache(SQLModel, table=True):
url: str = Field(primary_key=True)
html: Optional[str] = None
fetched_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
class LinksCache(SQLModel, table=True):
url: str = Field(primary_key=True)
links_json: str # JSON list[str]
extracted_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
class HostFeedCache(SQLModel, table=True):
hostname: str = Field(primary_key=True) # normalized!
feed_url: Optional[str] = None
checked_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
class FeedRun(SQLModel, table=True):
id: str = Field(primary_key=True, default_factory=lambda: str(uuid.uuid4()))
feed_url: str
started_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
finished_at: Optional[datetime] = None
summary_json: Optional[str] = None # store last summary, if desired
engine = create_engine("sqlite:///cache.db", echo=False)
SQLModel.metadata.create_all(engine)
# ------------------------------
# Data models
# ------------------------------
@dataclass
class HostSummary:
hostname: str
count: int = 0
unique_links: Set[str] = field(default_factory=set)
link_counts: Counter = field(default_factory=Counter)
feed_url: Optional[str] = None
# ------------------------------
# Utilities
# ------------------------------
def now_utc() -> datetime:
return datetime.now(timezone.utc)
def normalize_host(host: str) -> str:
if not host:
return host
h = host.strip().lower().rstrip(".")
if h.startswith("www."):
h = h[4:]
return h
def is_http_url(href: str) -> bool:
try:
p = urlparse(href)
return p.scheme in ("http", "https")
except Exception:
return False
def absolutize(href: str, base_url: str) -> Optional[str]:
if not href:
return None
if href.startswith("#") or href.startswith("mailto:") or href.startswith("tel:"):
return None
try:
abs_url = urljoin(base_url, href)
if is_http_url(abs_url):
return abs_url
except Exception:
return None
return None
def extract_links_from_html(html: str, base_url: str) -> List[str]:
soup = BeautifulSoup(html, "lxml")
links: List[str] = []
for a in soup.find_all("a", href=True):
u = absolutize(a.get("href"), base_url)
if u:
links.append(u)
return links
# ------------------------------
# Networking
# ------------------------------
async def fetch_text(client: httpx.AsyncClient, url: str) -> Optional[str]:
try:
r = await client.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
if r.status_code < 400:
# Do NOT force UTF-8; respect server if present
if r.encoding is None:
r.encoding = r.apparent_encoding or "utf-8"
return r.text
except Exception:
return None
return None
async def fetch_bytes(client: httpx.AsyncClient, url: str) -> Optional[Tuple[bytes, Optional[str]]]:
try:
r = await client.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
if r.status_code < 400:
ctype = r.headers.get("content-type")
return r.content, ctype
except Exception:
return None
return None
async def fetch_head_ok(client: httpx.AsyncClient, url: str) -> Tuple[bool, Optional[str]]:
try:
r = await client.head(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
if r.status_code < 400:
return True, r.headers.get("content-type")
except Exception:
pass
try:
r = await client.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
if r.status_code < 400:
return True, r.headers.get("content-type")
except Exception:
pass
return False, None
# ------------------------------
# Cache helpers
# ------------------------------
def cache_get_page(url: str) -> Optional[PageCache]:
with Session(engine) as sess:
return sess.get(PageCache, url)
def cache_set_page(url: str, html: Optional[str]):
with Session(engine) as sess:
sess.merge(PageCache(url=url, html=html, fetched_at=now_utc()))
sess.commit()
def cache_get_links(url: str) -> Optional[List[str]]:
with Session(engine) as sess:
row = sess.get(LinksCache, url)
if not row:
return None
try:
return json.loads(row.links_json)
except Exception:
return None
def cache_set_links(url: str, links: List[str]):
with Session(engine) as sess:
sess.merge(LinksCache(url=url, links_json=json.dumps(links), extracted_at=now_utc()))
sess.commit()
def cache_get_host_feed(hostname: str) -> Optional[str]:
host_key = normalize_host(hostname)
with Session(engine) as sess:
row = sess.get(HostFeedCache, host_key)
return row.feed_url if row else None
def cache_set_host_feed(hostname: str, feed_url: Optional[str]):
host_key = normalize_host(hostname)
with Session(engine) as sess:
sess.merge(HostFeedCache(hostname=host_key, feed_url=feed_url, checked_at=now_utc()))
sess.commit()
# ------------------------------
# Cached fetch/extract
# ------------------------------
async def fetch_page_html(client: httpx.AsyncClient, url: str) -> Optional[str]:
cached = cache_get_page(url)
if cached and cached.html:
return cached.html
html = await fetch_text(client, url)
cache_set_page(url, html)
return html
async def get_links_for_page(client: httpx.AsyncClient, url: str) -> List[str]:
cached = cache_get_links(url)
if cached is not None:
return cached
html = await fetch_page_html(client, url)
if not html:
cache_set_links(url, [])
return []
links = extract_links_from_html(html, url)
cache_set_links(url, links)
return links
# ------------------------------
# Robust feed parsing
# ------------------------------
async def fetch_feed_entries(client: httpx.AsyncClient, feed_url: str) -> List[str]:
"""
Fetch feed as bytes and let feedparser infer encoding using headers.
Retry a couple fallbacks for mismatched declarations.
"""
got = await fetch_bytes(client, feed_url)
if not got:
raise ValueError("Could not download the feed.")
content, ctype = got
parsed = feedparser.parse(content)
if parsed.bozo == 0 and (parsed.feed or parsed.entries):
return _entries_to_urls(parsed)
# Fallback 1: strip BOM
cleaned = content.lstrip(b"\xef\xbb\xbf")
if cleaned is not content:
parsed2 = feedparser.parse(cleaned)
if parsed2.bozo == 0 and (parsed2.feed or parsed2.entries):
return _entries_to_urls(parsed2)
# Fallback 2: replace us-ascii decl with utf-8
try:
cleaned2 = cleaned.replace(b'encoding="us-ascii"', b'encoding="utf-8"')
parsed3 = feedparser.parse(cleaned2)
if parsed3.bozo == 0 and (parsed3.feed or parsed3.entries):
return _entries_to_urls(parsed3)
except Exception:
pass
raise ValueError(f"Could not parse feed: {getattr(parsed, 'bozo_exception', 'unknown parse error')}")
def _entries_to_urls(parsed) -> List[str]:
urls: List[str] = []
for e in parsed.entries:
if getattr(e, "link", None):
urls.append(e.link)
elif getattr(e, "id", None) and is_http_url(e.id):
urls.append(e.id)
seen, out = set(), []
for u in urls:
if u not in seen:
seen.add(u)
out.append(u)
return out
# ------------------------------
# Feed discovery (normalized + concurrent)
# ------------------------------
async def discover_feed_for_host(client: httpx.AsyncClient, hostname: str) -> Optional[str]:
host_key = normalize_host(hostname)
cached = cache_get_host_feed(host_key)
if cached is not None:
return cached
bases = []
canon = host_key
bases.append(f"https://{canon}")
bases.append(f"http://{canon}")
if not canon.startswith("www."):
bases.append(f"https://www.{canon}")
bases.append(f"http://www.{canon}")
async def try_candidate(url: str) -> Optional[str]:
ok, ctype = await fetch_head_ok(client, url)
if ok and (not ctype or any(mt in ctype for mt in FEED_MIME_HINTS)):
parsed = feedparser.parse(url)
if parsed.bozo == 0 and (parsed.feed or parsed.entries):
return url
return None
tasks = []
for base in bases:
for path in COMMON_FEED_PATHS:
tasks.append(asyncio.create_task(try_candidate(base + path)))
for t in asyncio.as_completed(tasks):
res = await t
if res:
cache_set_host_feed(host_key, res)
return res
for base in bases:
html = await fetch_page_html(client, base + "/")
if not html:
continue
soup = BeautifulSoup(html, "lxml")
for link in soup.find_all("link", rel=True, href=True):
rels = link.get("rel")
if isinstance(rels, list):
rels = {r.lower() for r in rels if r}
else:
rels = {str(rels).lower()}
typ = str(link.get("type", "")).lower()
href = link.get("href")
if "alternate" in rels and any(mt in typ for mt in ("rss", "atom", "xml")):
feed_url = urljoin(base + "/", href)
parsed = feedparser.parse(feed_url)
if parsed.bozo == 0 and (parsed.feed or parsed.entries):
cache_set_host_feed(host_key, feed_url)
return feed_url
for a in soup.find_all("a", href=True):
href = a.get("href", "")
if any(tok in href.lower() for tok in ("rss", "atom", "feed")):
feed_url = urljoin(base + "/", href)
ok, ctype = await fetch_head_ok(client, feed_url)
if ok:
parsed = feedparser.parse(feed_url)
if parsed.bozo == 0 and (parsed.feed or parsed.entries):
cache_set_host_feed(host_key, feed_url)
return feed_url
cache_set_host_feed(host_key, None)
return None
# ------------------------------
# SSE plumbing
# ------------------------------
class Job:
def __init__(self, feed_url: str):
self.id = str(uuid.uuid4())
self.feed_url = feed_url
self.queue: asyncio.Queue[str] = asyncio.Queue()
self.done = asyncio.Event()
async def emit(self, event: str, data: dict):
payload = {"event": event, "data": data, "ts": datetime.now(timezone.utc).isoformat()}
await self.queue.put(f"event: {event}\ndata: {json.dumps(payload)}\n\n")
async def finish(self):
self.done.set()
await self.queue.put("event: done\ndata: {}\n\n")
JOBS: Dict[str, Job] = {}
async def run_analysis_job(job: Job):
with Session(engine) as sess:
fr = FeedRun(feed_url=job.feed_url)
sess.add(fr)
sess.commit()
async with httpx.AsyncClient(http2=True) as client:
try:
await job.emit("status", {"stage": "feed", "message": "Downloading and parsing feed…"})
post_urls = await fetch_feed_entries(client, job.feed_url)
await job.emit("posts", {"count": len(post_urls)})
all_links: List[str] = []
for idx, post_url in enumerate(post_urls, start=1):
await job.emit("status", {"stage": "posts", "message": f"Fetching post {idx}/{len(post_urls)}"})
links = await get_links_for_page(client, post_url)
all_links.extend(links)
await job.emit("post_progress", {"current": idx, "total": len(post_urls), "post_url": post_url})
host_map: Dict[str, HostSummary] = {}
for link in all_links:
host = normalize_host(urlparse(link).netloc)
if not host:
continue
hs = host_map.setdefault(host, HostSummary(hostname=host))
hs.count += 1
hs.unique_links.add(link)
hs.link_counts[link] += 1
hosts_sorted = sorted(host_map.values(), key=lambda s: s.count, reverse=True)
await job.emit("hosts", {"count": len(hosts_sorted)})
sem = asyncio.Semaphore(DISCOVERY_CONCURRENCY)
max_count = max((h.count for h in hosts_sorted), default=1)
async def work(hs: HostSummary, idx: int, total: int):
async with sem:
await job.emit("status", {"stage": "discover", "message": f"Discovering feed for {hs.hostname} ({idx}/{total})"})
feed = await discover_feed_for_host(client, hs.hostname)
hs.feed_url = feed
host_dict = {
"hostname": hs.hostname,
"count": hs.count,
"unique_link_count": len(hs.unique_links),
"links": sorted(list(hs.unique_links)),
"top_links": [
{"url": url, "count": cnt}
for url, cnt in hs.link_counts.most_common()
if cnt > 1
],
"feed_url": hs.feed_url,
}
html = render_host_card(host_dict, max_count, index=idx)
await job.emit("host_card", {"html": html, "index": idx, "total": total})
tasks = [asyncio.create_task(work(hs, i, len(hosts_sorted))) for i, hs in enumerate(hosts_sorted, start=1)]
async def heartbeat():
while any(not t.done() for t in tasks):
await job.emit("status", {"stage": "discover", "message": "Still discovering host feeds…"})
await asyncio.sleep(3)
hb = asyncio.create_task(heartbeat())
await asyncio.gather(*tasks)
hb.cancel()
summary = {
"feed_url": job.feed_url,
"post_count": len(post_urls),
"hosts": [h.hostname for h in hosts_sorted],
"fetched_at": datetime.now(timezone.utc).isoformat(),
}
with Session(engine) as sess:
fr = sess.exec(select(FeedRun).where(FeedRun.feed_url == job.feed_url).order_by(FeedRun.started_at.desc())).first()
if fr:
fr.summary_json = json.dumps(summary)
fr.finished_at = datetime.now(timezone.utc)
sess.add(fr)
sess.commit()
await job.emit("summary", summary)
except Exception as e:
await job.emit("error", {"message": str(e)})
finally:
await job.finish()
# ------------------------------
# Template rendering for components
# ------------------------------
templates = Jinja2Templates(directory="templates")
def render_host_card(host: dict, max_count: int, index: int) -> str:
from fastapi import Request
class Dummy:
def __init__(self): self.state = type("s", (), {})()
req = Dummy()
html = templates.get_template("components/host_card.html").render(
request=req, host=host, max_count=max_count, index=index
)
return html
# ------------------------------
# FastAPI app + routes
# ------------------------------
app = FastAPI(title="RSS Link Audit", version="1.2.0")
app.mount("/static", StaticFiles(directory="static"), name="static")
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/start", response_class=JSONResponse)
async def start(feed_url: str = Form(...)):
job = Job(feed_url)
JOBS[job.id] = job
asyncio.create_task(run_analysis_job(job))
return {"job_id": job.id}
@app.get("/events/{job_id}")
async def sse(job_id: str):
job = JOBS.get(job_id)
if not job:
raise HTTPException(404, "Job not found")
async def event_gen():
yield f"event: hello\ndata: {{\"job_id\":\"{job.id}\"}}\n\n"
while True:
try:
item = await asyncio.wait_for(job.queue.get(), timeout=30.0)
yield item
if job.done.is_set():
break
except asyncio.TimeoutError:
yield "event: ping\ndata: {}\n\n"
JOBS.pop(job.id, None)
return StreamingResponse(event_gen(), media_type="text/event-stream")
@app.post("/api/analyze", response_class=JSONResponse)
async def analyze_api(payload: Dict):
feed_url = payload.get("feed_url")
if not feed_url:
raise HTTPException(status_code=400, detail="Missing 'feed_url'")
job = Job(feed_url)
await run_analysis_job(job)
return JSONResponse(content={"ok": True})
@app.get("/healthz")
async def healthz():
return {"ok": True}

10
requirements.txt Normal file
View file

@ -0,0 +1,10 @@
fastapi
uvicorn[standard]
httpx
feedparser
beautifulsoup4
lxml
jinja2
sqlmodel
aiosqlite
sqlalchemy>=2.0

84
static/styles.css Normal file
View file

@ -0,0 +1,84 @@
/* Royal Armory Palette */
:root {
--ra-ink: #000030;
--ra-plum: #3f0a57;
--ra-magenta: #85106b;
--ra-ruby: #b02c2c;
--ra-bronze: #b8673e;
--ra-amber: #d9932b;
--ra-gold: #f0bd71;
--ra-cream: #ffe3ba;
--ra-bg: var(--ra-ink);
--ra-panel: #0b0b3f;
--ra-copper: #6f3b2b;
--ra-ruby-dark: #8c2323;
}
* { box-sizing: border-box; }
html, body {
margin: 0;
background: var(--ra-bg);
color: var(--ra-cream);
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, Noto Sans, Arial;
line-height: 1.5;
}
a { color: var(--ra-gold); text-decoration: none; }
a:hover { text-decoration: underline; }
header, footer { background: linear-gradient(0deg, rgba(64,10,87,0.25), rgba(64,10,87,0.25)); }
main { padding: 1rem; }
.link { word-break: break-all; text-underline-offset: 3px; }
.bar-wrap { width: 100%; background: rgba(240,189,113,0.12); height: 12px; }
.bar { height: 12px; background: linear-gradient(90deg, var(--ra-amber), var(--ra-gold)); }
.btn-more { background: var(--ra-plum); padding: 6px 10px; border-radius: 10px; font-weight: 600; color: var(--ra-cream); }
.more-list[data-expanded="false"] { display: none; }
.more-list[data-expanded="true"] { display: block; }
/* utilities */
.max-w-3xl { max-width: 48rem; }
.max-w-5xl { max-width: 64rem; }
.mx-auto { margin-left: auto; margin-right: auto; }
.p-6 { padding: 1.5rem; }
.p-5 { padding: 1.25rem; }
.p-4 { padding: 1rem; }
.px-6 { padding-left: 1.5rem; padding-right: 1.5rem; }
.py-4 { padding-top: 1rem; padding-bottom: 1rem; }
.py-10{ padding-top: 2.5rem; padding-bottom: 2.5rem; }
.mt-2 { margin-top: 0.5rem; }
.mt-4 { margin-top: 1rem; }
.mt-6 { margin-top: 1.5rem; }
.mb-1 { margin-bottom: 0.25rem; }
.mb-2 { margin-bottom: 0.5rem; }
.space-y-1 > * + * { margin-top: 0.25rem; }
.space-y-6 > * + * { margin-top: 1.5rem; }
.rounded-2xl { border-radius: 1rem; }
.rounded-xl { border-radius: 0.75rem; }
.shadow { box-shadow: 0 10px 30px rgba(0,0,0,0.25); }
.font-bold { font-weight: 700; }
.font-semibold { font-weight: 600; }
.text-sm { font-size: 0.875rem; }
.text-xl { font-size: 1.25rem; }
.text-3xl { font-size: 1.875rem; }
.opacity-70 { opacity: 0.7; }
.opacity-80 { opacity: 0.8; }
.border { border-width: 1px; }
.border-b { border-bottom-width: 1px; }
.flex { display: flex; }
.items-center { align-items: center; }
.items-baseline { align-items: baseline; }
.justify-between { justify-content: space-between; }
.gap-2 { gap: 0.5rem; }
.gap-4 { gap: 1rem; }
.min-w-0 { min-width: 0; }
.shrink-0 { flex-shrink: 0; }
.break-all { word-break: break-all; }
.w-3 { width: 0.75rem; }
.h-3 { height: 0.75rem; }
input, button { border: none; }
button { cursor: pointer; }

View file

@ -0,0 +1,62 @@
{% set pct = (100 * host.count // (max_count or 1)) %}
<article class="rounded-2xl bg-[var(--ra-panel)] border border-[var(--ra-copper)] overflow-hidden">
<header class="p-4 flex items-center justify-between gap-4">
<div class="min-w-0">
<h2 class="text-xl font-semibold break-all">{{ host.hostname }}</h2>
<div class="text-sm opacity-80">
<span class="mr-3">Links: <strong>{{ host.count }}</strong></span>
<span>Unique: <strong>{{ host.unique_link_count }}</strong></span>
</div>
</div>
{% if host.feed_url %}
<a href="{{ host.feed_url }}" target="_blank" rel="noopener"
class="shrink-0 px-3 py-1 rounded-lg bg-[var(--ra-amber)] text-[var(--ra-ink)] font-semibold hover:opacity-90">
RSS / Atom
</a>
{% endif %}
</header>
<div class="bar-wrap">
<div class="bar" style="width: {{ pct }}%"></div>
</div>
<div class="p-4 space-y-4">
{% if host.top_links %}
<div>
<div class="text-sm font-semibold mb-2">Top links (mentioned &gt; 1):</div>
<ul class="space-y-1 text-sm">
{% for tl in host.top_links %}
<li class="flex items-baseline gap-2">
<span class="inline-block px-2 py-0.5 rounded-md bg-[var(--ra-ruby)]">{{ tl.count }}</span>
<a class="link" href="{{ tl.url }}" target="_blank" rel="noopener">{{ tl.url }}</a>
</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% set list_id = "links-" ~ index %}
{% set links = host.links %}
{% set preview = links[:8] %}
{% set remainder = links[8:] %}
<div>
<div class="text-sm font-semibold mb-2">Links:</div>
<ul class="space-y-1 text-sm">
{% for url in preview %}
<li><a class="link" href="{{ url }}" target="_blank" rel="noopener">{{ url }}</a></li>
{% endfor %}
</ul>
{% if remainder %}
<div id="{{ list_id }}" class="more-list" data-expanded="false">
<ul class="space-y-1 text-sm">
{% for url in remainder %}
<li><a class="link" href="{{ url }}" target="_blank" rel="noopener">{{ url }}</a></li>
{% endfor %}
</ul>
</div>
<button class="btn-more mt-2" data-more-btn data-target="{{ list_id }}">More</button>
{% endif %}
</div>
</div>
</article>

104
templates/index.html Normal file
View file

@ -0,0 +1,104 @@
{% extends "layout.html" %}
{% block content %}
<section class="mx-auto max-w-3xl p-6">
<h1 class="text-3xl font-bold mb-2">RSS Link Audit</h1>
<p class="mb-6 opacity-90">Paste a feed URL. This version uses <strong>SQLite/SQLModel caching</strong> and streams progress over <strong>SSE</strong>.</p>
<form id="feed-form" class="space-y-4 bg-[var(--ra-panel)] p-5 rounded-2xl shadow">
<label class="block">
<span class="block mb-2 font-semibold">Feed URL</span>
<input id="feed-input" type="url" name="feed_url" placeholder="https://example.com/feed.xml"
required
class="w-full p-3 rounded-xl bg-[var(--ra-ink)] text-[var(--ra-cream)] border border-[var(--ra-copper)] focus:outline-none focus:ring-2 focus:ring-[var(--ra-amber)]" />
</label>
<button class="px-4 py-2 rounded-xl font-semibold bg-[var(--ra-ruby)] hover:bg-[var(--ra-ruby-dark)]">
Analyze
</button>
</form>
<div id="status" class="mt-6 text-sm opacity-80"></div>
<section id="summary" class="mt-6"></section>
<section id="hosts" class="mt-4 space-y-6"></section>
</section>
<script>
const statusEl = document.getElementById('status');
const hostsEl = document.getElementById('hosts');
const summaryEl = document.getElementById('summary');
const form = document.getElementById('feed-form');
function setStatus(html) { statusEl.innerHTML = html; }
function appendHostCard(html) {
const div = document.createElement('div');
div.innerHTML = html;
hostsEl.appendChild(div.firstElementChild);
}
function setSummary(feed_url, post_count, host_count) {
summaryEl.innerHTML = `
<div class="rounded-2xl bg-[var(--ra-panel)] border border-[var(--ra-copper)] p-4">
<div class="font-semibold mb-1">Summary</div>
<div>Feed: <a class="underline" href="${feed_url}" target="_blank" rel="noopener">${feed_url}</a></div>
<div>Posts parsed: <strong>${post_count}</strong></div>
<div>Hosts found: <strong>${host_count}</strong></div>
</div>`;
}
form.addEventListener('submit', async (e) => {
e.preventDefault();
hostsEl.innerHTML = '';
summaryEl.innerHTML = '';
setStatus('Starting…');
const fd = new FormData(form);
const resp = await fetch('/start', { method: 'POST', body: fd });
if (!resp.ok) {
setStatus('Failed to start.');
return;
}
const { job_id } = await resp.json();
setStatus('Job started. Connecting…');
const es = new EventSource(`/events/${job_id}`);
let postCount = 0, hostsCount = 0, seenCards = 0;
es.addEventListener('hello', () => setStatus('Connected. Parsing feed…'));
es.addEventListener('status', (ev) => {
const d = JSON.parse(ev.data).data;
setStatus(`${d.message}`);
});
es.addEventListener('posts', (ev) => {
const data = JSON.parse(ev.data).data;
postCount = data.count || 0;
setStatus(`Posts: ${postCount}. Fetching pages…`);
});
es.addEventListener('post_progress', (ev) => {
const d = JSON.parse(ev.data).data;
setStatus(`Fetching posts ${d.current}/${d.total}…`);
});
es.addEventListener('hosts', (ev) => {
const data = JSON.parse(ev.data).data;
hostsCount = data.count || 0;
setStatus(`Found ${hostsCount} hosts. Discovering their feeds…`);
});
es.addEventListener('host_card', (ev) => {
const data = JSON.parse(ev.data).data;
appendHostCard(data.html);
seenCards = data.index;
setStatus(`Rendered ${seenCards}/${data.total} hosts… Still discovering feeds…`);
});
es.addEventListener('summary', (ev) => {
const data = JSON.parse(ev.data).data;
setSummary(data.feed_url, postCount, hostsCount);
});
es.addEventListener('error', (ev) => {
const data = JSON.parse(ev.data).data;
setStatus('Error: ' + (data.message || 'Unknown'));
});
es.addEventListener('done', () => {
setStatus('Done.');
es.close();
});
});
</script>
{% endblock %}

24
templates/layout.html Normal file
View file

@ -0,0 +1,24 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>RSS Link Audit</title>
<link rel="stylesheet" href="/static/styles.css"/>
</head>
<body>
<header class="px-6 py-4 border-b border-[var(--ra-copper)]">
<div class="max-w-5xl mx-auto flex items-center gap-4">
<div class="w-3 h-3 rounded-full bg-[var(--ra-gold)]"></div>
<a href="/" class="font-bold hover:underline">RSS Link Audit</a>
<span class="opacity-70 text-sm">with SQLite cache + SSE</span>
</div>
</header>
<main class="max-w-5xl mx-auto">
{% block content %}{% endblock %}
</main>
<footer class="px-6 py-10 text-sm opacity-70">
<div class="max-w-5xl mx-auto">Built with FastAPI • Palette: Royal Armory</div>
</footer>
</body>
</html>