Initial Commit for rss-link-app

Analyze links from rss feeds
2025-09-03 20:22:39 -05:00 · 2025-09-03 20:22:39 -05:00 · 060f998c59
commit 060f998c59
8 changed files with 1837 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,966 @@
+# Created by https://www.toptal.com/developers/gitignore/api/vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode
+# Edit at https://www.toptal.com/developers/gitignore?templates=vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode
+
+### Data ###
+*.csv
+*.dat
+*.efx
+*.gbr
+*.key
+*.pps
+*.ppt
+*.pptx
+*.sdf
+*.tax2010
+*.vcf
+*.xml
+
+### Emacs ###
+# -*- mode: gitignore; -*-
+*~
+\#*\#
+/.emacs.desktop
+/.emacs.desktop.lock
+*.elc
+auto-save-list
+tramp
+.\#*
+
+# Org-mode
+.org-id-locations
+*_archive
+
+# flymake-mode
+*_flymake.*
+
+# eshell files
+/eshell/history
+/eshell/lastdir
+
+# elpa packages
+/elpa/
+
+# reftex files
+*.rel
+
+# AUCTeX auto folder
+/auto/
+
+# cask packages
+.cask/
+dist/
+
+# Flycheck
+flycheck_*.el
+
+# server auth directory
+/server/
+
+# projectiles files
+.projectile
+
+# directory configuration
+.dir-locals.el
+
+# network security
+/network-security.data
+
+
+### Executable ###
+*.app
+*.bat
+*.cgi
+*.com
+*.exe
+*.gadget
+*.jar
+*.pif
+*.vb
+*.wsf
+
+### Node ###
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+
+### Node Patch ###
+# Serverless Webpack directories
+.webpack/
+
+# Optional stylelint cache
+
+# SvelteKit build / generate output
+.svelte-kit
+
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# AWS User-specific
+.idea/**/aws.xml
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# SonarLint plugin
+.idea/sonarlint/
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
+### PyCharm Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+
+# Sonarlint plugin
+# https://plugins.jetbrains.com/plugin/7973-sonarlint
+.idea/**/sonarlint/
+
+# SonarQube Plugin
+# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
+.idea/**/sonarIssues.xml
+
+# Markdown Navigator plugin
+# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
+.idea/**/markdown-navigator.xml
+.idea/**/markdown-navigator-enh.xml
+.idea/**/markdown-navigator/
+
+# Cache file creation bug
+# See https://youtrack.jetbrains.com/issue/JBR-2257
+.idea/$CACHE_FILE$
+
+# CodeStream plugin
+# https://plugins.jetbrains.com/plugin/12206-codestream
+.idea/codestream.xml
+
+# Azure Toolkit for IntelliJ plugin
+# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
+.idea/**/azureSettings.xml
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+### SublimeText ###
+# Cache files for Sublime Text
+*.tmlanguage.cache
+*.tmPreferences.cache
+*.stTheme.cache
+
+# Workspace files are user-specific
+*.sublime-workspace
+
+# Project files should be checked into the repository, unless a significant
+# proportion of contributors will probably not be using Sublime Text
+# *.sublime-project
+
+# SFTP configuration file
+sftp-config.json
+sftp-config-alt*.json
+
+# Package control specific files
+Package Control.last-run
+Package Control.ca-list
+Package Control.ca-bundle
+Package Control.system-ca-bundle
+Package Control.cache/
+Package Control.ca-certs/
+Package Control.merged-ca-bundle
+Package Control.user-ca-bundle
+oscrypto-ca-bundle.crt
+bh_unicode_properties.cache
+
+# Sublime-github package stores a github token in this file
+# https://packagecontrol.io/packages/sublime-github
+GitHub.sublime-settings
+
+### Vim ###
+# Swap
+[._]*.s[a-v][a-z]
+!*.svg  # comment out if you don't need vector files
+[._]*.sw[a-p]
+[._]s[a-rt-v][a-z]
+[._]ss[a-gi-z]
+[._]sw[a-p]
+
+# Session
+Session.vim
+Sessionx.vim
+
+# Temporary
+.netrwhist
+# Auto-generated tag files
+tags
+# Persistent undo
+[._]*.un~
+
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+!.vscode/*.code-snippets
+
+# Local History for Visual Studio Code
+.history/
+
+# Built Visual Studio Code Extensions
+*.vsix
+
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+
+### VisualStudio ###
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+##
+## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
+
+# User-specific files
+*.rsuser
+*.suo
+*.user
+*.userosscache
+*.sln.docstates
+
+# User-specific files (MonoDevelop/Xamarin Studio)
+*.userprefs
+
+# Mono auto generated files
+mono_crash.*
+
+# Build results
+[Dd]ebug/
+[Dd]ebugPublic/
+[Rr]elease/
+[Rr]eleases/
+x64/
+x86/
+[Ww][Ii][Nn]32/
+[Aa][Rr][Mm]/
+[Aa][Rr][Mm]64/
+bld/
+[Bb]in/
+[Oo]bj/
+[Ll]og/
+[Ll]ogs/
+
+# Visual Studio 2015/2017 cache/options directory
+.vs/
+# Uncomment if you have tasks that create the project's static files in wwwroot
+#wwwroot/
+
+# Visual Studio 2017 auto generated files
+Generated\ Files/
+
+# MSTest test Results
+[Tt]est[Rr]esult*/
+[Bb]uild[Ll]og.*
+
+# NUnit
+*.VisualState.xml
+TestResult.xml
+nunit-*.xml
+
+# Build Results of an ATL Project
+[Dd]ebugPS/
+[Rr]eleasePS/
+dlldata.c
+
+# Benchmark Results
+BenchmarkDotNet.Artifacts/
+
+# .NET Core
+project.lock.json
+project.fragment.lock.json
+artifacts/
+
+# ASP.NET Scaffolding
+ScaffoldingReadMe.txt
+
+# StyleCop
+StyleCopReport.xml
+
+# Files built by Visual Studio
+*_i.c
+*_p.c
+*_h.h
+*.ilk
+*.meta
+*.obj
+*.iobj
+*.pch
+*.pdb
+*.ipdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.tmp_proj
+*_wpftmp.csproj
+*.tlog
+*.vspscc
+*.vssscc
+.builds
+*.pidb
+*.svclog
+*.scc
+
+# Chutzpah Test files
+_Chutzpah*
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opendb
+*.opensdf
+*.cachefile
+*.VC.db
+*.VC.VC.opendb
+
+# Visual Studio profiler
+*.psess
+*.vsp
+*.vspx
+*.sap
+
+# Visual Studio Trace Files
+*.e2e
+
+# TFS 2012 Local Workspace
+$tf/
+
+# Guidance Automation Toolkit
+*.gpState
+
+# ReSharper is a .NET coding add-in
+_ReSharper*/
+*.[Rr]e[Ss]harper
+*.DotSettings.user
+
+# TeamCity is a build add-in
+_TeamCity*
+
+# DotCover is a Code Coverage Tool
+*.dotCover
+
+# AxoCover is a Code Coverage Tool
+.axoCover/*
+!.axoCover/settings.json
+
+# Coverlet is a free, cross platform Code Coverage Tool
+coverage*.json
+coverage*.xml
+coverage*.info
+
+# Visual Studio code coverage results
+*.coverage
+*.coveragexml
+
+# NCrunch
+_NCrunch_*
+.*crunch*.local.xml
+nCrunchTemp_*
+
+# MightyMoose
+*.mm.*
+AutoTest.Net/
+
+# Web workbench (sass)
+.sass-cache/
+
+# Installshield output folder
+[Ee]xpress/
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish/
+
+# Publish Web Output
+*.[Pp]ublish.xml
+*.azurePubxml
+# Note: Comment the next line if you want to checkin your web deploy settings,
+# but database connection strings (with potential passwords) will be unencrypted
+*.pubxml
+*.publishproj
+
+# Microsoft Azure Web App publish settings. Comment the next line if you want to
+# checkin your Azure Web App publish settings, but sensitive information contained
+# in these scripts will be unencrypted
+PublishScripts/
+
+# NuGet Packages
+*.nupkg
+# NuGet Symbol Packages
+*.snupkg
+# The packages folder can be ignored because of Package Restore
+**/[Pp]ackages/*
+# except build/, which is used as an MSBuild target.
+!**/[Pp]ackages/build/
+# Uncomment if necessary however generally it will be regenerated when needed
+#!**/[Pp]ackages/repositories.config
+# NuGet v3's project.json files produces more ignorable files
+*.nuget.props
+*.nuget.targets
+
+# Microsoft Azure Build Output
+csx/
+*.build.csdef
+
+# Microsoft Azure Emulator
+ecf/
+rcf/
+
+# Windows Store app package directories and files
+AppPackages/
+BundleArtifacts/
+Package.StoreAssociation.xml
+_pkginfo.txt
+*.appx
+*.appxbundle
+*.appxupload
+
+# Visual Studio cache files
+# files ending in .cache can be ignored
+*.[Cc]ache
+# but keep track of directories ending in .cache
+!?*.[Cc]ache/
+
+# Others
+ClientBin/
+~$*
+*.dbmdl
+*.dbproj.schemaview
+*.jfm
+*.pfx
+*.publishsettings
+orleans.codegen.cs
+
+# Including strong name files can present a security risk
+# (https://github.com/github/gitignore/pull/2483#issue-259490424)
+#*.snk
+
+# Since there are multiple workflows, uncomment next line to ignore bower_components
+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
+#bower_components/
+
+# RIA/Silverlight projects
+Generated_Code/
+
+# Backup & report files from converting an old project file
+# to a newer Visual Studio version. Backup files are not needed,
+# because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+UpgradeLog*.htm
+ServiceFabricBackup/
+*.rptproj.bak
+
+# SQL Server files
+*.mdf
+*.ldf
+*.ndf
+
+# Business Intelligence projects
+*.rdl.data
+*.bim.layout
+*.bim_*.settings
+*.rptproj.rsuser
+*- [Bb]ackup.rdl
+*- [Bb]ackup ([0-9]).rdl
+*- [Bb]ackup ([0-9][0-9]).rdl
+
+# Microsoft Fakes
+FakesAssemblies/
+
+# GhostDoc plugin setting file
+*.GhostDoc.xml
+
+# Node.js Tools for Visual Studio
+.ntvs_analysis.dat
+
+# Visual Studio 6 build log
+*.plg
+
+# Visual Studio 6 workspace options file
+*.opt
+
+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
+*.vbw
+
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+
+# Visual Studio 6 technical files
+
+# Visual Studio LightSwitch build output
+**/*.HTMLClient/GeneratedArtifacts
+**/*.DesktopClient/GeneratedArtifacts
+**/*.DesktopClient/ModelManifest.xml
+**/*.Server/GeneratedArtifacts
+**/*.Server/ModelManifest.xml
+_Pvt_Extensions
+
+# Paket dependency manager
+.paket/paket.exe
+paket-files/
+
+# FAKE - F# Make
+.fake/
+
+# CodeRush personal settings
+.cr/personal
+
+# Python Tools for Visual Studio (PTVS)
+*.pyc
+
+# Cake - Uncomment if you are using it
+# tools/**
+# !tools/packages.config
+
+# Tabs Studio
+*.tss
+
+# Telerik's JustMock configuration file
+*.jmconfig
+
+# BizTalk build output
+*.btp.cs
+*.btm.cs
+*.odx.cs
+*.xsd.cs
+
+# OpenCover UI analysis results
+OpenCover/
+
+# Azure Stream Analytics local run output
+ASALocalRun/
+
+# MSBuild Binary and Structured Log
+*.binlog
+
+# NVidia Nsight GPU debugger configuration file
+*.nvuser
+
+# MFractors (Xamarin productivity tool) working folder
+.mfractor/
+
+# Local History for Visual Studio
+.localhistory/
+
+# Visual Studio History (VSHistory) files
+.vshistory/
+
+# BeatPulse healthcheck temp database
+healthchecksdb
+
+# Backup folder for Package Reference Convert tool in Visual Studio 2017
+MigrationBackup/
+
+# Ionide (cross platform F# VS Code tools) working folder
+.ionide/
+
+# Fody - auto-generated XML schema
+FodyWeavers.xsd
+
+# VS Code files for those working on multiple tools
+*.code-workspace
+
+# Local History for Visual Studio Code
+
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# JetBrains Rider
+*.sln.iml
+
+### VisualStudio Patch ###
+# Additional files built by Visual Studio
+
+# End of https://www.toptal.com/developers/gitignore/api/vim,node,data,emacs,python,pycharm,executable,sublimetext,visualstudio,visualstudiocode
+
+*.db
--- a/README.md
+++ b/README.md
@ -0,0 +1,50 @@
+# RSS Link Audit (FastAPI)
+
+A FastAPI app that accepts an RSS/Atom feed URL, fetches each post’s full HTML, extracts outbound links, groups them by hostname, **hunts for each host’s RSS feed** (common endpoints + homepage discovery), and renders a stylish report using the **Royal Armory** palette.
+
+## Features
+
+- Input a feed URL via UI or JSON.
+- Concurrent fetching (httpx + asyncio).
+- Extract links from each post page.
+- Group by hostname; count occurrences.
+- Heuristic RSS discovery:
+  - Probe common feed endpoints (e.g. `/feed`, `/rss.xml`, `/atom.xml`, etc.).
+  - Parse homepage `<link rel="alternate" ...>` for RSS/Atom.
+  - Scan homepage `<a>` tags for `rss|atom|feed`.
+  - Validate candidates with `feedparser`.
+- Report UI:
+  - Per-host card with counts.
+  - **Bar** visual for how many links a host has.
+  - **Top links** (if mentioned > 1).
+  - Links list truncated with a **More** button.
+  - RSS/Atom badge if found.
+
+## Run locally
+
+```bash
+python -m venv .venv
+source .venv/bin/activate   # Windows: .venv\Scripts\activate
+pip install -r requirements.txt
+uvicorn main:app --reload
+```
+
+Open: http://127.0.0.1:8000
+
+## API
+
+```
+POST /api/analyze
+Content-Type: application/json
+
+{"feed_url": "https://example.com/feed.xml"}
+```
+
+Returns JSON with the summarized data.
+
+## Notes / Caveats
+
+- Only static HTML is parsed (no JS rendering).
+- Some sites block bots; results may vary.
+- For large feeds, you may wish to trim the number of posts (e.g., slice `post_urls` in `analyze_feed`).
+- Consider adding caching (e.g., `aiocache`, Redis) if you’ll run this frequently.
--- a/main.py
+++ b/main.py
@ -0,0 +1,537 @@
+# main.py (v1.2) — robust feed parsing, clearer SSE progress, normalized host caching, concurrent discovery
+import asyncio
+import json
+import uuid
+from collections import Counter
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Set, Tuple
+from urllib.parse import urljoin, urlparse
+from contextlib import asynccontextmanager
+
+import httpx
+import feedparser
+from bs4 import BeautifulSoup
+
+from fastapi import FastAPI, Request, Form, HTTPException
+from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+
+from sqlmodel import Field, SQLModel, create_engine, Session, select
+from datetime import datetime, timezone
+
+
+# ------------------------------
+# Settings / Constants
+# ------------------------------
+REQUEST_TIMEOUT = httpx.Timeout(15.0, connect=8.0, read=15.0)
+HEADERS = {
+    "User-Agent": "LinkAuditBot/1.2 (+https://example.com; contact: admin@example.com)"
+}
+
+COMMON_FEED_PATHS = [
+    "/feed", "/feed/", "/feed.xml",
+    "/rss", "/rss.xml", "/rss/",
+    "/atom", "/atom.xml",
+    "/index.xml",
+    "/blog/feed", "/blog/rss", "/blog/rss.xml", "/blog/index.xml",
+    "/feeds/posts/default?alt=rss",  # Blogger
+    "/news/atom.xml", "/news/rss.xml",
+    "/.rss", "/?feed=rss2",          # WP variants
+    "/category/news/feed", "/?feed=atom",
+]
+
+FEED_MIME_HINTS = {
+    "application/rss+xml",
+    "application/atom+xml",
+    "application/xml",
+    "text/xml",
+}
+
+DISCOVERY_CONCURRENCY = 10
+
+
+# ------------------------------
+# Database Models (SQLModel)
+# ------------------------------
+class PageCache(SQLModel, table=True):
+    url: str = Field(primary_key=True)
+    html: Optional[str] = None
+    fetched_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+
+class LinksCache(SQLModel, table=True):
+    url: str = Field(primary_key=True)
+    links_json: str  # JSON list[str]
+    extracted_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+
+class HostFeedCache(SQLModel, table=True):
+    hostname: str = Field(primary_key=True)  # normalized!
+    feed_url: Optional[str] = None
+    checked_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+
+class FeedRun(SQLModel, table=True):
+    id: str = Field(primary_key=True, default_factory=lambda: str(uuid.uuid4()))
+    feed_url: str
+    started_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    finished_at: Optional[datetime] = None
+    summary_json: Optional[str] = None  # store last summary, if desired
+
+
+engine = create_engine("sqlite:///cache.db", echo=False)
+SQLModel.metadata.create_all(engine)
+
+
+# ------------------------------
+# Data models
+# ------------------------------
+@dataclass
+class HostSummary:
+    hostname: str
+    count: int = 0
+    unique_links: Set[str] = field(default_factory=set)
+    link_counts: Counter = field(default_factory=Counter)
+    feed_url: Optional[str] = None
+
+
+# ------------------------------
+# Utilities
+# ------------------------------
+def now_utc() -> datetime:
+    return datetime.now(timezone.utc)
+
+def normalize_host(host: str) -> str:
+    if not host:
+        return host
+    h = host.strip().lower().rstrip(".")
+    if h.startswith("www."):
+        h = h[4:]
+    return h
+
+def is_http_url(href: str) -> bool:
+    try:
+        p = urlparse(href)
+        return p.scheme in ("http", "https")
+    except Exception:
+        return False
+
+def absolutize(href: str, base_url: str) -> Optional[str]:
+    if not href:
+        return None
+    if href.startswith("#") or href.startswith("mailto:") or href.startswith("tel:"):
+        return None
+    try:
+        abs_url = urljoin(base_url, href)
+        if is_http_url(abs_url):
+            return abs_url
+    except Exception:
+        return None
+    return None
+
+def extract_links_from_html(html: str, base_url: str) -> List[str]:
+    soup = BeautifulSoup(html, "lxml")
+    links: List[str] = []
+    for a in soup.find_all("a", href=True):
+        u = absolutize(a.get("href"), base_url)
+        if u:
+            links.append(u)
+    return links
+
+
+# ------------------------------
+# Networking
+# ------------------------------
+async def fetch_text(client: httpx.AsyncClient, url: str) -> Optional[str]:
+    try:
+        r = await client.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
+        if r.status_code < 400:
+            # Do NOT force UTF-8; respect server if present
+            if r.encoding is None:
+                r.encoding = r.apparent_encoding or "utf-8"
+            return r.text
+    except Exception:
+        return None
+    return None
+
+async def fetch_bytes(client: httpx.AsyncClient, url: str) -> Optional[Tuple[bytes, Optional[str]]]:
+    try:
+        r = await client.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
+        if r.status_code < 400:
+            ctype = r.headers.get("content-type")
+            return r.content, ctype
+    except Exception:
+        return None
+    return None
+
+async def fetch_head_ok(client: httpx.AsyncClient, url: str) -> Tuple[bool, Optional[str]]:
+    try:
+        r = await client.head(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
+        if r.status_code < 400:
+            return True, r.headers.get("content-type")
+    except Exception:
+        pass
+    try:
+        r = await client.get(url, headers=HEADERS, timeout=REQUEST_TIMEOUT, follow_redirects=True)
+        if r.status_code < 400:
+            return True, r.headers.get("content-type")
+    except Exception:
+        pass
+    return False, None
+
+
+# ------------------------------
+# Cache helpers
+# ------------------------------
+def cache_get_page(url: str) -> Optional[PageCache]:
+    with Session(engine) as sess:
+        return sess.get(PageCache, url)
+
+def cache_set_page(url: str, html: Optional[str]):
+    with Session(engine) as sess:
+        sess.merge(PageCache(url=url, html=html, fetched_at=now_utc()))
+        sess.commit()
+
+def cache_get_links(url: str) -> Optional[List[str]]:
+    with Session(engine) as sess:
+        row = sess.get(LinksCache, url)
+        if not row:
+            return None
+        try:
+            return json.loads(row.links_json)
+        except Exception:
+            return None
+
+def cache_set_links(url: str, links: List[str]):
+    with Session(engine) as sess:
+        sess.merge(LinksCache(url=url, links_json=json.dumps(links), extracted_at=now_utc()))
+        sess.commit()
+
+def cache_get_host_feed(hostname: str) -> Optional[str]:
+    host_key = normalize_host(hostname)
+    with Session(engine) as sess:
+        row = sess.get(HostFeedCache, host_key)
+        return row.feed_url if row else None
+
+def cache_set_host_feed(hostname: str, feed_url: Optional[str]):
+    host_key = normalize_host(hostname)
+    with Session(engine) as sess:
+        sess.merge(HostFeedCache(hostname=host_key, feed_url=feed_url, checked_at=now_utc()))
+        sess.commit()
+
+
+# ------------------------------
+# Cached fetch/extract
+# ------------------------------
+async def fetch_page_html(client: httpx.AsyncClient, url: str) -> Optional[str]:
+    cached = cache_get_page(url)
+    if cached and cached.html:
+        return cached.html
+    html = await fetch_text(client, url)
+    cache_set_page(url, html)
+    return html
+
+async def get_links_for_page(client: httpx.AsyncClient, url: str) -> List[str]:
+    cached = cache_get_links(url)
+    if cached is not None:
+        return cached
+    html = await fetch_page_html(client, url)
+    if not html:
+        cache_set_links(url, [])
+        return []
+    links = extract_links_from_html(html, url)
+    cache_set_links(url, links)
+    return links
+
+
+# ------------------------------
+# Robust feed parsing
+# ------------------------------
+async def fetch_feed_entries(client: httpx.AsyncClient, feed_url: str) -> List[str]:
+    """
+    Fetch feed as bytes and let feedparser infer encoding using headers.
+    Retry a couple fallbacks for mismatched declarations.
+    """
+    got = await fetch_bytes(client, feed_url)
+    if not got:
+        raise ValueError("Could not download the feed.")
+    content, ctype = got
+
+    parsed = feedparser.parse(content)
+    if parsed.bozo == 0 and (parsed.feed or parsed.entries):
+        return _entries_to_urls(parsed)
+    # Fallback 1: strip BOM
+    cleaned = content.lstrip(b"\xef\xbb\xbf")
+    if cleaned is not content:
+        parsed2 = feedparser.parse(cleaned)
+        if parsed2.bozo == 0 and (parsed2.feed or parsed2.entries):
+            return _entries_to_urls(parsed2)
+    # Fallback 2: replace us-ascii decl with utf-8
+    try:
+        cleaned2 = cleaned.replace(b'encoding="us-ascii"', b'encoding="utf-8"')
+        parsed3 = feedparser.parse(cleaned2)
+        if parsed3.bozo == 0 and (parsed3.feed or parsed3.entries):
+            return _entries_to_urls(parsed3)
+    except Exception:
+        pass
+    raise ValueError(f"Could not parse feed: {getattr(parsed, 'bozo_exception', 'unknown parse error')}")
+
+def _entries_to_urls(parsed) -> List[str]:
+    urls: List[str] = []
+    for e in parsed.entries:
+        if getattr(e, "link", None):
+            urls.append(e.link)
+        elif getattr(e, "id", None) and is_http_url(e.id):
+            urls.append(e.id)
+    seen, out = set(), []
+    for u in urls:
+        if u not in seen:
+            seen.add(u)
+            out.append(u)
+    return out
+
+
+# ------------------------------
+# Feed discovery (normalized + concurrent)
+# ------------------------------
+async def discover_feed_for_host(client: httpx.AsyncClient, hostname: str) -> Optional[str]:
+    host_key = normalize_host(hostname)
+    cached = cache_get_host_feed(host_key)
+    if cached is not None:
+        return cached
+
+    bases = []
+    canon = host_key
+    bases.append(f"https://{canon}")
+    bases.append(f"http://{canon}")
+    if not canon.startswith("www."):
+        bases.append(f"https://www.{canon}")
+        bases.append(f"http://www.{canon}")
+
+    async def try_candidate(url: str) -> Optional[str]:
+        ok, ctype = await fetch_head_ok(client, url)
+        if ok and (not ctype or any(mt in ctype for mt in FEED_MIME_HINTS)):
+            parsed = feedparser.parse(url)
+            if parsed.bozo == 0 and (parsed.feed or parsed.entries):
+                return url
+        return None
+
+    tasks = []
+    for base in bases:
+        for path in COMMON_FEED_PATHS:
+            tasks.append(asyncio.create_task(try_candidate(base + path)))
+    for t in asyncio.as_completed(tasks):
+        res = await t
+        if res:
+            cache_set_host_feed(host_key, res)
+            return res
+
+    for base in bases:
+        html = await fetch_page_html(client, base + "/")
+        if not html:
+            continue
+        soup = BeautifulSoup(html, "lxml")
+        for link in soup.find_all("link", rel=True, href=True):
+            rels = link.get("rel")
+            if isinstance(rels, list):
+                rels = {r.lower() for r in rels if r}
+            else:
+                rels = {str(rels).lower()}
+            typ = str(link.get("type", "")).lower()
+            href = link.get("href")
+            if "alternate" in rels and any(mt in typ for mt in ("rss", "atom", "xml")):
+                feed_url = urljoin(base + "/", href)
+                parsed = feedparser.parse(feed_url)
+                if parsed.bozo == 0 and (parsed.feed or parsed.entries):
+                    cache_set_host_feed(host_key, feed_url)
+                    return feed_url
+        for a in soup.find_all("a", href=True):
+            href = a.get("href", "")
+            if any(tok in href.lower() for tok in ("rss", "atom", "feed")):
+                feed_url = urljoin(base + "/", href)
+                ok, ctype = await fetch_head_ok(client, feed_url)
+                if ok:
+                    parsed = feedparser.parse(feed_url)
+                    if parsed.bozo == 0 and (parsed.feed or parsed.entries):
+                        cache_set_host_feed(host_key, feed_url)
+                        return feed_url
+
+    cache_set_host_feed(host_key, None)
+    return None
+
+
+# ------------------------------
+# SSE plumbing
+# ------------------------------
+class Job:
+    def __init__(self, feed_url: str):
+        self.id = str(uuid.uuid4())
+        self.feed_url = feed_url
+        self.queue: asyncio.Queue[str] = asyncio.Queue()
+        self.done = asyncio.Event()
+
+    async def emit(self, event: str, data: dict):
+        payload = {"event": event, "data": data, "ts": datetime.now(timezone.utc).isoformat()}
+        await self.queue.put(f"event: {event}\ndata: {json.dumps(payload)}\n\n")
+
+    async def finish(self):
+        self.done.set()
+        await self.queue.put("event: done\ndata: {}\n\n")
+
+
+JOBS: Dict[str, Job] = {}
+
+
+async def run_analysis_job(job: Job):
+    with Session(engine) as sess:
+        fr = FeedRun(feed_url=job.feed_url)
+        sess.add(fr)
+        sess.commit()
+
+    async with httpx.AsyncClient(http2=True) as client:
+        try:
+            await job.emit("status", {"stage": "feed", "message": "Downloading and parsing feed…"})
+            post_urls = await fetch_feed_entries(client, job.feed_url)
+            await job.emit("posts", {"count": len(post_urls)})
+
+            all_links: List[str] = []
+            for idx, post_url in enumerate(post_urls, start=1):
+                await job.emit("status", {"stage": "posts", "message": f"Fetching post {idx}/{len(post_urls)}"})
+                links = await get_links_for_page(client, post_url)
+                all_links.extend(links)
+                await job.emit("post_progress", {"current": idx, "total": len(post_urls), "post_url": post_url})
+
+            host_map: Dict[str, HostSummary] = {}
+            for link in all_links:
+                host = normalize_host(urlparse(link).netloc)
+                if not host:
+                    continue
+                hs = host_map.setdefault(host, HostSummary(hostname=host))
+                hs.count += 1
+                hs.unique_links.add(link)
+                hs.link_counts[link] += 1
+
+            hosts_sorted = sorted(host_map.values(), key=lambda s: s.count, reverse=True)
+            await job.emit("hosts", {"count": len(hosts_sorted)})
+
+            sem = asyncio.Semaphore(DISCOVERY_CONCURRENCY)
+            max_count = max((h.count for h in hosts_sorted), default=1)
+
+            async def work(hs: HostSummary, idx: int, total: int):
+                async with sem:
+                    await job.emit("status", {"stage": "discover", "message": f"Discovering feed for {hs.hostname} ({idx}/{total})"})
+                    feed = await discover_feed_for_host(client, hs.hostname)
+                    hs.feed_url = feed
+                    host_dict = {
+                        "hostname": hs.hostname,
+                        "count": hs.count,
+                        "unique_link_count": len(hs.unique_links),
+                        "links": sorted(list(hs.unique_links)),
+                        "top_links": [
+                            {"url": url, "count": cnt}
+                            for url, cnt in hs.link_counts.most_common()
+                            if cnt > 1
+                        ],
+                        "feed_url": hs.feed_url,
+                    }
+                    html = render_host_card(host_dict, max_count, index=idx)
+                    await job.emit("host_card", {"html": html, "index": idx, "total": total})
+
+            tasks = [asyncio.create_task(work(hs, i, len(hosts_sorted))) for i, hs in enumerate(hosts_sorted, start=1)]
+            async def heartbeat():
+                while any(not t.done() for t in tasks):
+                    await job.emit("status", {"stage": "discover", "message": "Still discovering host feeds…"})
+                    await asyncio.sleep(3)
+            hb = asyncio.create_task(heartbeat())
+            await asyncio.gather(*tasks)
+            hb.cancel()
+
+            summary = {
+                "feed_url": job.feed_url,
+                "post_count": len(post_urls),
+                "hosts": [h.hostname for h in hosts_sorted],
+                "fetched_at": datetime.now(timezone.utc).isoformat(),
+            }
+            with Session(engine) as sess:
+                fr = sess.exec(select(FeedRun).where(FeedRun.feed_url == job.feed_url).order_by(FeedRun.started_at.desc())).first()
+                if fr:
+                    fr.summary_json = json.dumps(summary)
+                    fr.finished_at = datetime.now(timezone.utc)
+                    sess.add(fr)
+                    sess.commit()
+
+            await job.emit("summary", summary)
+        except Exception as e:
+            await job.emit("error", {"message": str(e)})
+        finally:
+            await job.finish()
+
+
+# ------------------------------
+# Template rendering for components
+# ------------------------------
+templates = Jinja2Templates(directory="templates")
+
+def render_host_card(host: dict, max_count: int, index: int) -> str:
+    from fastapi import Request
+    class Dummy:
+        def __init__(self): self.state = type("s", (), {})()
+    req = Dummy()
+    html = templates.get_template("components/host_card.html").render(
+        request=req, host=host, max_count=max_count, index=index
+    )
+    return html
+
+
+# ------------------------------
+# FastAPI app + routes
+# ------------------------------
+app = FastAPI(title="RSS Link Audit", version="1.2.0")
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+
+@app.get("/", response_class=HTMLResponse)
+async def index(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+
+
+@app.post("/start", response_class=JSONResponse)
+async def start(feed_url: str = Form(...)):
+    job = Job(feed_url)
+    JOBS[job.id] = job
+    asyncio.create_task(run_analysis_job(job))
+    return {"job_id": job.id}
+
+
+@app.get("/events/{job_id}")
+async def sse(job_id: str):
+    job = JOBS.get(job_id)
+    if not job:
+        raise HTTPException(404, "Job not found")
+
+    async def event_gen():
+        yield f"event: hello\ndata: {{\"job_id\":\"{job.id}\"}}\n\n"
+        while True:
+            try:
+                item = await asyncio.wait_for(job.queue.get(), timeout=30.0)
+                yield item
+                if job.done.is_set():
+                    break
+            except asyncio.TimeoutError:
+                yield "event: ping\ndata: {}\n\n"
+        JOBS.pop(job.id, None)
+
+    return StreamingResponse(event_gen(), media_type="text/event-stream")
+
+
+@app.post("/api/analyze", response_class=JSONResponse)
+async def analyze_api(payload: Dict):
+    feed_url = payload.get("feed_url")
+    if not feed_url:
+        raise HTTPException(status_code=400, detail="Missing 'feed_url'")
+    job = Job(feed_url)
+    await run_analysis_job(job)
+    return JSONResponse(content={"ok": True})
+
+
+@app.get("/healthz")
+async def healthz():
+    return {"ok": True}
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,10 @@
+fastapi
+uvicorn[standard]
+httpx
+feedparser
+beautifulsoup4
+lxml
+jinja2
+sqlmodel
+aiosqlite
+sqlalchemy>=2.0
--- a/static/styles.css
+++ b/static/styles.css
@ -0,0 +1,84 @@
+/* Royal Armory Palette */
+:root {
+  --ra-ink: #000030;
+  --ra-plum: #3f0a57;
+  --ra-magenta: #85106b;
+  --ra-ruby: #b02c2c;
+  --ra-bronze: #b8673e;
+  --ra-amber: #d9932b;
+  --ra-gold: #f0bd71;
+  --ra-cream: #ffe3ba;
+
+  --ra-bg: var(--ra-ink);
+  --ra-panel: #0b0b3f;
+  --ra-copper: #6f3b2b;
+  --ra-ruby-dark: #8c2323;
+}
+
+* { box-sizing: border-box; }
+html, body {
+  margin: 0;
+  background: var(--ra-bg);
+  color: var(--ra-cream);
+  font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell, Noto Sans, Arial;
+  line-height: 1.5;
+}
+
+a { color: var(--ra-gold); text-decoration: none; }
+a:hover { text-decoration: underline; }
+
+header, footer { background: linear-gradient(0deg, rgba(64,10,87,0.25), rgba(64,10,87,0.25)); }
+main { padding: 1rem; }
+
+.link { word-break: break-all; text-underline-offset: 3px; }
+
+.bar-wrap { width: 100%; background: rgba(240,189,113,0.12); height: 12px; }
+.bar { height: 12px; background: linear-gradient(90deg, var(--ra-amber), var(--ra-gold)); }
+
+.btn-more { background: var(--ra-plum); padding: 6px 10px; border-radius: 10px; font-weight: 600; color: var(--ra-cream); }
+
+.more-list[data-expanded="false"] { display: none; }
+.more-list[data-expanded="true"] { display: block; }
+
+/* utilities */
+.max-w-3xl { max-width: 48rem; }
+.max-w-5xl { max-width: 64rem; }
+.mx-auto { margin-left: auto; margin-right: auto; }
+.p-6 { padding: 1.5rem; }
+.p-5 { padding: 1.25rem; }
+.p-4 { padding: 1rem; }
+.px-6 { padding-left: 1.5rem; padding-right: 1.5rem; }
+.py-4 { padding-top: 1rem; padding-bottom: 1rem; }
+.py-10{ padding-top: 2.5rem; padding-bottom: 2.5rem; }
+.mt-2 { margin-top: 0.5rem; }
+.mt-4 { margin-top: 1rem; }
+.mt-6 { margin-top: 1.5rem; }
+.mb-1 { margin-bottom: 0.25rem; }
+.mb-2 { margin-bottom: 0.5rem; }
+.space-y-1 > * + * { margin-top: 0.25rem; }
+.space-y-6 > * + * { margin-top: 1.5rem; }
+.rounded-2xl { border-radius: 1rem; }
+.rounded-xl { border-radius: 0.75rem; }
+.shadow { box-shadow: 0 10px 30px rgba(0,0,0,0.25); }
+.font-bold { font-weight: 700; }
+.font-semibold { font-weight: 600; }
+.text-sm { font-size: 0.875rem; }
+.text-xl { font-size: 1.25rem; }
+.text-3xl { font-size: 1.875rem; }
+.opacity-70 { opacity: 0.7; }
+.opacity-80 { opacity: 0.8; }
+.border { border-width: 1px; }
+.border-b { border-bottom-width: 1px; }
+.flex { display: flex; }
+.items-center { align-items: center; }
+.items-baseline { align-items: baseline; }
+.justify-between { justify-content: space-between; }
+.gap-2 { gap: 0.5rem; }
+.gap-4 { gap: 1rem; }
+.min-w-0 { min-width: 0; }
+.shrink-0 { flex-shrink: 0; }
+.break-all { word-break: break-all; }
+.w-3 { width: 0.75rem; }
+.h-3 { height: 0.75rem; }
+input, button { border: none; }
+button { cursor: pointer; }
--- a/templates/components/host_card.html
+++ b/templates/components/host_card.html
@ -0,0 +1,62 @@
+{% set pct = (100 * host.count // (max_count or 1)) %}
+<article class="rounded-2xl bg-[var(--ra-panel)] border border-[var(--ra-copper)] overflow-hidden">
+  <header class="p-4 flex items-center justify-between gap-4">
+    <div class="min-w-0">
+      <h2 class="text-xl font-semibold break-all">{{ host.hostname }}</h2>
+      <div class="text-sm opacity-80">
+        <span class="mr-3">Links: <strong>{{ host.count }}</strong></span>
+        <span>Unique: <strong>{{ host.unique_link_count }}</strong></span>
+      </div>
+    </div>
+    {% if host.feed_url %}
+      <a href="{{ host.feed_url }}" target="_blank" rel="noopener"
+         class="shrink-0 px-3 py-1 rounded-lg bg-[var(--ra-amber)] text-[var(--ra-ink)] font-semibold hover:opacity-90">
+        RSS / Atom
+      </a>
+    {% endif %}
+  </header>
+
+  <div class="bar-wrap">
+    <div class="bar" style="width: {{ pct }}%"></div>
+  </div>
+
+  <div class="p-4 space-y-4">
+    {% if host.top_links %}
+      <div>
+        <div class="text-sm font-semibold mb-2">Top links (mentioned &gt; 1):</div>
+        <ul class="space-y-1 text-sm">
+          {% for tl in host.top_links %}
+            <li class="flex items-baseline gap-2">
+              <span class="inline-block px-2 py-0.5 rounded-md bg-[var(--ra-ruby)]">{{ tl.count }}</span>
+              <a class="link" href="{{ tl.url }}" target="_blank" rel="noopener">{{ tl.url }}</a>
+            </li>
+          {% endfor %}
+        </ul>
+      </div>
+    {% endif %}
+
+    {% set list_id = "links-" ~ index %}
+    {% set links = host.links %}
+    {% set preview = links[:8] %}
+    {% set remainder = links[8:] %}
+    <div>
+      <div class="text-sm font-semibold mb-2">Links:</div>
+      <ul class="space-y-1 text-sm">
+        {% for url in preview %}
+          <li><a class="link" href="{{ url }}" target="_blank" rel="noopener">{{ url }}</a></li>
+        {% endfor %}
+      </ul>
+
+      {% if remainder %}
+        <div id="{{ list_id }}" class="more-list" data-expanded="false">
+          <ul class="space-y-1 text-sm">
+            {% for url in remainder %}
+              <li><a class="link" href="{{ url }}" target="_blank" rel="noopener">{{ url }}</a></li>
+            {% endfor %}
+          </ul>
+        </div>
+        <button class="btn-more mt-2" data-more-btn data-target="{{ list_id }}">More</button>
+      {% endif %}
+    </div>
+  </div>
+</article>
--- a/templates/index.html
+++ b/templates/index.html
@ -0,0 +1,104 @@
+{% extends "layout.html" %}
+{% block content %}
+<section class="mx-auto max-w-3xl p-6">
+  <h1 class="text-3xl font-bold mb-2">RSS Link Audit</h1>
+  <p class="mb-6 opacity-90">Paste a feed URL. This version uses <strong>SQLite/SQLModel caching</strong> and streams progress over <strong>SSE</strong>.</p>
+
+  <form id="feed-form" class="space-y-4 bg-[var(--ra-panel)] p-5 rounded-2xl shadow">
+    <label class="block">
+      <span class="block mb-2 font-semibold">Feed URL</span>
+      <input id="feed-input" type="url" name="feed_url" placeholder="https://example.com/feed.xml"
+             required
+             class="w-full p-3 rounded-xl bg-[var(--ra-ink)] text-[var(--ra-cream)] border border-[var(--ra-copper)] focus:outline-none focus:ring-2 focus:ring-[var(--ra-amber)]" />
+    </label>
+    <button class="px-4 py-2 rounded-xl font-semibold bg-[var(--ra-ruby)] hover:bg-[var(--ra-ruby-dark)]">
+      Analyze
+    </button>
+  </form>
+
+  <div id="status" class="mt-6 text-sm opacity-80"></div>
+
+  <section id="summary" class="mt-6"></section>
+  <section id="hosts" class="mt-4 space-y-6"></section>
+</section>
+
+<script>
+const statusEl = document.getElementById('status');
+const hostsEl = document.getElementById('hosts');
+const summaryEl = document.getElementById('summary');
+const form = document.getElementById('feed-form');
+
+function setStatus(html) { statusEl.innerHTML = html; }
+function appendHostCard(html) {
+  const div = document.createElement('div');
+  div.innerHTML = html;
+  hostsEl.appendChild(div.firstElementChild);
+}
+function setSummary(feed_url, post_count, host_count) {
+  summaryEl.innerHTML = `
+    <div class="rounded-2xl bg-[var(--ra-panel)] border border-[var(--ra-copper)] p-4">
+      <div class="font-semibold mb-1">Summary</div>
+      <div>Feed: <a class="underline" href="${feed_url}" target="_blank" rel="noopener">${feed_url}</a></div>
+      <div>Posts parsed: <strong>${post_count}</strong></div>
+      <div>Hosts found: <strong>${host_count}</strong></div>
+    </div>`;
+}
+
+form.addEventListener('submit', async (e) => {
+  e.preventDefault();
+  hostsEl.innerHTML = '';
+  summaryEl.innerHTML = '';
+  setStatus('Starting…');
+
+  const fd = new FormData(form);
+  const resp = await fetch('/start', { method: 'POST', body: fd });
+  if (!resp.ok) {
+    setStatus('Failed to start.');
+    return;
+  }
+  const { job_id } = await resp.json();
+  setStatus('Job started. Connecting…');
+
+  const es = new EventSource(`/events/${job_id}`);
+  let postCount = 0, hostsCount = 0, seenCards = 0;
+
+  es.addEventListener('hello', () => setStatus('Connected. Parsing feed…'));
+  es.addEventListener('status', (ev) => {
+    const d = JSON.parse(ev.data).data;
+    setStatus(`${d.message}`);
+  });
+  es.addEventListener('posts', (ev) => {
+    const data = JSON.parse(ev.data).data;
+    postCount = data.count || 0;
+    setStatus(`Posts: ${postCount}. Fetching pages…`);
+  });
+  es.addEventListener('post_progress', (ev) => {
+    const d = JSON.parse(ev.data).data;
+    setStatus(`Fetching posts ${d.current}/${d.total}…`);
+  });
+  es.addEventListener('hosts', (ev) => {
+    const data = JSON.parse(ev.data).data;
+    hostsCount = data.count || 0;
+    setStatus(`Found ${hostsCount} hosts. Discovering their feeds…`);
+  });
+  es.addEventListener('host_card', (ev) => {
+    const data = JSON.parse(ev.data).data;
+    appendHostCard(data.html);
+    seenCards = data.index;
+    setStatus(`Rendered ${seenCards}/${data.total} hosts… Still discovering feeds…`);
+  });
+  es.addEventListener('summary', (ev) => {
+    const data = JSON.parse(ev.data).data;
+    setSummary(data.feed_url, postCount, hostsCount);
+  });
+  es.addEventListener('error', (ev) => {
+    const data = JSON.parse(ev.data).data;
+    setStatus('Error: ' + (data.message || 'Unknown'));
+  });
+  es.addEventListener('done', () => {
+    setStatus('Done.');
+    es.close();
+  });
+});
+</script>
+{% endblock %}
--- a/templates/layout.html
+++ b/templates/layout.html
@ -0,0 +1,24 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8"/>
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <title>RSS Link Audit</title>
+  <link rel="stylesheet" href="/static/styles.css"/>
+</head>
+<body>
+  <header class="px-6 py-4 border-b border-[var(--ra-copper)]">
+    <div class="max-w-5xl mx-auto flex items-center gap-4">
+      <div class="w-3 h-3 rounded-full bg-[var(--ra-gold)]"></div>
+      <a href="/" class="font-bold hover:underline">RSS Link Audit</a>
+      <span class="opacity-70 text-sm">with SQLite cache + SSE</span>
+    </div>
+  </header>
+  <main class="max-w-5xl mx-auto">
+    {% block content %}{% endblock %}
+  </main>
+  <footer class="px-6 py-10 text-sm opacity-70">
+    <div class="max-w-5xl mx-auto">Built with FastAPI • Palette: Royal Armory</div>
+  </footer>
+</body>
+</html>