Scans all notes, extracts internal hyperlinks, and reports any that point to non-existent documents.
import httpx
BASE = 'http://localhost:8021'
resp = httpx.post(BASE + '/', json={'op': 'keys'})
keys = resp.json()['keys']
print(f'Total notes: {len(keys)}')
for k in sorted(keys):
print(f' {k}')
def extract_internal_links(obj):
"""Recursively find all internal (non-http) link hrefs."""
links = set()
if isinstance(obj, dict):
if 'link' in obj and isinstance(obj['link'], dict):
href = obj['link'].get('href', '')
if href and not href.startswith(('http://', 'https://')):
links.add(href)
for v in obj.values():
links |= extract_internal_links(v)
elif isinstance(obj, list):
for item in obj:
links |= extract_internal_links(item)
return links
all_links = {} # source_key -> set of hrefs
errors = []
for key in keys:
try:
r = httpx.get(f'{BASE}/{key}', timeout=10)
if r.status_code == 200:
found = extract_internal_links(r.json())
if found:
all_links[key] = found
except Exception as e:
errors.append(f'{key}: {e}')
total = sum(len(v) for v in all_links.values())
print(f'Found {total} internal links across {len(all_links)} notes:')
for src in sorted(all_links):
for href in sorted(all_links[src]):
print(f' {src} -> {href}')
if errors:
print(f'\nErrors ({len(errors)}):')
for e in errors:
print(f' {e}')
keys_set = set(keys)
broken = {}
for src, hrefs in all_links.items():
bad = [h for h in hrefs if h not in keys_set]
if bad:
broken[src] = sorted(bad)
if broken:
total_broken = sum(len(v) for v in broken.values())
print(f'Found {total_broken} broken links in {len(broken)} notes:\n')
for src in sorted(broken):
for target in broken[src]:
print(f' {src} -> {target} [MISSING]')
else:
print('No broken links found!')