Broken Link Finder

Scans all notes, extracts internal hyperlinks, and reports any that point to non-existent documents.

Cell 1: List all notes

import httpx

BASE = 'http://localhost:8021'
resp = httpx.post(BASE + '/', json={'op': 'keys'})
keys = resp.json()['keys']
print(f'Total notes: {len(keys)}')
for k in sorted(keys):
    print(f'  {k}')

Cell 2: Extract internal links

def extract_internal_links(obj):
    """Recursively find all internal (non-http) link hrefs."""
    links = set()
    if isinstance(obj, dict):
        if 'link' in obj and isinstance(obj['link'], dict):
            href = obj['link'].get('href', '')
            if href and not href.startswith(('http://', 'https://')):
                links.add(href)
        for v in obj.values():
            links |= extract_internal_links(v)
    elif isinstance(obj, list):
        for item in obj:
            links |= extract_internal_links(item)
    return links

all_links = {}  # source_key -> set of hrefs
errors = []

for key in keys:
    try:
        r = httpx.get(f'{BASE}/{key}', timeout=10)
        if r.status_code == 200:
            found = extract_internal_links(r.json())
            if found:
                all_links[key] = found
    except Exception as e:
        errors.append(f'{key}: {e}')

total = sum(len(v) for v in all_links.values())
print(f'Found {total} internal links across {len(all_links)} notes:')
for src in sorted(all_links):
    for href in sorted(all_links[src]):
        print(f'  {src}  ->  {href}')
if errors:
    print(f'\nErrors ({len(errors)}):')
    for e in errors:
        print(f'  {e}')

Cell 3: Find broken links

keys_set = set(keys)
broken = {}

for src, hrefs in all_links.items():
    bad = [h for h in hrefs if h not in keys_set]
    if bad:
        broken[src] = sorted(bad)

if broken:
    total_broken = sum(len(v) for v in broken.values())
    print(f'Found {total_broken} broken links in {len(broken)} notes:\n')
    for src in sorted(broken):
        for target in broken[src]:
            print(f'  {src}  ->  {target}  [MISSING]')
else:
    print('No broken links found!')
created 2026-05-29