#!/bin/sh # Find dead (404) links in an html file # Requires curl, grep, hxwls, parallel # $Id: deadlink.sh,v 1.3 2024/06/24 09:20:23 oc45ujef Exp $ # https://wwwcip.cs.fau.de/~oc45ujef/misc/src/deadlink.sh links(){ if [ "$1" = "-" ] then cat else if [ -f "$1" ] then cat "$1" else curl -Ls "$1" fi fi | hxwls - } links "$1" \ | grep -e '^http' \ | parallel 'curl -ILs -o /dev/null -w "%{http_code}\t%{url}\n"' \ | grep -e '^404'