#!/bin/sh
# Find dead (404) links in an html file
# Requires curl, grep, hxwls, parallel
# $Id: deadlink.sh,v 1.3 2024/06/24 09:20:23 oc45ujef Exp $
# https://wwwcip.cs.fau.de/~oc45ujef/misc/src/deadlink.sh
links(){
if [ "$1" = "-" ]
then cat
else
if [ -f "$1" ]
then
cat "$1"
else
curl -Ls "$1"
fi
fi | hxwls -
}
links "$1" \
| grep -e '^http' \
| parallel 'curl -ILs -o /dev/null -w "%{http_code}\t%{url}\n"' \
| grep -e '^404'