#!/usr/bin/env bash set -euo pipefail URL=$1 mapfile inputlines <<<"$(elinks "$1")" ## Collect links declare -A links inlinkssection=0 for ((i=0; i<${#inputlines[@]}; i++)); do line=$(tr -d $'\n' <<<"${inputlines[$i]}") if grep '^\s*Visible links$' <<<"$line" >/dev/null; then inlinkssection=1 elif [[ $inlinkssection -eq 1 ]] && grep '^\s*[0-9][0-9]*\. ' <<<"$line" >/dev/null; then linknumber=$(sed 's/^\s*\([0-9]*\)\..*/\1/' <<<"$line") linktext=$(sed 's/[^.]*\. //' <<<"$line") links[$linknumber]=$linktext fi done ## Collect ads totalnum= counted=0 accum= # 0: outside # 1: in overview images section # 2: in body state=0 adlink= # link for the ad currently being collected function writeout() { if [[ $state -eq 2 && -n $accum ]]; then if [[ -n $adlink ]]; then echo "$accum ($adlink)"; else echo "$accum"; fi let counted+=1 fi accum= } for ((i=0; i<${#inputlines[@]}; i++)); do line=$(tr -d $'\n' <<<"${inputlines[$i]}") # echo >&2 "?$state <$line>" m=$(sed -n 's/[^0-9]*\([0-9]*\) *[Pp]roperties for rent.*/\1/p' <<<"$line") if [[ -n $m ]]; then totalnum=$m continue fi if grep 'Overview image' <<<"$line" >/dev/null; then # echo >&2 "overview <$line>" writeout state=1 elif [[ -z "$(tr -d '[:space:]' <<<"$line")" ]]; then # echo >&2 "empty <$line>" writeout state=0 if [[ $counted -gt 0 ]]; then break; fi elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then # echo >&2 "bodystart <$line>" state=2 linknumber=$(sed 's/^\s*\[\([0-9]*\).*/\1/' <<<"$line") if [[ -n ${links[$linknumber]:+1} ]]; then adlink=${links[$linknumber]}; fi accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line") elif [[ $state -eq 2 ]]; then # echo >&2 "bodycont <$line>" # c2 a0 is UTF-8 for a non-breaking space accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")" fi done writeout if [[ $counted != "$totalnum" ]]; then echo "SCRIPT BROKEN, OFFERS MAY BE MISSED" fi