#!/usr/bin/env bash set -euo pipefail totalnum= counted=0 accum= # 0: outside # 1: in overview images section # 2: in body state=0 mapfile inputlines <<<"$(elinks 'https://properties.edinburghlettingcentre.com/?per_page=60&address=Edinburgh&price_min&price_max=1200&bedrooms_min=1')" for ((i=0; i<${#inputlines[@]}; i++)); do line=$(tr -d $'\n' <<<"${inputlines[$i]}") # echo >&2 "?$state <$line>" m=$(sed -n 's/[^0-9]*\([0-9]*\) *Properties for rent.*/\1/p' <<<"$line") if [[ -n $m ]]; then totalnum=$m continue fi if grep 'Overview image' <<<"$line" >/dev/null; then # echo >&2 "overview <$line>" if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi accum= state=1 elif [[ -z "$(tr -d '[[:space:]]' <<<"$line")" ]]; then # echo >&2 "empty <$line>" if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi accum= state=0 if [[ $counted -gt 0 ]]; then break; fi elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then # echo >&2 "bodystart <$line>" state=2 accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line") elif [[ $state -eq 2 ]]; then # echo >&2 "bodycont <$line>" # c2 a0 is UTF-8 for a non-breaking space accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")" fi done if [[ -n $accum ]]; then echo "$accum"; let counted+=1; fi if [[ $counted != $totalnum ]]; then echo "SCRIPT BROKEN, OFFERS MAY BE MISSED" fi