blob: eb4b5c9c0bcc720c46b9ea1f5825cc5cb45bab3e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
#!/usr/bin/env bash
set -euo pipefail
totalnum=
counted=0
accum=
# 0: outside
# 1: in overview images section
# 2: in body
state=0
mapfile inputlines <<<"$(elinks 'https://www.umega.co.uk/properties/properties-to-rent/?per_page=60&address=Edinburgh&price_min=&price_max=1200&bedrooms_min=1&furnished=&pg=1&order=ASC&orderby=price')"
for ((i=0; i<${#inputlines[@]}; i++)); do
line=$(tr -d $'\n' <<<"${inputlines[$i]}")
# echo >&2 "?$state <$line>"
m=$(sed -n 's/[^0-9]*\([0-9]*\) *properties for rent.*/\1/p' <<<"$line")
if [[ -n $m ]]; then
totalnum=$m
continue
fi
if grep 'Overview image' <<<"$line" >/dev/null; then
# echo >&2 "overview <$line>"
if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
accum=
state=1
elif [[ -z "$(tr -d '[[:space:]]' <<<"$line")" ]]; then
# echo >&2 "empty <$line>"
if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
accum=
state=0
if [[ $counted -gt 0 ]]; then break; fi
elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then
# echo >&2 "bodystart <$line>"
state=2
accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line")
elif [[ $state -eq 2 ]]; then
# echo >&2 "bodycont <$line>"
# c2 a0 is UTF-8 for a non-breaking space
accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")"
fi
done
if [[ -n $accum ]]; then echo "$accum"; let counted+=1; fi
if [[ $counted != $totalnum ]]; then
echo "SCRIPT BROKEN, OFFERS MAY BE MISSED"
fi
|