blob: 061476b191d4b0336d2740c4c58effe398b3576f (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
#!/usr/bin/env bash
set -euo pipefail
URL=$1
mapfile inputlines <<<"$(elinks "$1")"
## Collect links
declare -A links
inlinkssection=0
for ((i=0; i<${#inputlines[@]}; i++)); do
line=$(tr -d $'\n' <<<"${inputlines[$i]}")
if grep '^\s*Visible links$' <<<"$line" >/dev/null; then
inlinkssection=1
elif [[ $inlinkssection -eq 1 ]] && grep '^\s*[0-9][0-9]*\. ' <<<"$line" >/dev/null; then
linknumber=$(sed 's/^\s*\([0-9]*\)\..*/\1/' <<<"$line")
linktext=$(sed 's/[^.]*\. //' <<<"$line")
links[$linknumber]=$linktext
fi
done
## Collect ads
totalnum=
counted=0
accum=
# 0: outside
# 1: in overview images section
# 2: in body
state=0
adlink= # link for the ad currently being collected
function writeout() {
if [[ $state -eq 2 && -n $accum ]]; then
if [[ -n $adlink ]]; then echo "$accum ($adlink)"; else echo "$accum"; fi
let counted+=1
fi
accum=
}
for ((i=0; i<${#inputlines[@]}; i++)); do
line=$(tr -d $'\n' <<<"${inputlines[$i]}")
# echo >&2 "?$state <$line>"
m=$(sed -n 's/[^0-9]*\([0-9]*\) *[Pp]roperties for rent.*/\1/p' <<<"$line")
if [[ -n $m ]]; then
totalnum=$m
continue
fi
if grep 'Overview image' <<<"$line" >/dev/null; then
# echo >&2 "overview <$line>"
writeout
state=1
elif [[ -z "$(tr -d '[:space:]' <<<"$line")" ]]; then
# echo >&2 "empty <$line>"
writeout
state=0
if [[ $counted -gt 0 ]]; then break; fi
elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then
# echo >&2 "bodystart <$line>"
state=2
linknumber=$(sed 's/^\s*\[\([0-9]*\).*/\1/' <<<"$line")
if [[ -n ${links[$linknumber]:+1} ]]; then adlink=${links[$linknumber]}; fi
accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line")
elif [[ $state -eq 2 ]]; then
# echo >&2 "bodycont <$line>"
# c2 a0 is UTF-8 for a non-breaking space
accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")"
fi
done
writeout
if [[ $counted != "$totalnum" ]]; then
echo "SCRIPT BROKEN, OFFERS MAY BE MISSED"
fi
|