summaryrefslogtreecommitdiff
path: root/modules/vacancies/getters/umega-rent.sh
blob: eb4b5c9c0bcc720c46b9ea1f5825cc5cb45bab3e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env bash
set -euo pipefail

totalnum=
counted=0

accum=

# 0: outside
# 1: in overview images section
# 2: in body
state=0

mapfile inputlines <<<"$(elinks 'https://www.umega.co.uk/properties/properties-to-rent/?per_page=60&address=Edinburgh&price_min=&price_max=1200&bedrooms_min=1&furnished=&pg=1&order=ASC&orderby=price')"

for ((i=0; i<${#inputlines[@]}; i++)); do
  line=$(tr -d $'\n' <<<"${inputlines[$i]}")

  # echo >&2 "?$state <$line>"
  m=$(sed -n 's/[^0-9]*\([0-9]*\) *properties for rent.*/\1/p' <<<"$line")
  if [[ -n $m ]]; then
    totalnum=$m
    continue
  fi

  if grep 'Overview image' <<<"$line" >/dev/null; then
    # echo >&2 "overview <$line>"
    if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
    accum=
    state=1
  elif [[ -z "$(tr -d '[[:space:]]' <<<"$line")" ]]; then
    # echo >&2 "empty <$line>"
    if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
    accum=
    state=0
    if [[ $counted -gt 0 ]]; then break; fi
  elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then
    # echo >&2 "bodystart <$line>"
    state=2
    accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line")
  elif [[ $state -eq 2 ]]; then
    # echo >&2 "bodycont <$line>"
    # c2 a0 is UTF-8 for a non-breaking space
    accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")"
  fi
done

if [[ -n $accum ]]; then echo "$accum"; let counted+=1; fi

if [[ $counted != $totalnum ]]; then
  echo "SCRIPT BROKEN, OFFERS MAY BE MISSED"
fi