summaryrefslogtreecommitdiff
path: root/modules/vacancies
diff options
context:
space:
mode:
authorTom Smeding <tom@tomsmeding.com>2026-02-27 22:54:50 +0100
committerTom Smeding <tom@tomsmeding.com>2026-02-27 22:54:50 +0100
commit90efd48e11f9ae6f6db64e2b101015d5b69f2fb5 (patch)
treee79732a88daeaa77370e3431a9c5c25ed25cacf7 /modules/vacancies
parentefc5a694762ea88724f24e82eef24840d79819be (diff)
vacancies: Links in umega and elc getters
Diffstat (limited to 'modules/vacancies')
-rwxr-xr-xmodules/vacancies/getter-helpers/umega-like.sh83
-rwxr-xr-xmodules/vacancies/getters/elc-rent.sh50
-rwxr-xr-xmodules/vacancies/getters/umega-rent.sh50
3 files changed, 85 insertions, 98 deletions
diff --git a/modules/vacancies/getter-helpers/umega-like.sh b/modules/vacancies/getter-helpers/umega-like.sh
new file mode 100755
index 0000000..061476b
--- /dev/null
+++ b/modules/vacancies/getter-helpers/umega-like.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+URL=$1
+
+mapfile inputlines <<<"$(elinks "$1")"
+
+
+## Collect links
+
+declare -A links
+inlinkssection=0
+for ((i=0; i<${#inputlines[@]}; i++)); do
+ line=$(tr -d $'\n' <<<"${inputlines[$i]}")
+ if grep '^\s*Visible links$' <<<"$line" >/dev/null; then
+ inlinkssection=1
+ elif [[ $inlinkssection -eq 1 ]] && grep '^\s*[0-9][0-9]*\. ' <<<"$line" >/dev/null; then
+ linknumber=$(sed 's/^\s*\([0-9]*\)\..*/\1/' <<<"$line")
+ linktext=$(sed 's/[^.]*\. //' <<<"$line")
+ links[$linknumber]=$linktext
+ fi
+done
+
+
+## Collect ads
+
+totalnum=
+counted=0
+
+accum=
+
+# 0: outside
+# 1: in overview images section
+# 2: in body
+state=0
+
+adlink= # link for the ad currently being collected
+
+function writeout() {
+ if [[ $state -eq 2 && -n $accum ]]; then
+ if [[ -n $adlink ]]; then echo "$accum ($adlink)"; else echo "$accum"; fi
+ let counted+=1
+ fi
+ accum=
+}
+
+for ((i=0; i<${#inputlines[@]}; i++)); do
+ line=$(tr -d $'\n' <<<"${inputlines[$i]}")
+
+ # echo >&2 "?$state <$line>"
+ m=$(sed -n 's/[^0-9]*\([0-9]*\) *[Pp]roperties for rent.*/\1/p' <<<"$line")
+ if [[ -n $m ]]; then
+ totalnum=$m
+ continue
+ fi
+
+ if grep 'Overview image' <<<"$line" >/dev/null; then
+ # echo >&2 "overview <$line>"
+ writeout
+ state=1
+ elif [[ -z "$(tr -d '[:space:]' <<<"$line")" ]]; then
+ # echo >&2 "empty <$line>"
+ writeout
+ state=0
+ if [[ $counted -gt 0 ]]; then break; fi
+ elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then
+ # echo >&2 "bodystart <$line>"
+ state=2
+ linknumber=$(sed 's/^\s*\[\([0-9]*\).*/\1/' <<<"$line")
+ if [[ -n ${links[$linknumber]:+1} ]]; then adlink=${links[$linknumber]}; fi
+ accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line")
+ elif [[ $state -eq 2 ]]; then
+ # echo >&2 "bodycont <$line>"
+ # c2 a0 is UTF-8 for a non-breaking space
+ accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")"
+ fi
+done
+
+writeout
+
+if [[ $counted != "$totalnum" ]]; then
+ echo "SCRIPT BROKEN, OFFERS MAY BE MISSED"
+fi
diff --git a/modules/vacancies/getters/elc-rent.sh b/modules/vacancies/getters/elc-rent.sh
index 624e99e..2d56c8a 100755
--- a/modules/vacancies/getters/elc-rent.sh
+++ b/modules/vacancies/getters/elc-rent.sh
@@ -1,52 +1,4 @@
#!/usr/bin/env bash
set -euo pipefail
-totalnum=
-counted=0
-
-accum=
-
-# 0: outside
-# 1: in overview images section
-# 2: in body
-state=0
-
-mapfile inputlines <<<"$(elinks 'https://properties.edinburghlettingcentre.com/?per_page=60&address=Edinburgh&price_min&price_max=1200&bedrooms_min=1')"
-
-for ((i=0; i<${#inputlines[@]}; i++)); do
- line=$(tr -d $'\n' <<<"${inputlines[$i]}")
-
- # echo >&2 "?$state <$line>"
- m=$(sed -n 's/[^0-9]*\([0-9]*\) *Properties for rent.*/\1/p' <<<"$line")
- if [[ -n $m ]]; then
- totalnum=$m
- continue
- fi
-
- if grep 'Overview image' <<<"$line" >/dev/null; then
- # echo >&2 "overview <$line>"
- if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
- accum=
- state=1
- elif [[ -z "$(tr -d '[[:space:]]' <<<"$line")" ]]; then
- # echo >&2 "empty <$line>"
- if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
- accum=
- state=0
- if [[ $counted -gt 0 ]]; then break; fi
- elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then
- # echo >&2 "bodystart <$line>"
- state=2
- accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line")
- elif [[ $state -eq 2 ]]; then
- # echo >&2 "bodycont <$line>"
- # c2 a0 is UTF-8 for a non-breaking space
- accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")"
- fi
-done
-
-if [[ -n $accum ]]; then echo "$accum"; let counted+=1; fi
-
-if [[ $counted != $totalnum ]]; then
- echo "SCRIPT BROKEN, OFFERS MAY BE MISSED"
-fi
+exec "$(dirname "$0")"/../getter-helpers/umega-like.sh 'https://properties.edinburghlettingcentre.com/?per_page=60&address=Edinburgh&price_min&price_max=1200&bedrooms_min=1'
diff --git a/modules/vacancies/getters/umega-rent.sh b/modules/vacancies/getters/umega-rent.sh
index eb4b5c9..26fd099 100755
--- a/modules/vacancies/getters/umega-rent.sh
+++ b/modules/vacancies/getters/umega-rent.sh
@@ -1,52 +1,4 @@
#!/usr/bin/env bash
set -euo pipefail
-totalnum=
-counted=0
-
-accum=
-
-# 0: outside
-# 1: in overview images section
-# 2: in body
-state=0
-
-mapfile inputlines <<<"$(elinks 'https://www.umega.co.uk/properties/properties-to-rent/?per_page=60&address=Edinburgh&price_min=&price_max=1200&bedrooms_min=1&furnished=&pg=1&order=ASC&orderby=price')"
-
-for ((i=0; i<${#inputlines[@]}; i++)); do
- line=$(tr -d $'\n' <<<"${inputlines[$i]}")
-
- # echo >&2 "?$state <$line>"
- m=$(sed -n 's/[^0-9]*\([0-9]*\) *properties for rent.*/\1/p' <<<"$line")
- if [[ -n $m ]]; then
- totalnum=$m
- continue
- fi
-
- if grep 'Overview image' <<<"$line" >/dev/null; then
- # echo >&2 "overview <$line>"
- if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
- accum=
- state=1
- elif [[ -z "$(tr -d '[[:space:]]' <<<"$line")" ]]; then
- # echo >&2 "empty <$line>"
- if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi
- accum=
- state=0
- if [[ $counted -gt 0 ]]; then break; fi
- elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then
- # echo >&2 "bodystart <$line>"
- state=2
- accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line")
- elif [[ $state -eq 2 ]]; then
- # echo >&2 "bodycont <$line>"
- # c2 a0 is UTF-8 for a non-breaking space
- accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")"
- fi
-done
-
-if [[ -n $accum ]]; then echo "$accum"; let counted+=1; fi
-
-if [[ $counted != $totalnum ]]; then
- echo "SCRIPT BROKEN, OFFERS MAY BE MISSED"
-fi
+exec "$(dirname "$0")"/../getter-helpers/umega-like.sh 'https://www.umega.co.uk/properties/properties-to-rent/?per_page=60&address=Edinburgh&price_min=&price_max=1200&bedrooms_min=1&furnished=&pg=1&order=ASC&orderby=price'