summaryrefslogtreecommitdiff
path: root/modules/vacancies/getters
diff options
context:
space:
mode:
Diffstat (limited to 'modules/vacancies/getters')
-rwxr-xr-xmodules/vacancies/getters/southside-rent.sh58
1 files changed, 58 insertions, 0 deletions
diff --git a/modules/vacancies/getters/southside-rent.sh b/modules/vacancies/getters/southside-rent.sh
new file mode 100755
index 0000000..be157df
--- /dev/null
+++ b/modules/vacancies/getters/southside-rent.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+mapfile inputlines <<<"$(elinks 'https://southsidemanagement.com/search-results/?property_type=&maximum_rent=1000&minimum_bedrooms=1&maximum_bedrooms=&furnished=&department=residential-lettings')"
+
+
+## Collect links
+
+declare -A links
+inlinkssection=0
+for ((i=0; i<${#inputlines[@]}; i++)); do
+ line=$(tr -d $'\n' <<<"${inputlines[$i]}")
+ if grep '^\s*Visible links$' <<<"$line" >/dev/null; then
+ inlinkssection=1
+ elif [[ $inlinkssection -eq 1 ]] && grep '^\s*[0-9][0-9]*\. ' <<<"$line" >/dev/null; then
+ linknumber=$(sed 's/^\s*\([0-9]*\)\..*/\1/' <<<"$line")
+ linktext=$(sed 's/[^.]*\. //' <<<"$line")
+ links[$linknumber]=$linktext
+ fi
+done
+
+
+## Collect ads
+
+# 0: before search results
+# 1: outside
+# 2: in ad
+state=0
+
+accum=
+adlink=
+
+function writeout() {
+ if [[ $state -eq 2 && -n $accum ]]; then
+ if [[ -n $adlink ]]; then echo "$accum ($adlink)"; else echo "$accum"; fi
+ fi
+ accum=
+}
+
+for ((i=0; i<${#inputlines[@]}; i++)); do
+ line=$(tr -d $'\n' <<<"${inputlines[$i]}")
+ if [[ $state -eq 0 && $line = *"Search Results"* ]]; then
+ state=1
+ elif [[ $state -ge 1 ]] && grep '^\s*•' <<<"$line" >/dev/null; then
+ [[ $line != *"["* ]] && break
+ writeout
+ state=2
+ linknumber=$(sed 's/^[^[]*\[\([0-9]*\)\].*/\1/' <<<"$line")
+ accum=$(sed 's/^[^]]*]//' <<<"$line")
+ elif [[ $state -eq 2 && $line = *REF:* ]]; then
+ writeout
+ state=1
+ elif [[ $state -eq 2 && $line != *"["* && $line != '' ]]; then
+ accum="$accum; $(sed 's/^\s*//; s/\s*$//' <<<"$line")"
+ fi
+done
+
+writeout