diff options
| -rwxr-xr-x | modules/vacancies/getter-helpers/umega-like.sh | 83 | ||||
| -rwxr-xr-x | modules/vacancies/getters/elc-rent.sh | 50 | ||||
| -rwxr-xr-x | modules/vacancies/getters/umega-rent.sh | 50 |
3 files changed, 85 insertions, 98 deletions
diff --git a/modules/vacancies/getter-helpers/umega-like.sh b/modules/vacancies/getter-helpers/umega-like.sh new file mode 100755 index 0000000..061476b --- /dev/null +++ b/modules/vacancies/getter-helpers/umega-like.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +set -euo pipefail + +URL=$1 + +mapfile inputlines <<<"$(elinks "$1")" + + +## Collect links + +declare -A links +inlinkssection=0 +for ((i=0; i<${#inputlines[@]}; i++)); do + line=$(tr -d $'\n' <<<"${inputlines[$i]}") + if grep '^\s*Visible links$' <<<"$line" >/dev/null; then + inlinkssection=1 + elif [[ $inlinkssection -eq 1 ]] && grep '^\s*[0-9][0-9]*\. ' <<<"$line" >/dev/null; then + linknumber=$(sed 's/^\s*\([0-9]*\)\..*/\1/' <<<"$line") + linktext=$(sed 's/[^.]*\. //' <<<"$line") + links[$linknumber]=$linktext + fi +done + + +## Collect ads + +totalnum= +counted=0 + +accum= + +# 0: outside +# 1: in overview images section +# 2: in body +state=0 + +adlink= # link for the ad currently being collected + +function writeout() { + if [[ $state -eq 2 && -n $accum ]]; then + if [[ -n $adlink ]]; then echo "$accum ($adlink)"; else echo "$accum"; fi + let counted+=1 + fi + accum= +} + +for ((i=0; i<${#inputlines[@]}; i++)); do + line=$(tr -d $'\n' <<<"${inputlines[$i]}") + + # echo >&2 "?$state <$line>" + m=$(sed -n 's/[^0-9]*\([0-9]*\) *[Pp]roperties for rent.*/\1/p' <<<"$line") + if [[ -n $m ]]; then + totalnum=$m + continue + fi + + if grep 'Overview image' <<<"$line" >/dev/null; then + # echo >&2 "overview <$line>" + writeout + state=1 + elif [[ -z "$(tr -d '[:space:]' <<<"$line")" ]]; then + # echo >&2 "empty <$line>" + writeout + state=0 + if [[ $counted -gt 0 ]]; then break; fi + elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then + # echo >&2 "bodystart <$line>" + state=2 + linknumber=$(sed 's/^\s*\[\([0-9]*\).*/\1/' <<<"$line") + if [[ -n ${links[$linknumber]:+1} ]]; then adlink=${links[$linknumber]}; fi + accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line") + elif [[ $state -eq 2 ]]; then + # echo >&2 "bodycont <$line>" + # c2 a0 is UTF-8 for a non-breaking space + accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")" + fi +done + +writeout + +if [[ $counted != "$totalnum" ]]; then + echo "SCRIPT BROKEN, OFFERS MAY BE MISSED" +fi diff --git a/modules/vacancies/getters/elc-rent.sh b/modules/vacancies/getters/elc-rent.sh index 624e99e..2d56c8a 100755 --- a/modules/vacancies/getters/elc-rent.sh +++ b/modules/vacancies/getters/elc-rent.sh @@ -1,52 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -totalnum= -counted=0 - -accum= - -# 0: outside -# 1: in overview images section -# 2: in body -state=0 - -mapfile inputlines <<<"$(elinks 'https://properties.edinburghlettingcentre.com/?per_page=60&address=Edinburgh&price_min&price_max=1200&bedrooms_min=1')" - -for ((i=0; i<${#inputlines[@]}; i++)); do - line=$(tr -d $'\n' <<<"${inputlines[$i]}") - - # echo >&2 "?$state <$line>" - m=$(sed -n 's/[^0-9]*\([0-9]*\) *Properties for rent.*/\1/p' <<<"$line") - if [[ -n $m ]]; then - totalnum=$m - continue - fi - - if grep 'Overview image' <<<"$line" >/dev/null; then - # echo >&2 "overview <$line>" - if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi - accum= - state=1 - elif [[ -z "$(tr -d '[[:space:]]' <<<"$line")" ]]; then - # echo >&2 "empty <$line>" - if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi - accum= - state=0 - if [[ $counted -gt 0 ]]; then break; fi - elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then - # echo >&2 "bodystart <$line>" - state=2 - accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line") - elif [[ $state -eq 2 ]]; then - # echo >&2 "bodycont <$line>" - # c2 a0 is UTF-8 for a non-breaking space - accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")" - fi -done - -if [[ -n $accum ]]; then echo "$accum"; let counted+=1; fi - -if [[ $counted != $totalnum ]]; then - echo "SCRIPT BROKEN, OFFERS MAY BE MISSED" -fi +exec "$(dirname "$0")"/../getter-helpers/umega-like.sh 'https://properties.edinburghlettingcentre.com/?per_page=60&address=Edinburgh&price_min&price_max=1200&bedrooms_min=1' diff --git a/modules/vacancies/getters/umega-rent.sh b/modules/vacancies/getters/umega-rent.sh index eb4b5c9..26fd099 100755 --- a/modules/vacancies/getters/umega-rent.sh +++ b/modules/vacancies/getters/umega-rent.sh @@ -1,52 +1,4 @@ #!/usr/bin/env bash set -euo pipefail -totalnum= -counted=0 - -accum= - -# 0: outside -# 1: in overview images section -# 2: in body -state=0 - -mapfile inputlines <<<"$(elinks 'https://www.umega.co.uk/properties/properties-to-rent/?per_page=60&address=Edinburgh&price_min=&price_max=1200&bedrooms_min=1&furnished=&pg=1&order=ASC&orderby=price')" - -for ((i=0; i<${#inputlines[@]}; i++)); do - line=$(tr -d $'\n' <<<"${inputlines[$i]}") - - # echo >&2 "?$state <$line>" - m=$(sed -n 's/[^0-9]*\([0-9]*\) *properties for rent.*/\1/p' <<<"$line") - if [[ -n $m ]]; then - totalnum=$m - continue - fi - - if grep 'Overview image' <<<"$line" >/dev/null; then - # echo >&2 "overview <$line>" - if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi - accum= - state=1 - elif [[ -z "$(tr -d '[[:space:]]' <<<"$line")" ]]; then - # echo >&2 "empty <$line>" - if [[ $state -eq 2 && -n $accum ]]; then echo "$accum"; let counted+=1; fi - accum= - state=0 - if [[ $counted -gt 0 ]]; then break; fi - elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then - # echo >&2 "bodystart <$line>" - state=2 - accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line") - elif [[ $state -eq 2 ]]; then - # echo >&2 "bodycont <$line>" - # c2 a0 is UTF-8 for a non-breaking space - accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")" - fi -done - -if [[ -n $accum ]]; then echo "$accum"; let counted+=1; fi - -if [[ $counted != $totalnum ]]; then - echo "SCRIPT BROKEN, OFFERS MAY BE MISSED" -fi +exec "$(dirname "$0")"/../getter-helpers/umega-like.sh 'https://www.umega.co.uk/properties/properties-to-rent/?per_page=60&address=Edinburgh&price_min=&price_max=1200&bedrooms_min=1&furnished=&pg=1&order=ASC&orderby=price' |
