diff options
Diffstat (limited to 'modules/vacancies')
| -rwxr-xr-x | modules/vacancies/bak/aarhus.sh | 8 | ||||
| -rwxr-xr-x | modules/vacancies/bak/cambridge.sh | 9 | ||||
| -rwxr-xr-x | modules/vacancies/bak/chalmers.sh | 30 | ||||
| -rwxr-xr-x | modules/vacancies/bak/elc-rent.sh | 4 | ||||
| -rwxr-xr-x | modules/vacancies/bak/glasgow.sh | 8 | ||||
| -rwxr-xr-x | modules/vacancies/bak/mpg.sh | 8 | ||||
| -rwxr-xr-x | modules/vacancies/bak/umega-rent.sh | 4 | ||||
| -rwxr-xr-x | modules/vacancies/getter-helpers/umega-like.sh | 83 | ||||
| -rw-r--r-- | modules/vacancies/vacancies.js | 85 |
9 files changed, 239 insertions, 0 deletions
diff --git a/modules/vacancies/bak/aarhus.sh b/modules/vacancies/bak/aarhus.sh new file mode 100755 index 0000000..8acbe6f --- /dev/null +++ b/modules/vacancies/bak/aarhus.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +curl -sL 'https://cs.au.dk/about-us/vacancies/' \ + | grep 'DYCON\.Em[^ ]*vacancies' \ + | head -1 \ + | sed 's/^[^[]*//; s/; *$//' \ + | jq -r '.[] | (.title + " [https://cs.au.dk" + .link + "]")' diff --git a/modules/vacancies/bak/cambridge.sh b/modules/vacancies/bak/cambridge.sh new file mode 100755 index 0000000..f5f9fe1 --- /dev/null +++ b/modules/vacancies/bak/cambridge.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +curl -sL 'https://www.cam.ac.uk/jobs/term/Department-of-Computer-Science-and-Technology' | \ + sed ' +/<tbody>/,/<\/tbody>/!d +s|<a href="/jobs/term/[^"]*">[^<]*</a>||g +/<a href="\/jobs\//!d +s|^ *<a href="\([^"]*\)">\(.*\)</a>.*|\2 (https://www.cam.ac.uk\1)|' diff --git a/modules/vacancies/bak/chalmers.sh b/modules/vacancies/bak/chalmers.sh new file mode 100755 index 0000000..5583223 --- /dev/null +++ b/modules/vacancies/bak/chalmers.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +url='https://web103.reachmee.com/ext/I003/304/main?site=5&validator=a72aeedd63ec10de71e46f8d91d0d57c&lang=UK' + +script=' +0,/<div id="mainjoblist">/d +/<tbody>/,/<\/tbody>/!d + +/<\/tr>/ { + s/.*// + x + s/\n//g + s/^ | // + /Technical and Administrative staff/d + /PhD Student Positions/d + p + d +} + +/<td>/!d +/^\s*<\/td>\s*$/d +s|\s*<td>\(.*\)</td>\s*|\1| +s|.*Application deadline:.*display:\s*none">\([^<]*\)</span>.*|\1| +s|.*a href=.*reachmee.*/job.*job_id[^>]*>\([^<]*\)</a>.*|\1| +s/^/ | / +H +' + +curl -s "$url" | sed -n "$script" diff --git a/modules/vacancies/bak/elc-rent.sh b/modules/vacancies/bak/elc-rent.sh new file mode 100755 index 0000000..2d56c8a --- /dev/null +++ b/modules/vacancies/bak/elc-rent.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +exec "$(dirname "$0")"/../getter-helpers/umega-like.sh 'https://properties.edinburghlettingcentre.com/?per_page=60&address=Edinburgh&price_min&price_max=1200&bedrooms_min=1' diff --git a/modules/vacancies/bak/glasgow.sh b/modules/vacancies/bak/glasgow.sh new file mode 100755 index 0000000..c6beef5 --- /dev/null +++ b/modules/vacancies/bak/glasgow.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +curl -sL 'https://www.jobs.gla.ac.uk/jobs/college-of-science-and-engineering/school-of-computing-science-1' \ + | sed ' +/a href="\/job\// !d +/>Read More</ d +s|^ *<a href="\([^"]*\)">\(.*\)</a>.*|\2 (https://www.jobs.gla.ac.uk\1)|' diff --git a/modules/vacancies/bak/mpg.sh b/modules/vacancies/bak/mpg.sh new file mode 100755 index 0000000..bf7f222 --- /dev/null +++ b/modules/vacancies/bak/mpg.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail + +# <td headers="view-ocm-pp-project-title-table-column" class="views-field views-field-ocm-pp-project-title"><a href="/node/21090">Identifying novel ways of inducing effective angiogenesis and the development of arteries</a> </td> + + +curl -s 'https://postdocprogram.mpg.de/all-postdoc-positions' \ + | sed -n '/td.*project-title/ s/^ *<td[^>]*> *<a href="\([^"]*\)"[^>]*>\([^<]*\).*/\2 (https:\/\/postdocprogram.mpg.de\1)/p' diff --git a/modules/vacancies/bak/umega-rent.sh b/modules/vacancies/bak/umega-rent.sh new file mode 100755 index 0000000..26fd099 --- /dev/null +++ b/modules/vacancies/bak/umega-rent.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +exec "$(dirname "$0")"/../getter-helpers/umega-like.sh 'https://www.umega.co.uk/properties/properties-to-rent/?per_page=60&address=Edinburgh&price_min=&price_max=1200&bedrooms_min=1&furnished=&pg=1&order=ASC&orderby=price' diff --git a/modules/vacancies/getter-helpers/umega-like.sh b/modules/vacancies/getter-helpers/umega-like.sh new file mode 100755 index 0000000..061476b --- /dev/null +++ b/modules/vacancies/getter-helpers/umega-like.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +set -euo pipefail + +URL=$1 + +mapfile inputlines <<<"$(elinks "$1")" + + +## Collect links + +declare -A links +inlinkssection=0 +for ((i=0; i<${#inputlines[@]}; i++)); do + line=$(tr -d $'\n' <<<"${inputlines[$i]}") + if grep '^\s*Visible links$' <<<"$line" >/dev/null; then + inlinkssection=1 + elif [[ $inlinkssection -eq 1 ]] && grep '^\s*[0-9][0-9]*\. ' <<<"$line" >/dev/null; then + linknumber=$(sed 's/^\s*\([0-9]*\)\..*/\1/' <<<"$line") + linktext=$(sed 's/[^.]*\. //' <<<"$line") + links[$linknumber]=$linktext + fi +done + + +## Collect ads + +totalnum= +counted=0 + +accum= + +# 0: outside +# 1: in overview images section +# 2: in body +state=0 + +adlink= # link for the ad currently being collected + +function writeout() { + if [[ $state -eq 2 && -n $accum ]]; then + if [[ -n $adlink ]]; then echo "$accum ($adlink)"; else echo "$accum"; fi + let counted+=1 + fi + accum= +} + +for ((i=0; i<${#inputlines[@]}; i++)); do + line=$(tr -d $'\n' <<<"${inputlines[$i]}") + + # echo >&2 "?$state <$line>" + m=$(sed -n 's/[^0-9]*\([0-9]*\) *[Pp]roperties for rent.*/\1/p' <<<"$line") + if [[ -n $m ]]; then + totalnum=$m + continue + fi + + if grep 'Overview image' <<<"$line" >/dev/null; then + # echo >&2 "overview <$line>" + writeout + state=1 + elif [[ -z "$(tr -d '[:space:]' <<<"$line")" ]]; then + # echo >&2 "empty <$line>" + writeout + state=0 + if [[ $counted -gt 0 ]]; then break; fi + elif [[ $state -eq 1 ]] && grep '^\s*\[' <<<"$line" >/dev/null; then + # echo >&2 "bodystart <$line>" + state=2 + linknumber=$(sed 's/^\s*\[\([0-9]*\).*/\1/' <<<"$line") + if [[ -n ${links[$linknumber]:+1} ]]; then adlink=${links[$linknumber]}; fi + accum=$(sed 's/^\s*\[[0-9]*\]//' <<<"$line") + elif [[ $state -eq 2 ]]; then + # echo >&2 "bodycont <$line>" + # c2 a0 is UTF-8 for a non-breaking space + accum="$accum; $(sed 's/^\s*//; s/^\xc2\xa0//; s/\s*$//' <<<"$line")" + fi +done + +writeout + +if [[ $counted != "$totalnum" ]]; then + echo "SCRIPT BROKEN, OFFERS MAY BE MISSED" +fi diff --git a/modules/vacancies/vacancies.js b/modules/vacancies/vacancies.js new file mode 100644 index 0000000..0625c74 --- /dev/null +++ b/modules/vacancies/vacancies.js @@ -0,0 +1,85 @@ +const cmn = require("../$common.js"); +const fs = require("fs"); +const child_process = require("child_process"); +const persist = require("node-persist"); +const mkdirp = require("mkdirp"); + +const FIRST_FETCH_DELAY = 10 * 60 * 1000; // 10 seconds +const INTERVAL = 24 * 3600 * 1000; // 1 hour +// const FIRST_FETCH_DELAY = 2 * 1000; +// const INTERVAL = 20 * 1000; + +let moddir = null; + +mkdirp.sync(cmn.persistdir + "/vacancies"); +const DB = persist.create({ + dir: cmn.persistdir + "/vacancies", + continuous: false, + interval: false, +}); +DB.initSync(); + +function sendNotification(text, cb) { + cmn.statusbot.send("vacancies", text, cb); + // console.log("--> " + text); + // cb(200); +} + +function refreshSite(name) { + child_process.execFile(moddir + "/getters/" + name + ".sh", (err, stdout, _stderr) => { + if (err != null) { + sendNotification("Error getting <" + name + ">: " + err, () => {}); + return; + } + + const lines = stdout.split("\n").filter(s => s.length > 0); + let news = []; + + let prev = DB.getItemSync(name); + if (prev != null) { + prev = new Set(prev); + for (let line of lines) { + if (!prev.has(line)) news.push(line); + } + } else { + news = lines; + } + + if (news.length == 0) { + console.log("[vacancies] No news for <" + name + ">"); + return; + } + console.log("[vacancies] " + news.length + " news for <" + name + ">"); + + const message = news.map(s => "<" + name + "> " + s).join("\n"); + sendNotification(message, status => { + if (status == 200) { + DB.setItemSync(name, lines); + } else { + console.log("[vacancies] Failed sending: [[[" + message + "]]]"); + } + }); + }); +} + +function refreshAll() { + if (!fs.existsSync(moddir + "/getters")) return; + console.log("[vacancies] Refreshing"); + let i = 0; + for (let name of fs.readdirSync(moddir + "/getters")) { + if (!name.endsWith(".sh")) continue; + setTimeout(() => refreshSite(name.slice(0, -3)), 2000 * i); + i++; + } +} + +module.exports = (app, io, _moddir) => { + moddir = _moddir; + + setTimeout(() => { + refreshAll(); + setInterval(() => { + refreshAll(); + }, INTERVAL); + }, FIRST_FETCH_DELAY); // wait a while before the first fetch +}; |
