From 841519901e8b6df97733bad85f215a640f6d6d3f Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Sun, 22 Mar 2020 08:59:52 +0100 Subject: Cache http gets --- .gitignore | 1 + ichimoe-get.js | 70 ++++++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index c2658d7..9f875e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules/ +.ichimoe-cache/ diff --git a/ichimoe-get.js b/ichimoe-get.js index f929db5..fcbca01 100755 --- a/ichimoe-get.js +++ b/ichimoe-get.js @@ -1,5 +1,7 @@ #!/usr/bin/env node const https = require("https"); +const fs = require("fs"); +const crypto = require("crypto"); const util = require("util"); const cheerio = require("cheerio"); @@ -65,6 +67,7 @@ function parseConjugationDiv($, div) { const items = $(div).children(); let formdesc = null; let alternatives = []; + const unknown = []; for (let i = 0; i < items.length; i++) { if (items[i].type != "tag") continue; @@ -80,10 +83,9 @@ function parseConjugationDiv($, div) { } } - return { - form: formdesc, - alternatives - }; + let result = {form: formdesc, alternatives}; + if (unknown.length) result.unknown = unknown; + return result; } function parseConjugationsDiv($, div) { @@ -137,7 +139,9 @@ function parseDefinitionsDD($, dd) { } function parseWordLI($, li) { - const romaji = $($(".gloss-rtext", li)[0]).text().trim(); + const div = $(".gloss-rtext", li)[0]; + const links = $(".info-link", div); + const romaji = links.length > 0 ? $(links[0]).text().trim() : $(div).text().trim(); const dl = $(".gloss-content > dl.alternatives", li); const alternatives = parseDL($, dl); return {romaji, alternatives}; @@ -157,6 +161,44 @@ function parseIchimoeHTML(html) { return words; } +function cachedHTTPSGet(url, cb) { + const id = crypto.createHash("sha256").update(url).digest("hex"); + const dir = __dirname + "/.ichimoe-cache"; + const path = dir + "/" + id + ".html"; + + function doGet() { + https.get(url, res => { + if (res.statusCode != 200) { + console.error(`Ichi.moe returned status code: ${res.statusCode}`); + console.error(res.headers); + process.exit(1); + } + + let body = ""; + res.on("data", data => body += data); + res.on("end", () => { + fs.writeFile(path, body, err => { + if (err) throw err; + cb(body); + }); + }); + }); + } + + fs.mkdir(dir, err => { + if (err && err.code != "EEXIST") throw err; + + fs.readFile(path, "utf8", (err, data) => { + if (err) { + if (err.code == "ENOENT") doGet(); + else throw err; + } else { + cb(data); + } + }); + }); +} + // const result = parseIchimoeHTML(require("fs").readFileSync("kioku.ichimoe.html")); // console.log(util.inspect(result, {depth: Infinity, colors: true})); // process.exit(); @@ -169,17 +211,11 @@ if (process.argv.length != 3) { const word = process.argv[2]; const url = `https://ichi.moe/cl/qr/?q=${encodeURIComponent(word)}&r=htr`; -https.get(url, res => { - if (res.statusCode != 200) { - console.error(`Ichi.moe returned status code: ${res.statusCode}`); - console.error(res.headers); - process.exit(1); - } - - let body = ""; - res.on("data", data => body += data); - res.on("end", () => { - const result = parseIchimoeHTML(body); +cachedHTTPSGet(url, body => { + const result = parseIchimoeHTML(body); + if (process.stdout.isTTY) { console.log(util.inspect(result, {depth: Infinity})); - }); + } else { + console.log(JSON.stringify(result)); + } }); -- cgit v1.2.3