summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom.smeding@gmail.com>2020-03-22 08:59:52 +0100
committerTom Smeding <tom.smeding@gmail.com>2020-03-22 08:59:52 +0100
commit841519901e8b6df97733bad85f215a640f6d6d3f (patch)
tree097f7463a742a4fbb85c2f361a509dcbc35f1832
parent0e556a7eedb779adfc36db2aecd378eccaaa2420 (diff)
Cache http gets
-rw-r--r--.gitignore1
-rwxr-xr-xichimoe-get.js70
2 files changed, 54 insertions, 17 deletions
diff --git a/.gitignore b/.gitignore
index c2658d7..9f875e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
node_modules/
+.ichimoe-cache/
diff --git a/ichimoe-get.js b/ichimoe-get.js
index f929db5..fcbca01 100755
--- a/ichimoe-get.js
+++ b/ichimoe-get.js
@@ -1,5 +1,7 @@
#!/usr/bin/env node
const https = require("https");
+const fs = require("fs");
+const crypto = require("crypto");
const util = require("util");
const cheerio = require("cheerio");
@@ -65,6 +67,7 @@ function parseConjugationDiv($, div) {
const items = $(div).children();
let formdesc = null;
let alternatives = [];
+ const unknown = [];
for (let i = 0; i < items.length; i++) {
if (items[i].type != "tag") continue;
@@ -80,10 +83,9 @@ function parseConjugationDiv($, div) {
}
}
- return {
- form: formdesc,
- alternatives
- };
+ let result = {form: formdesc, alternatives};
+ if (unknown.length) result.unknown = unknown;
+ return result;
}
function parseConjugationsDiv($, div) {
@@ -137,7 +139,9 @@ function parseDefinitionsDD($, dd) {
}
function parseWordLI($, li) {
- const romaji = $($(".gloss-rtext", li)[0]).text().trim();
+ const div = $(".gloss-rtext", li)[0];
+ const links = $(".info-link", div);
+ const romaji = links.length > 0 ? $(links[0]).text().trim() : $(div).text().trim();
const dl = $(".gloss-content > dl.alternatives", li);
const alternatives = parseDL($, dl);
return {romaji, alternatives};
@@ -157,6 +161,44 @@ function parseIchimoeHTML(html) {
return words;
}
+function cachedHTTPSGet(url, cb) {
+ const id = crypto.createHash("sha256").update(url).digest("hex");
+ const dir = __dirname + "/.ichimoe-cache";
+ const path = dir + "/" + id + ".html";
+
+ function doGet() {
+ https.get(url, res => {
+ if (res.statusCode != 200) {
+ console.error(`Ichi.moe returned status code: ${res.statusCode}`);
+ console.error(res.headers);
+ process.exit(1);
+ }
+
+ let body = "";
+ res.on("data", data => body += data);
+ res.on("end", () => {
+ fs.writeFile(path, body, err => {
+ if (err) throw err;
+ cb(body);
+ });
+ });
+ });
+ }
+
+ fs.mkdir(dir, err => {
+ if (err && err.code != "EEXIST") throw err;
+
+ fs.readFile(path, "utf8", (err, data) => {
+ if (err) {
+ if (err.code == "ENOENT") doGet();
+ else throw err;
+ } else {
+ cb(data);
+ }
+ });
+ });
+}
+
// const result = parseIchimoeHTML(require("fs").readFileSync("kioku.ichimoe.html"));
// console.log(util.inspect(result, {depth: Infinity, colors: true}));
// process.exit();
@@ -169,17 +211,11 @@ if (process.argv.length != 3) {
const word = process.argv[2];
const url = `https://ichi.moe/cl/qr/?q=${encodeURIComponent(word)}&r=htr`;
-https.get(url, res => {
- if (res.statusCode != 200) {
- console.error(`Ichi.moe returned status code: ${res.statusCode}`);
- console.error(res.headers);
- process.exit(1);
- }
-
- let body = "";
- res.on("data", data => body += data);
- res.on("end", () => {
- const result = parseIchimoeHTML(body);
+cachedHTTPSGet(url, body => {
+ const result = parseIchimoeHTML(body);
+ if (process.stdout.isTTY) {
console.log(util.inspect(result, {depth: Infinity}));
- });
+ } else {
+ console.log(JSON.stringify(result));
+ }
});