2023-05-22 16:46:29 +02:00
|
|
|
import cheerio from "npm:cheerio@1.0.0-rc.12";
|
2023-01-26 02:17:29 +01:00
|
|
|
import { ensureDir, exists } from "https://deno.land/std@0.173.0/fs/mod.ts";
|
|
|
|
|
2023-05-15 15:08:38 +02:00
|
|
|
const CACHE_DIR = "./cache/sync";
|
2023-01-23 04:37:12 +01:00
|
|
|
|
2023-02-06 16:26:09 +01:00
|
|
|
export async function loadJSONUrl(url, options) {
|
|
|
|
const resp = await fetch(url, options);
|
2023-01-23 04:37:12 +01:00
|
|
|
return resp.json();
|
|
|
|
}
|
|
|
|
|
|
|
|
export async function loadHtmlUrl(url) {
|
2023-01-26 02:17:29 +01:00
|
|
|
await ensureDir(CACHE_DIR);
|
|
|
|
const hash = Array.from(
|
|
|
|
new Uint8Array(
|
|
|
|
await crypto.subtle.digest("SHA-256", (new TextEncoder()).encode(url)),
|
|
|
|
),
|
|
|
|
).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
|
|
const cacheFn = `${CACHE_DIR}/${hash}`;
|
|
|
|
|
|
|
|
if (await exists(cacheFn)) {
|
|
|
|
console.log(`Cache found! ${hash}`);
|
|
|
|
return cheerio.load(await Deno.readTextFile(cacheFn));
|
|
|
|
}
|
|
|
|
|
|
|
|
console.log(`Getting ${url}`);
|
2023-01-24 02:44:48 +01:00
|
|
|
const resp = await fetch(url, {
|
|
|
|
headers: {
|
|
|
|
"User-Agent":
|
|
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
|
|
|
|
},
|
|
|
|
});
|
2023-01-26 02:17:29 +01:00
|
|
|
const output = await resp.text();
|
|
|
|
await Deno.writeTextFile(cacheFn, output);
|
|
|
|
return cheerio.load(output);
|
|
|
|
}
|
|
|
|
|
|
|
|
export async function loadHtmlLocal(fn) {
|
|
|
|
const text = cheerio.load(await Deno.readTextFile(fn));
|
|
|
|
return text;
|
2023-01-23 03:37:00 +01:00
|
|
|
}
|
2023-02-06 16:26:09 +01:00
|
|
|
|
|
|
|
export function formatId(str) {
|
|
|
|
return str
|
2023-05-09 19:42:50 +02:00
|
|
|
.trim()
|
2023-02-06 16:26:09 +01:00
|
|
|
.normalize("NFD")
|
|
|
|
.toLowerCase()
|
|
|
|
.replace(/[\u0300-\u036F]/g, "")
|
2023-03-27 04:06:52 +02:00
|
|
|
.replace(/\./, "-")
|
|
|
|
.replace(/\s+/g, "-")
|
|
|
|
.replace(/-$/, "");
|
2023-02-06 16:26:09 +01:00
|
|
|
}
|