diff --git a/backend/lib/repo.js b/backend/lib/repo.js index 507f0f7..f066b80 100644 --- a/backend/lib/repo.js +++ b/backend/lib/repo.js @@ -3,8 +3,11 @@ import { timeout } from "./utils.js"; import { join } from "https://deno.land/std@0.192.0/path/posix.ts"; import { ensureDir } from "https://deno.land/std@0.192.0/fs/ensure_dir.ts"; import * as fsize from "npm:filesize"; +import * as zstd from 'https://deno.land/x/zstd_wasm/deno/zstd.ts'; +import { Sha256 } from "https://deno.land/std@0.119.0/hash/sha256.ts"; const filesize = fsize.filesize; +await zstd.init(); export async function saveRepo(ats, didInfo, job = null) { if (didInfo.skipRepo) { @@ -73,14 +76,23 @@ export async function saveRepo(ats, didInfo, job = null) { return; } + repo.carHash = new Sha256().update(data).hex(); + // ensure db directory const dbPathBase = ats.env.ATSCAN_DB_PATH || "./db"; const dbPath = join(dbPathBase, "repo"); await ensureDir(dbPath); // write car file - const carFn = join(dbPath, `${did}.car`); - await Deno.writeFile(carFn, data); + //const carFn = join(dbPath, `${did}.car`); + //await Deno.writeFile(carFn, data); + + // write compressed version + const compressed = zstd.compress(data); + repo.sizeCompressed = compressed.length; + repo.carCompressedHash = new Sha256().update(compressed).hex(); + const carFnCompressed = join(dbPath, `${did}.car.zst`); + await Deno.writeFile(carFnCompressed, compressed); // write index file const indexFn = join(dbPath, `${did}.json`); @@ -96,7 +108,7 @@ export async function saveRepo(ats, didInfo, job = null) { await job.log( `[${did}@${pds}] displayName=${ JSON.stringify(repo.profile?.displayName) - } [${filesize(repo.size)}] ${carFn}`, + } [${filesize(repo.size)}] compressed: ${carFnCompressed} [${filesize(repo.sizeCompressed)}]`, ); await job.updateProgress(99); } diff --git a/backend/pds-crawler.js b/backend/pds-crawler.js index 224e506..a76b40a 100644 --- a/backend/pds-crawler.js +++ b/backend/pds-crawler.js @@ -15,7 +15,7 @@ async function crawlUrl(ats, url, host = "local") { if (host === "local") { try { const [, ms] = await timeout( - TIMEOUT, + 5000, fetch(url, { method: "OPTIONS", headers: { @@ -38,7 +38,9 @@ async function crawlUrl(ats, url, host = "local") { }, }), ); - if (res) { + if (!res.ok) { + err = res.status; + } else { data = await res.json(); } } catch (e) { @@ -77,7 +79,7 @@ async function crawlUrl(ats, url, host = "local") { async function crawl(ats) { const arr = await ats.db.pds.find().toArray(); - const results = pooledMap(25, arr.slice(0, 1000), async (i) => { + const results = pooledMap(10, arr.slice(0, 1000), async (i) => { let err = null; if (i.url.match(/^https?:\/\/(localhost|example.com)/)) { @@ -89,8 +91,10 @@ async function crawl(ats) { } const host = i.url.replace(/^https?:\/\//, ""); + const dontHaveA = i.dns && + (!i.dns.Answer || i.dns.Answer.filter((a) => a.type === 1).length === 0); - if (!i.dns) { + if (!i.dns || dontHaveA) { console.log("sending dns request: ", i.url); let dns = await (await fetch(`https://dns.google/resolve?name=${host}&type=A`)) diff --git a/backend/repo-crawler.js b/backend/repo-crawler.js index 9d1a909..53e0ae3 100644 --- a/backend/repo-crawler.js +++ b/backend/repo-crawler.js @@ -78,9 +78,9 @@ async function traversePDSRepos(ats, item, cursor = null) { async function crawlNew(ats) { const pds = await ats.db.pds.find({}).toArray(); const results = pooledMap(CONCURRENCY, _.shuffle(pds), async (item) => { - if (item.url === "https://bsky.social") { + /*if (item.url !== "https://bsky.social") { return null; - } + }*/ if (!item.inspect.current || item.inspect.current.err) { return null; } diff --git a/frontend/package.json b/frontend/package.json index 3293dde..d815f7d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "atscan-fe", - "version": "0.7.1-alpha", + "version": "0.7.2-alpha", "private": true, "scripts": { "dev": "vite dev", diff --git a/frontend/src/routes/did:[id]/+page.svelte b/frontend/src/routes/did:[id]/+page.svelte index 568f569..5ac8991 100644 --- a/frontend/src/routes/did:[id]/+page.svelte +++ b/frontend/src/routes/did:[id]/+page.svelte @@ -13,19 +13,32 @@ const item = data.item; const asa = item.revs[item.revs.length - 1].operation?.alsoKnownAs; const handles = asa ? asa.map((h) => h.replace(/^at:\/\//, '')) : []; - let verifiedHandles = null; + let handleVerification = null; - export function checkDNSHandleUrl(host) { - return `https://dns.google/resolve?name=_atproto.${host}&type=TXT`; - } - - async function checkDNSHandle(did, host) { - const resp = await fetch(checkDNSHandleUrl(host)); - const out = await resp.json(); - if (!out.Answer) { - return false; + async function verifyHandle(handle, did) { + const verifications = []; + // firstly we try DNS style + const dnsCheckUrl = `https://dns.google/resolve?name=_atproto.${handle}&type=TXT`; + const dnsResp = await fetch(dnsCheckUrl); + const dnsData = await dnsResp.json(); + verifications.push({ + verified: + dnsData.Answer && dnsData.Answer.find((a) => a.type === 16 && a.data === `did=${did}`), + type: 'dns', + url: dnsCheckUrl + }); + // if DNS verification failed, then we try http based + if (!verifications[0].verified) { + const httpCheckUrl = `https://${handle}/.well-known/atproto-did`; + const httpResp = await fetch(httpCheckUrl); + const httpString = await httpResp.text(); + verifications.push({ + verified: httpString.includes(did), + type: 'http', + url: httpCheckUrl + }); } - return out.Answer.find((a) => a.type === 16 && a.data === `did=${did}`); + return verifications; } function tableMapperValuesLocal(source, keys) { @@ -45,20 +58,27 @@ : null; if (handle) { val = `@${handle}`; - if (!handle.match(/\.bsky\.social$/) && i === source.length - 1) { + if (i === source.length - 1) { let hstr = null; - if (verifiedHandles === null) { + console.log(handleVerification); + if (handleVerification === null) { hstr = ''; - } else if (verifiedHandles.includes(handle)) { - hstr = - ''; + } else if (handleVerification && handleVerification.find((v) => v.verified)) { + const method = handleVerification.find((v) => v.verified); + hstr = ` ${method.type}`; } else { hstr = ''; } if (hstr) { - val += `${hstr}`; + if (handleVerification) { + val += `${hstr}`; + } else { + val += `
${hstr}
`; + } } } } @@ -117,15 +137,9 @@ currentError = e.message; } } - if (handles && handles[0] && !handles[0].match(/\.bsky\.social$/)) { - const verified = await checkDNSHandle(item.did, handles[0]); - if (verified) { - verifiedHandles = [handles[0]]; - historyTable = renderTable(); - } else { - verifiedHandles = []; - historyTable = renderTable(); - } + if (handles && handles[0]) { + handleVerification = await verifyHandle(handles[0], item.did); + historyTable = renderTable(); } }); @@ -227,9 +241,18 @@ + + Hash + {item.repo?.carHash || '-'} + Size - {filesize(item.repo?.size)} + {filesize(item.repo?.size)} + {#if item.repo?.sizeCompressed} + ({filesize(item.repo.sizeCompressed)} compressed) + {/if} + Records