zrcadlo https://github.com/atscan/atscan
repo-crawler
This commit is contained in:
rodič
95585facb1
revize
6410cd4d6f
10
Makefile
10
Makefile
|
@ -20,8 +20,8 @@ index-daemon:
|
|||
api-ws:
|
||||
deno run --unstable --allow-net --allow-read --allow-env --allow-sys ./backend/api-ws.js
|
||||
|
||||
did-crawler:
|
||||
deno run --unstable --allow-net --allow-read --allow-env --allow-sys ./backend/did-crawler.js
|
||||
repo-crawler:
|
||||
deno run --unstable --allow-net --allow-read --allow-write --allow-env --allow-sys ./backend/repo-crawler.js
|
||||
|
||||
fe-rebuild:
|
||||
cd frontend && npm run build && pm2 restart atscan-fe
|
||||
|
@ -29,8 +29,12 @@ fe-rebuild:
|
|||
be-restart:
|
||||
pm2 restart atscan-api atscan-indexer atscan-pds-crawler atscan-plc-crawler
|
||||
|
||||
install:
|
||||
deno install --unstable -A -f -n ats ./cli/ats.js
|
||||
deno install --unstable -A -f -n ats-repo ./cli/ats-repo.js
|
||||
|
||||
format:
|
||||
cd backend && deno fmt
|
||||
cd backend && deno fmt **.js
|
||||
cd frontend && npm run format
|
||||
|
||||
fmt: format
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
.env
|
||||
.env
|
||||
db
|
|
@ -1,15 +0,0 @@
|
|||
import * as bsky from "npm:@atproto/api@0.3.13";
|
||||
import "https://deno.land/std@0.192.0/dotenv/load.ts";
|
||||
|
||||
const { BskyAgent } = bsky.default;
|
||||
const agent = new BskyAgent({ service: "https://bsky.social" });
|
||||
|
||||
await agent.login({
|
||||
identifier: Deno.env.get("BLUESKY_USERNAME"),
|
||||
password: Deno.env.get("BLUESKY_PASSWORD"),
|
||||
});
|
||||
|
||||
const p = await agent.getProfiles({
|
||||
actors: ["did:plc:b5rrmme6ncenhe4lq53y7lpf", "tree.fail"],
|
||||
});
|
||||
console.log(p);
|
|
@ -8,9 +8,10 @@ async function index(ats) {
|
|||
const didsCount = await ats.db.did.countDocuments({
|
||||
"pds": { $in: [pds.url] },
|
||||
});
|
||||
console.log(`${pds.url}: ${didsCount}`);
|
||||
//console.log(`${pds.url}: ${didsCount}`);
|
||||
await ats.db.pds.updateOne({ url: pds.url }, { $set: { didsCount } });
|
||||
}
|
||||
console.log("indexer round finished");
|
||||
//console.log(await whoiser("dev.otaso-sky.blue"));
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
import * as repo from "npm:@atproto/repo@0.1.0";
|
||||
|
||||
const {
|
||||
MemoryBlockstore,
|
||||
readCarWithRoot,
|
||||
verifyCheckout,
|
||||
verifyFullHistory,
|
||||
} = repo.default;
|
||||
|
||||
const colsDefs = {
|
||||
"app.bsky.actor.profile": { ignore: true },
|
||||
"app.bsky.feed.like": { key: "like" },
|
||||
"app.bsky.feed.post": { key: "post" },
|
||||
"app.bsky.feed.repost": { key: "repost" },
|
||||
"app.bsky.graph.follow": { key: "follow" },
|
||||
"app.bsky.feed.generator": { key: "generator" },
|
||||
"app.bsky.graph.block": { key: "block" },
|
||||
};
|
||||
|
||||
export async function readRaw(data) {
|
||||
const { root, blocks } = await readCarWithRoot(data);
|
||||
return {
|
||||
root,
|
||||
blocks,
|
||||
};
|
||||
}
|
||||
|
||||
export async function read(data, did, signingKey) {
|
||||
const { root, blocks } = await readRaw(data);
|
||||
|
||||
const storage = new MemoryBlockstore(blocks);
|
||||
const checkout = await verifyCheckout(storage, root, did, signingKey);
|
||||
const history = await verifyFullHistory(storage, root, did, signingKey);
|
||||
|
||||
return {
|
||||
root,
|
||||
blocks,
|
||||
storage,
|
||||
checkout,
|
||||
history,
|
||||
};
|
||||
}
|
||||
|
||||
export async function inspect(data, did, signingKey) {
|
||||
const { root, checkout, history } = await read(...arguments);
|
||||
|
||||
const collections = Object.fromEntries(
|
||||
Object.keys(checkout.contents).map((collection) => {
|
||||
if (!colsDefs[collection]) {
|
||||
throw new Error(`Unknown collection: ${collection}`);
|
||||
}
|
||||
if (colsDefs[collection].ignore) {
|
||||
return undefined;
|
||||
}
|
||||
return [
|
||||
colsDefs[collection].key,
|
||||
Object.keys(checkout.contents[collection]).length,
|
||||
];
|
||||
}).filter((i) => i),
|
||||
);
|
||||
const _profile = checkout.contents["app.bsky.actor.profile"]?.self;
|
||||
const profile = _profile
|
||||
? JSON.parse(
|
||||
JSON.stringify(checkout.contents["app.bsky.actor.profile"].self),
|
||||
)
|
||||
: null;
|
||||
|
||||
return {
|
||||
did,
|
||||
signingKey,
|
||||
size: data.length,
|
||||
root: root.toString(),
|
||||
commits: history.length, //.map((h) => h.commit.toString()),
|
||||
profile,
|
||||
collections,
|
||||
time: new Date(),
|
||||
};
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
export function timeout(ms, promise) {
|
||||
return new Promise(function (resolve, reject) {
|
||||
const start = performance.now();
|
||||
setTimeout(function () {
|
||||
reject(new Error("timeout"));
|
||||
}, ms);
|
||||
promise.then((v) => {
|
||||
const end = performance.now();
|
||||
return resolve([v, end - start]);
|
||||
}, reject);
|
||||
});
|
||||
}
|
|
@ -1,22 +1,10 @@
|
|||
import { ATScan } from "./lib/atscan.js";
|
||||
import { pooledMap } from "https://deno.land/std/async/mod.ts";
|
||||
import { timeout } from "./lib/utils.js";
|
||||
import "https://deno.land/std@0.192.0/dotenv/load.ts";
|
||||
|
||||
const wait = 60 * 5;
|
||||
|
||||
function timeout(ms, promise) {
|
||||
return new Promise(function (resolve, reject) {
|
||||
const start = performance.now();
|
||||
setTimeout(function () {
|
||||
reject(new Error("timeout"));
|
||||
}, ms);
|
||||
promise.then((v) => {
|
||||
const end = performance.now();
|
||||
return resolve([v, end - start]);
|
||||
}, reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function crawl(ats) {
|
||||
const arr = await ats.db.pds.find().toArray();
|
||||
const results = pooledMap(25, arr.slice(0, 1000), async (i) => {
|
||||
|
@ -99,7 +87,7 @@ async function crawl(ats) {
|
|||
}`,
|
||||
);
|
||||
});
|
||||
for await (const value of results) {}
|
||||
for await (const _ of results) {}
|
||||
}
|
||||
|
||||
if (Deno.args[0] === "daemon") {
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
import { ensureDir } from "https://deno.land/std@0.192.0/fs/ensure_dir.ts";
|
||||
import { join } from "https://deno.land/std@0.192.0/path/posix.ts";
|
||||
import { pooledMap } from "https://deno.land/std/async/mod.ts";
|
||||
import { ATScan } from "./lib/atscan.js";
|
||||
import { inspect } from "./lib/car.js";
|
||||
import { timeout } from "./lib/utils.js";
|
||||
import _ from "npm:lodash";
|
||||
|
||||
const DB_PATH = "./backend/db/repo";
|
||||
await ensureDir(DB_PATH);
|
||||
|
||||
async function crawl(ats) {
|
||||
let expiry = new Date();
|
||||
expiry.setDate(expiry.getDate() - 1);
|
||||
|
||||
const dids = await ats.db.did.find({
|
||||
$or: [{ "repo.time": { $lte: expiry } }, { "repo": { $exists: false } }],
|
||||
}).limit(10000).toArray();
|
||||
|
||||
const results = pooledMap(4, _.shuffle(dids), async (didInfo) => {
|
||||
const did = didInfo.did;
|
||||
const signingKey = didInfo.revs[didInfo.revs.length - 1].operation
|
||||
.verificationMethods?.atproto;
|
||||
|
||||
if (!signingKey) {
|
||||
await ats.db.did.updateOne({ did }, {
|
||||
$set: { repo: { error: "no signing key", time: new Date() } },
|
||||
});
|
||||
return;
|
||||
}
|
||||
const pds = didInfo.pds[0];
|
||||
//console.log(`[${did}@${pds}] Getting repo ..`);
|
||||
|
||||
// fetch remote repo
|
||||
const url = `${pds}/xrpc/com.atproto.sync.getRepo?did=${did}`;
|
||||
let repoRes;
|
||||
try {
|
||||
[repoRes] = await timeout(5000, fetch(url));
|
||||
} catch (e) {
|
||||
repoRes = { ok: false };
|
||||
console.error(e);
|
||||
|
||||
await ats.db.did.updateOne({ did }, {
|
||||
$set: { repo: { error: e.message, time: new Date() } },
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (!repoRes.ok) {
|
||||
let message = null;
|
||||
if ([403, 500].includes(repoRes.status)) {
|
||||
let err;
|
||||
try {
|
||||
err = await repoRes.json();
|
||||
} catch {}
|
||||
message = err?.message;
|
||||
}
|
||||
console.error(url, message);
|
||||
await ats.db.did.updateOne({ did }, {
|
||||
$set: { repo: { error: message, time: new Date() } },
|
||||
});
|
||||
return;
|
||||
}
|
||||
//console.log(`[${did}@${pds}] Inspecting CAR ..`);
|
||||
const data = new Uint8Array(await repoRes.arrayBuffer());
|
||||
let repo;
|
||||
try {
|
||||
repo = await inspect(data, did, signingKey);
|
||||
} catch (e) {
|
||||
await ats.db.did.updateOne({ did }, {
|
||||
$set: { repo: { error: e.message, time: new Date() } },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const carFn = join(DB_PATH, `${did}.car`);
|
||||
await Deno.writeFile(carFn, data);
|
||||
//console.log(`[${did}@${pds}] File written: ${carFn}`);
|
||||
|
||||
const indexFn = join(DB_PATH, `${did}.json`);
|
||||
await Deno.writeTextFile(
|
||||
indexFn,
|
||||
JSON.stringify(
|
||||
{ did, signingKey, pds, root: repo.root, commits: repo.commits },
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
//console.log(`[${did}@${pds}] File written: ${indexFn}`);
|
||||
console.log(
|
||||
`[${did}@${pds}] displayName=${
|
||||
JSON.stringify(repo.profile?.displayName)
|
||||
}`,
|
||||
);
|
||||
/*console.log(
|
||||
`[${did}@${pds}] Done [${
|
||||
Object.keys(repo.collections).map(
|
||||
(c) => [c + ":" + repo.collections[c]],
|
||||
).join(", ")
|
||||
}]`,
|
||||
);*/
|
||||
|
||||
await ats.db.did.updateOne({ did }, { $set: { repo } });
|
||||
//console.log(out)
|
||||
});
|
||||
for await (const _ of results) {}
|
||||
}
|
||||
|
||||
if (Deno.args[0] === "daemon") {
|
||||
const wait = 60;
|
||||
|
||||
console.log("Initializing ATScan ..");
|
||||
const ats = new ATScan();
|
||||
ats.debug = true;
|
||||
await ats.init();
|
||||
console.log("repo-crawler daemon started");
|
||||
console.log("Performing initial crawl ..");
|
||||
// initial crawl
|
||||
await crawl(ats);
|
||||
console.log(`Initial crawl done`);
|
||||
ats.debug = false;
|
||||
console.log(`Processing events [wait=${wait}s] ..`);
|
||||
setInterval(() => crawl(ats), wait * 1000);
|
||||
} else {
|
||||
const ats = new ATScan({ debug: true });
|
||||
await ats.init();
|
||||
await crawl(ats);
|
||||
Deno.exit();
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
import { request } from "npm:undici";
|
||||
|
||||
const res = await request("https://jsonplaceholder.typicode.com/todos/1");
|
||||
console.log(res.statusCode);
|
|
@ -0,0 +1,48 @@
|
|||
|
||||
import { Command } from "https://deno.land/x/cliffy@v0.25.7/command/mod.ts";
|
||||
import { inspect, read } from "../backend/lib/car.js";
|
||||
|
||||
await new Command()
|
||||
.name("ats-repo")
|
||||
.action(() => { console.log("Please specify command or use `-h`") })
|
||||
.command("car-inspect,ci", "Inspect CAR file")
|
||||
.option("--did <val:string>", "DID", { required: true })
|
||||
.option("--signing-key <val:string>", "Signing key", { required: true })
|
||||
.arguments("<file:string>", 'Input CAR file')
|
||||
.example("Inspect CAR file", "ats r ci --did did:plc:ixko5wwzamist35uptkjae7p --signing-key did:key:zQ3shXv3xDNbJfYiMtyNT3E6buJtgKwQTYpoJu6NJDU2EHyVj backend/db/repos/test.car")
|
||||
.action(async ({ did, signingKey }, file) => {
|
||||
const out = await inspect(await Deno.readFile(file), did, signingKey)
|
||||
console.log(JSON.stringify(out, null, 2))
|
||||
})
|
||||
.command("remote-car-inspect,rci", "Inspect remote CAR file")
|
||||
.option('--checkout,-c', "Read checkout data")
|
||||
.option('--debug', "Debug")
|
||||
.arguments("<did:string>", 'DID')
|
||||
.action(async ({ debug, checkout }, did) => {
|
||||
// get did info
|
||||
const didRes = await fetch('https://api.atscan.net/'+did)
|
||||
if (!didRes.ok) {
|
||||
console.error(`Error: ${didRes.status} ${didRes.statusText}`)
|
||||
return;
|
||||
}
|
||||
const didInfo = await didRes.json()
|
||||
const signingKey = didInfo.revs[didInfo.revs.length-1].operation.verificationMethods.atproto
|
||||
// fetch remote repo
|
||||
const repo = await fetch(`${didInfo.pds[0]}/xrpc/com.atproto.sync.getRepo?did=${did}`)
|
||||
const data = new Uint8Array(await repo.arrayBuffer())
|
||||
// load and validate repo
|
||||
let out;
|
||||
if (debug) {
|
||||
out = await read(data, did, signingKey)
|
||||
} else if (checkout) {
|
||||
const resp = await read(data, did, signingKey)
|
||||
delete resp.checkout.newCids
|
||||
out = resp.checkout
|
||||
} else {
|
||||
out = await inspect(data, did, signingKey)
|
||||
}
|
||||
console.log(JSON.stringify(out, null, 2))
|
||||
})
|
||||
.example("Inspect remote CAR", "ats r rci did:plc:naichbdds7i7cwbzwzvjraxm")
|
||||
.example("Get all current data", "ats r rci did:plc:naichbdds7i7cwbzwzvjraxm -c")
|
||||
.parse(Deno.args)
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
import { Command } from "https://deno.land/x/cliffy@v0.25.7/command/mod.ts";
|
||||
//import { inspect } from "./lib/car.js";
|
||||
|
||||
await new Command()
|
||||
.name("ats")
|
||||
.description("CLI for ATScan")
|
||||
.meta("deno", Deno.version.deno)
|
||||
.meta("v8", Deno.version.v8)
|
||||
.version("0.1.0")
|
||||
.usage("<command>")
|
||||
.action(() => { console.log("Please specify command or use `-h`") })
|
||||
.command("repo,r", "Repository tools").executable()
|
||||
.parse(Deno.args)
|
||||
|
||||
//import * as bsky from "npm:@atproto/api";
|
||||
//import "https://deno.land/std@0.192.0/dotenv/load.ts";
|
||||
/*const { BskyAgent } = bsky.default;
|
||||
const agent = new BskyAgent({ service: "https://bsky.social" });
|
||||
|
||||
await agent.login({
|
||||
identifier: Deno.env.get("BLUESKY_USERNAME"),
|
||||
password: Deno.env.get("BLUESKY_PASSWORD"),
|
||||
});*/
|
||||
|
||||
/*const p = await agent.getProfiles({
|
||||
actors: ["did:plc:b5rrmme6ncenhe4lq53y7lpf", "tree.fail"],
|
||||
});*/
|
||||
//console.log(p);
|
|
@ -1,12 +1,12 @@
|
|||
{
|
||||
"name": "atscan-fe",
|
||||
"version": "0.3.5-alpha",
|
||||
"version": "0.4.4-alpha",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "atscan-fe",
|
||||
"version": "0.3.5-alpha",
|
||||
"version": "0.4.4-alpha",
|
||||
"dependencies": {
|
||||
"js-yaml": "^4.1.0"
|
||||
},
|
||||
|
@ -22,6 +22,7 @@
|
|||
"eslint": "^8.28.0",
|
||||
"eslint-config-prettier": "^8.5.0",
|
||||
"eslint-plugin-svelte": "^2.26.0",
|
||||
"filesize": "^10.0.7",
|
||||
"highlight.js": "^11.8.0",
|
||||
"lodash": "^4.17.21",
|
||||
"minidenticons": "^4.2.0",
|
||||
|
@ -1703,6 +1704,15 @@
|
|||
"node": "^10.12.0 || >=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/filesize": {
|
||||
"version": "10.0.7",
|
||||
"resolved": "https://registry.npmjs.org/filesize/-/filesize-10.0.7.tgz",
|
||||
"integrity": "sha512-iMRG7Qo9nayLoU3PNCiLizYtsy4W1ClrapeCwEgtiQelOAOuRJiw4QaLI+sSr8xr901dgHv+EYP2bCusGZgoiA==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">= 10.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "atscan-fe",
|
||||
"version": "0.4.4-alpha",
|
||||
"version": "0.5.0-alpha",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "vite dev",
|
||||
|
@ -21,6 +21,7 @@
|
|||
"eslint": "^8.28.0",
|
||||
"eslint-config-prettier": "^8.5.0",
|
||||
"eslint-plugin-svelte": "^2.26.0",
|
||||
"filesize": "^10.0.7",
|
||||
"highlight.js": "^11.8.0",
|
||||
"lodash": "^4.17.21",
|
||||
"minidenticons": "^4.2.0",
|
||||
|
|
|
@ -2,6 +2,7 @@ import { formatDistanceToNow } from 'date-fns';
|
|||
import { minidenticon } from 'minidenticons';
|
||||
import { tableSourceValues } from '@skeletonlabs/skeleton';
|
||||
import numbro from 'numbro';
|
||||
import { filesize as _filesize } from 'filesize';
|
||||
|
||||
export function dateDistance(date) {
|
||||
return formatDistanceToNow(new Date(date));
|
||||
|
@ -48,3 +49,7 @@ export function getDIDProfileUrl(fed, item) {
|
|||
}
|
||||
return `${base}/profile/${item.did}`;
|
||||
}
|
||||
|
||||
export function filesize(size) {
|
||||
return _filesize(size);
|
||||
}
|
||||
|
|
|
@ -44,9 +44,11 @@
|
|||
val = val
|
||||
.map(
|
||||
(v) =>
|
||||
`<code class="code ${key === 'path' ? 'text-lg' : ''}">` + (key === 'example' ? `<a href="${
|
||||
data.config.api
|
||||
}${v}" target="_blank">${v}</a>` : v) + `</code>`
|
||||
`<code class="code ${key === 'path' ? 'text-lg' : ''}">` +
|
||||
(key === 'example'
|
||||
? `<a href="${data.config.api}${v}" target="_blank">${v}</a>`
|
||||
: v) +
|
||||
`</code>`
|
||||
)
|
||||
.join('<br />');
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
<script>
|
||||
import { dateDistance, identicon, getDIDProfileUrl } from '$lib/utils.js';
|
||||
import { dateDistance, identicon, getDIDProfileUrl, filesize } from '$lib/utils.js';
|
||||
import { Table } from '@skeletonlabs/skeleton';
|
||||
import { tableMapperValues, tableSourceValues } from '@skeletonlabs/skeleton';
|
||||
import SourceSection from '$lib/components/SourceSection.svelte';
|
||||
|
@ -90,5 +90,46 @@
|
|||
<PDSTable sourceData={data.pds} {data} />
|
||||
{/if}
|
||||
|
||||
<h2 class="h2">Repository</h2>
|
||||
{#if item.repo && !item.repo.error}
|
||||
<div class="table-container">
|
||||
<!-- Native Table Element -->
|
||||
<table class="table table-hover">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th class="text-right">Root</th>
|
||||
<td>{item.repo.root}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="text-right">Signing Key</th>
|
||||
<td>{item.repo.signingKey}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="text-right">Commits</th>
|
||||
<td>{item.repo.commits}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="text-right">Size</th>
|
||||
<td>{filesize(item.repo?.size)}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="text-right">Collections</th>
|
||||
<td
|
||||
>{Object.keys(item.repo?.collections)
|
||||
.map((c) => `${item.repo.collections[c]} ${c}`)
|
||||
.join(', ')}</td
|
||||
>
|
||||
</tr>
|
||||
<tr>
|
||||
<th class="text-right">Last indexed</th>
|
||||
<td>{dateDistance(item.repo?.time)} ago</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{:else}
|
||||
<div>No repository info yet.</div>
|
||||
{/if}
|
||||
|
||||
<SourceSection {data} model="did" />
|
||||
</BasicPage>
|
||||
|
|
|
@ -40,7 +40,7 @@ module.exports = {
|
|||
//args : "daemon",
|
||||
interpreter: "deno",
|
||||
interpreterArgs: "run --unstable --allow-net --allow-read --allow-env --allow-sys",
|
||||
watch: true,
|
||||
//watch: true,
|
||||
ignore_watch: [ 'frontend' ],
|
||||
}],
|
||||
};
|
||||
|
|
Načítá se…
Odkázat v novém úkolu