Proof-of-concept "sipper" minimal pages for web crawlers

This commit is contained in:
Mononaut
2023-08-24 17:49:27 +09:00
parent c51159d275
commit eeefaa6374
9 changed files with 661 additions and 44 deletions

View File

@@ -30,6 +30,10 @@ interface IConfig {
MAX_PAGE_AGE?: number;
RENDER_TIMEOUT?: number;
};
API: {
MEMPOOL: string;
ESPLORA: string;
}
SYSLOG: {
ENABLED: boolean;
HOST: string;
@@ -53,6 +57,10 @@ const defaults: IConfig = {
'ENABLED': true,
'CLUSTER_SIZE': 1,
},
'API': {
'MEMPOOL': 'https://mempool.space/api/v1',
'ESPLORA': 'https://mempool.space/api',
},
'SYSLOG': {
'ENABLED': true,
'HOST': '127.0.0.1',
@@ -66,6 +74,7 @@ class Config implements IConfig {
SERVER: IConfig['SERVER'];
MEMPOOL: IConfig['MEMPOOL'];
PUPPETEER: IConfig['PUPPETEER'];
API: IConfig['API'];
SYSLOG: IConfig['SYSLOG'];
constructor() {
@@ -73,6 +82,7 @@ class Config implements IConfig {
this.SERVER = configs.SERVER;
this.MEMPOOL = configs.MEMPOOL;
this.PUPPETEER = configs.PUPPETEER;
this.API = configs.API;
this.SYSLOG = configs.SYSLOG;
}

View File

@@ -113,6 +113,8 @@ class Server {
}
setUpRoutes() {
this.app.set('view engine', 'ejs');
if (puppeteerEnabled) {
this.app.get('/unfurl/render*', async (req, res) => { return this.renderPreview(req, res) })
this.app.get('/render*', async (req, res) => { return this.renderPreview(req, res) })
@@ -122,6 +124,7 @@ class Server {
}
this.app.get('/unfurl*', (req, res) => { return this.renderHTML(req, res, true) })
this.app.get('/slurp*', (req, res) => { return this.renderHTML(req, res, false) })
this.app.get('/sip*', (req, res) => { return this.renderSip(req, res) })
this.app.get('*', (req, res) => { return this.renderHTML(req, res, false) })
}
@@ -371,6 +374,38 @@ class Server {
}
return html;
}
async renderSip(req, res): Promise<void> {
const start = Date.now();
const rawPath = req.params[0];
const { lang, path } = parseLanguageUrl(rawPath);
const matchedRoute = matchRoute(this.network, path, 'sip');
let ogImageUrl = config.SERVER.HOST + (matchedRoute.staticImg || matchedRoute.fallbackImg);
let ogTitle = 'The Mempool Open Source Project®';
const canonical = this.canonicalHost + rawPath;
if (matchedRoute.render) {
ogImageUrl = `${config.SERVER.HOST}/render/${lang || 'en'}/preview${path}`;
ogTitle = `${this.network ? capitalize(this.network) + ' ' : ''}${matchedRoute.networkMode !== 'mainnet' ? capitalize(matchedRoute.networkMode) + ' ' : ''}${matchedRoute.title}`;
}
if (matchedRoute.sip) {
logger.info(`sipping "${req.url}"`);
try {
const data = await matchedRoute.sip.getData(matchedRoute.params);
logger.info(`sip data fetched for "${req.url}" in ${Date.now() - start}ms`);
res.render(matchedRoute.sip.template, { canonical, ogImageUrl, ogTitle, matchedRoute, data });
logger.info(`sip returned "${req.url}" in ${Date.now() - start}ms`);
} catch (e) {
logger.err(`failed to sip ${req.url}: ` + (e instanceof Error ? e.message : `${e}`));
res.status(500).send();
}
} else {
return this.renderHTML(req, res, false);
}
}
}
const server = new Server();

View File

@@ -1,9 +1,19 @@
import fetch from 'node-fetch-commonjs';
import config from './config';
interface Match {
render: boolean;
title: string;
fallbackImg: string;
staticImg?: string;
networkMode: string;
params?: string[];
sip?: SipTemplate;
}
interface SipTemplate {
template: string;
getData: Function;
}
const routes = {
@@ -19,18 +29,37 @@ const routes = {
title: "Mempool Accelerator",
fallbackImg: '/resources/previews/accelerator.jpg',
},
address: {
render: true,
params: 1,
getTitle(path) {
return `Address: ${path[0]}`;
}
},
block: {
render: true,
params: 1,
getTitle(path) {
return `Block: ${path[0]}`;
},
sip: {
template: 'block',
async getData (params: string[]) {
if (params?.length) {
let blockId = params[0];
if (blockId.length !== 64) {
blockId = await (await fetch(config.API.ESPLORA + `/block-height/${blockId}`)).text();
}
const [block, transactions] = await Promise.all([
(await fetch(config.API.MEMPOOL + `/block/${blockId}`)).json(),
(await fetch(config.API.ESPLORA + `/block/${blockId}/txids`)).json()
])
return {
block,
transactions,
};
}
}
}
},
address: {
render: true,
params: 1,
getTitle(path) {
return `Address: ${path[0]}`;
}
},
blocks: {
@@ -162,7 +191,7 @@ const networks = {
}
};
export function matchRoute(network: string, path: string): Match {
export function matchRoute(network: string, path: string, matchFor: string = 'render'): Match {
const match: Match = {
render: false,
title: '',
@@ -183,7 +212,7 @@ export function matchRoute(network: string, path: string): Match {
match.fallbackImg = route.fallbackImg;
// traverse the route tree until we run out of route or tree, or hit a renderable match
while (route.routes && parts.length && route.routes[parts[0]]) {
while (!route[matchFor] && route.routes && parts.length && route.routes[parts[0]]) {
route = route.routes[parts[0]];
parts.shift();
if (route.fallbackImg) {
@@ -192,8 +221,10 @@ export function matchRoute(network: string, path: string): Match {
}
// enough route parts left for title & rendering
if (route.render && parts.length >= route.params) {
match.render = true;
if (route[matchFor] && parts.length >= route.params) {
match.render = route.render;
match.sip = route.sip;
match.params = parts;
}
// only use set a static image for exact matches
if (!parts.length && route.staticImg) {