|
|
|
@ -25,36 +25,34 @@ const isInvalidLink = (link: string) => {
|
|
|
|
|
|
|
|
|
|
const getFaviconUrl = (dom: JSDOM, domain: string) => { |
|
|
|
|
try { |
|
|
|
|
const node = dom.window.document.querySelector("[rel=icon][type*=image]") as HTMLLinkElement;; |
|
|
|
|
const href = node?.href; |
|
|
|
|
let url; |
|
|
|
|
const node = dom.window.document.querySelector("[rel=icon][type*=image]") as HTMLLinkElement |
|
|
|
|
const href = node?.href |
|
|
|
|
let url |
|
|
|
|
if (href) { |
|
|
|
|
try { |
|
|
|
|
url = new URL(href); |
|
|
|
|
url = new URL(href) |
|
|
|
|
} catch (e) { |
|
|
|
|
url = new URL(domain+'/'+href); |
|
|
|
|
url.pathname = url.pathname.replaceAll('//','/'); |
|
|
|
|
url = new URL(domain + "/" + href) |
|
|
|
|
url.pathname = url.pathname.replaceAll("//", "/") |
|
|
|
|
} |
|
|
|
|
return url.toString(); |
|
|
|
|
return url.toString() |
|
|
|
|
} |
|
|
|
|
} catch (e) { |
|
|
|
|
return undefined |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
class Parser { |
|
|
|
|
constructor() {} |
|
|
|
|
parseUrl = async (url: string, domain: string) => { |
|
|
|
|
try { |
|
|
|
|
|
|
|
|
|
const { data, headers } = await axios.get(url, { |
|
|
|
|
proxy: getTonProxy(), |
|
|
|
|
}) |
|
|
|
|
|
|
|
|
|
const contentType = headers["content-type"].toLocaleLowerCase() |
|
|
|
|
|
|
|
|
|
if (!contentType.startsWith('text/html')) { |
|
|
|
|
if (!contentType.startsWith("text/html")) { |
|
|
|
|
return SHOULD_NOT_PARSE |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -86,12 +84,12 @@ class Parser {
|
|
|
|
|
.querySelector("meta[name='description']") |
|
|
|
|
?.getAttribute("content") || "", |
|
|
|
|
url, |
|
|
|
|
faviconUrl: getFaviconUrl(dom, domain) |
|
|
|
|
faviconUrl: getFaviconUrl(dom, domain), |
|
|
|
|
}, |
|
|
|
|
subPages, |
|
|
|
|
} |
|
|
|
|
} catch (e) { |
|
|
|
|
console.log("Parse error ",e, url) |
|
|
|
|
console.log("Parse error ", e?.code, url) |
|
|
|
|
return SHOULD_NOT_PARSE |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|