Browse Source

fix favicons

main
matthew 2 years ago
parent
commit
b535b292df
  1. 22
      searching-front/services/modules/parser/index.ts

22
searching-front/services/modules/parser/index.ts

@ -25,36 +25,34 @@ const isInvalidLink = (link: string) => {
const getFaviconUrl = (dom: JSDOM, domain: string) => { const getFaviconUrl = (dom: JSDOM, domain: string) => {
try { try {
const node = dom.window.document.querySelector("[rel=icon][type*=image]") as HTMLLinkElement;; const node = dom.window.document.querySelector("[rel=icon][type*=image]") as HTMLLinkElement
const href = node?.href; const href = node?.href
let url; let url
if (href) { if (href) {
try { try {
url = new URL(href); url = new URL(href)
} catch (e) { } catch (e) {
url = new URL(domain+'/'+href); url = new URL(domain + "/" + href)
url.pathname = url.pathname.replaceAll('//','/'); url.pathname = url.pathname.replaceAll("//", "/")
} }
return url.toString(); return url.toString()
} }
} catch (e) { } catch (e) {
return undefined return undefined
} }
} }
class Parser { class Parser {
constructor() {} constructor() {}
parseUrl = async (url: string, domain: string) => { parseUrl = async (url: string, domain: string) => {
try { try {
const { data, headers } = await axios.get(url, { const { data, headers } = await axios.get(url, {
proxy: getTonProxy(), proxy: getTonProxy(),
}) })
const contentType = headers["content-type"].toLocaleLowerCase() const contentType = headers["content-type"].toLocaleLowerCase()
if (!contentType.startsWith('text/html')) { if (!contentType.startsWith("text/html")) {
return SHOULD_NOT_PARSE return SHOULD_NOT_PARSE
} }
@ -86,12 +84,12 @@ class Parser {
.querySelector("meta[name='description']") .querySelector("meta[name='description']")
?.getAttribute("content") || "", ?.getAttribute("content") || "",
url, url,
faviconUrl: getFaviconUrl(dom, domain) faviconUrl: getFaviconUrl(dom, domain),
}, },
subPages, subPages,
} }
} catch (e) { } catch (e) {
console.log("Parse error ",e, url) console.log("Parse error ", e?.code, url)
return SHOULD_NOT_PARSE return SHOULD_NOT_PARSE
} }
} }

Loading…
Cancel
Save