From 69b10e2b319b5cc1ee2ebd9b4fcf2e1517640b34 Mon Sep 17 00:00:00 2001 From: matthew Date: Fri, 7 Oct 2022 18:19:23 +0400 Subject: [PATCH] add influx --- docker-compose.yml | 18 +++++++++++++----- searching-front/services/main.ts | 2 +- .../services/modules/parser/index.ts | 2 +- searching-front/services/parser.ts | 10 +++++----- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 29f30cf..cb4b03a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,11 +15,18 @@ services: - "discovery.type=single-node" ports: - 9200:9200 - # proxy: - # platform: linux/x86_64 - # build: ./proxy - # volumes: - # - ./proxy:/app + influxdb: + image: influxdb:2.4-alpine + ports: + - '8086:8086' + volumes: + - influxdb-storage:/var/lib/influxdb + env_file: ./searching-front/.env.local + proxy: + platform: linux/x86_64 + build: ./proxy + volumes: + - ./proxy:/app front: container_name: front build: ./searching-front @@ -45,3 +52,4 @@ networks: volumes: dbdata: + influxdb-storage: diff --git a/searching-front/services/main.ts b/searching-front/services/main.ts index a12ccef..03be486 100644 --- a/searching-front/services/main.ts +++ b/searching-front/services/main.ts @@ -11,7 +11,7 @@ const run = async()=>{ console.timeEnd('watcher') console.log('Start parser'); console.time('watcher'); - // await parser(); + await parser(); console.timeEnd('watcher'); } diff --git a/searching-front/services/modules/parser/index.ts b/searching-front/services/modules/parser/index.ts index 2ed3d06..dc1101b 100644 --- a/searching-front/services/modules/parser/index.ts +++ b/searching-front/services/modules/parser/index.ts @@ -45,7 +45,7 @@ class Parser { dom.window.document.querySelectorAll("a").forEach(({ href }) => { if (isInnerLink(href)) { const url = new URL("ton://a.ton" + href) - if (!isInvalidLink(url.pathname)) { + if (!isInvalidLink(url.pathname) && [...subPagesSet].length < 50 ) { subPagesSet.add(url.pathname) } } diff --git a/searching-front/services/parser.ts b/searching-front/services/parser.ts index d897340..58a1a13 100644 --- a/searching-front/services/parser.ts +++ b/searching-front/services/parser.ts @@ -10,14 +10,14 @@ const findFirstNotIndexed = (subpages: SubPages = {}) => { return Object.entries(subpages).find(([url, isIndexed]) => !isIndexed)?.[0] } -const indexWebsite = async (domain: string, path: string, subpages: SubPages = {}) => { +const indexWebsite = async (domain: string, path: string, subpages: SubPages = {},count=0) => { const subpagesLength = Object.keys(subpages).length; - if (!subpages[path] && subpagesLength < 50) { + if (!subpages[path]) { const url = domain + path; const parseInfo = await Parser.parseUrl(url) subpages[path] = true let pages = {} - if (parseInfo !== SHOULD_NOT_PARSE) { + if (parseInfo !== SHOULD_NOT_PARSE && subpagesLength < 50) { await Elastic.index(parseInfo.elasticData) pages = { ...parseInfo.subPages, @@ -28,12 +28,12 @@ const indexWebsite = async (domain: string, path: string, subpages: SubPages = { } const firstNotIndexed = findFirstNotIndexed(pages) if (firstNotIndexed) { - return await indexWebsite(domain, firstNotIndexed, pages) + return await indexWebsite(domain, firstNotIndexed, pages, count +1) } } else { const firstNotIndexed = findFirstNotIndexed(subpages) if (firstNotIndexed) { - return await indexWebsite(domain, firstNotIndexed, subpages) + return await indexWebsite(domain, firstNotIndexed, subpages, count +1) } } }