Browse Source

add influx

main
matthew 2 years ago
parent
commit
69b10e2b31
  1. 18
      docker-compose.yml
  2. 2
      searching-front/services/main.ts
  3. 2
      searching-front/services/modules/parser/index.ts
  4. 10
      searching-front/services/parser.ts

18
docker-compose.yml

@ -15,11 +15,18 @@ services:
- "discovery.type=single-node" - "discovery.type=single-node"
ports: ports:
- 9200:9200 - 9200:9200
# proxy: influxdb:
# platform: linux/x86_64 image: influxdb:2.4-alpine
# build: ./proxy ports:
# volumes: - '8086:8086'
# - ./proxy:/app volumes:
- influxdb-storage:/var/lib/influxdb
env_file: ./searching-front/.env.local
proxy:
platform: linux/x86_64
build: ./proxy
volumes:
- ./proxy:/app
front: front:
container_name: front container_name: front
build: ./searching-front build: ./searching-front
@ -45,3 +52,4 @@ networks:
volumes: volumes:
dbdata: dbdata:
influxdb-storage:

2
searching-front/services/main.ts

@ -11,7 +11,7 @@ const run = async()=>{
console.timeEnd('watcher') console.timeEnd('watcher')
console.log('Start parser'); console.log('Start parser');
console.time('watcher'); console.time('watcher');
// await parser(); await parser();
console.timeEnd('watcher'); console.timeEnd('watcher');
} }

2
searching-front/services/modules/parser/index.ts

@ -45,7 +45,7 @@ class Parser {
dom.window.document.querySelectorAll("a").forEach(({ href }) => { dom.window.document.querySelectorAll("a").forEach(({ href }) => {
if (isInnerLink(href)) { if (isInnerLink(href)) {
const url = new URL("ton://a.ton" + href) const url = new URL("ton://a.ton" + href)
if (!isInvalidLink(url.pathname)) { if (!isInvalidLink(url.pathname) && [...subPagesSet].length < 50 ) {
subPagesSet.add(url.pathname) subPagesSet.add(url.pathname)
} }
} }

10
searching-front/services/parser.ts

@ -10,14 +10,14 @@ const findFirstNotIndexed = (subpages: SubPages = {}) => {
return Object.entries(subpages).find(([url, isIndexed]) => !isIndexed)?.[0] return Object.entries(subpages).find(([url, isIndexed]) => !isIndexed)?.[0]
} }
const indexWebsite = async (domain: string, path: string, subpages: SubPages = {}) => { const indexWebsite = async (domain: string, path: string, subpages: SubPages = {},count=0) => {
const subpagesLength = Object.keys(subpages).length; const subpagesLength = Object.keys(subpages).length;
if (!subpages[path] && subpagesLength < 50) { if (!subpages[path]) {
const url = domain + path; const url = domain + path;
const parseInfo = await Parser.parseUrl(url) const parseInfo = await Parser.parseUrl(url)
subpages[path] = true subpages[path] = true
let pages = {} let pages = {}
if (parseInfo !== SHOULD_NOT_PARSE) { if (parseInfo !== SHOULD_NOT_PARSE && subpagesLength < 50) {
await Elastic.index(parseInfo.elasticData) await Elastic.index(parseInfo.elasticData)
pages = { pages = {
...parseInfo.subPages, ...parseInfo.subPages,
@ -28,12 +28,12 @@ const indexWebsite = async (domain: string, path: string, subpages: SubPages = {
} }
const firstNotIndexed = findFirstNotIndexed(pages) const firstNotIndexed = findFirstNotIndexed(pages)
if (firstNotIndexed) { if (firstNotIndexed) {
return await indexWebsite(domain, firstNotIndexed, pages) return await indexWebsite(domain, firstNotIndexed, pages, count +1)
} }
} else { } else {
const firstNotIndexed = findFirstNotIndexed(subpages) const firstNotIndexed = findFirstNotIndexed(subpages)
if (firstNotIndexed) { if (firstNotIndexed) {
return await indexWebsite(domain, firstNotIndexed, subpages) return await indexWebsite(domain, firstNotIndexed, subpages, count +1)
} }
} }
} }

Loading…
Cancel
Save