Browse Source

add influx

main
matthew 2 years ago
parent
commit
69b10e2b31
  1. 18
      docker-compose.yml
  2. 2
      searching-front/services/main.ts
  3. 2
      searching-front/services/modules/parser/index.ts
  4. 10
      searching-front/services/parser.ts

18
docker-compose.yml

@ -15,11 +15,18 @@ services:
- "discovery.type=single-node"
ports:
- 9200:9200
# proxy:
# platform: linux/x86_64
# build: ./proxy
# volumes:
# - ./proxy:/app
influxdb:
image: influxdb:2.4-alpine
ports:
- '8086:8086'
volumes:
- influxdb-storage:/var/lib/influxdb
env_file: ./searching-front/.env.local
proxy:
platform: linux/x86_64
build: ./proxy
volumes:
- ./proxy:/app
front:
container_name: front
build: ./searching-front
@ -45,3 +52,4 @@ networks:
volumes:
dbdata:
influxdb-storage:

2
searching-front/services/main.ts

@ -11,7 +11,7 @@ const run = async()=>{
console.timeEnd('watcher')
console.log('Start parser');
console.time('watcher');
// await parser();
await parser();
console.timeEnd('watcher');
}

2
searching-front/services/modules/parser/index.ts

@ -45,7 +45,7 @@ class Parser {
dom.window.document.querySelectorAll("a").forEach(({ href }) => {
if (isInnerLink(href)) {
const url = new URL("ton://a.ton" + href)
if (!isInvalidLink(url.pathname)) {
if (!isInvalidLink(url.pathname) && [...subPagesSet].length < 50 ) {
subPagesSet.add(url.pathname)
}
}

10
searching-front/services/parser.ts

@ -10,14 +10,14 @@ const findFirstNotIndexed = (subpages: SubPages = {}) => {
return Object.entries(subpages).find(([url, isIndexed]) => !isIndexed)?.[0]
}
const indexWebsite = async (domain: string, path: string, subpages: SubPages = {}) => {
const indexWebsite = async (domain: string, path: string, subpages: SubPages = {},count=0) => {
const subpagesLength = Object.keys(subpages).length;
if (!subpages[path] && subpagesLength < 50) {
if (!subpages[path]) {
const url = domain + path;
const parseInfo = await Parser.parseUrl(url)
subpages[path] = true
let pages = {}
if (parseInfo !== SHOULD_NOT_PARSE) {
if (parseInfo !== SHOULD_NOT_PARSE && subpagesLength < 50) {
await Elastic.index(parseInfo.elasticData)
pages = {
...parseInfo.subPages,
@ -28,12 +28,12 @@ const indexWebsite = async (domain: string, path: string, subpages: SubPages = {
}
const firstNotIndexed = findFirstNotIndexed(pages)
if (firstNotIndexed) {
return await indexWebsite(domain, firstNotIndexed, pages)
return await indexWebsite(domain, firstNotIndexed, pages, count +1)
}
} else {
const firstNotIndexed = findFirstNotIndexed(subpages)
if (firstNotIndexed) {
return await indexWebsite(domain, firstNotIndexed, subpages)
return await indexWebsite(domain, firstNotIndexed, subpages, count +1)
}
}
}

Loading…
Cancel
Save