Browse Source

add watcher

main
matthew 2 years ago
parent
commit
c6628f0d03
  1. 16
      docker-compose.yml
  2. 6
      package-lock.json
  3. 1
      searching-front/.dockerignore
  4. 24
      searching-front/Dockerfile
  5. 24
      searching-front/Dockerfile.watcher
  6. 1
      searching-front/app/auth/components/Button/index.ts
  7. 2
      searching-front/db/migrations/20220930173504_a/migration.sql
  8. 13
      searching-front/db/migrations/20220930184658_asd/migration.sql
  9. 22
      searching-front/db/migrations/20220930185345_a/migration.sql
  10. 16
      searching-front/db/migrations/20220930225241_j/migration.sql
  11. 2
      searching-front/db/migrations/20221002132213_asd/migration.sql
  12. 1
      searching-front/db/schema.prisma
  13. 6
      searching-front/next.config.js
  14. 25574
      searching-front/package-lock.json
  15. 9
      searching-front/package.json
  16. 83
      searching-front/services/domain-watcher.ts
  17. 4
      searching-front/services/helpers.ts
  18. 20
      searching-front/services/main.ts
  19. 9
      searching-front/services/modules/elastic/index.ts
  20. 9
      searching-front/services/modules/parser/index.ts
  21. 33
      searching-front/services/parser.ts

16
docker-compose.yml

@ -34,6 +34,22 @@ services:
build: ./proxy
volumes:
- ./proxy:/app
front:
container_name: front
build: ./searching-front
restart: always
ports:
- 3000:3000
watcher:
container_name: watcher
depends_on:
- elasticsearch
build:
context: ./searching-front
dockerfile: Dockerfile.watcher
restart: always
ports:
- 3000:3000
networks:
es-net:

6
package-lock.json generated

@ -0,0 +1,6 @@
{
"name": "searching",
"lockfileVersion": 2,
"requires": true,
"packages": {}
}

1
searching-front/.dockerignore

@ -0,0 +1 @@
node_modules

24
searching-front/Dockerfile

@ -0,0 +1,24 @@
FROM node:16
WORKDIR /app
# Install dependencies based on the preferred package manager
COPY package.json package-lock.json* ./
# Omit --production flag for TypeScript devDependencies
RUN npm i
COPY . .
# Environment variables must be present at build time
# https://github.com/vercel/next.js/discussions/14030
# ARG ENV_VARIABLE
# ENV ENV_VARIABLE=${ENV_VARIABLE}
# ARG NEXT_PUBLIC_ENV_VARIABLE
# ENV NEXT_PUBLIC_ENV_VARIABLE=${NEXT_PUBLIC_ENV_VARIABLE}
# Uncomment the following line to disable telemetry at build time
# ENV NEXT_TELEMETRY_DISABLED 1
RUN npm run build
CMD ["npm", "run", "start"]

24
searching-front/Dockerfile.watcher

@ -0,0 +1,24 @@
FROM node:16
WORKDIR /app
# Install dependencies based on the preferred package manager
COPY package.json package-lock.json* ./
# Omit --production flag for TypeScript devDependencies
RUN npm i
COPY . .
# Environment variables must be present at build time
# https://github.com/vercel/next.js/discussions/14030
# ARG ENV_VARIABLE
# ENV ENV_VARIABLE=${ENV_VARIABLE}
# ARG NEXT_PUBLIC_ENV_VARIABLE
# ENV NEXT_PUBLIC_ENV_VARIABLE=${NEXT_PUBLIC_ENV_VARIABLE}
# Uncomment the following line to disable telemetry at build time
# ENV NEXT_TELEMETRY_DISABLED 1
RUN npm run build
CMD ["npm", "run", "watcher"]

1
searching-front/app/auth/components/Button/index.ts

@ -0,0 +1 @@
export { default } from "./Button"

2
searching-front/db/migrations/20220930173504_a/migration.sql

@ -1,2 +0,0 @@
-- AlterTable
ALTER TABLE "Domain" ALTER COLUMN "lastParse" DROP NOT NULL;

13
searching-front/db/migrations/20220930184658_asd/migration.sql

@ -1,13 +0,0 @@
-- CreateTable
CREATE TABLE "NFTDomain" (
"id" SERIAL NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"address" TEXT NOT NULL,
"available" BOOLEAN NOT NULL,
CONSTRAINT "NFTDomain_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "NFTDomain_address_key" ON "NFTDomain"("address");

22
searching-front/db/migrations/20220930185345_a/migration.sql

@ -1,22 +0,0 @@
/*
Warnings:
- You are about to drop the `NFTDomain` table. If the table is not empty, all the data it contains will be lost.
*/
-- DropTable
DROP TABLE "NFTDomain";
-- CreateTable
CREATE TABLE "NftDomain" (
"id" SERIAL NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"address" TEXT NOT NULL,
"available" BOOLEAN NOT NULL,
CONSTRAINT "NftDomain_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "NftDomain_address_key" ON "NftDomain"("address");

16
searching-front/db/migrations/20220930173413_a/migration.sql → searching-front/db/migrations/20220930225241_j/migration.sql

@ -68,11 +68,22 @@ CREATE TABLE "Domain" (
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"address" TEXT NOT NULL,
"lastParse" TIMESTAMP(3) NOT NULL,
"lastParse" TIMESTAMP(3),
CONSTRAINT "Domain_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "NftDomain" (
"id" SERIAL NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
"address" TEXT NOT NULL,
"available" BOOLEAN NOT NULL,
CONSTRAINT "NftDomain_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "User_email_key" ON "User"("email");
@ -91,6 +102,9 @@ CREATE UNIQUE INDEX "Webpage_path_key" ON "Webpage"("path");
-- CreateIndex
CREATE UNIQUE INDEX "Domain_address_key" ON "Domain"("address");
-- CreateIndex
CREATE UNIQUE INDEX "NftDomain_address_key" ON "NftDomain"("address");
-- AddForeignKey
ALTER TABLE "Session" ADD CONSTRAINT "Session_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE SET NULL ON UPDATE CASCADE;

2
searching-front/db/migrations/20221002132213_asd/migration.sql

@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "NftDomain" ADD COLUMN "lastParse" TIMESTAMP(3);

1
searching-front/db/schema.prisma

@ -94,4 +94,5 @@ model NftDomain {
updatedAt DateTime @updatedAt
address String @unique
available Boolean
lastParse DateTime?
}

6
searching-front/next.config.js

@ -4,6 +4,10 @@ const { withBlitz } = require("@blitzjs/next")
/**
* @type {import('@blitzjs/next').BlitzConfig}
**/
const config = {}
const config = {
typescript:{
ignoreBuildErrors: true
}
}
module.exports = withBlitz(config)

25574
searching-front/package-lock.json generated

File diff suppressed because it is too large Load Diff

9
searching-front/package.json

@ -2,7 +2,7 @@
"name": "searching-front",
"version": "1.0.0",
"scripts": {
"watcher": "ts-node-esm ./services/domain-watcher.ts",
"watcher": "ts-node-esm ./services/main.ts",
"parser": "ts-node-esm ./services/parser.ts",
"dev": "blitz dev",
"build": "blitz build",
@ -10,8 +10,7 @@
"studio": "blitz prisma studio",
"lint": "eslint --ignore-path .gitignore --ext .js,.ts,.tsx .",
"test": "jest",
"test:watch": "jest --watch",
"prepare": "husky install"
"test:watch": "jest --watch"
},
"prisma": {
"schema": "db/schema.prisma"
@ -63,8 +62,6 @@
},
"devDependencies": {
"@next/bundle-analyzer": "12.0.8",
"@testing-library/jest-dom": "5.16.3",
"@types/jest": "27.4.1",
"@types/jsdom": "20.0.0",
"@types/node": "17.0.16",
"@types/preview-email": "2.0.1",
@ -75,14 +72,12 @@
"eslint-config-next": "12.2.0",
"eslint-config-prettier": "8.5.0",
"husky": "7.0.4",
"jest": "27.5.1",
"lint-staged": "12.1.7",
"prettier": "^2.5.1",
"prettier-plugin-prisma": "3.8.0",
"pretty-quick": "3.1.3",
"preview-email": "3.0.7",
"prisma": "4.2.1",
"ts-jest": "28.0.7",
"typescript": "^4.5.3"
},
"private": true

83
searching-front/services/domain-watcher.ts

@ -14,46 +14,55 @@ import {
dotenv.config({ path: path.resolve(__dirname, "../.env.local") })
import db from "../db/index"
import axios from "axios"
import { getTonProxy } from "./helpers"
const nftApi = new NFTApi()
const DOMAIN_MOCKS = ["https://wolkonsky.com", "https://zhleb.ru"]
const DOMAINS_COLLECTION_ADDRESS = "EQC3dNlesgVD8YbAazcauIrXBPfiVhMMr5YYk2in0Mtsz0Bz"
// const main = async () => {
// DOMAIN_MOCKS.forEach(async (item) => {
// await db.domain.upsert({
// where: {
// address: item,
// },
// update: {},
// create: { address: item },
// })
// })
// }
interface SearchNFTItemsParams {
limit: number
offset: number
}
const wait = (time:number) => new Promise((resolve)=>setTimeout(()=>resolve(true),time ))
const searchNFTItems = async ({ limit, offset }: SearchNFTItemsParams) => {
try{
console.log(`Start search limit:${limit}, offset:${offset}`)
await wait(1000)
const { data } = await axios.get(
`https://tonapi.io/v1/nft/searchItems?collection=EQC3dNlesgVD8YbAazcauIrXBPfiVhMMr5YYk2in0Mtsz0Bz&include_on_sale=false&limit=${limit}&offset=${offset}`
)
console.log(`Success search: ${data.nft_items.length} items`)
`https://tonapi.io/v1/nft/searchItems?collection=EQC3dNlesgVD8YbAazcauIrXBPfiVhMMr5YYk2in0Mtsz0Bz&include_on_sale=false&limit=${limit}&offset=${offset}`,
{
headers:{
// 'Authorization': 'Bearer '+ '6c456b1e31217a79e121dcb9b506c280358d58bc86659bdbac1d737bfc3691fb',
}
}
)
return data.nft_items
} catch (e){
return searchNFTItems({ limit, offset })
}
}
const portion = 1000
const main = async () => {
console.time("DOMAINWATCH")
const fetchTonSite = async (url: string) => {
const urlToFetch = `http://${url}/`
const response = await axios.get(urlToFetch, {
proxy: getTonProxy(),
})
if (!response.data) {
console.log("Error fetch")
throw "error"
}
return url
}
const main = async () => new Promise(async (resolve)=>{
// Receive typed array of owner nfts
let count = 0
while (true) {
// в nftItems 1000 сайтов
const nftItems = await searchNFTItems({
limit: portion,
offset: count * portion,
@ -63,25 +72,27 @@ const main = async () => {
for (let i = 0; i < nftItems.length; i++) {
const nftDomainItem = nftItems[i]
if (nftDomainItem.dns) {
await db.nftDomain.upsert({
where: {
address: nftDomainItem.dns,
},
update: { available: false, address: nftDomainItem.dns },
create: { available: false, address: nftDomainItem.dns },
})
fetchTonSite(nftDomainItem.dns)
.then(async (dmn) => {
console.log("success dmn", dmn)
await db.nftDomain.upsert({
where: {
address: `http://${dmn}`,
},
update: { available: false, address: `http://${dmn}` },
create: { available: false, address: `http://${dmn}` },
})
})
.catch(() => {})
}
}
count++
continue
}
console.timeEnd("DOMAINWATCH")
break
}
}
main()
.then(() => console.log("finish domain watcher"))
.catch((e) => console.log("error in domain watcher", e))
console.log('Finish fetch nft')
setTimeout(()=>{resolve(true)}, 10000)
})
export default {}
export default main

4
searching-front/services/helpers.ts

@ -0,0 +1,4 @@
export const getTonProxy = () => ({
host: "in1.ton.org",
port: 8080,
})

20
searching-front/services/main.ts

@ -0,0 +1,20 @@
import domainWatcher from './domain-watcher'
import parser from './parser'
const run = async()=>{
console.log('Start domain watcher')
await domainWatcher();
console.log('Start parser')
await parser();
}
const second = 1000;
const minute= 60 * second;
const hour = 60 * minute;
run();
setInterval(()=>{
run()
},3 * hour)

9
searching-front/services/modules/elastic/index.ts

@ -63,7 +63,6 @@ class Elastic {
public createIndex = async (lang: Languages) => {
const indexName = getIndexNameByLang(lang)
const analyzer = getAnalyzerByLang(lang)
await this.client.indices.create({
index: indexName,
mappings: {
@ -90,8 +89,12 @@ class Elastic {
}
public initElastic = async () => {
;[Languages.EN, Languages.RU].forEach(async (item) => {
await this.createIndex(item)
;[Languages.EN, Languages.RU].forEach(async (lang) => {
const indexName = getIndexNameByLang(lang)
const alreadyExist = await this.client.indices.exists({index:indexName})
if(!alreadyExist){
await this.createIndex(lang)
}
})
}

9
searching-front/services/modules/parser/index.ts

@ -8,6 +8,7 @@ import textversion from "textversionjs"
import { ElasticIndexParams } from "../elastic"
import { htmlToText, SHOULD_NOT_PARSE } from "./helpers"
import { URL } from "url"
import { getTonProxy } from "../../helpers"
interface ParseUrlResult {
elasticData: ElasticIndexParams
@ -27,10 +28,13 @@ class Parser {
constructor() {}
parseUrl = async (url: string) => {
try {
const { data, headers } = await axios.get(url)
const { data, headers } = await axios.get(url,{
proxy: getTonProxy(),
})
const contentType = headers["content-type"].toLocaleLowerCase()
if (contentType !== "text/html; charset=utf-8") {
console.log(contentType)
if (!contentType.startsWith('text/html')) {
return SHOULD_NOT_PARSE
}
@ -40,6 +44,7 @@ class Parser {
// собираем все ссылки и складываем их в сэт для дальнейшей обработки
dom.window.document.querySelectorAll("a").forEach(({ href }) => {
console.log(href)
if (isInnerLink(href)) {
const url = new URL("ton://a.ton" + href)
if (!isInvalidLink(url.pathname)) {

33
searching-front/services/parser.ts

@ -11,6 +11,7 @@ import db from "../db/index"
import Elastic from "./modules/elastic"
import Parser from "./modules/parser"
import { SHOULD_NOT_PARSE } from "./modules/parser/helpers"
import { Domain } from "domain"
type SubPages = Record<string, boolean>
@ -18,11 +19,11 @@ const findFirstNotIndexed = (subpages: SubPages = {}) => {
return Object.entries(subpages).find(([url, isIndexed]) => !isIndexed)?.[0]
}
const indexWebsite = async (domain: string, url: string, subpages: SubPages = {}) => {
if (!subpages[url]) {
const urlObj = new URL(domain + url)
const parseInfo = await Parser.parseUrl(urlObj.toString())
subpages[url] = true
const indexWebsite = async (domain: string, path: string, subpages: SubPages = {}) => {
if (!subpages[path]) {
const url = domain + path;
const parseInfo = await Parser.parseUrl(url)
subpages[path] = true
let pages = {}
if (parseInfo !== SHOULD_NOT_PARSE) {
await Elastic.index(parseInfo.elasticData)
@ -46,28 +47,24 @@ const indexWebsite = async (domain: string, url: string, subpages: SubPages = {}
}
}
const main = async () => {
// await Elastic.initElastic()
// await Elastic.createIndex()
const domains = await db.domain.findMany()
const main = async () => {
await Elastic.initElastic()
console.log('Success InitElastic')
const domains = await db.nftDomain.findMany()
console.log('Find domains', domains)
if (domains) {
console.time("index")
for (const domain of domains) {
console.time("index" + domain.address)
await db.domain.update({
await db.nftDomain.update({
where: { address: domain.address },
data: { lastParse: new Date() },
})
console.log('Start index domain: ',domain.address)
await indexWebsite(domain.address, "/")
console.timeEnd("index" + domain.address)
}
console.timeEnd("index")
}
console.log('Finish parse domains')
}
main()
.then(() => console.log("finish parser"))
.catch((e) => console.log("error in parserr", e))
export default {}
export default main

Loading…
Cancel
Save