77 lines
2.6 KiB
Python
77 lines
2.6 KiB
Python
import datetime
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from modules.civit.client import Client
|
|
from modules.civit.fetch import Fetch
|
|
from modules.civit.datamodel import *
|
|
from modules.shared.DatabaseAbstraction import Database
|
|
|
|
|
|
class Civit:
|
|
def __init__(self, db: Database, path, api_key = None):
|
|
self._db = db
|
|
self.path = path
|
|
self.client = Client(path, api_key)
|
|
self.fetcher = Fetch(self.client)
|
|
Creator.create(self._db.cursor())
|
|
Tag.create(self._db.cursor())
|
|
Model.create(self._db.cursor())
|
|
|
|
def save(self, e: DataClassDatabase): return e.save(self._db.cursor())
|
|
|
|
def from_fetch(self, entity: str, entity_type: type[DataClassDatabase]):
|
|
if entity: entity = entity.lower()
|
|
else: return
|
|
if entity in self.fetcher.entities: subdir = self.fetcher.entities[entity]
|
|
else: raise ValueError(f'Civit doesn\'t have entity type {entity}')
|
|
directory_path = str(Path(self.client.path) / subdir)
|
|
files = os.listdir(directory_path)
|
|
i = 0
|
|
files_count = len(files)
|
|
tp = datetime.datetime.now()
|
|
|
|
# Проходим по всем файлам в директории
|
|
for filename in files:
|
|
i += 1
|
|
print(f'processing file {i} of {files_count} ({float(i) / float(files_count) * 100:.2f}%): {filename} Elapsed time {datetime.datetime.now() - tp}')
|
|
tp = datetime.datetime.now()
|
|
if not filename.endswith('.json'): continue
|
|
file_path = os.path.join(directory_path, filename)
|
|
data = None
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Если данные - список словарей
|
|
if isinstance(data, list):
|
|
pass
|
|
# Если данные - один словарь
|
|
elif isinstance(data, dict):
|
|
data = [data]
|
|
|
|
except (json.JSONDecodeError, IOError) as e:
|
|
print(f"Ошибка чтения файла {filename}: {e}")
|
|
continue
|
|
|
|
if not data: continue
|
|
|
|
t = datetime.datetime.now()
|
|
j = 0
|
|
data_count = len(data)
|
|
for d in data:
|
|
j += 1
|
|
self.save(entity_type.from_dict(d))
|
|
if j % 1000 == 0:
|
|
print(f'saved {j} {entity} of {data_count} ({float(j) / float(data_count) * 100:.2f}%). Elapsed time {datetime.datetime.now() - t}')
|
|
t = datetime.datetime.now()
|
|
del d, data
|
|
|
|
|
|
|
|
|
|
|
|
|