Common commit
This commit is contained in:
@@ -2,7 +2,7 @@ import datetime
|
||||
from dataclasses import dataclass, fields
|
||||
from typing import Optional, List, get_origin
|
||||
|
||||
from DataClassJson import DataClassJson
|
||||
from .DataClassJson import DataClassJson
|
||||
from modules.shared.DatabaseAbstraction import Cursor
|
||||
|
||||
types = {bool: 'INTEGER', int: 'INTEGER', float: 'REAL', str: "TEXT",
|
||||
@@ -22,9 +22,14 @@ class DataClassDatabase(DataClassJson):
|
||||
tmp_instance = cls()
|
||||
if not table_name: table_name = tmp_instance._table_name
|
||||
|
||||
pk_type = str
|
||||
for field in fields(tmp_instance):
|
||||
if field.name == tmp_instance._key_field:
|
||||
pk_type = field.type
|
||||
|
||||
result: list[str] = list()
|
||||
result.append(f'CREATE TABLE IF NOT EXISTS {table_name} (fk TEXT NOT NULL, pk TEXT NOT NULL, PRIMARY KEY(pk, fk));')
|
||||
result.append(f'CREATE TABLE IF NOT EXISTS {table_name}_archive (fk TEXT NOT NULL, pk TEXT NOT NULL, save_date TEXT NOT NULL, PRIMARY KEY(pk, fk, save_date));')
|
||||
result.append(f'CREATE TABLE IF NOT EXISTS "{table_name}" (fk INTEGER NOT NULL, pk {types.get(pk_type, 'INTEGER')} NOT NULL, PRIMARY KEY(pk, fk));')
|
||||
result.append(f'CREATE TABLE IF NOT EXISTS "{table_name}_archive" (fk INTEGER NOT NULL, pk {types.get(pk_type, 'INTEGER')} NOT NULL, save_date TEXT NOT NULL, PRIMARY KEY(pk, fk, save_date));')
|
||||
|
||||
excluded_fields = {f.name for f in fields(DataClassDatabase)}
|
||||
all_fields = [f for f in fields(cls) if f.name not in excluded_fields and not f.name.startswith('_')]
|
||||
@@ -35,10 +40,10 @@ class DataClassDatabase(DataClassJson):
|
||||
try: result.extend(inner_type.get_create_sqls())
|
||||
except Exception as e: raise RuntimeError('invalid forwarding type') from e
|
||||
elif field.type in { list, Optional[list], Optional[List] }:
|
||||
result.append(f'CREATE TABLE IF NOT EXISTS {table_name}_{field.name} (fk TEXT NOT NULL, data TEXT NOT NULL, PRIMARY KEY(data, fk));')
|
||||
result.append(f'CREATE TABLE IF NOT EXISTS "{table_name}_{field.name}" (fk TEXT NOT NULL, data TEXT NOT NULL, PRIMARY KEY(data, fk));')
|
||||
else:
|
||||
result.append(f'ALTER TABLE {table_name} ADD COLUMN {field.name} {types.get(field.type, 'TEXT')};')
|
||||
result.append(f'ALTER TABLE {table_name}_archive ADD COLUMN {field.name} {types.get(field.type, 'TEXT')};')
|
||||
result.append(f'ALTER TABLE "{table_name}" ADD COLUMN "{field.name}" {types.get(field.type, 'TEXT')};')
|
||||
result.append(f'ALTER TABLE "{table_name}_archive" ADD COLUMN "{field.name}" {types.get(field.type, 'TEXT')};')
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
@@ -53,7 +58,7 @@ class DataClassDatabase(DataClassJson):
|
||||
params = list()
|
||||
instance = cls()
|
||||
|
||||
sql = f'SELECT pk, fk FROM {instance._table_name}'
|
||||
sql = f'SELECT pk, fk FROM "{instance._table_name}"'
|
||||
if pk or fk: sql += ' WHERE'
|
||||
if pk:
|
||||
params.append(pk)
|
||||
@@ -77,7 +82,7 @@ class DataClassDatabase(DataClassJson):
|
||||
def _load(cls, cur: Cursor, pk, fk, depth = 5):
|
||||
if not pk and not fk: return None
|
||||
instance = cls()
|
||||
res: dict = cur.fetchone(f'SELECT * FROM {instance._table_name} WHERE pk = ? AND fk = ?', [pk, fk])
|
||||
res: dict = cur.fetchone(f'SELECT * FROM "{instance._table_name}" WHERE pk = ? AND fk = ?', [pk, fk])
|
||||
if not res: return None
|
||||
rpk = res.pop('pk')
|
||||
rfk = res.pop('fk')
|
||||
@@ -93,7 +98,7 @@ class DataClassDatabase(DataClassJson):
|
||||
elif len(items) > 0: setattr(result, field.name, items[0])
|
||||
|
||||
elif field.type in {list, List, Optional[list], Optional[List]}:
|
||||
items = cur.fetchall(f'SELECT data from {instance._table_name}_{field.name} WHERE fk=?', [rpk])
|
||||
items = cur.fetchall(f'SELECT data from "{instance._table_name}_{field.name}" WHERE fk=?', [rpk])
|
||||
if items:
|
||||
items = [row['data'] for row in items]
|
||||
else:
|
||||
@@ -117,15 +122,15 @@ class DataClassDatabase(DataClassJson):
|
||||
|
||||
if prev and not self.equals_simple(prev):
|
||||
d = str(datetime.datetime.now())
|
||||
cur.execute(f'INSERT OR IGNORE INTO {prev._table_name}_archive (fk, pk, save_date) VALUES (?, ?, ?)', [fk, pk, d])
|
||||
cur.execute(f'INSERT OR IGNORE INTO "{prev._table_name}_archive" (fk, pk, save_date) VALUES (?, ?, ?)', [fk, pk, d])
|
||||
for field in prev.serializable_fields():
|
||||
attr = getattr(prev, field.name)
|
||||
if field.name in prev._forwarding: continue
|
||||
elif field.type in {list, List, Optional[list], Optional[List]} or isinstance(attr, list): continue
|
||||
else:
|
||||
cur.execute(f'UPDATE {prev._table_name}_archive SET {field.name}=? WHERE fk=? AND pk=? AND save_date=?', [attr, fk, pk, d])
|
||||
cur.execute(f'UPDATE "{prev._table_name}_archive" SET {field.name}=? WHERE fk=? AND pk=? AND save_date=?', [attr, fk, pk, d])
|
||||
|
||||
cur.execute(f'INSERT OR IGNORE INTO {self._table_name} (fk, pk) VALUES (?, ?)', [fk, pk])
|
||||
cur.execute(f'INSERT OR IGNORE INTO "{self._table_name}" (fk, pk) VALUES (?, ?)', [fk, pk])
|
||||
|
||||
for field in self.serializable_fields():
|
||||
attr = getattr(self, field.name)
|
||||
@@ -134,13 +139,13 @@ class DataClassDatabase(DataClassJson):
|
||||
if field.name in self._forwarding:
|
||||
if not isinstance(getattr(self, field.name), list): attr = [attr]
|
||||
for val in attr:
|
||||
val.save(cur, fk=pk)
|
||||
val.autosave(cur, fk=pk)
|
||||
continue
|
||||
elif field.type in {list, List, Optional[list], Optional[List]} or isinstance(attr, list):
|
||||
for val in attr: cur.execute(f'INSERT OR IGNORE INTO {self._table_name}_{field.name} VALUES (?, ?)', [pk, val])
|
||||
for val in attr: cur.execute(f'INSERT OR IGNORE INTO "{self._table_name}_{field.name}" VALUES (?, ?)', [pk, val])
|
||||
continue
|
||||
else:
|
||||
cur.execute(f'UPDATE {self._table_name} SET {field.name}=? WHERE fk=? AND pk=?', [attr, fk, pk])
|
||||
cur.execute(f'UPDATE "{self._table_name}" SET "{field.name}"=? WHERE fk=? AND pk=?', [attr, fk, pk])
|
||||
continue
|
||||
|
||||
|
||||
|
||||
249
modules/shared/DatamodelBuilder.py
Normal file
249
modules/shared/DatamodelBuilder.py
Normal file
@@ -0,0 +1,249 @@
|
||||
import datetime
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
import warnings
|
||||
from collections import defaultdict, Counter
|
||||
from logging.config import valid_ident
|
||||
from traceback import print_tb
|
||||
from typing import Dict, List, Any, Tuple, Union
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
from modules.civit.client import Client
|
||||
|
||||
|
||||
class DatamodelBuilderSimple:
|
||||
def __init__(self):
|
||||
self.field_analysis = {}
|
||||
self.field_analysis_low_ram: dict[str, int] = dict()
|
||||
|
||||
@staticmethod
|
||||
def _get_json_files(directory_path: str) -> List[str]:
|
||||
"""Получает список всех JSON файлов в директории"""
|
||||
json_files = []
|
||||
for filename in os.listdir(directory_path):
|
||||
if filename.endswith('.json'):
|
||||
json_files.append(os.path.join(directory_path, filename))
|
||||
return json_files
|
||||
|
||||
@staticmethod
|
||||
def _load_json_data(file_path: str) -> List[Dict]:
|
||||
"""Загружает данные из JSON файла"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
else:
|
||||
return [data]
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
print(f"Ошибка чтения файла {file_path}: {e}")
|
||||
return []
|
||||
|
||||
def _collect_all_entities(self, directory_path: str) -> List[Dict]:
|
||||
"""Собирает все экземпляры из всех JSON файлов"""
|
||||
all_entities = []
|
||||
json_files = self._get_json_files(directory_path)
|
||||
|
||||
for file_path in json_files:
|
||||
entities = self._load_json_data(file_path)
|
||||
all_entities.extend(entities)
|
||||
|
||||
return all_entities
|
||||
|
||||
@staticmethod
|
||||
def _get_field_types(value: Any) -> str:
|
||||
"""Определяет тип значения"""
|
||||
if isinstance(value, dict):
|
||||
return 'dict'
|
||||
elif isinstance(value, list):
|
||||
return 'list'
|
||||
elif isinstance(value, bool):
|
||||
return 'bool'
|
||||
elif isinstance(value, int):
|
||||
return 'int'
|
||||
elif isinstance(value, float):
|
||||
return 'float'
|
||||
elif isinstance(value, str):
|
||||
return 'str'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
@staticmethod
|
||||
def _get_main_type(types: List[str]) -> str:
|
||||
"""Определяет основной тип из списка типов"""
|
||||
if not types:
|
||||
return 'unknown'
|
||||
|
||||
# Если есть dict или list - это сложная структура
|
||||
if 'dict' in types or 'list' in types:
|
||||
return 'complex'
|
||||
|
||||
# Иначе возвращаем первый тип (или объединяем)
|
||||
unique_types = set(types)
|
||||
if len(unique_types) == 1:
|
||||
return types[0]
|
||||
else:
|
||||
return 'mixed'
|
||||
|
||||
@staticmethod
|
||||
def _is_hashable(value: Any) -> bool:
|
||||
"""Проверяет, является ли значение хэшируемым"""
|
||||
try:
|
||||
hash(value)
|
||||
return True
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _serialize_value_for_counter(cls, value: Any) -> str:
|
||||
"""Преобразует значение в строку для использования в Counter"""
|
||||
if cls._is_hashable(value):
|
||||
return value
|
||||
else:
|
||||
# Для нехэшируемых типов используем строковое представление
|
||||
return str(value)
|
||||
|
||||
def _analyze_fields_recursive(self, entity: Dict, parent_path: str,
|
||||
field_types: Dict, field_presence: Dict,
|
||||
field_values: Dict, top_n: int):
|
||||
"""Рекурсивно анализирует поля сущности"""
|
||||
if not isinstance(entity, dict):
|
||||
return
|
||||
|
||||
for key, value in entity.items():
|
||||
field_path = f"{parent_path}.{key}" if parent_path else key
|
||||
|
||||
# Добавляем тип поля
|
||||
field_types[field_path].append(self._get_field_types(value))
|
||||
|
||||
# Отмечаем наличие поля
|
||||
field_presence[field_path].append(True)
|
||||
|
||||
# Сохраняем значение для подсчета частоты (обрабатываем нехэшируемые типы)
|
||||
if value is not None:
|
||||
serialized_value = self._serialize_value_for_counter(value)
|
||||
field_values[field_path].append(serialized_value)
|
||||
|
||||
# Рекурсивно анализируем вложенные структуры
|
||||
if isinstance(value, dict):
|
||||
self._analyze_fields_recursive(value, field_path, field_types,
|
||||
field_presence, field_values, top_n)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
self._analyze_fields_recursive(item, field_path, field_types,
|
||||
field_presence, field_values, top_n)
|
||||
|
||||
def _analyze_entity_structure(self, entities: List[Dict], top_n: int) -> Dict[str, Any]:
|
||||
"""Анализирует структуру всех сущностей"""
|
||||
if not entities:
|
||||
return {}
|
||||
|
||||
# Собираем все поля и их типы
|
||||
field_types = defaultdict(list)
|
||||
field_presence = defaultdict(list)
|
||||
field_values = defaultdict(list)
|
||||
|
||||
for entity in entities:
|
||||
self._analyze_fields_recursive(entity, "", field_types, field_presence,
|
||||
field_values, top_n)
|
||||
|
||||
# Формируем финальный анализ
|
||||
result = {}
|
||||
for field_path, types in field_types.items():
|
||||
# Определяем основной тип
|
||||
main_type = self._get_main_type(types)
|
||||
|
||||
# Подсчитываем частоту наличия поля
|
||||
presence_count = len(field_presence[field_path])
|
||||
total_count = len(entities)
|
||||
always_present = presence_count == total_count
|
||||
|
||||
# Получаем топ N значений
|
||||
top_values = []
|
||||
if field_path in field_values:
|
||||
try:
|
||||
# Преобразуем строки обратно в оригинальные типы для отображения
|
||||
value_counter = Counter(field_values[field_path])
|
||||
top_values = [item[0] for item in value_counter.most_common(top_n)]
|
||||
except Exception:
|
||||
# Если возникла ошибка, используем пустой список
|
||||
top_values = []
|
||||
|
||||
result[field_path] = {
|
||||
'type': main_type,
|
||||
'always_present': always_present,
|
||||
'top_values': top_values,
|
||||
'total_count': total_count,
|
||||
'presence_count': presence_count
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
def analyze_directory(self, directory_path: str, top_n: int = 10) -> Dict[str, Any]:
|
||||
"""
|
||||
Основной метод анализа директории
|
||||
|
||||
Args:
|
||||
directory_path: Путь к директории с JSON файлами
|
||||
top_n: Количество самых частых значений для каждого поля
|
||||
|
||||
Returns:
|
||||
Словарь с анализом структуры данных
|
||||
"""
|
||||
# Шаг 1: Собираем все экземпляры из JSON файлов
|
||||
entities = self._collect_all_entities(directory_path)
|
||||
|
||||
# Шаг 2: Анализируем структуру сущностей
|
||||
self.field_analysis = self._analyze_entity_structure(entities, top_n)
|
||||
|
||||
return self.field_analysis
|
||||
|
||||
def analyze_directory_low_ram(self, directory_path: str, dump = None):
|
||||
json_files = self._get_json_files(directory_path)
|
||||
|
||||
i = 0
|
||||
files_count = len(json_files)
|
||||
for file_path in json_files:
|
||||
i += 1
|
||||
print(f'processing file {i} of {files_count}: {file_path}')
|
||||
entities = self._load_json_data(file_path)
|
||||
for entity in entities:
|
||||
self.analyze_recursive_low_ram(entity)
|
||||
# del entity, entities
|
||||
|
||||
sorted_items = sorted(self.field_analysis_low_ram.items(), key=lambda item: item[1])
|
||||
result = [f'{item[0]} => {item[1]}' for item in sorted_items]
|
||||
|
||||
if dump:
|
||||
with open(dump, 'w') as f:
|
||||
for res in result:
|
||||
f.write(res + '\n')
|
||||
for res in result:
|
||||
print(res)
|
||||
|
||||
|
||||
|
||||
def analyze_recursive_low_ram(self, entity: dict, prefix = ''):
|
||||
for key, value in entity.items():
|
||||
if not isinstance(value, list): value = [value]
|
||||
for v in value:
|
||||
if isinstance(v, dict): self.analyze_recursive_low_ram(v, prefix=prefix + key + '.')
|
||||
else: self.field_analysis_low_ram[prefix + key] = self.field_analysis_low_ram.get(prefix + key, 0) + 1
|
||||
# del v
|
||||
# del key, value
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = DatamodelBuilderSimple()
|
||||
d.analyze_directory_low_ram(input("Directory path: "), input("Dump file path: "))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
74
modules/shared/IncrementalCounter.py
Normal file
74
modules/shared/IncrementalCounter.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import time
|
||||
from collections import deque
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Deque, Tuple
|
||||
|
||||
WINDOWS = {
|
||||
"5min": 5 * 60,
|
||||
"1h": 60 * 60,
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class IncrementalCounter:
|
||||
"""Счётчик, который умеет:
|
||||
• `+=` – увеличивает внутренний счётчик на 1
|
||||
• `last_5min`, `last_hour`, `total` – сколько было увеличений
|
||||
за последние 5 минут, 1 час и за всё время соответственно
|
||||
"""
|
||||
|
||||
# Внутренний счётчик (сумма всех увеличений)
|
||||
_total: int = 0
|
||||
# История – deque из timestamps (float) когда происходил инкремент
|
||||
_history: Deque[float] = field(default_factory=deque, init=False)
|
||||
|
||||
# ---------- Оператор += ----------
|
||||
def __iadd__(self, other):
|
||||
"""
|
||||
При любом `+=` увеличиваем счётчик на 1.
|
||||
Возвращаем self, чтобы поддерживать цепочку выражений.
|
||||
"""
|
||||
# Счётчик всегда +1, игнорируем `other`
|
||||
self._total += 1
|
||||
# Храним только время события
|
||||
self._history.append(time.monotonic())
|
||||
# Удаляем слишком старые элементы (самый длинный интервал = 1h)
|
||||
self._purge_old_entries()
|
||||
return self
|
||||
|
||||
# ---------- Свойства для статистики ----------
|
||||
@property
|
||||
def total(self) -> int:
|
||||
"""Общее количество прибавлений."""
|
||||
return self._total
|
||||
|
||||
@property
|
||||
def last_5min(self) -> int:
|
||||
"""Сколько прибавлений было за последние 5 минут."""
|
||||
return self._count_in_window(WINDOWS["5min"])
|
||||
|
||||
@property
|
||||
def last_hour(self) -> int:
|
||||
"""Сколько прибавлений было за последний час."""
|
||||
return self._count_in_window(WINDOWS["1h"])
|
||||
|
||||
# ---------- Вспомогательные методы ----------
|
||||
def _purge_old_entries(self) -> None:
|
||||
"""Удаляем из deque все записи старше 1 часа."""
|
||||
cutoff = time.monotonic() - WINDOWS["1h"]
|
||||
while self._history and self._history[0] < cutoff:
|
||||
self._history.popleft()
|
||||
|
||||
def _count_in_window(self, seconds: float) -> int:
|
||||
"""Подсчёт, сколько событий попадает в заданный интервал."""
|
||||
cutoff = time.monotonic() - seconds
|
||||
# Удаляем старые элементы, которые уже не нужны
|
||||
while self._history and self._history[0] < cutoff:
|
||||
self._history.popleft()
|
||||
return len(self._history)
|
||||
|
||||
# ---------- Пользовательский интерфейс ----------
|
||||
def __repr__(self):
|
||||
return (
|
||||
f"<IncrementalCounter total={self.total} "
|
||||
f"5min={self.last_5min} 1h={self.last_hour}>"
|
||||
)
|
||||
Reference in New Issue
Block a user