Common commit

This commit is contained in:
2025-10-16 18:42:32 +07:00
parent 124065d2ac
commit 40c320a1ac
21 changed files with 1916 additions and 379 deletions

View File

@@ -2,7 +2,7 @@ import datetime
from dataclasses import dataclass, fields
from typing import Optional, List, get_origin
from DataClassJson import DataClassJson
from .DataClassJson import DataClassJson
from modules.shared.DatabaseAbstraction import Cursor
types = {bool: 'INTEGER', int: 'INTEGER', float: 'REAL', str: "TEXT",
@@ -22,9 +22,14 @@ class DataClassDatabase(DataClassJson):
tmp_instance = cls()
if not table_name: table_name = tmp_instance._table_name
pk_type = str
for field in fields(tmp_instance):
if field.name == tmp_instance._key_field:
pk_type = field.type
result: list[str] = list()
result.append(f'CREATE TABLE IF NOT EXISTS {table_name} (fk TEXT NOT NULL, pk TEXT NOT NULL, PRIMARY KEY(pk, fk));')
result.append(f'CREATE TABLE IF NOT EXISTS {table_name}_archive (fk TEXT NOT NULL, pk TEXT NOT NULL, save_date TEXT NOT NULL, PRIMARY KEY(pk, fk, save_date));')
result.append(f'CREATE TABLE IF NOT EXISTS "{table_name}" (fk INTEGER NOT NULL, pk {types.get(pk_type, 'INTEGER')} NOT NULL, PRIMARY KEY(pk, fk));')
result.append(f'CREATE TABLE IF NOT EXISTS "{table_name}_archive" (fk INTEGER NOT NULL, pk {types.get(pk_type, 'INTEGER')} NOT NULL, save_date TEXT NOT NULL, PRIMARY KEY(pk, fk, save_date));')
excluded_fields = {f.name for f in fields(DataClassDatabase)}
all_fields = [f for f in fields(cls) if f.name not in excluded_fields and not f.name.startswith('_')]
@@ -35,10 +40,10 @@ class DataClassDatabase(DataClassJson):
try: result.extend(inner_type.get_create_sqls())
except Exception as e: raise RuntimeError('invalid forwarding type') from e
elif field.type in { list, Optional[list], Optional[List] }:
result.append(f'CREATE TABLE IF NOT EXISTS {table_name}_{field.name} (fk TEXT NOT NULL, data TEXT NOT NULL, PRIMARY KEY(data, fk));')
result.append(f'CREATE TABLE IF NOT EXISTS "{table_name}_{field.name}" (fk TEXT NOT NULL, data TEXT NOT NULL, PRIMARY KEY(data, fk));')
else:
result.append(f'ALTER TABLE {table_name} ADD COLUMN {field.name} {types.get(field.type, 'TEXT')};')
result.append(f'ALTER TABLE {table_name}_archive ADD COLUMN {field.name} {types.get(field.type, 'TEXT')};')
result.append(f'ALTER TABLE "{table_name}" ADD COLUMN "{field.name}" {types.get(field.type, 'TEXT')};')
result.append(f'ALTER TABLE "{table_name}_archive" ADD COLUMN "{field.name}" {types.get(field.type, 'TEXT')};')
return result
@classmethod
@@ -53,7 +58,7 @@ class DataClassDatabase(DataClassJson):
params = list()
instance = cls()
sql = f'SELECT pk, fk FROM {instance._table_name}'
sql = f'SELECT pk, fk FROM "{instance._table_name}"'
if pk or fk: sql += ' WHERE'
if pk:
params.append(pk)
@@ -77,7 +82,7 @@ class DataClassDatabase(DataClassJson):
def _load(cls, cur: Cursor, pk, fk, depth = 5):
if not pk and not fk: return None
instance = cls()
res: dict = cur.fetchone(f'SELECT * FROM {instance._table_name} WHERE pk = ? AND fk = ?', [pk, fk])
res: dict = cur.fetchone(f'SELECT * FROM "{instance._table_name}" WHERE pk = ? AND fk = ?', [pk, fk])
if not res: return None
rpk = res.pop('pk')
rfk = res.pop('fk')
@@ -93,7 +98,7 @@ class DataClassDatabase(DataClassJson):
elif len(items) > 0: setattr(result, field.name, items[0])
elif field.type in {list, List, Optional[list], Optional[List]}:
items = cur.fetchall(f'SELECT data from {instance._table_name}_{field.name} WHERE fk=?', [rpk])
items = cur.fetchall(f'SELECT data from "{instance._table_name}_{field.name}" WHERE fk=?', [rpk])
if items:
items = [row['data'] for row in items]
else:
@@ -117,15 +122,15 @@ class DataClassDatabase(DataClassJson):
if prev and not self.equals_simple(prev):
d = str(datetime.datetime.now())
cur.execute(f'INSERT OR IGNORE INTO {prev._table_name}_archive (fk, pk, save_date) VALUES (?, ?, ?)', [fk, pk, d])
cur.execute(f'INSERT OR IGNORE INTO "{prev._table_name}_archive" (fk, pk, save_date) VALUES (?, ?, ?)', [fk, pk, d])
for field in prev.serializable_fields():
attr = getattr(prev, field.name)
if field.name in prev._forwarding: continue
elif field.type in {list, List, Optional[list], Optional[List]} or isinstance(attr, list): continue
else:
cur.execute(f'UPDATE {prev._table_name}_archive SET {field.name}=? WHERE fk=? AND pk=? AND save_date=?', [attr, fk, pk, d])
cur.execute(f'UPDATE "{prev._table_name}_archive" SET {field.name}=? WHERE fk=? AND pk=? AND save_date=?', [attr, fk, pk, d])
cur.execute(f'INSERT OR IGNORE INTO {self._table_name} (fk, pk) VALUES (?, ?)', [fk, pk])
cur.execute(f'INSERT OR IGNORE INTO "{self._table_name}" (fk, pk) VALUES (?, ?)', [fk, pk])
for field in self.serializable_fields():
attr = getattr(self, field.name)
@@ -134,13 +139,13 @@ class DataClassDatabase(DataClassJson):
if field.name in self._forwarding:
if not isinstance(getattr(self, field.name), list): attr = [attr]
for val in attr:
val.save(cur, fk=pk)
val.autosave(cur, fk=pk)
continue
elif field.type in {list, List, Optional[list], Optional[List]} or isinstance(attr, list):
for val in attr: cur.execute(f'INSERT OR IGNORE INTO {self._table_name}_{field.name} VALUES (?, ?)', [pk, val])
for val in attr: cur.execute(f'INSERT OR IGNORE INTO "{self._table_name}_{field.name}" VALUES (?, ?)', [pk, val])
continue
else:
cur.execute(f'UPDATE {self._table_name} SET {field.name}=? WHERE fk=? AND pk=?', [attr, fk, pk])
cur.execute(f'UPDATE "{self._table_name}" SET "{field.name}"=? WHERE fk=? AND pk=?', [attr, fk, pk])
continue

View File

@@ -0,0 +1,249 @@
import datetime
import json
import time
import os
import warnings
from collections import defaultdict, Counter
from logging.config import valid_ident
from traceback import print_tb
from typing import Dict, List, Any, Tuple, Union
from pathlib import Path
from modules.civit.client import Client
class DatamodelBuilderSimple:
def __init__(self):
self.field_analysis = {}
self.field_analysis_low_ram: dict[str, int] = dict()
@staticmethod
def _get_json_files(directory_path: str) -> List[str]:
"""Получает список всех JSON файлов в директории"""
json_files = []
for filename in os.listdir(directory_path):
if filename.endswith('.json'):
json_files.append(os.path.join(directory_path, filename))
return json_files
@staticmethod
def _load_json_data(file_path: str) -> List[Dict]:
"""Загружает данные из JSON файла"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
if isinstance(data, list):
return data
else:
return [data]
except (json.JSONDecodeError, IOError) as e:
print(f"Ошибка чтения файла {file_path}: {e}")
return []
def _collect_all_entities(self, directory_path: str) -> List[Dict]:
"""Собирает все экземпляры из всех JSON файлов"""
all_entities = []
json_files = self._get_json_files(directory_path)
for file_path in json_files:
entities = self._load_json_data(file_path)
all_entities.extend(entities)
return all_entities
@staticmethod
def _get_field_types(value: Any) -> str:
"""Определяет тип значения"""
if isinstance(value, dict):
return 'dict'
elif isinstance(value, list):
return 'list'
elif isinstance(value, bool):
return 'bool'
elif isinstance(value, int):
return 'int'
elif isinstance(value, float):
return 'float'
elif isinstance(value, str):
return 'str'
else:
return 'unknown'
@staticmethod
def _get_main_type(types: List[str]) -> str:
"""Определяет основной тип из списка типов"""
if not types:
return 'unknown'
# Если есть dict или list - это сложная структура
if 'dict' in types or 'list' in types:
return 'complex'
# Иначе возвращаем первый тип (или объединяем)
unique_types = set(types)
if len(unique_types) == 1:
return types[0]
else:
return 'mixed'
@staticmethod
def _is_hashable(value: Any) -> bool:
"""Проверяет, является ли значение хэшируемым"""
try:
hash(value)
return True
except TypeError:
return False
@classmethod
def _serialize_value_for_counter(cls, value: Any) -> str:
"""Преобразует значение в строку для использования в Counter"""
if cls._is_hashable(value):
return value
else:
# Для нехэшируемых типов используем строковое представление
return str(value)
def _analyze_fields_recursive(self, entity: Dict, parent_path: str,
field_types: Dict, field_presence: Dict,
field_values: Dict, top_n: int):
"""Рекурсивно анализирует поля сущности"""
if not isinstance(entity, dict):
return
for key, value in entity.items():
field_path = f"{parent_path}.{key}" if parent_path else key
# Добавляем тип поля
field_types[field_path].append(self._get_field_types(value))
# Отмечаем наличие поля
field_presence[field_path].append(True)
# Сохраняем значение для подсчета частоты (обрабатываем нехэшируемые типы)
if value is not None:
serialized_value = self._serialize_value_for_counter(value)
field_values[field_path].append(serialized_value)
# Рекурсивно анализируем вложенные структуры
if isinstance(value, dict):
self._analyze_fields_recursive(value, field_path, field_types,
field_presence, field_values, top_n)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
self._analyze_fields_recursive(item, field_path, field_types,
field_presence, field_values, top_n)
def _analyze_entity_structure(self, entities: List[Dict], top_n: int) -> Dict[str, Any]:
"""Анализирует структуру всех сущностей"""
if not entities:
return {}
# Собираем все поля и их типы
field_types = defaultdict(list)
field_presence = defaultdict(list)
field_values = defaultdict(list)
for entity in entities:
self._analyze_fields_recursive(entity, "", field_types, field_presence,
field_values, top_n)
# Формируем финальный анализ
result = {}
for field_path, types in field_types.items():
# Определяем основной тип
main_type = self._get_main_type(types)
# Подсчитываем частоту наличия поля
presence_count = len(field_presence[field_path])
total_count = len(entities)
always_present = presence_count == total_count
# Получаем топ N значений
top_values = []
if field_path in field_values:
try:
# Преобразуем строки обратно в оригинальные типы для отображения
value_counter = Counter(field_values[field_path])
top_values = [item[0] for item in value_counter.most_common(top_n)]
except Exception:
# Если возникла ошибка, используем пустой список
top_values = []
result[field_path] = {
'type': main_type,
'always_present': always_present,
'top_values': top_values,
'total_count': total_count,
'presence_count': presence_count
}
return result
def analyze_directory(self, directory_path: str, top_n: int = 10) -> Dict[str, Any]:
"""
Основной метод анализа директории
Args:
directory_path: Путь к директории с JSON файлами
top_n: Количество самых частых значений для каждого поля
Returns:
Словарь с анализом структуры данных
"""
# Шаг 1: Собираем все экземпляры из JSON файлов
entities = self._collect_all_entities(directory_path)
# Шаг 2: Анализируем структуру сущностей
self.field_analysis = self._analyze_entity_structure(entities, top_n)
return self.field_analysis
def analyze_directory_low_ram(self, directory_path: str, dump = None):
json_files = self._get_json_files(directory_path)
i = 0
files_count = len(json_files)
for file_path in json_files:
i += 1
print(f'processing file {i} of {files_count}: {file_path}')
entities = self._load_json_data(file_path)
for entity in entities:
self.analyze_recursive_low_ram(entity)
# del entity, entities
sorted_items = sorted(self.field_analysis_low_ram.items(), key=lambda item: item[1])
result = [f'{item[0]} => {item[1]}' for item in sorted_items]
if dump:
with open(dump, 'w') as f:
for res in result:
f.write(res + '\n')
for res in result:
print(res)
def analyze_recursive_low_ram(self, entity: dict, prefix = ''):
for key, value in entity.items():
if not isinstance(value, list): value = [value]
for v in value:
if isinstance(v, dict): self.analyze_recursive_low_ram(v, prefix=prefix + key + '.')
else: self.field_analysis_low_ram[prefix + key] = self.field_analysis_low_ram.get(prefix + key, 0) + 1
# del v
# del key, value
if __name__ == '__main__':
d = DatamodelBuilderSimple()
d.analyze_directory_low_ram(input("Directory path: "), input("Dump file path: "))

View File

@@ -0,0 +1,74 @@
import time
from collections import deque
from dataclasses import dataclass, field
from typing import Deque, Tuple
WINDOWS = {
"5min": 5 * 60,
"1h": 60 * 60,
}
@dataclass
class IncrementalCounter:
"""Счётчик, который умеет:
• `+=` увеличивает внутренний счётчик на 1
• `last_5min`, `last_hour`, `total` сколько было увеличений
за последние 5минут, 1час и за всё время соответственно
"""
# Внутренний счётчик (сумма всех увеличений)
_total: int = 0
# История deque из timestamps (float) когда происходил инкремент
_history: Deque[float] = field(default_factory=deque, init=False)
# ---------- Оператор += ----------
def __iadd__(self, other):
"""
При любом `+=` увеличиваем счётчик на 1.
Возвращаем self, чтобы поддерживать цепочку выражений.
"""
# Счётчик всегда +1, игнорируем `other`
self._total += 1
# Храним только время события
self._history.append(time.monotonic())
# Удаляем слишком старые элементы (самый длинный интервал = 1h)
self._purge_old_entries()
return self
# ---------- Свойства для статистики ----------
@property
def total(self) -> int:
"""Общее количество прибавлений."""
return self._total
@property
def last_5min(self) -> int:
"""Сколько прибавлений было за последние 5 минут."""
return self._count_in_window(WINDOWS["5min"])
@property
def last_hour(self) -> int:
"""Сколько прибавлений было за последний час."""
return self._count_in_window(WINDOWS["1h"])
# ---------- Вспомогательные методы ----------
def _purge_old_entries(self) -> None:
"""Удаляем из deque все записи старше 1 часа."""
cutoff = time.monotonic() - WINDOWS["1h"]
while self._history and self._history[0] < cutoff:
self._history.popleft()
def _count_in_window(self, seconds: float) -> int:
"""Подсчёт, сколько событий попадает в заданный интервал."""
cutoff = time.monotonic() - seconds
# Удаляем старые элементы, которые уже не нужны
while self._history and self._history[0] < cutoff:
self._history.popleft()
return len(self._history)
# ---------- Пользовательский интерфейс ----------
def __repr__(self):
return (
f"<IncrementalCounter total={self.total} "
f"5min={self.last_5min} 1h={self.last_hour}>"
)