618 lines
25 KiB
Python
618 lines
25 KiB
Python
import datetime
|
||
import json
|
||
import time
|
||
import os
|
||
import warnings
|
||
from collections import defaultdict, Counter
|
||
from typing import Dict, List, Any, Tuple, Union
|
||
from pathlib import Path
|
||
|
||
|
||
from modules.civit.client import Client
|
||
|
||
class NetworkError(RuntimeError): pass
|
||
class ApiDataError(RuntimeError): pass
|
||
class CursorError(RuntimeError): pass
|
||
|
||
|
||
class EntityAnalyzer:
|
||
def __init__(self):
|
||
self.field_analysis = {}
|
||
|
||
def _get_json_files(self, directory_path: str) -> List[str]:
|
||
"""Получает список всех JSON файлов в директории"""
|
||
json_files = []
|
||
for filename in os.listdir(directory_path):
|
||
if filename.endswith('.json'):
|
||
json_files.append(os.path.join(directory_path, filename))
|
||
return json_files
|
||
|
||
def _load_json_data(self, file_path: str) -> List[Dict]:
|
||
"""Загружает данные из JSON файла"""
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
if isinstance(data, list):
|
||
return data
|
||
else:
|
||
return [data]
|
||
except (json.JSONDecodeError, IOError) as e:
|
||
print(f"Ошибка чтения файла {file_path}: {e}")
|
||
return []
|
||
|
||
def _collect_all_entities(self, directory_path: str) -> List[Dict]:
|
||
"""Собирает все экземпляры из всех JSON файлов"""
|
||
all_entities = []
|
||
json_files = self._get_json_files(directory_path)
|
||
|
||
for file_path in json_files:
|
||
entities = self._load_json_data(file_path)
|
||
all_entities.extend(entities)
|
||
|
||
return all_entities
|
||
|
||
def _get_field_types(self, value: Any) -> str:
|
||
"""Определяет тип значения"""
|
||
if isinstance(value, dict):
|
||
return 'dict'
|
||
elif isinstance(value, list):
|
||
return 'list'
|
||
elif isinstance(value, bool):
|
||
return 'bool'
|
||
elif isinstance(value, int):
|
||
return 'int'
|
||
elif isinstance(value, float):
|
||
return 'float'
|
||
elif isinstance(value, str):
|
||
return 'str'
|
||
else:
|
||
return 'unknown'
|
||
|
||
def _get_main_type(self, types: List[str]) -> str:
|
||
"""Определяет основной тип из списка типов"""
|
||
if not types:
|
||
return 'unknown'
|
||
|
||
# Если есть dict или list - это сложная структура
|
||
if 'dict' in types or 'list' in types:
|
||
return 'complex'
|
||
|
||
# Иначе возвращаем первый тип (или объединяем)
|
||
unique_types = set(types)
|
||
if len(unique_types) == 1:
|
||
return types[0]
|
||
else:
|
||
return 'mixed'
|
||
|
||
def _is_hashable(self, value: Any) -> bool:
|
||
"""Проверяет, является ли значение хэшируемым"""
|
||
try:
|
||
hash(value)
|
||
return True
|
||
except TypeError:
|
||
return False
|
||
|
||
def _serialize_value_for_counter(self, value: Any) -> str:
|
||
"""Преобразует значение в строку для использования в Counter"""
|
||
if self._is_hashable(value):
|
||
return value
|
||
else:
|
||
# Для нехэшируемых типов используем строковое представление
|
||
return str(value)
|
||
|
||
def _analyze_fields_recursive(self, entity: Dict, parent_path: str,
|
||
field_types: Dict, field_presence: Dict,
|
||
field_values: Dict, top_n: int):
|
||
"""Рекурсивно анализирует поля сущности"""
|
||
if not isinstance(entity, dict):
|
||
return
|
||
|
||
for key, value in entity.items():
|
||
field_path = f"{parent_path}.{key}" if parent_path else key
|
||
|
||
# Добавляем тип поля
|
||
field_types[field_path].append(self._get_field_types(value))
|
||
|
||
# Отмечаем наличие поля
|
||
field_presence[field_path].append(True)
|
||
|
||
# Сохраняем значение для подсчета частоты (обрабатываем нехэшируемые типы)
|
||
if value is not None:
|
||
serialized_value = self._serialize_value_for_counter(value)
|
||
field_values[field_path].append(serialized_value)
|
||
|
||
# Рекурсивно анализируем вложенные структуры
|
||
if isinstance(value, dict):
|
||
self._analyze_fields_recursive(value, field_path, field_types,
|
||
field_presence, field_values, top_n)
|
||
elif isinstance(value, list):
|
||
for item in value:
|
||
if isinstance(item, dict):
|
||
self._analyze_fields_recursive(item, field_path, field_types,
|
||
field_presence, field_values, top_n)
|
||
|
||
def _analyze_entity_structure(self, entities: List[Dict], top_n: int) -> Dict[str, Any]:
|
||
"""Анализирует структуру всех сущностей"""
|
||
if not entities:
|
||
return {}
|
||
|
||
# Собираем все поля и их типы
|
||
field_types = defaultdict(list)
|
||
field_presence = defaultdict(list)
|
||
field_values = defaultdict(list)
|
||
|
||
for entity in entities:
|
||
self._analyze_fields_recursive(entity, "", field_types, field_presence,
|
||
field_values, top_n)
|
||
|
||
# Формируем финальный анализ
|
||
result = {}
|
||
for field_path, types in field_types.items():
|
||
# Определяем основной тип
|
||
main_type = self._get_main_type(types)
|
||
|
||
# Подсчитываем частоту наличия поля
|
||
presence_count = len(field_presence[field_path])
|
||
total_count = len(entities)
|
||
always_present = presence_count == total_count
|
||
|
||
# Получаем топ N значений
|
||
top_values = []
|
||
if field_path in field_values:
|
||
try:
|
||
# Преобразуем строки обратно в оригинальные типы для отображения
|
||
value_counter = Counter(field_values[field_path])
|
||
top_values = [item[0] for item in value_counter.most_common(top_n)]
|
||
except Exception:
|
||
# Если возникла ошибка, используем пустой список
|
||
top_values = []
|
||
|
||
result[field_path] = {
|
||
'type': main_type,
|
||
'always_present': always_present,
|
||
'top_values': top_values,
|
||
'total_count': total_count,
|
||
'presence_count': presence_count
|
||
}
|
||
|
||
return result
|
||
|
||
def analyze_directory(self, directory_path: str, top_n: int = 10) -> Dict[str, Any]:
|
||
"""
|
||
Основной метод анализа директории
|
||
|
||
Args:
|
||
directory_path: Путь к директории с JSON файлами
|
||
top_n: Количество самых частых значений для каждого поля
|
||
|
||
Returns:
|
||
Словарь с анализом структуры данных
|
||
"""
|
||
# Шаг 1: Собираем все экземпляры из JSON файлов
|
||
entities = self._collect_all_entities(directory_path)
|
||
|
||
# Шаг 2: Анализируем структуру сущностей
|
||
self.field_analysis = self._analyze_entity_structure(entities, top_n)
|
||
|
||
return self.field_analysis
|
||
|
||
|
||
class Fetch:
|
||
|
||
def __init__(self, client: Client, delay = 3):
|
||
self.items: dict[int, dict] = dict()
|
||
self.cursor_state = 'normal'
|
||
self.client = client
|
||
self.path = client.path
|
||
self.delay_time = delay
|
||
|
||
@staticmethod
|
||
def load_json_dir(directory_path):
|
||
"""
|
||
Получает путь к директории, находит в ней все файлы json,
|
||
читает из них списки словарей и возвращает один список со всеми словарями
|
||
|
||
Args:
|
||
directory_path (str): Путь к директории с JSON файлами
|
||
|
||
Returns:
|
||
list: Список всех словарей из всех JSON файлов
|
||
"""
|
||
all_dicts = []
|
||
files = os.listdir(directory_path)
|
||
|
||
# Проходим по всем файлам в директории
|
||
for filename in files:
|
||
if filename.endswith('.json'):
|
||
file_path = os.path.join(directory_path, filename)
|
||
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
# Если данные - список словарей
|
||
if isinstance(data, list):
|
||
all_dicts.extend(data)
|
||
# Если данные - один словарь
|
||
elif isinstance(data, dict):
|
||
all_dicts.append(data)
|
||
|
||
except (json.JSONDecodeError, IOError) as e:
|
||
print(f"Ошибка чтения файла {filename}: {e}")
|
||
continue
|
||
|
||
return all_dicts
|
||
|
||
entities = {
|
||
'creator': 'fetch_creators',
|
||
'creators': 'fetch_creators',
|
||
'tag': 'fetch_tags',
|
||
'tags': 'fetch_tags',
|
||
'model': 'fetch_models',
|
||
'models': 'fetch_models',
|
||
'image': 'fetch_images',
|
||
'images': 'fetch_images',
|
||
}
|
||
|
||
keys = {
|
||
'creator': 'username',
|
||
'creators': 'username',
|
||
'tag': 'name',
|
||
'tags': 'name',
|
||
'model': 'id',
|
||
'models': 'id',
|
||
'image': 'id',
|
||
'images': 'id',
|
||
}
|
||
|
||
@classmethod
|
||
def load(cls, client: Client, entity_type: str):
|
||
if entity_type in cls.entities: subdir = cls.entities[entity_type]
|
||
else: raise ValueError(f'Civit doesn\'t have entity type {entity_type}')
|
||
res = cls.load_json_dir(str(Path(client.path) / subdir))
|
||
return res
|
||
|
||
|
||
@classmethod
|
||
def datamodel(cls, client: Client, subdir, top = None):
|
||
if not top: top = 10
|
||
path = Path(client.path) / subdir
|
||
datamodel = EntityAnalyzer().analyze_directory(path, top_n=top)
|
||
return datamodel
|
||
|
||
@classmethod
|
||
def _save_json(cls, path, items):
|
||
with open(path, 'w') as f:
|
||
json.dump(items, f, indent=2, ensure_ascii=False)
|
||
|
||
|
||
@classmethod
|
||
def _msg(cls, msg_type, arg1 = None, arg2 = None, arg3 = None, arg4 = None, arg5 = None, arg6 = None, arg7 = None):
|
||
if msg_type == 'initial': msg = f"Fetching {arg1}..."
|
||
elif msg_type == 'extend_warn': msg = f"Warning! This fetch iteration has no effect"
|
||
elif msg_type == 'paginated_progress': msg = f"{arg1}: Fetching page {arg2} of {arg3}"
|
||
elif msg_type == 'network_warn': msg = f"Network error! {arg1}"
|
||
elif msg_type == 'api_warn': msg = f"API data error! {arg1}"
|
||
elif msg_type == 'cursor_warn': msg = f"Cursor slip error! {arg1}"
|
||
|
||
else: return
|
||
|
||
print(msg)
|
||
|
||
def reset(self, entity):
|
||
self._msg('initial', entity)
|
||
if entity not in self.entities: raise RuntimeError(f'Unknown entity: {entity}')
|
||
self.path = Path(self.client.path) / self.entities[entity]
|
||
self.path.mkdir(exist_ok=True)
|
||
|
||
def extend(self, entity: str, items: list[dict] = None, save: str = None):
|
||
if entity not in self.keys: raise RuntimeError(f'Unknown entity: {entity}')
|
||
prev_len = len(self.items)
|
||
if items and len(items) > 0:
|
||
for item in items: self.items[item[self.keys[entity]]] = item
|
||
else: raise RuntimeError('Try extend with empty items')
|
||
post_len = len(self.items)
|
||
if prev_len == post_len:
|
||
self._msg('extend_warn')
|
||
raise RuntimeWarning('Warning! This fetch iteration has no effect')
|
||
if save: self._save_json(self.path / save, items)
|
||
|
||
return post_len - prev_len
|
||
|
||
def delay(self, mult = 1): time.sleep(self.delay_time * mult)
|
||
|
||
@classmethod
|
||
def crawler_paginated_parse_metadata(cls, page):
|
||
metadata = page.get('metadata', None)
|
||
if not metadata: raise RuntimeError("Unable to find metadata")
|
||
total_pages = metadata.get('totalPages', None)
|
||
current_page = metadata.get('currentPage', None)
|
||
if not total_pages or not current_page: RuntimeError("Unable to parse metadata")
|
||
print(f"Found! Total pages: {total_pages}")
|
||
return total_pages, current_page
|
||
|
||
def crawler_paginated(self, entity: str, save = True):
|
||
self.reset(entity)
|
||
url = self.client.config.base_url + 'api/v1/' + entity + f'?limit=200'
|
||
first_page = self.client.get_creators_tags_raw(entity)
|
||
self.extend(entity, first_page.get('items', None), save='page_1.json' if save else None)
|
||
total_pages, current_page = self.crawler_paginated_parse_metadata(first_page)
|
||
|
||
for i in range(2, total_pages + 1):
|
||
self.delay()
|
||
self._msg('paginated_progress', entity, i, total_pages)
|
||
page = self.client.get_creators_tags_raw(entity, page=i)
|
||
self.extend(entity, page.get('items', None), save=f'page_{i}.json' if save else None)
|
||
|
||
if save: self.to_file('all.json')
|
||
return self.to_list()
|
||
|
||
def to_list(self): return [value for key, value in self.items.items()]
|
||
|
||
def to_file(self, filename, dirname = None):
|
||
if not dirname: dirname = self.path
|
||
self._save_json(Path(dirname) / filename, self.to_list())
|
||
|
||
def request_get(self, url, json=True, timeout=10, **kwargs):
|
||
try:
|
||
response = self.client.session.get(url, timeout=timeout, **kwargs)
|
||
response.raise_for_status()
|
||
self.delay()
|
||
if json:
|
||
return response.json()
|
||
else:
|
||
return response
|
||
except Exception as e:
|
||
raise NetworkError from e
|
||
|
||
@classmethod
|
||
def crawler_cursor_parse_metadata(cls, page):
|
||
metadata = page.get('metadata', None)
|
||
if not metadata: raise RuntimeError("Unable to find metadata")
|
||
next_page = metadata.get('nextPage', None)
|
||
next_cursor = metadata.get('nextCursor', None)
|
||
if not next_page or not next_cursor: RuntimeError("Unable to parse metadata")
|
||
return next_page, next_cursor
|
||
|
||
@staticmethod
|
||
def cursor_strip(url):
|
||
split = url.split('cursor=', maxsplit=1)
|
||
if len(split) < 2: return url
|
||
prefix = split[0] + 'cursor='
|
||
suffix = split[1].split('%', maxsplit=1)[0]
|
||
return prefix + suffix
|
||
def cursor_decrement(self, url): raise NotImplemented
|
||
def cursor_increment(self, url): raise NotImplemented
|
||
def cursor_state_reset(self, cursor = None):
|
||
self.cursor_state = 'normal'
|
||
return cursor
|
||
|
||
def cursor_fix(self, url, increment = False):
|
||
if self.cursor_state == 'normal':
|
||
self.cursor_state = 'stripped'
|
||
return self.cursor_strip(url)
|
||
elif self.cursor_state == 'stripped':
|
||
return self.cursor_increment(url) if increment else self.cursor_decrement(url)
|
||
else: raise RuntimeWarning(f'Invalid cursor state: {self.cursor_state}')
|
||
|
||
def crawler_cursor_request(self, url, counter = 50, reset = True):
|
||
if reset: self.cursor_state_reset()
|
||
while counter < 0:
|
||
try:
|
||
page = self.request_get(url)
|
||
if not page.get('items', None) or len(page.get('items', None)) == 0: raise ApiDataError
|
||
try:
|
||
next_page, next_cursor = self.crawler_cursor_parse_metadata(page)
|
||
except RuntimeError as e:
|
||
return page
|
||
if next_page == url: raise CursorError
|
||
return page
|
||
|
||
except NetworkError as e:
|
||
self.delay(10)
|
||
self._msg('network_warn', str(e))
|
||
url = self.cursor_fix(url)
|
||
except ApiDataError as e:
|
||
self._msg('api_warn', str(e))
|
||
url = self.cursor_fix(url)
|
||
except CursorError as e:
|
||
self._msg('cursor_warn', str(e))
|
||
url = self.cursor_fix(url, increment=True)
|
||
|
||
counter -= 1
|
||
|
||
|
||
|
||
return dict() # TODO handle this error
|
||
|
||
|
||
|
||
|
||
|
||
@classmethod
|
||
def crawler_cursor_avoid_slip(cls, client: Client, url, path, entity, slip_retries = 5, get_retries = 50, chill_time = 3):
|
||
slip_counter = 0
|
||
get_counter = 0
|
||
page = None
|
||
while True:
|
||
try:
|
||
page = client.make_get_request(url)
|
||
if not page: raise ValueError
|
||
page = page.json()
|
||
if not page.get('items', None) or len(page.get('items', None)) == 0: raise ValueError
|
||
try: next_page, next_cursor = cls.crawler_cursor_parse_metadata(page)
|
||
except RuntimeError as e: return page
|
||
if next_page == url: raise TypeError
|
||
# raise ValueError
|
||
return page
|
||
|
||
except ValueError:
|
||
get_counter = get_counter + 1
|
||
with open(Path(path) / '_get_error.log', 'a') as file:
|
||
file.write(f'{url}\n')
|
||
if get_counter >= get_retries: return page
|
||
if entity == 'images':
|
||
print("Trying avoid images get error by decreasing cursor position by 1")
|
||
split = url.rsplit('=', maxsplit=1)
|
||
prefix = split[0] + '='
|
||
split = split[1].rsplit('%', maxsplit=1)
|
||
cursor = int(split[0])
|
||
cursor = cursor - 1
|
||
|
||
# suffix = '%' + split[1]
|
||
url = prefix + str(cursor) # + suffix
|
||
|
||
|
||
print('get error detected. waiting 30s for retry')
|
||
time.sleep(30)
|
||
|
||
|
||
except TypeError:
|
||
slip_counter = slip_counter + 1
|
||
with open(Path(path) / '_slip.log', 'a') as file:
|
||
file.write(f'{url}\n')
|
||
if slip_counter >= slip_retries: break
|
||
print('slip error detected. waiting 30s for retry')
|
||
time.sleep(30)
|
||
|
||
|
||
if entity not in {'models'}: raise RuntimeError("Slip detected! Avoiding failed: NotImplemented")
|
||
|
||
split = url.rsplit('.', 1)
|
||
prefix = split[0] + '.'
|
||
split = split[1].split('%', 1)
|
||
suffix = '%' + split[1]
|
||
num = int(split[0])
|
||
if num < 999:
|
||
num = num + 1
|
||
else:
|
||
raise RuntimeError("Slip avoiding failed: Number overflow")
|
||
url = prefix + f'{num:03d}' + suffix
|
||
page = client.make_get_request(url).json()
|
||
next_page, next_cursor = cls.crawler_paginated_parse_metadata(page)
|
||
if next_page != url: return page
|
||
else: raise RuntimeError("Slip avoiding failed: Not effective")
|
||
|
||
|
||
@classmethod
|
||
def crawler_cursor(cls, client: Client, entity: str, params: dict, save = True):
|
||
print(f"{datetime.datetime.now()} Fetching {entity}...")
|
||
path = Path(client.path) / ('fetch_' + entity)
|
||
items = dict()
|
||
url = f'{client.client.config.base_url}/api/v1/{entity}{client.client.build_query_string(params)}'
|
||
first_page = client.make_get_request(url)
|
||
if not first_page:
|
||
with open(Path(client.path) / 'bugs.log', 'a') as f: f.write(url + '\n')
|
||
return items
|
||
first_page = first_page.json()
|
||
if first_page.get('items', None):
|
||
for i in first_page.get('items', None): items[i['id']] = i
|
||
if save:
|
||
path.mkdir(exist_ok=True)
|
||
cls._save_json(path / 'first.json', [value for key, value in items.items()])
|
||
try: next_page, next_cursor = cls.crawler_cursor_parse_metadata(first_page)
|
||
except RuntimeError: return items
|
||
cc = 0
|
||
while next_page:
|
||
next_page = cls.cursor_strip(next_page)
|
||
time.sleep(3)
|
||
# with open(Path(client.path) / 'bugs.log', 'a') as f:
|
||
# f.write(next_page + '\n')
|
||
page = cls.crawler_cursor_avoid_slip(client, next_page, path, entity)
|
||
if not page: return items
|
||
page_items = page.get('items', None)
|
||
if page_items is None:
|
||
with open(Path(client.path)/'bugs.log', 'a') as f: f.write(next_page + '\n')
|
||
return items
|
||
l = len(items)
|
||
for i in page_items: items[i['id']] = i
|
||
print(f"{datetime.datetime.now()} Fetched {len(items) - l}/{len(page_items)} {entity} from page {next_page}")
|
||
if len(items) - l == 0 and cc < 5:
|
||
print("Trying avoid images cursor corruption by request for new cursor")
|
||
next_page = cls.cursor_strip(next_page)
|
||
cc += 1
|
||
continue
|
||
else: cc = 0
|
||
if save: cls._save_json(path / f'page_{next_cursor}.json', page_items)
|
||
try: next_page, next_cursor = cls.crawler_cursor_parse_metadata(page)
|
||
except RuntimeError: break
|
||
|
||
if save: cls._save_json(path / f'all.json', items)
|
||
return [value for key,value in items.items()]
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
@classmethod
|
||
def models(cls, client: Client, subdir='fetch_models', save=True):
|
||
return cls.crawler_cursor(client, 'models', {'period': 'AllTime', 'sort': 'Oldest', 'nsfw': 'true'}, save)
|
||
|
||
@classmethod
|
||
def images(cls, client: Client, subdir='fetch_images', save=True, start_with = None):
|
||
items = list()
|
||
if not start_with: start_with = 0
|
||
path = Path(client.path) / ('fetch_' + 'images')
|
||
if save: path.mkdir(exist_ok=True)
|
||
creators = [c.get('username', None) for c in cls.load(client, 'creators')]
|
||
counter = 1 + int(start_with)
|
||
|
||
c = ['nochekaiser881']
|
||
c.extend(creators[int(start_with):])
|
||
|
||
for username in creators[int(start_with):]:
|
||
# for username in ['yonne']:
|
||
time.sleep(3)
|
||
if not username: continue
|
||
page_items = cls.crawler_cursor(client, 'images', {
|
||
'period': 'AllTime', 'sort': 'Oldest', 'nsfw':'X', 'username': username, 'limit': '200', 'cursor': 0
|
||
}, save=False)
|
||
|
||
# page_items = cls._cursor_crawler(client, 'images', {
|
||
# 'period': 'AllTime', 'sort': 'Most%20Reactions', 'nsfw': 'X', 'username': username, 'limit': '200', 'cursor': 0
|
||
# }, save=False)
|
||
|
||
if len(page_items) >= 1000:
|
||
with open(path / '_1k.log', 'a') as f: f.write(username + '\n')
|
||
if len(page_items) >= 5000:
|
||
with open(path / '_5k.log', 'a') as f: f.write(username + '\n')
|
||
if len(page_items) >= 10000:
|
||
with open(path / '_10k.log', 'a') as f: f.write(username + '\n')
|
||
if len(page_items) >= 25000:
|
||
with open(path / '_25k.log', 'a') as f: f.write(username + '\n')
|
||
|
||
if len(page_items) >= 45000:
|
||
with open(path / '_giants_over_50k.log', 'a') as f: f.write(username + '\n')
|
||
print(f'Giant {username} has more then {len(page_items)} images, starting deep scan')
|
||
page_items_dict = dict()
|
||
for item in page_items: page_items_dict[item['id']] = item
|
||
print(f'Transferred {len(page_items_dict)} images of {len(page_items)}')
|
||
for sort in ['Newest', 'Most%20Reactions', 'Most%20Comments', 'Most%20Collected', ]:
|
||
page_items = cls.crawler_cursor(client, 'images',
|
||
{'period': 'AllTime', 'sort': sort, 'nsfw': 'X',
|
||
'username': username, 'limit': '200'}, save=False)
|
||
l = len(page_items_dict)
|
||
for item in page_items: page_items_dict[item['id']] = item
|
||
print(f'Added {len(page_items_dict) - l} images by {sort} sort crawl. {len(page_items_dict)} images total')
|
||
|
||
page_items = [value for key, value in page_items_dict.items()]
|
||
|
||
|
||
l = len(items)
|
||
#items.extend(page_items)
|
||
print(f"Fetched {len(page_items)} images by {username} ({counter}/{len(creators)})")
|
||
counter = counter + 1
|
||
|
||
if save: cls._save_json(path / f'{username}.json', page_items)
|
||
|
||
#if save: cls._save_json(path / 'aaa.json', items)
|
||
return items
|
||
|
||
def creators(self, save=True): return self.crawler_paginated('creators', save)
|
||
def tags(self, save=True): return self.crawler_paginated('tags', save)
|