Files
Bacruru Sakaguchi 1ddfe375e5 feat(core): add recursive loading and analyzer tools
Add recursive thread loading method to load threads with all messages and images from the database.

Add support for image captions in the database schema and Image model.

Introduce new analyzer tools for thread management:
- Load thread names from database
- Group threads by similarity
- Merge thread groups

Update main menu to include new Analyzer submenu and options to process threads without downloading images.
2026-02-18 18:19:33 +00:00

175 lines
5.5 KiB
Python
Executable File

"""
SQLite database module for storing forum data.
This module provides functionality to store parsed forum data in SQLite database.
"""
import sqlite3
from typing import Optional
from ..Datamodel import Thread, Message, Image
from .table import Table
from .thread_table import ThreadTable
from .message_table import MessageTable
from .image_table import ImageTable
class Database:
"""
SQLite database for storing forum thread data.
Attributes:
db_path: Path to the SQLite database file
connection: SQLite database connection
thread_table: Thread table object
message_table: Message table object
image_table: Image table object
"""
def __init__(self, db_path: str):
"""
Initialize the database and create tables.
Args:
db_path: Path to the SQLite database file
"""
self.db_path = db_path
self.connection = None
self.thread_table = None
self.message_table = None
self.image_table = None
self._create_tables()
def _create_tables(self):
"""Create database tables if they don't exist."""
self.connection = sqlite3.connect(self.db_path)
# Create table objects
self.thread_table = ThreadTable(self.connection)
self.message_table = MessageTable(self.connection)
self.image_table = ImageTable(self.connection)
# Create tables
self.thread_table.create()
self.message_table.create()
self.image_table.create()
def save_thread_recursive(self, thread: Thread) -> int:
"""
Recursively save a thread, all its messages, and all images.
Args:
thread: Thread object to save
Returns:
Number of objects that were actually saved (not skipped)
"""
saved_count = 0
# Save thread
if self.thread_table.save(thread):
saved_count += 1
print(f"✓ Saved thread: {thread.title}")
else:
print(f"⊘ Thread already exists: {thread.title}")
# Save messages
for message in thread.messages:
if self.message_table.save(message):
saved_count += 1
print(f"✓ Saved message: {message.id}")
else:
print(f"⊘ Message already exists: {message.id}")
# Save images for this message
for image in message.images:
if self.image_table.save(image):
saved_count += 1
print(f"✓ Saved image: {image.name}")
else:
print(f"⊘ Image already exists: {image.name}")
return saved_count
def close(self):
"""Close the database connection."""
if self.connection:
self.connection.close()
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.close()
def load_thread_recursive(self, thread_id: str) -> Optional[Thread]:
"""
Recursively load a thread, all its messages, and all images.
Args:
thread_id: Thread ID to load
Returns:
Thread object with all messages and images loaded, or None if not found
"""
# Load thread
thread = self.thread_table.load(thread_id)
if thread is None:
print(f"⊘ Thread not found: {thread_id}")
return None
print(f"✓ Loaded thread: {thread.title}")
# Load messages for this thread
thread.messages = []
for message_id in self._get_message_ids_for_thread(thread_id):
message = self.message_table.load(message_id)
if message:
thread.messages.append(message)
print(f"✓ Loaded message: {message_id}")
# Load images for this message
for image_url in self._get_image_urls_for_message(message_id):
image = self.image_table.load(image_url, include_data=True)
if image:
message.images.append(image)
print(f"✓ Loaded image: {image.name}")
return thread
def _get_message_ids_for_thread(self, thread_id: str) -> list:
"""
Get all message IDs for a thread from the database.
Args:
thread_id: Thread ID to get messages for
Returns:
List of message IDs
"""
self.cursor = self.connection.cursor()
self.cursor.execute('''
SELECT id FROM messages WHERE thread_id = ?
''', (thread_id,))
rows = self.cursor.fetchall()
return [row[0] for row in rows]
def _get_image_urls_for_message(self, message_id: str) -> list:
"""
Get all image URLs for a message from the database.
Args:
message_id: Message ID to get images for
Returns:
List of image URLs
"""
self.cursor = self.connection.cursor()
self.cursor.execute('''
SELECT url FROM images WHERE message_id = ?
''', (message_id,))
rows = self.cursor.fetchall()
return [row[0] for row in rows]