Add recursive thread loading method to load threads with all messages and images from the database. Add support for image captions in the database schema and Image model. Introduce new analyzer tools for thread management: - Load thread names from database - Group threads by similarity - Merge thread groups Update main menu to include new Analyzer submenu and options to process threads without downloading images.
175 lines
5.5 KiB
Python
Executable File
175 lines
5.5 KiB
Python
Executable File
"""
|
|
SQLite database module for storing forum data.
|
|
|
|
This module provides functionality to store parsed forum data in SQLite database.
|
|
"""
|
|
|
|
import sqlite3
|
|
from typing import Optional
|
|
from ..Datamodel import Thread, Message, Image
|
|
from .table import Table
|
|
from .thread_table import ThreadTable
|
|
from .message_table import MessageTable
|
|
from .image_table import ImageTable
|
|
|
|
|
|
class Database:
|
|
"""
|
|
SQLite database for storing forum thread data.
|
|
|
|
Attributes:
|
|
db_path: Path to the SQLite database file
|
|
connection: SQLite database connection
|
|
thread_table: Thread table object
|
|
message_table: Message table object
|
|
image_table: Image table object
|
|
"""
|
|
|
|
def __init__(self, db_path: str):
|
|
"""
|
|
Initialize the database and create tables.
|
|
|
|
Args:
|
|
db_path: Path to the SQLite database file
|
|
"""
|
|
self.db_path = db_path
|
|
self.connection = None
|
|
self.thread_table = None
|
|
self.message_table = None
|
|
self.image_table = None
|
|
self._create_tables()
|
|
|
|
def _create_tables(self):
|
|
"""Create database tables if they don't exist."""
|
|
self.connection = sqlite3.connect(self.db_path)
|
|
|
|
# Create table objects
|
|
self.thread_table = ThreadTable(self.connection)
|
|
self.message_table = MessageTable(self.connection)
|
|
self.image_table = ImageTable(self.connection)
|
|
|
|
# Create tables
|
|
self.thread_table.create()
|
|
self.message_table.create()
|
|
self.image_table.create()
|
|
|
|
def save_thread_recursive(self, thread: Thread) -> int:
|
|
"""
|
|
Recursively save a thread, all its messages, and all images.
|
|
|
|
Args:
|
|
thread: Thread object to save
|
|
|
|
Returns:
|
|
Number of objects that were actually saved (not skipped)
|
|
"""
|
|
saved_count = 0
|
|
|
|
# Save thread
|
|
if self.thread_table.save(thread):
|
|
saved_count += 1
|
|
print(f"✓ Saved thread: {thread.title}")
|
|
else:
|
|
print(f"⊘ Thread already exists: {thread.title}")
|
|
|
|
# Save messages
|
|
for message in thread.messages:
|
|
if self.message_table.save(message):
|
|
saved_count += 1
|
|
print(f"✓ Saved message: {message.id}")
|
|
else:
|
|
print(f"⊘ Message already exists: {message.id}")
|
|
|
|
# Save images for this message
|
|
for image in message.images:
|
|
if self.image_table.save(image):
|
|
saved_count += 1
|
|
print(f"✓ Saved image: {image.name}")
|
|
else:
|
|
print(f"⊘ Image already exists: {image.name}")
|
|
|
|
return saved_count
|
|
|
|
def close(self):
|
|
"""Close the database connection."""
|
|
if self.connection:
|
|
self.connection.close()
|
|
|
|
def __enter__(self):
|
|
"""Context manager entry."""
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
"""Context manager exit."""
|
|
self.close()
|
|
|
|
def load_thread_recursive(self, thread_id: str) -> Optional[Thread]:
|
|
"""
|
|
Recursively load a thread, all its messages, and all images.
|
|
|
|
Args:
|
|
thread_id: Thread ID to load
|
|
|
|
Returns:
|
|
Thread object with all messages and images loaded, or None if not found
|
|
"""
|
|
# Load thread
|
|
thread = self.thread_table.load(thread_id)
|
|
if thread is None:
|
|
print(f"⊘ Thread not found: {thread_id}")
|
|
return None
|
|
|
|
print(f"✓ Loaded thread: {thread.title}")
|
|
|
|
# Load messages for this thread
|
|
thread.messages = []
|
|
for message_id in self._get_message_ids_for_thread(thread_id):
|
|
message = self.message_table.load(message_id)
|
|
if message:
|
|
thread.messages.append(message)
|
|
print(f"✓ Loaded message: {message_id}")
|
|
|
|
# Load images for this message
|
|
for image_url in self._get_image_urls_for_message(message_id):
|
|
image = self.image_table.load(image_url, include_data=True)
|
|
if image:
|
|
message.images.append(image)
|
|
print(f"✓ Loaded image: {image.name}")
|
|
|
|
return thread
|
|
|
|
def _get_message_ids_for_thread(self, thread_id: str) -> list:
|
|
"""
|
|
Get all message IDs for a thread from the database.
|
|
|
|
Args:
|
|
thread_id: Thread ID to get messages for
|
|
|
|
Returns:
|
|
List of message IDs
|
|
"""
|
|
self.cursor = self.connection.cursor()
|
|
self.cursor.execute('''
|
|
SELECT id FROM messages WHERE thread_id = ?
|
|
''', (thread_id,))
|
|
|
|
rows = self.cursor.fetchall()
|
|
return [row[0] for row in rows]
|
|
|
|
def _get_image_urls_for_message(self, message_id: str) -> list:
|
|
"""
|
|
Get all image URLs for a message from the database.
|
|
|
|
Args:
|
|
message_id: Message ID to get images for
|
|
|
|
Returns:
|
|
List of image URLs
|
|
"""
|
|
self.cursor = self.connection.cursor()
|
|
self.cursor.execute('''
|
|
SELECT url FROM images WHERE message_id = ?
|
|
''', (message_id,))
|
|
|
|
rows = self.cursor.fetchall()
|
|
return [row[0] for row in rows] |