from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, event, text from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker from datetime import datetime import sqlite3 SQLALCHEMY_DATABASE_URL = "sqlite:///./browser_history.db" # Create engine with custom configuration engine = create_engine( SQLALCHEMY_DATABASE_URL, connect_args={ "timeout": 30, # Connection timeout in seconds "check_same_thread": False, # Allow multi-threaded access }, # Update pool configuration for better concurrency pool_size=5, # Increase pool size to handle concurrent requests max_overflow=10, # Allow some overflow connections pool_timeout=30, # Connection timeout from pool pool_recycle=3600, # Recycle connections every hour ) SessionLocal = sessionmaker( autocommit=False, autoflush=False, bind=engine, expire_on_commit=False # Prevent unnecessary reloads ) Base = declarative_base() @event.listens_for(engine, "connect") def set_sqlite_pragma(dbapi_connection, connection_record): """Configure SQLite for better performance""" if isinstance(dbapi_connection, sqlite3.Connection): cursor = dbapi_connection.cursor() # Enable WAL mode for better write performance and concurrency cursor.execute("PRAGMA journal_mode=WAL") # Set page size to 4KB for better performance cursor.execute("PRAGMA page_size=4096") # Set cache size to 32MB (-32000 pages * 4KB per page = ~32MB) cursor.execute("PRAGMA cache_size=-32000") # Enable memory-mapped I/O for better performance cursor.execute("PRAGMA mmap_size=268435456") # 256MB # Set synchronous mode to NORMAL for better write performance cursor.execute("PRAGMA synchronous=NORMAL") # Enable foreign key support cursor.execute("PRAGMA foreign_keys=ON") cursor.close() class HistoryEntry(Base): __tablename__ = "history" id = Column(Integer, primary_key=True) url = Column(String, index=True) # Add index for URL lookups title = Column(String) visit_time = Column(DateTime, index=True) # Add index for time-based queries domain = Column(String, index=True) # Add index for domain filtering markdown_content = Column(Text, nullable=True) last_content_update = Column(DateTime, nullable=True) __table_args__ = ( # Composite index for common query patterns {'sqlite_with_rowid': True} # Ensure we have rowids for better performance ) class Bookmark(Base): __tablename__ = "bookmarks" id = Column(Integer, primary_key=True) url = Column(String, index=True) title = Column(String, nullable=True) added_time = Column(DateTime, index=True) folder = Column(String, index=True) domain = Column(String, index=True) __table_args__ = ( # Composite index for common query patterns {'sqlite_with_rowid': True} # Ensure we have rowids for better performance ) # Create tables Base.metadata.create_all(bind=engine) # Initialize FTS tables for full-text search def init_fts(): """Initialize Full Text Search tables""" conn = engine.raw_connection() cursor = conn.cursor() # Create FTS table with content and title columns cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS history_fts USING fts5( title, markdown_content, domain, -- Add domain for filtering visit_time UNINDEXED, -- Add visit_time but don't index it content='history', content_rowid='id', tokenize='trigram' ) """) # Update triggers to include domain and visit_time cursor.execute(""" CREATE TRIGGER IF NOT EXISTS history_ai AFTER INSERT ON history BEGIN INSERT INTO history_fts(rowid, title, markdown_content, domain, visit_time) VALUES (new.id, new.title, new.markdown_content, new.domain, new.visit_time); END; """) cursor.execute(""" CREATE TRIGGER IF NOT EXISTS history_ad AFTER DELETE ON history BEGIN INSERT INTO history_fts(history_fts, rowid, title, markdown_content, domain, visit_time) VALUES('delete', old.id, old.title, old.markdown_content, old.domain, old.visit_time); END; """) cursor.execute(""" CREATE TRIGGER IF NOT EXISTS history_au AFTER UPDATE ON history BEGIN INSERT INTO history_fts(history_fts, rowid, title, markdown_content, domain, visit_time) VALUES('delete', old.id, old.title, old.markdown_content, old.domain, old.visit_time); INSERT INTO history_fts(rowid, title, markdown_content, domain, visit_time) VALUES (new.id, new.title, new.markdown_content, new.domain, new.visit_time); END; """) conn.commit() cursor.close() conn.close() # Initialize FTS tables init_fts() def reindex_fts(): """Reindex the FTS tables""" conn = engine.raw_connection() cursor = conn.cursor() cursor.execute("INSERT INTO history_fts(history_fts) VALUES('rebuild')") conn.commit() cursor.close() conn.close() def get_db(): """Get database session""" db = SessionLocal() try: yield db finally: db.close() def get_last_processed_timestamp(source): """ Get last processed timestamp for a source (e.g., 'chrome_history', 'chrome_bookmarks') """ db = next(get_db()) try: result = db.execute( text('SELECT last_timestamp FROM last_processed WHERE source = :source'), {'source': source} ).fetchone() return result[0] if result else 0 finally: db.close() def update_last_processed_timestamp(source, timestamp): """ Update last processed timestamp for a source """ db = next(get_db()) try: db.execute( text(''' INSERT OR REPLACE INTO last_processed (source, last_timestamp) VALUES (:source, :timestamp) '''), {'source': source, 'timestamp': timestamp} ) db.commit() finally: db.close() def create_tables(): db = next(get_db()) try: db.execute( text(''' CREATE TABLE IF NOT EXISTS last_processed ( source TEXT PRIMARY KEY, last_timestamp INTEGER ) ''') ) db.commit() finally: db.close() def search_history(query, domain=None, start_date=None, end_date=None, db=None): """ Search history using FTS5 with proper ranking """ if db is None: db = next(get_db()) try: # Build the FTS query fts_query = f'"{query}"' # Exact phrase if domain: fts_query += f' AND domain:"{domain}"' # Build date filter conditions date_conditions = [] params = {'query': query} if start_date: date_conditions.append("visit_time >= :start_date") params['start_date'] = start_date if end_date: date_conditions.append("visit_time <= :end_date") params['end_date'] = end_date date_filter = f"AND {' AND '.join(date_conditions)}" if date_conditions else "" # Execute the search query sql_query = f""" SELECT h.*, bm25(history_fts) as rank, highlight(history_fts, 0, '', '') as title_highlight, highlight(history_fts, 1, '', '') as content_highlight FROM history_fts JOIN history h ON history_fts.rowid = h.id WHERE history_fts MATCH :query {date_filter} ORDER BY rank, visit_time DESC LIMIT 100 """ results = db.execute(text(sql_query), params).fetchall() return results except Exception as e: print(f"Search error: {e}") return [] def recreate_fts_tables(): """Drop and recreate the FTS tables""" conn = engine.raw_connection() cursor = conn.cursor() try: # Drop existing FTS table and triggers cursor.execute("DROP TRIGGER IF EXISTS history_ai") cursor.execute("DROP TRIGGER IF EXISTS history_ad") cursor.execute("DROP TRIGGER IF EXISTS history_au") cursor.execute("DROP TABLE IF EXISTS history_fts") # Recreate FTS tables and triggers init_fts() # Reindex all existing content cursor.execute(""" INSERT INTO history_fts(rowid, title, markdown_content, domain, visit_time) SELECT id, title, markdown_content, domain, visit_time FROM history """) conn.commit() print("Successfully recreated FTS tables and reindexed content") except Exception as e: conn.rollback() print(f"Error recreating FTS tables: {e}") finally: cursor.close() conn.close()