diff --git a/app/database.py b/app/database.py index 8dacfaa..2ed03a0 100644 --- a/app/database.py +++ b/app/database.py @@ -95,14 +95,14 @@ def init_fts(): conn = engine.raw_connection() cursor = conn.cursor() - # Create FTS table for history content + # Update FTS table configuration to use trigrams for better partial matching cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS history_fts USING fts5( title, markdown_content, content='history', content_rowid='id', - tokenize='porter unicode61' + tokenize='trigram' ) """) @@ -137,6 +137,15 @@ def init_fts(): # Initialize FTS tables init_fts() +def reindex_fts(): + """Reindex the FTS tables""" + conn = engine.raw_connection() + cursor = conn.cursor() + cursor.execute("INSERT INTO history_fts(history_fts) VALUES('rebuild')") + conn.commit() + cursor.close() + conn.close() + def get_db(): """Get database session""" db = SessionLocal() diff --git a/app/main.py b/app/main.py index 46912c5..1b403e8 100644 --- a/app/main.py +++ b/app/main.py @@ -115,43 +115,50 @@ async def search_history( """Search history with optimized full-text search""" try: if search_term: - # Optimize FTS query with content-focused ranking + # Modified query to handle title-only searches better fts_query = """ WITH RECURSIVE ranked_results AS ( - SELECT - h.*, - rank * ( - CASE - -- Boost exact phrase matches in content - WHEN h.markdown_content LIKE :exact_pattern THEN 3.0 - -- Boost title matches but less than content - WHEN h.title LIKE :like_pattern THEN 1.5 - -- Base score for other matches - ELSE 1.0 - END - ) + ( - -- Additional boost for recent entries + SELECT DISTINCT h.*, + CASE + -- Boost exact title matches highest + WHEN h.title LIKE :exact_pattern THEN 4.0 + -- Boost title prefix matches + WHEN h.title LIKE :prefix_pattern THEN 3.0 + -- Boost title contains matches + WHEN h.title LIKE :like_pattern THEN 2.0 + -- Lower boost for content matches + WHEN h.markdown_content IS NOT NULL AND ( + h.markdown_content LIKE :exact_pattern OR + h.markdown_content LIKE :prefix_pattern OR + h.markdown_content LIKE :like_pattern + ) THEN 1.0 + ELSE 0.5 + END * ( CAST(strftime('%s', h.visit_time) AS INTEGER) / - CAST(strftime('%s', 'now') AS INTEGER) * 0.5 + CAST(strftime('%s', 'now') AS INTEGER) * 0.5 + 1 ) as final_rank FROM history h - INNER JOIN history_fts f ON h.id = f.rowid - WHERE history_fts MATCH :search - AND (:domain IS NULL OR h.domain = :domain) - AND (:start_date IS NULL OR h.visit_time >= :start_date) - AND (:end_date IS NULL OR h.visit_time <= :end_date) + LEFT JOIN history_fts f ON h.id = f.rowid + WHERE + h.title LIKE :like_pattern + OR (h.markdown_content IS NOT NULL AND history_fts MATCH :search) + AND (:domain IS NULL OR h.domain = :domain) + AND (:start_date IS NULL OR h.visit_time >= :start_date) + AND (:end_date IS NULL OR h.visit_time <= :end_date) ) SELECT * FROM ranked_results + WHERE final_rank > 0 ORDER BY final_rank DESC LIMIT 100 """ - # Prepare parameters with exact phrase matching + # Prepare search patterns for different matching strategies params = { - 'search': search_term, - 'like_pattern': f'%{search_term}%', - 'exact_pattern': f'%{search_term}%', # For exact phrase matching + 'search': f'{search_term}*', # Wildcard suffix matching + 'like_pattern': f'%{search_term}%', # Contains matching + 'exact_pattern': search_term, # Exact matching + 'prefix_pattern': f'{search_term}%', # Prefix matching 'domain': domain, 'start_date': start_date, 'end_date': end_date