Update FTS to support partial and title matches

This commit is contained in:
2025-01-27 10:51:29 -06:00
parent 1134a814d2
commit 20cfdf75e4
2 changed files with 42 additions and 26 deletions

View File

@@ -95,14 +95,14 @@ def init_fts():
conn = engine.raw_connection() conn = engine.raw_connection()
cursor = conn.cursor() cursor = conn.cursor()
# Create FTS table for history content # Update FTS table configuration to use trigrams for better partial matching
cursor.execute(""" cursor.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS history_fts USING fts5( CREATE VIRTUAL TABLE IF NOT EXISTS history_fts USING fts5(
title, title,
markdown_content, markdown_content,
content='history', content='history',
content_rowid='id', content_rowid='id',
tokenize='porter unicode61' tokenize='trigram'
) )
""") """)
@@ -137,6 +137,15 @@ def init_fts():
# Initialize FTS tables # Initialize FTS tables
init_fts() init_fts()
def reindex_fts():
"""Reindex the FTS tables"""
conn = engine.raw_connection()
cursor = conn.cursor()
cursor.execute("INSERT INTO history_fts(history_fts) VALUES('rebuild')")
conn.commit()
cursor.close()
conn.close()
def get_db(): def get_db():
"""Get database session""" """Get database session"""
db = SessionLocal() db = SessionLocal()

View File

@@ -115,43 +115,50 @@ async def search_history(
"""Search history with optimized full-text search""" """Search history with optimized full-text search"""
try: try:
if search_term: if search_term:
# Optimize FTS query with content-focused ranking # Modified query to handle title-only searches better
fts_query = """ fts_query = """
WITH RECURSIVE WITH RECURSIVE
ranked_results AS ( ranked_results AS (
SELECT SELECT DISTINCT h.*,
h.*,
rank * (
CASE CASE
-- Boost exact phrase matches in content -- Boost exact title matches highest
WHEN h.markdown_content LIKE :exact_pattern THEN 3.0 WHEN h.title LIKE :exact_pattern THEN 4.0
-- Boost title matches but less than content -- Boost title prefix matches
WHEN h.title LIKE :like_pattern THEN 1.5 WHEN h.title LIKE :prefix_pattern THEN 3.0
-- Base score for other matches -- Boost title contains matches
ELSE 1.0 WHEN h.title LIKE :like_pattern THEN 2.0
END -- Lower boost for content matches
) + ( WHEN h.markdown_content IS NOT NULL AND (
-- Additional boost for recent entries h.markdown_content LIKE :exact_pattern OR
h.markdown_content LIKE :prefix_pattern OR
h.markdown_content LIKE :like_pattern
) THEN 1.0
ELSE 0.5
END * (
CAST(strftime('%s', h.visit_time) AS INTEGER) / CAST(strftime('%s', h.visit_time) AS INTEGER) /
CAST(strftime('%s', 'now') AS INTEGER) * 0.5 CAST(strftime('%s', 'now') AS INTEGER) * 0.5 + 1
) as final_rank ) as final_rank
FROM history h FROM history h
INNER JOIN history_fts f ON h.id = f.rowid LEFT JOIN history_fts f ON h.id = f.rowid
WHERE history_fts MATCH :search WHERE
h.title LIKE :like_pattern
OR (h.markdown_content IS NOT NULL AND history_fts MATCH :search)
AND (:domain IS NULL OR h.domain = :domain) AND (:domain IS NULL OR h.domain = :domain)
AND (:start_date IS NULL OR h.visit_time >= :start_date) AND (:start_date IS NULL OR h.visit_time >= :start_date)
AND (:end_date IS NULL OR h.visit_time <= :end_date) AND (:end_date IS NULL OR h.visit_time <= :end_date)
) )
SELECT * FROM ranked_results SELECT * FROM ranked_results
WHERE final_rank > 0
ORDER BY final_rank DESC ORDER BY final_rank DESC
LIMIT 100 LIMIT 100
""" """
# Prepare parameters with exact phrase matching # Prepare search patterns for different matching strategies
params = { params = {
'search': search_term, 'search': f'{search_term}*', # Wildcard suffix matching
'like_pattern': f'%{search_term}%', 'like_pattern': f'%{search_term}%', # Contains matching
'exact_pattern': f'%{search_term}%', # For exact phrase matching 'exact_pattern': search_term, # Exact matching
'prefix_pattern': f'{search_term}%', # Prefix matching
'domain': domain, 'domain': domain,
'start_date': start_date, 'start_date': start_date,
'end_date': end_date 'end_date': end_date