mirror of
https://github.com/Zetaphor/browser-recall.git
synced 2025-12-06 10:29:38 +00:00
Update FTS to support partial and title matches
This commit is contained in:
@@ -95,14 +95,14 @@ def init_fts():
|
||||
conn = engine.raw_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Create FTS table for history content
|
||||
# Update FTS table configuration to use trigrams for better partial matching
|
||||
cursor.execute("""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS history_fts USING fts5(
|
||||
title,
|
||||
markdown_content,
|
||||
content='history',
|
||||
content_rowid='id',
|
||||
tokenize='porter unicode61'
|
||||
tokenize='trigram'
|
||||
)
|
||||
""")
|
||||
|
||||
@@ -137,6 +137,15 @@ def init_fts():
|
||||
# Initialize FTS tables
|
||||
init_fts()
|
||||
|
||||
def reindex_fts():
|
||||
"""Reindex the FTS tables"""
|
||||
conn = engine.raw_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("INSERT INTO history_fts(history_fts) VALUES('rebuild')")
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
def get_db():
|
||||
"""Get database session"""
|
||||
db = SessionLocal()
|
||||
|
||||
55
app/main.py
55
app/main.py
@@ -115,43 +115,50 @@ async def search_history(
|
||||
"""Search history with optimized full-text search"""
|
||||
try:
|
||||
if search_term:
|
||||
# Optimize FTS query with content-focused ranking
|
||||
# Modified query to handle title-only searches better
|
||||
fts_query = """
|
||||
WITH RECURSIVE
|
||||
ranked_results AS (
|
||||
SELECT
|
||||
h.*,
|
||||
rank * (
|
||||
CASE
|
||||
-- Boost exact phrase matches in content
|
||||
WHEN h.markdown_content LIKE :exact_pattern THEN 3.0
|
||||
-- Boost title matches but less than content
|
||||
WHEN h.title LIKE :like_pattern THEN 1.5
|
||||
-- Base score for other matches
|
||||
ELSE 1.0
|
||||
END
|
||||
) + (
|
||||
-- Additional boost for recent entries
|
||||
SELECT DISTINCT h.*,
|
||||
CASE
|
||||
-- Boost exact title matches highest
|
||||
WHEN h.title LIKE :exact_pattern THEN 4.0
|
||||
-- Boost title prefix matches
|
||||
WHEN h.title LIKE :prefix_pattern THEN 3.0
|
||||
-- Boost title contains matches
|
||||
WHEN h.title LIKE :like_pattern THEN 2.0
|
||||
-- Lower boost for content matches
|
||||
WHEN h.markdown_content IS NOT NULL AND (
|
||||
h.markdown_content LIKE :exact_pattern OR
|
||||
h.markdown_content LIKE :prefix_pattern OR
|
||||
h.markdown_content LIKE :like_pattern
|
||||
) THEN 1.0
|
||||
ELSE 0.5
|
||||
END * (
|
||||
CAST(strftime('%s', h.visit_time) AS INTEGER) /
|
||||
CAST(strftime('%s', 'now') AS INTEGER) * 0.5
|
||||
CAST(strftime('%s', 'now') AS INTEGER) * 0.5 + 1
|
||||
) as final_rank
|
||||
FROM history h
|
||||
INNER JOIN history_fts f ON h.id = f.rowid
|
||||
WHERE history_fts MATCH :search
|
||||
AND (:domain IS NULL OR h.domain = :domain)
|
||||
AND (:start_date IS NULL OR h.visit_time >= :start_date)
|
||||
AND (:end_date IS NULL OR h.visit_time <= :end_date)
|
||||
LEFT JOIN history_fts f ON h.id = f.rowid
|
||||
WHERE
|
||||
h.title LIKE :like_pattern
|
||||
OR (h.markdown_content IS NOT NULL AND history_fts MATCH :search)
|
||||
AND (:domain IS NULL OR h.domain = :domain)
|
||||
AND (:start_date IS NULL OR h.visit_time >= :start_date)
|
||||
AND (:end_date IS NULL OR h.visit_time <= :end_date)
|
||||
)
|
||||
SELECT * FROM ranked_results
|
||||
WHERE final_rank > 0
|
||||
ORDER BY final_rank DESC
|
||||
LIMIT 100
|
||||
"""
|
||||
|
||||
# Prepare parameters with exact phrase matching
|
||||
# Prepare search patterns for different matching strategies
|
||||
params = {
|
||||
'search': search_term,
|
||||
'like_pattern': f'%{search_term}%',
|
||||
'exact_pattern': f'%{search_term}%', # For exact phrase matching
|
||||
'search': f'{search_term}*', # Wildcard suffix matching
|
||||
'like_pattern': f'%{search_term}%', # Contains matching
|
||||
'exact_pattern': search_term, # Exact matching
|
||||
'prefix_pattern': f'{search_term}%', # Prefix matching
|
||||
'domain': domain,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date
|
||||
|
||||
Reference in New Issue
Block a user