mirror of
https://github.com/Zetaphor/browser-recall.git
synced 2025-12-06 02:19:37 +00:00
Update FTS to support partial and title matches
This commit is contained in:
@@ -95,14 +95,14 @@ def init_fts():
|
|||||||
conn = engine.raw_connection()
|
conn = engine.raw_connection()
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
# Create FTS table for history content
|
# Update FTS table configuration to use trigrams for better partial matching
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
CREATE VIRTUAL TABLE IF NOT EXISTS history_fts USING fts5(
|
CREATE VIRTUAL TABLE IF NOT EXISTS history_fts USING fts5(
|
||||||
title,
|
title,
|
||||||
markdown_content,
|
markdown_content,
|
||||||
content='history',
|
content='history',
|
||||||
content_rowid='id',
|
content_rowid='id',
|
||||||
tokenize='porter unicode61'
|
tokenize='trigram'
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
@@ -137,6 +137,15 @@ def init_fts():
|
|||||||
# Initialize FTS tables
|
# Initialize FTS tables
|
||||||
init_fts()
|
init_fts()
|
||||||
|
|
||||||
|
def reindex_fts():
|
||||||
|
"""Reindex the FTS tables"""
|
||||||
|
conn = engine.raw_connection()
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("INSERT INTO history_fts(history_fts) VALUES('rebuild')")
|
||||||
|
conn.commit()
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
def get_db():
|
def get_db():
|
||||||
"""Get database session"""
|
"""Get database session"""
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
|
|||||||
55
app/main.py
55
app/main.py
@@ -115,43 +115,50 @@ async def search_history(
|
|||||||
"""Search history with optimized full-text search"""
|
"""Search history with optimized full-text search"""
|
||||||
try:
|
try:
|
||||||
if search_term:
|
if search_term:
|
||||||
# Optimize FTS query with content-focused ranking
|
# Modified query to handle title-only searches better
|
||||||
fts_query = """
|
fts_query = """
|
||||||
WITH RECURSIVE
|
WITH RECURSIVE
|
||||||
ranked_results AS (
|
ranked_results AS (
|
||||||
SELECT
|
SELECT DISTINCT h.*,
|
||||||
h.*,
|
CASE
|
||||||
rank * (
|
-- Boost exact title matches highest
|
||||||
CASE
|
WHEN h.title LIKE :exact_pattern THEN 4.0
|
||||||
-- Boost exact phrase matches in content
|
-- Boost title prefix matches
|
||||||
WHEN h.markdown_content LIKE :exact_pattern THEN 3.0
|
WHEN h.title LIKE :prefix_pattern THEN 3.0
|
||||||
-- Boost title matches but less than content
|
-- Boost title contains matches
|
||||||
WHEN h.title LIKE :like_pattern THEN 1.5
|
WHEN h.title LIKE :like_pattern THEN 2.0
|
||||||
-- Base score for other matches
|
-- Lower boost for content matches
|
||||||
ELSE 1.0
|
WHEN h.markdown_content IS NOT NULL AND (
|
||||||
END
|
h.markdown_content LIKE :exact_pattern OR
|
||||||
) + (
|
h.markdown_content LIKE :prefix_pattern OR
|
||||||
-- Additional boost for recent entries
|
h.markdown_content LIKE :like_pattern
|
||||||
|
) THEN 1.0
|
||||||
|
ELSE 0.5
|
||||||
|
END * (
|
||||||
CAST(strftime('%s', h.visit_time) AS INTEGER) /
|
CAST(strftime('%s', h.visit_time) AS INTEGER) /
|
||||||
CAST(strftime('%s', 'now') AS INTEGER) * 0.5
|
CAST(strftime('%s', 'now') AS INTEGER) * 0.5 + 1
|
||||||
) as final_rank
|
) as final_rank
|
||||||
FROM history h
|
FROM history h
|
||||||
INNER JOIN history_fts f ON h.id = f.rowid
|
LEFT JOIN history_fts f ON h.id = f.rowid
|
||||||
WHERE history_fts MATCH :search
|
WHERE
|
||||||
AND (:domain IS NULL OR h.domain = :domain)
|
h.title LIKE :like_pattern
|
||||||
AND (:start_date IS NULL OR h.visit_time >= :start_date)
|
OR (h.markdown_content IS NOT NULL AND history_fts MATCH :search)
|
||||||
AND (:end_date IS NULL OR h.visit_time <= :end_date)
|
AND (:domain IS NULL OR h.domain = :domain)
|
||||||
|
AND (:start_date IS NULL OR h.visit_time >= :start_date)
|
||||||
|
AND (:end_date IS NULL OR h.visit_time <= :end_date)
|
||||||
)
|
)
|
||||||
SELECT * FROM ranked_results
|
SELECT * FROM ranked_results
|
||||||
|
WHERE final_rank > 0
|
||||||
ORDER BY final_rank DESC
|
ORDER BY final_rank DESC
|
||||||
LIMIT 100
|
LIMIT 100
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Prepare parameters with exact phrase matching
|
# Prepare search patterns for different matching strategies
|
||||||
params = {
|
params = {
|
||||||
'search': search_term,
|
'search': f'{search_term}*', # Wildcard suffix matching
|
||||||
'like_pattern': f'%{search_term}%',
|
'like_pattern': f'%{search_term}%', # Contains matching
|
||||||
'exact_pattern': f'%{search_term}%', # For exact phrase matching
|
'exact_pattern': search_term, # Exact matching
|
||||||
|
'prefix_pattern': f'{search_term}%', # Prefix matching
|
||||||
'domain': domain,
|
'domain': domain,
|
||||||
'start_date': start_date,
|
'start_date': start_date,
|
||||||
'end_date': end_date
|
'end_date': end_date
|
||||||
|
|||||||
Reference in New Issue
Block a user