FTS tweaks

This commit is contained in:
2025-01-26 01:06:02 -06:00
parent 687bbb198e
commit 28098ca69a
2 changed files with 61 additions and 37 deletions

View File

@@ -13,10 +13,11 @@ engine = create_engine(
"timeout": 30, # Connection timeout in seconds "timeout": 30, # Connection timeout in seconds
"check_same_thread": False, # Allow multi-threaded access "check_same_thread": False, # Allow multi-threaded access
}, },
# Enable write-ahead logging and set a larger pool size # Update pool configuration for better concurrency
pool_size=1, # Single connection pool since we're using one connection pool_size=5, # Increase pool size to handle concurrent requests
max_overflow=0, # Prevent additional connections max_overflow=10, # Allow some overflow connections
pool_recycle=3600, # Recycle connection every hour pool_timeout=30, # Connection timeout from pool
pool_recycle=3600, # Recycle connections every hour
) )
SessionLocal = sessionmaker( SessionLocal = sessionmaker(

View File

@@ -16,14 +16,14 @@ from fastapi.templating import Jinja2Templates
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi import Request from fastapi import Request
import browser_history import browser_history
from .database import ( from .database import (
get_db, get_db,
HistoryEntry, HistoryEntry,
Bookmark, Bookmark,
get_last_processed_timestamp, get_last_processed_timestamp,
update_last_processed_timestamp, update_last_processed_timestamp,
create_tables create_tables,
engine
) )
from .scheduler import HistoryScheduler from .scheduler import HistoryScheduler
from .page_info import PageInfo from .page_info import PageInfo
@@ -106,60 +106,83 @@ def serialize_bookmark(bookmark):
@app.get("/history/search") @app.get("/history/search")
async def search_history( async def search_history(
domain: Optional[str] = Query(None), domain: Optional[str] = Query(None),
start_date: Optional[datetime] = Query(None), start_date: Optional[str] = Query(None),
end_date: Optional[datetime] = Query(None), end_date: Optional[str] = Query(None),
search_term: Optional[str] = Query(None), search_term: Optional[str] = Query(None),
include_content: bool = Query(False), include_content: bool = Query(False),
db: Session = Depends(get_db) db: Session = Depends(get_db)
): ):
"""Search history with optimized full-text search""" """Search history with optimized full-text search"""
try: try:
# If there's a full-text search term, use the FTS table
if search_term: if search_term:
# Use raw SQL for FTS query to leverage SQLite's optimization # Optimize FTS query with content-focused ranking
fts_query = """ fts_query = """
SELECT h.* FROM history h WITH RECURSIVE
INNER JOIN history_fts f ON h.id = f.rowid ranked_results AS (
WHERE history_fts MATCH :search SELECT
AND (:domain IS NULL OR h.domain = :domain) h.*,
AND (:start_date IS NULL OR h.visit_time >= :start_date) rank * (
AND (:end_date IS NULL OR h.visit_time <= :end_date) CASE
ORDER BY rank -- Boost exact phrase matches in content
LIMIT 1000 WHEN h.markdown_content LIKE :exact_pattern THEN 3.0
-- Boost title matches but less than content
WHEN h.title LIKE :like_pattern THEN 1.5
-- Base score for other matches
ELSE 1.0
END
) + (
-- Additional boost for recent entries
CAST(strftime('%s', h.visit_time) AS INTEGER) /
CAST(strftime('%s', 'now') AS INTEGER) * 0.5
) as final_rank
FROM history h
INNER JOIN history_fts f ON h.id = f.rowid
WHERE history_fts MATCH :search
AND (:domain IS NULL OR h.domain = :domain)
AND (:start_date IS NULL OR h.visit_time >= :start_date)
AND (:end_date IS NULL OR h.visit_time <= :end_date)
)
SELECT * FROM ranked_results
ORDER BY final_rank DESC
LIMIT 100
""" """
results = db.execute(
text(fts_query),
{
'search': search_term,
'domain': domain,
'start_date': start_date,
'end_date': end_date
}
).all()
# Return serialized results directly # Prepare parameters with exact phrase matching
return [serialize_history_entry(row, include_content) for row in results] params = {
'search': search_term,
'like_pattern': f'%{search_term}%',
'exact_pattern': f'%{search_term}%', # For exact phrase matching
'domain': domain,
'start_date': start_date,
'end_date': end_date
}
# Execute with connection context manager
with engine.connect() as connection:
results = connection.execute(text(fts_query), params).all()
return [serialize_history_entry(row, include_content) for row in results]
else: else:
# Start with base query # Optimize non-FTS query
query = db.query(HistoryEntry) query = db.query(HistoryEntry)
# Apply filters
if domain: if domain:
query = query.filter(HistoryEntry.domain == domain) query = query.filter(HistoryEntry.domain == domain)
if start_date: if start_date:
query = query.filter(HistoryEntry.visit_time >= start_date) query = query.filter(HistoryEntry.visit_time >= start_date)
if end_date: if end_date:
query = query.filter(HistoryEntry.visit_time <= end_date) query = query.filter(HistoryEntry.visit_time <= end_date)
# Execute query with limit for better performance # Add index hints and limit
entries = query.limit(1000).all() query = query.with_hint(HistoryEntry, 'INDEXED BY ix_history_visit_time', 'sqlite')
entries = query.order_by(HistoryEntry.visit_time.desc()).limit(100).all()
return [serialize_history_entry(entry, include_content) for entry in entries] return [serialize_history_entry(entry, include_content) for entry in entries]
except Exception as e: except Exception as e:
print(f"Search error: {e}") logger.error(f"Search error: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail="Search operation failed") raise HTTPException(
status_code=500,
detail={"message": "Search operation failed", "error": str(e)}
)
@app.get("/bookmarks/search") @app.get("/bookmarks/search")
async def search_bookmarks( async def search_bookmarks(