From 28098ca69adf8fb05a7c3661791122ae9ed39718 Mon Sep 17 00:00:00 2001 From: Zetaphor Date: Sun, 26 Jan 2025 01:06:02 -0600 Subject: [PATCH] FTS tweaks --- app/database.py | 9 ++--- app/main.py | 89 +++++++++++++++++++++++++++++++------------------ 2 files changed, 61 insertions(+), 37 deletions(-) diff --git a/app/database.py b/app/database.py index 38a930c..8dacfaa 100644 --- a/app/database.py +++ b/app/database.py @@ -13,10 +13,11 @@ engine = create_engine( "timeout": 30, # Connection timeout in seconds "check_same_thread": False, # Allow multi-threaded access }, - # Enable write-ahead logging and set a larger pool size - pool_size=1, # Single connection pool since we're using one connection - max_overflow=0, # Prevent additional connections - pool_recycle=3600, # Recycle connection every hour + # Update pool configuration for better concurrency + pool_size=5, # Increase pool size to handle concurrent requests + max_overflow=10, # Allow some overflow connections + pool_timeout=30, # Connection timeout from pool + pool_recycle=3600, # Recycle connections every hour ) SessionLocal = sessionmaker( diff --git a/app/main.py b/app/main.py index 6df2bfa..46912c5 100644 --- a/app/main.py +++ b/app/main.py @@ -16,14 +16,14 @@ from fastapi.templating import Jinja2Templates from fastapi.staticfiles import StaticFiles from fastapi import Request import browser_history - from .database import ( get_db, HistoryEntry, Bookmark, get_last_processed_timestamp, update_last_processed_timestamp, - create_tables + create_tables, + engine ) from .scheduler import HistoryScheduler from .page_info import PageInfo @@ -106,60 +106,83 @@ def serialize_bookmark(bookmark): @app.get("/history/search") async def search_history( domain: Optional[str] = Query(None), - start_date: Optional[datetime] = Query(None), - end_date: Optional[datetime] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), search_term: Optional[str] = Query(None), include_content: bool = Query(False), db: Session = Depends(get_db) ): """Search history with optimized full-text search""" try: - # If there's a full-text search term, use the FTS table if search_term: - # Use raw SQL for FTS query to leverage SQLite's optimization + # Optimize FTS query with content-focused ranking fts_query = """ - SELECT h.* FROM history h - INNER JOIN history_fts f ON h.id = f.rowid - WHERE history_fts MATCH :search - AND (:domain IS NULL OR h.domain = :domain) - AND (:start_date IS NULL OR h.visit_time >= :start_date) - AND (:end_date IS NULL OR h.visit_time <= :end_date) - ORDER BY rank - LIMIT 1000 + WITH RECURSIVE + ranked_results AS ( + SELECT + h.*, + rank * ( + CASE + -- Boost exact phrase matches in content + WHEN h.markdown_content LIKE :exact_pattern THEN 3.0 + -- Boost title matches but less than content + WHEN h.title LIKE :like_pattern THEN 1.5 + -- Base score for other matches + ELSE 1.0 + END + ) + ( + -- Additional boost for recent entries + CAST(strftime('%s', h.visit_time) AS INTEGER) / + CAST(strftime('%s', 'now') AS INTEGER) * 0.5 + ) as final_rank + FROM history h + INNER JOIN history_fts f ON h.id = f.rowid + WHERE history_fts MATCH :search + AND (:domain IS NULL OR h.domain = :domain) + AND (:start_date IS NULL OR h.visit_time >= :start_date) + AND (:end_date IS NULL OR h.visit_time <= :end_date) + ) + SELECT * FROM ranked_results + ORDER BY final_rank DESC + LIMIT 100 """ - results = db.execute( - text(fts_query), - { - 'search': search_term, - 'domain': domain, - 'start_date': start_date, - 'end_date': end_date - } - ).all() - # Return serialized results directly - return [serialize_history_entry(row, include_content) for row in results] + # Prepare parameters with exact phrase matching + params = { + 'search': search_term, + 'like_pattern': f'%{search_term}%', + 'exact_pattern': f'%{search_term}%', # For exact phrase matching + 'domain': domain, + 'start_date': start_date, + 'end_date': end_date + } + + # Execute with connection context manager + with engine.connect() as connection: + results = connection.execute(text(fts_query), params).all() + return [serialize_history_entry(row, include_content) for row in results] else: - # Start with base query + # Optimize non-FTS query query = db.query(HistoryEntry) - # Apply filters if domain: query = query.filter(HistoryEntry.domain == domain) - if start_date: query = query.filter(HistoryEntry.visit_time >= start_date) - if end_date: query = query.filter(HistoryEntry.visit_time <= end_date) - # Execute query with limit for better performance - entries = query.limit(1000).all() + # Add index hints and limit + query = query.with_hint(HistoryEntry, 'INDEXED BY ix_history_visit_time', 'sqlite') + entries = query.order_by(HistoryEntry.visit_time.desc()).limit(100).all() return [serialize_history_entry(entry, include_content) for entry in entries] except Exception as e: - print(f"Search error: {e}") - raise HTTPException(status_code=500, detail="Search operation failed") + logger.error(f"Search error: {str(e)}", exc_info=True) + raise HTTPException( + status_code=500, + detail={"message": "Search operation failed", "error": str(e)} + ) @app.get("/bookmarks/search") async def search_bookmarks(