mirror of
https://github.com/Zetaphor/browser-recall.git
synced 2025-12-06 02:19:37 +00:00
FTS tweaks
This commit is contained in:
@@ -13,10 +13,11 @@ engine = create_engine(
|
|||||||
"timeout": 30, # Connection timeout in seconds
|
"timeout": 30, # Connection timeout in seconds
|
||||||
"check_same_thread": False, # Allow multi-threaded access
|
"check_same_thread": False, # Allow multi-threaded access
|
||||||
},
|
},
|
||||||
# Enable write-ahead logging and set a larger pool size
|
# Update pool configuration for better concurrency
|
||||||
pool_size=1, # Single connection pool since we're using one connection
|
pool_size=5, # Increase pool size to handle concurrent requests
|
||||||
max_overflow=0, # Prevent additional connections
|
max_overflow=10, # Allow some overflow connections
|
||||||
pool_recycle=3600, # Recycle connection every hour
|
pool_timeout=30, # Connection timeout from pool
|
||||||
|
pool_recycle=3600, # Recycle connections every hour
|
||||||
)
|
)
|
||||||
|
|
||||||
SessionLocal = sessionmaker(
|
SessionLocal = sessionmaker(
|
||||||
|
|||||||
89
app/main.py
89
app/main.py
@@ -16,14 +16,14 @@ from fastapi.templating import Jinja2Templates
|
|||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
import browser_history
|
import browser_history
|
||||||
|
|
||||||
from .database import (
|
from .database import (
|
||||||
get_db,
|
get_db,
|
||||||
HistoryEntry,
|
HistoryEntry,
|
||||||
Bookmark,
|
Bookmark,
|
||||||
get_last_processed_timestamp,
|
get_last_processed_timestamp,
|
||||||
update_last_processed_timestamp,
|
update_last_processed_timestamp,
|
||||||
create_tables
|
create_tables,
|
||||||
|
engine
|
||||||
)
|
)
|
||||||
from .scheduler import HistoryScheduler
|
from .scheduler import HistoryScheduler
|
||||||
from .page_info import PageInfo
|
from .page_info import PageInfo
|
||||||
@@ -106,60 +106,83 @@ def serialize_bookmark(bookmark):
|
|||||||
@app.get("/history/search")
|
@app.get("/history/search")
|
||||||
async def search_history(
|
async def search_history(
|
||||||
domain: Optional[str] = Query(None),
|
domain: Optional[str] = Query(None),
|
||||||
start_date: Optional[datetime] = Query(None),
|
start_date: Optional[str] = Query(None),
|
||||||
end_date: Optional[datetime] = Query(None),
|
end_date: Optional[str] = Query(None),
|
||||||
search_term: Optional[str] = Query(None),
|
search_term: Optional[str] = Query(None),
|
||||||
include_content: bool = Query(False),
|
include_content: bool = Query(False),
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db)
|
||||||
):
|
):
|
||||||
"""Search history with optimized full-text search"""
|
"""Search history with optimized full-text search"""
|
||||||
try:
|
try:
|
||||||
# If there's a full-text search term, use the FTS table
|
|
||||||
if search_term:
|
if search_term:
|
||||||
# Use raw SQL for FTS query to leverage SQLite's optimization
|
# Optimize FTS query with content-focused ranking
|
||||||
fts_query = """
|
fts_query = """
|
||||||
SELECT h.* FROM history h
|
WITH RECURSIVE
|
||||||
INNER JOIN history_fts f ON h.id = f.rowid
|
ranked_results AS (
|
||||||
WHERE history_fts MATCH :search
|
SELECT
|
||||||
AND (:domain IS NULL OR h.domain = :domain)
|
h.*,
|
||||||
AND (:start_date IS NULL OR h.visit_time >= :start_date)
|
rank * (
|
||||||
AND (:end_date IS NULL OR h.visit_time <= :end_date)
|
CASE
|
||||||
ORDER BY rank
|
-- Boost exact phrase matches in content
|
||||||
LIMIT 1000
|
WHEN h.markdown_content LIKE :exact_pattern THEN 3.0
|
||||||
|
-- Boost title matches but less than content
|
||||||
|
WHEN h.title LIKE :like_pattern THEN 1.5
|
||||||
|
-- Base score for other matches
|
||||||
|
ELSE 1.0
|
||||||
|
END
|
||||||
|
) + (
|
||||||
|
-- Additional boost for recent entries
|
||||||
|
CAST(strftime('%s', h.visit_time) AS INTEGER) /
|
||||||
|
CAST(strftime('%s', 'now') AS INTEGER) * 0.5
|
||||||
|
) as final_rank
|
||||||
|
FROM history h
|
||||||
|
INNER JOIN history_fts f ON h.id = f.rowid
|
||||||
|
WHERE history_fts MATCH :search
|
||||||
|
AND (:domain IS NULL OR h.domain = :domain)
|
||||||
|
AND (:start_date IS NULL OR h.visit_time >= :start_date)
|
||||||
|
AND (:end_date IS NULL OR h.visit_time <= :end_date)
|
||||||
|
)
|
||||||
|
SELECT * FROM ranked_results
|
||||||
|
ORDER BY final_rank DESC
|
||||||
|
LIMIT 100
|
||||||
"""
|
"""
|
||||||
results = db.execute(
|
|
||||||
text(fts_query),
|
|
||||||
{
|
|
||||||
'search': search_term,
|
|
||||||
'domain': domain,
|
|
||||||
'start_date': start_date,
|
|
||||||
'end_date': end_date
|
|
||||||
}
|
|
||||||
).all()
|
|
||||||
|
|
||||||
# Return serialized results directly
|
# Prepare parameters with exact phrase matching
|
||||||
return [serialize_history_entry(row, include_content) for row in results]
|
params = {
|
||||||
|
'search': search_term,
|
||||||
|
'like_pattern': f'%{search_term}%',
|
||||||
|
'exact_pattern': f'%{search_term}%', # For exact phrase matching
|
||||||
|
'domain': domain,
|
||||||
|
'start_date': start_date,
|
||||||
|
'end_date': end_date
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute with connection context manager
|
||||||
|
with engine.connect() as connection:
|
||||||
|
results = connection.execute(text(fts_query), params).all()
|
||||||
|
return [serialize_history_entry(row, include_content) for row in results]
|
||||||
else:
|
else:
|
||||||
# Start with base query
|
# Optimize non-FTS query
|
||||||
query = db.query(HistoryEntry)
|
query = db.query(HistoryEntry)
|
||||||
|
|
||||||
# Apply filters
|
|
||||||
if domain:
|
if domain:
|
||||||
query = query.filter(HistoryEntry.domain == domain)
|
query = query.filter(HistoryEntry.domain == domain)
|
||||||
|
|
||||||
if start_date:
|
if start_date:
|
||||||
query = query.filter(HistoryEntry.visit_time >= start_date)
|
query = query.filter(HistoryEntry.visit_time >= start_date)
|
||||||
|
|
||||||
if end_date:
|
if end_date:
|
||||||
query = query.filter(HistoryEntry.visit_time <= end_date)
|
query = query.filter(HistoryEntry.visit_time <= end_date)
|
||||||
|
|
||||||
# Execute query with limit for better performance
|
# Add index hints and limit
|
||||||
entries = query.limit(1000).all()
|
query = query.with_hint(HistoryEntry, 'INDEXED BY ix_history_visit_time', 'sqlite')
|
||||||
|
entries = query.order_by(HistoryEntry.visit_time.desc()).limit(100).all()
|
||||||
return [serialize_history_entry(entry, include_content) for entry in entries]
|
return [serialize_history_entry(entry, include_content) for entry in entries]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Search error: {e}")
|
logger.error(f"Search error: {str(e)}", exc_info=True)
|
||||||
raise HTTPException(status_code=500, detail="Search operation failed")
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail={"message": "Search operation failed", "error": str(e)}
|
||||||
|
)
|
||||||
|
|
||||||
@app.get("/bookmarks/search")
|
@app.get("/bookmarks/search")
|
||||||
async def search_bookmarks(
|
async def search_bookmarks(
|
||||||
|
|||||||
Reference in New Issue
Block a user