All the things

This commit is contained in:
2025-01-25 22:42:04 -06:00
parent d556823350
commit 0db1065d10
16 changed files with 678 additions and 55 deletions

View File

@@ -1,15 +1,31 @@
from fastapi import FastAPI, Depends, Query
from fastapi import FastAPI, Depends, Query, WebSocket, WebSocketDisconnect
from sqlalchemy.orm import Session
from datetime import datetime
from typing import List
from datetime import datetime, timezone
from typing import List, Optional
import asyncio
from fastapi import WebSocketDisconnect
from urllib.parse import urlparse
import pytz
from fastapi.middleware.cors import CORSMiddleware
import iso8601
from .database import get_db, HistoryEntry, Bookmark
from .scheduler import HistoryScheduler
from .page_info import PageInfo
from .page_reader import PageReader
app = FastAPI()
scheduler = HistoryScheduler()
# Add CORS middleware to allow WebSocket connections
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In production, specify your domains
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.on_event("startup")
async def startup_event():
# Initial bookmark fetch
@@ -17,12 +33,37 @@ async def startup_event():
# Start the background task
asyncio.create_task(scheduler.update_history())
def serialize_history_entry(entry, include_content: bool = False):
"""Serialize a HistoryEntry object to a dictionary"""
result = {
"id": entry.id,
"url": entry.url,
"title": entry.title,
"visit_time": entry.visit_time.isoformat() if entry.visit_time else None,
"domain": entry.domain,
}
if include_content:
result["markdown_content"] = entry.markdown_content
return result
def serialize_bookmark(bookmark):
"""Serialize a Bookmark object to a dictionary"""
return {
"id": bookmark.id,
"url": bookmark.url,
"title": bookmark.title,
"added_time": bookmark.added_time.isoformat() if bookmark.added_time else None,
"folder": bookmark.folder,
"domain": bookmark.domain,
}
@app.get("/history/search")
async def search_history(
domain: str = Query(None),
start_date: datetime = Query(None),
end_date: datetime = Query(None),
search_term: str = Query(None),
domain: Optional[str] = Query(None),
start_date: Optional[datetime] = Query(None),
end_date: Optional[datetime] = Query(None),
search_term: Optional[str] = Query(None),
include_content: bool = Query(False),
db: Session = Depends(get_db)
):
query = db.query(HistoryEntry)
@@ -37,15 +78,19 @@ async def search_history(
query = query.filter(HistoryEntry.visit_time <= end_date)
if search_term:
query = query.filter(HistoryEntry.title.ilike(f"%{search_term}%"))
query = query.filter(
(HistoryEntry.title.ilike(f"%{search_term}%")) |
(HistoryEntry.markdown_content.ilike(f"%{search_term}%"))
)
return query.all()
entries = query.all()
return [serialize_history_entry(entry, include_content) for entry in entries]
@app.get("/bookmarks/search")
async def search_bookmarks(
domain: str = Query(None),
folder: str = Query(None),
search_term: str = Query(None),
domain: Optional[str] = Query(None),
folder: Optional[str] = Query(None),
search_term: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
query = db.query(Bookmark)
@@ -59,4 +104,73 @@ async def search_bookmarks(
if search_term:
query = query.filter(Bookmark.title.ilike(f"%{search_term}%"))
return query.all()
bookmarks = query.all()
return [serialize_bookmark(bookmark) for bookmark in bookmarks]
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket, db: Session = Depends(get_db)):
print("WebSocket endpoint called")
page_reader = PageReader()
print("New WebSocket connection established")
await websocket.accept()
print("WebSocket connection accepted")
try:
while True:
print("Waiting for message...")
data = await websocket.receive_json()
print(f"Received message for URL: {data['url']}")
print(f"HTML content length: {len(data['html'])}")
print(f"Timestamp: {data['timestamp']}")
# Parse the ISO timestamp correctly
timestamp = iso8601.parse_date(data['timestamp'])
page_info = PageInfo(
url=data['url'],
html=data['html'],
timestamp=timestamp
)
print(f"Created PageInfo object for: {page_info.url}")
# Convert HTML to markdown
print("Converting HTML to markdown...")
markdown_content = page_reader.html_to_markdown(page_info.html)
print(f"Markdown conversion complete, length: {len(markdown_content) if markdown_content else 0}")
# Update or create history entry
domain = urlparse(page_info.url).netloc
print(f"Creating history entry for domain: {domain}")
history_entry = HistoryEntry(
url=page_info.url,
visit_time=page_info.timestamp,
domain=domain,
markdown_content=markdown_content,
last_content_update=datetime.now(timezone.utc)
)
print("Saving to database...")
db.add(history_entry)
db.commit()
print("Database save complete")
# Send confirmation back to client
await websocket.send_json({
"status": "success",
"message": f"Processed page: {page_info.url}"
})
except WebSocketDisconnect:
print("Client disconnected")
except Exception as e:
print(f"Error handling message: {e}")
# Send error back to client if possible
try:
await websocket.send_json({
"status": "error",
"message": str(e)
})
except:
pass
finally:
print("Cleaning up resources")
page_reader.close()