mirror of
https://github.com/Zetaphor/browser-recall.git
synced 2025-12-06 10:29:38 +00:00
65 lines
2.2 KiB
Python
65 lines
2.2 KiB
Python
from fastapi import FastAPI, WebSocket
|
|
import uvicorn
|
|
from logger import Logger
|
|
import os
|
|
from database import Database
|
|
from crawl4ai import AsyncWebCrawler
|
|
|
|
# Create logs directory if it doesn't exist
|
|
os.makedirs('logs', exist_ok=True)
|
|
|
|
app = FastAPI()
|
|
logger = Logger()
|
|
|
|
db = Database()
|
|
|
|
@app.websocket("/ws")
|
|
async def websocket_endpoint(websocket: WebSocket):
|
|
await websocket.accept()
|
|
logger.info("New WebSocket connection established")
|
|
|
|
# Create crawler instance outside the loop for reuse
|
|
async with AsyncWebCrawler() as crawler:
|
|
try:
|
|
while True:
|
|
data = await websocket.receive_json()
|
|
|
|
# Crawl the URL to get title and content
|
|
try:
|
|
result = await crawler.arun(url=data["url"])
|
|
# Get the first result from the container and access metadata
|
|
crawl_result = result[0]
|
|
title = crawl_result.metadata.get('title') or data["url"].split("/")[-1]
|
|
content = crawl_result.markdown
|
|
logger.info(f"Crawling result: {result}")
|
|
except Exception as crawl_error:
|
|
logger.error(f"Crawling error for {data['url']}: {str(crawl_error)}")
|
|
title = data["url"].split("/")[-1]
|
|
content = str(data)
|
|
|
|
# Store received data with crawled information
|
|
db.add_history(
|
|
url=data["url"],
|
|
title=title,
|
|
content=content
|
|
)
|
|
|
|
logger.info(f"Processed URL: {data['url']} - {title}")
|
|
await websocket.send_json({
|
|
"status": "received",
|
|
"data": {
|
|
"url": data["url"],
|
|
"title": title,
|
|
"timestamp": data["timestamp"]
|
|
}
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"WebSocket error: {str(e)}")
|
|
await websocket.close()
|
|
finally:
|
|
logger.info("WebSocket connection closed")
|
|
|
|
if __name__ == "__main__":
|
|
logger.info("Starting WebSocket server...")
|
|
uvicorn.run(app, host="0.0.0.0", port=8523)
|