mirror of
https://github.com/Zetaphor/browser-recall.git
synced 2025-12-06 10:29:38 +00:00
Initial commit
This commit is contained in:
18
app/browser.py
Normal file
18
app/browser.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from datetime import datetime
|
||||
from typing import List, Tuple
|
||||
from browser_history import get_history, get_bookmarks
|
||||
from urllib.parse import urlparse
|
||||
|
||||
class BrowserHistoryCollector:
|
||||
@staticmethod
|
||||
def get_domain(url: str) -> str:
|
||||
return urlparse(url).netloc
|
||||
|
||||
def fetch_history(self) -> List[Tuple[datetime, str, str]]:
|
||||
outputs = get_history()
|
||||
# Returns list of tuples containing (datetime, url, title)
|
||||
return [(entry[0], entry[1], entry[2]) for entry in outputs.histories]
|
||||
|
||||
def fetch_bookmarks(self) -> List[Tuple[datetime, str, str, str]]:
|
||||
outputs = get_bookmarks()
|
||||
return outputs.bookmarks
|
||||
38
app/database.py
Normal file
38
app/database.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from sqlalchemy import create_engine, Column, Integer, String, DateTime
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
SQLALCHEMY_DATABASE_URL = "sqlite:///./browser_history.db"
|
||||
|
||||
engine = create_engine(SQLALCHEMY_DATABASE_URL)
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
class HistoryEntry(Base):
|
||||
__tablename__ = "history_entries"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
url = Column(String, index=True)
|
||||
title = Column(String, nullable=True)
|
||||
visit_time = Column(DateTime, index=True)
|
||||
domain = Column(String, index=True)
|
||||
|
||||
class Bookmark(Base):
|
||||
__tablename__ = "bookmarks"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
url = Column(String, index=True)
|
||||
title = Column(String, nullable=True)
|
||||
added_time = Column(DateTime, index=True)
|
||||
folder = Column(String, index=True)
|
||||
domain = Column(String, index=True)
|
||||
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
62
app/main.py
Normal file
62
app/main.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from fastapi import FastAPI, Depends, Query
|
||||
from sqlalchemy.orm import Session
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
import asyncio
|
||||
|
||||
from .database import get_db, HistoryEntry, Bookmark
|
||||
from .scheduler import HistoryScheduler
|
||||
|
||||
app = FastAPI()
|
||||
scheduler = HistoryScheduler()
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
# Initial bookmark fetch
|
||||
await scheduler.update_bookmarks()
|
||||
# Start the background task
|
||||
asyncio.create_task(scheduler.update_history())
|
||||
|
||||
@app.get("/history/search")
|
||||
async def search_history(
|
||||
domain: str = Query(None),
|
||||
start_date: datetime = Query(None),
|
||||
end_date: datetime = Query(None),
|
||||
search_term: str = Query(None),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
query = db.query(HistoryEntry)
|
||||
|
||||
if domain:
|
||||
query = query.filter(HistoryEntry.domain == domain)
|
||||
|
||||
if start_date:
|
||||
query = query.filter(HistoryEntry.visit_time >= start_date)
|
||||
|
||||
if end_date:
|
||||
query = query.filter(HistoryEntry.visit_time <= end_date)
|
||||
|
||||
if search_term:
|
||||
query = query.filter(HistoryEntry.title.ilike(f"%{search_term}%"))
|
||||
|
||||
return query.all()
|
||||
|
||||
@app.get("/bookmarks/search")
|
||||
async def search_bookmarks(
|
||||
domain: str = Query(None),
|
||||
folder: str = Query(None),
|
||||
search_term: str = Query(None),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
query = db.query(Bookmark)
|
||||
|
||||
if domain:
|
||||
query = query.filter(Bookmark.domain == domain)
|
||||
|
||||
if folder:
|
||||
query = query.filter(Bookmark.folder == folder)
|
||||
|
||||
if search_term:
|
||||
query = query.filter(Bookmark.title.ilike(f"%{search_term}%"))
|
||||
|
||||
return query.all()
|
||||
16
app/page_info.py
Normal file
16
app/page_info.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import asyncio
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Optional
|
||||
|
||||
class PageInfoFetcher:
|
||||
async def get_page_title(self, url: str) -> Optional[str]:
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, timeout=5) as response:
|
||||
if response.status == 200:
|
||||
html = await response.text()
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
return soup.title.string if soup.title else None
|
||||
except:
|
||||
return None
|
||||
85
app/scheduler.py
Normal file
85
app/scheduler.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from fastapi import BackgroundTasks
|
||||
from datetime import datetime, timedelta
|
||||
import asyncio
|
||||
from .database import SessionLocal, HistoryEntry, Bookmark
|
||||
from .browser import BrowserHistoryCollector
|
||||
from .page_info import PageInfoFetcher
|
||||
from sqlalchemy import func
|
||||
|
||||
class HistoryScheduler:
|
||||
def __init__(self):
|
||||
self.browser_collector = BrowserHistoryCollector()
|
||||
self.page_fetcher = PageInfoFetcher()
|
||||
self.last_history_update = None
|
||||
|
||||
async def update_bookmarks(self):
|
||||
bookmarks = self.browser_collector.fetch_bookmarks()
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# First, get all existing URLs to avoid duplicates
|
||||
existing_urls = {
|
||||
url: (added_time, folder)
|
||||
for url, added_time, folder in
|
||||
db.query(Bookmark.url, Bookmark.added_time, Bookmark.folder).all()
|
||||
}
|
||||
|
||||
new_entries = []
|
||||
for added_time, url, title, folder in bookmarks:
|
||||
# Only add if URL doesn't exist or if it's in a different folder
|
||||
if (url not in existing_urls or
|
||||
existing_urls[url][1] != folder):
|
||||
domain = self.browser_collector.get_domain(url)
|
||||
entry = Bookmark(
|
||||
url=url,
|
||||
title=title,
|
||||
added_time=added_time,
|
||||
folder=folder,
|
||||
domain=domain
|
||||
)
|
||||
new_entries.append(entry)
|
||||
|
||||
if new_entries:
|
||||
db.bulk_save_objects(new_entries)
|
||||
db.commit()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
async def update_history(self):
|
||||
while True:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Get the latest timestamp from our database
|
||||
latest_entry = db.query(func.max(HistoryEntry.visit_time)).scalar()
|
||||
|
||||
# Fetch new history
|
||||
history = self.browser_collector.fetch_history()
|
||||
|
||||
# Filter to only get entries newer than our latest entry
|
||||
new_entries = []
|
||||
for visit_time, url, title in history:
|
||||
if not latest_entry or visit_time > latest_entry:
|
||||
domain = self.browser_collector.get_domain(url)
|
||||
if not title:
|
||||
title = await self.page_fetcher.get_page_title(url)
|
||||
|
||||
entry = HistoryEntry(
|
||||
url=url,
|
||||
title=title,
|
||||
visit_time=visit_time,
|
||||
domain=domain
|
||||
)
|
||||
new_entries.append(entry)
|
||||
|
||||
if new_entries:
|
||||
db.bulk_save_objects(new_entries)
|
||||
db.commit()
|
||||
|
||||
# Update bookmarks
|
||||
await self.update_bookmarks()
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Wait for 5 minutes before next update
|
||||
await asyncio.sleep(300)
|
||||
Reference in New Issue
Block a user