browser-recall/app/scheduler.py

from fastapi import BackgroundTasks
from datetime import datetime, timedelta
import asyncio
from .database import SessionLocal, HistoryEntry, Bookmark
from .browser import BrowserHistoryCollector
from .page_reader import PageReader
from sqlalchemy import func
from sqlalchemy.orm import Session
import pytz
from .config import Config
from .database import get_db
from urllib.parse import urlparse

class HistoryScheduler:
    def __init__(self):
        self.browser_collector = BrowserHistoryCollector()
        self.page_reader = PageReader()
        self.last_history_update = None
        self.content_update_interval = timedelta(hours=24)  # Update content daily
        self.config = Config()

    def _normalize_datetime(self, dt: datetime) -> datetime:
        """Convert datetime to UTC if it has timezone, or make it timezone-aware if it doesn't"""
        if dt is None:
            return None

        # If datetime is naive (no timezone), assume it's in UTC
        if dt.tzinfo is None:
            return pytz.UTC.localize(dt)

        # If datetime has timezone, convert to UTC
        return dt.astimezone(pytz.UTC)

    async def update_bookmarks(self):
        """Update bookmarks from browser"""
        try:
            db = next(get_db())
            bookmarks = self.browser_collector.fetch_bookmarks()

            for added_time, url, title, folder in bookmarks:  # Unpack the tuple
                # Extract domain and check if it should be ignored
                domain = urlparse(url).netloc
                if self.config.is_domain_ignored(domain):
                    continue

                # Normalize the datetime
                added_time = self._normalize_datetime(added_time)

                # Process the bookmark only if domain is not ignored
                bookmark_entry = Bookmark(
                    url=url,
                    title=title,
                    added_time=added_time,
                    folder=folder,
                    domain=domain
                )
                db.add(bookmark_entry)

            db.commit()

        except Exception as e:
            print(f"Error updating bookmarks: {e}")
        finally:
            db.close()

    async def update_history(self):
        """Background task to update history periodically"""
        while True:
            try:
                db = next(get_db())
                history_entries = self.browser_collector.fetch_history()

                for visit_time, url, title in history_entries:  # Unpack the tuple
                    # Extract domain and check if it should be ignored
                    domain = urlparse(url).netloc
                    if self.config.is_domain_ignored(domain):
                        continue

                    # Normalize the datetime
                    visit_time = self._normalize_datetime(visit_time)

                    # Process the entry only if domain is not ignored
                    history_entry = HistoryEntry(
                        url=url,
                        title=title,
                        visit_time=visit_time,
                        domain=domain
                    )
                    db.add(history_entry)

                db.commit()

            except Exception as e:
                print(f"Error updating history: {e}")
            finally:
                db.close()

            await asyncio.sleep(300)  # Wait 5 minutes before next update

    async def close(self):
        """Cleanup resources"""
        await self.page_reader.close()