All the things

2025-12-06 10:29:38 +00:00 · 2025-01-25 22:42:04 -06:00
parent d556823350
commit 0db1065d10
16 changed files with 678 additions and 55 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,6 @@
 # Meilisearch Configuration
 MEILISEARCH_HOST=http://localhost:7700
 # Generate a master key using: openssl rand -hex 32
 MEILISEARCH_MASTER_KEY=your_master_key_here
 # Example master key: 6d99b335033595ea62d02a5641b94e04e80c33c1e1f1f789c84445ff5
--- a/app/init.py
+++ b/app/init.py
@@ -0,0 +1 @@
 # This file can be empty, it just marks the directory as a Python package
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,92 @@
 import yaml
 from pathlib import Path
 from typing import Set
 import fnmatch
 class ReaderConfig:
    def __init__(self):
        self.excluded_patterns: Set[str] = set()
        self._load_config()
    def _load_config(self):
        config_path = Path("config/reader_config.yaml")
        if not config_path.exists():
            print("Warning: reader_config.yaml not found, creating default config")
            self._create_default_config(config_path)
        try:
            with open(config_path, 'r') as f:
                config = yaml.safe_load(f)
                self.excluded_patterns = set(config.get('excluded_domains', []))
        except Exception as e:
            print(f"Error loading config: {e}")
            self.excluded_patterns = set()
    def _create_default_config(self, config_path: Path):
        config_path.parent.mkdir(parents=True, exist_ok=True)
        default_config = {
            'excluded_domains': [
                'localhost',
                '127.0.0.1',
                '192.168.*.*',
                '10.*.*.*'
            ]
        }
        with open(config_path, 'w') as f:
            yaml.safe_dump(default_config, f, default_flow_style=False)
    def is_domain_excluded(self, domain: str) -> bool:
        """
        Check if a domain matches any exclusion pattern.
        Supports glob-style wildcards (* and ?)
        Examples:
            - '*.example.com' matches any subdomain of example.com
            - 'reddit-*.com' matches reddit-video.com, reddit-static.com, etc.
            - '192.168.*.*' matches any IP in the 192.168.0.0/16 subnet
        """
        domain = domain.lower()
        # Check each pattern
        for pattern in self.excluded_patterns:
            pattern = pattern.lower()
            # Handle IP address patterns specially
            if any(c.isdigit() for c in pattern):
                if self._match_ip_pattern(domain, pattern):
                    return True
            # Handle domain patterns
            if fnmatch.fnmatch(domain, pattern):
                return True
            # Also check if the pattern matches when prepended with a dot
            # This handles cases like 'example.com' matching 'subdomain.example.com'
            if fnmatch.fnmatch(domain, f"*.{pattern}"):
                return True
        return False
    def _match_ip_pattern(self, domain: str, pattern: str) -> bool:
        """
        Special handling for IP address patterns.
        Handles cases like '192.168.*.*' matching '192.168.1.1'
        """
        # Skip if domain isn't IP-like
        if not any(c.isdigit() for c in domain):
            return False
        # Split into octets
        domain_parts = domain.split('.')
        pattern_parts = pattern.split('.')
        # Must have same number of parts
        if len(domain_parts) != len(pattern_parts):
            return False
        # Check each octet
        for domain_part, pattern_part in zip(domain_parts, pattern_parts):
            if pattern_part == '*':
                continue
            if domain_part != pattern_part:
                return False
        return True
--- a/app/database.py
+++ b/app/database.py
@@ -1,6 +1,7 @@
-from sqlalchemy import create_engine, Column, Integer, String, DateTime
+from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
 from datetime import datetime
 SQLALCHEMY_DATABASE_URL = "sqlite:///./browser_history.db"
@@ -10,13 +11,15 @@ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 Base = declarative_base()
 class HistoryEntry(Base):
-    __tablename__ = "history_entries"
+    __tablename__ = "history"
-    id = Column(Integer, primary_key=True, index=True)
+    id = Column(Integer, primary_key=True)
-    url = Column(String, index=True)
+    url = Column(String)
-    title = Column(String, nullable=True)
+    title = Column(String)
-    visit_time = Column(DateTime, index=True)
+    visit_time = Column(DateTime)
-    domain = Column(String, index=True)
+    domain = Column(String)
    markdown_content = Column(Text, nullable=True)
    last_content_update = Column(DateTime, nullable=True)
 class Bookmark(Base):
    __tablename__ = "bookmarks"
@@ -28,6 +31,37 @@ class Bookmark(Base):
    folder = Column(String, index=True)
    domain = Column(String, index=True)
 class BlacklistedDomain(Base):
    __tablename__ = "blacklisted_domains"
    id = Column(Integer, primary_key=True)
    domain = Column(String, unique=True, index=True)
    reason = Column(String, nullable=True)
    added_time = Column(DateTime, default=datetime.utcnow)
    @classmethod
    def is_blacklisted(cls, db: SessionLocal, domain: str) -> bool:
        """Check if a domain is blacklisted"""
        return db.query(cls).filter(cls.domain == domain.lower()).first() is not None
    @classmethod
    def add_to_blacklist(cls, db: SessionLocal, domain: str, reason: str = None):
        """Add a domain to the blacklist"""
        try:
            blacklist_entry = cls(
                domain=domain.lower(),
                reason=reason
            )
            db.add(blacklist_entry)
            db.commit()
        except:
            db.rollback()
            # If entry already exists, just update the reason
            existing = db.query(cls).filter(cls.domain == domain.lower()).first()
            if existing and reason:
                existing.reason = reason
                db.commit()
 Base.metadata.create_all(bind=engine)
 def get_db():
--- a/app/main.py
+++ b/app/main.py
@@ -1,15 +1,31 @@
-from fastapi import FastAPI, Depends, Query
+from fastapi import FastAPI, Depends, Query, WebSocket, WebSocketDisconnect
 from sqlalchemy.orm import Session
-from datetime import datetime
+from datetime import datetime, timezone
-from typing import List
+from typing import List, Optional
 import asyncio
 from fastapi import WebSocketDisconnect
 from urllib.parse import urlparse
 import pytz
 from fastapi.middleware.cors import CORSMiddleware
 import iso8601
 from .database import get_db, HistoryEntry, Bookmark
 from .scheduler import HistoryScheduler
 from .page_info import PageInfo
 from .page_reader import PageReader
 app = FastAPI()
 scheduler = HistoryScheduler()
 # Add CORS middleware to allow WebSocket connections
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, specify your domains
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
@app.on_event("startup")
 async def startup_event():
    # Initial bookmark fetch
@@ -17,12 +33,37 @@ async def startup_event():
    # Start the background task
    asyncio.create_task(scheduler.update_history())
 def serialize_history_entry(entry, include_content: bool = False):
    """Serialize a HistoryEntry object to a dictionary"""
    result = {
        "id": entry.id,
        "url": entry.url,
        "title": entry.title,
        "visit_time": entry.visit_time.isoformat() if entry.visit_time else None,
        "domain": entry.domain,
    }
    if include_content:
        result["markdown_content"] = entry.markdown_content
    return result
 def serialize_bookmark(bookmark):
    """Serialize a Bookmark object to a dictionary"""
    return {
        "id": bookmark.id,
        "url": bookmark.url,
        "title": bookmark.title,
        "added_time": bookmark.added_time.isoformat() if bookmark.added_time else None,
        "folder": bookmark.folder,
        "domain": bookmark.domain,
    }
@app.get("/history/search")
 async def search_history(
-    domain: str = Query(None),
+    domain: Optional[str] = Query(None),
-    start_date: datetime = Query(None),
+    start_date: Optional[datetime] = Query(None),
-    end_date: datetime = Query(None),
+    end_date: Optional[datetime] = Query(None),
-    search_term: str = Query(None),
+    search_term: Optional[str] = Query(None),
    include_content: bool = Query(False),
    db: Session = Depends(get_db)
 ):
    query = db.query(HistoryEntry)
@@ -37,15 +78,19 @@ async def search_history(
        query = query.filter(HistoryEntry.visit_time <= end_date)
    if search_term:
-        query = query.filter(HistoryEntry.title.ilike(f"%{search_term}%"))
+        query = query.filter(
            (HistoryEntry.title.ilike(f"%{search_term}%")) |
            (HistoryEntry.markdown_content.ilike(f"%{search_term}%"))
        )
-    return query.all()
+    entries = query.all()
    return [serialize_history_entry(entry, include_content) for entry in entries]
@app.get("/bookmarks/search")
 async def search_bookmarks(
-    domain: str = Query(None),
+    domain: Optional[str] = Query(None),
-    folder: str = Query(None),
+    folder: Optional[str] = Query(None),
-    search_term: str = Query(None),
+    search_term: Optional[str] = Query(None),
    db: Session = Depends(get_db)
 ):
    query = db.query(Bookmark)
@@ -59,4 +104,73 @@ async def search_bookmarks(
    if search_term:
        query = query.filter(Bookmark.title.ilike(f"%{search_term}%"))
-    return query.all()
+    bookmarks = query.all()
    return [serialize_bookmark(bookmark) for bookmark in bookmarks]
@app.websocket("/ws")
 async def websocket_endpoint(websocket: WebSocket, db: Session = Depends(get_db)):
    print("WebSocket endpoint called")
    page_reader = PageReader()
    print("New WebSocket connection established")
    await websocket.accept()
    print("WebSocket connection accepted")
    try:
        while True:
            print("Waiting for message...")
            data = await websocket.receive_json()
            print(f"Received message for URL: {data['url']}")
            print(f"HTML content length: {len(data['html'])}")
            print(f"Timestamp: {data['timestamp']}")
            # Parse the ISO timestamp correctly
            timestamp = iso8601.parse_date(data['timestamp'])
            page_info = PageInfo(
                url=data['url'],
                html=data['html'],
                timestamp=timestamp
            )
            print(f"Created PageInfo object for: {page_info.url}")
            # Convert HTML to markdown
            print("Converting HTML to markdown...")
            markdown_content = page_reader.html_to_markdown(page_info.html)
            print(f"Markdown conversion complete, length: {len(markdown_content) if markdown_content else 0}")
            # Update or create history entry
            domain = urlparse(page_info.url).netloc
            print(f"Creating history entry for domain: {domain}")
            history_entry = HistoryEntry(
                url=page_info.url,
                visit_time=page_info.timestamp,
                domain=domain,
                markdown_content=markdown_content,
                last_content_update=datetime.now(timezone.utc)
            )
            print("Saving to database...")
            db.add(history_entry)
            db.commit()
            print("Database save complete")
            # Send confirmation back to client
            await websocket.send_json({
                "status": "success",
                "message": f"Processed page: {page_info.url}"
            })
    except WebSocketDisconnect:
        print("Client disconnected")
    except Exception as e:
        print(f"Error handling message: {e}")
        # Send error back to client if possible
        try:
            await websocket.send_json({
                "status": "error",
                "message": str(e)
            })
        except:
            pass
    finally:
        print("Cleaning up resources")
        page_reader.close()
--- a/app/page_info.py
+++ b/app/page_info.py
@@ -1,16 +1,8 @@
-import asyncio
+from dataclasses import dataclass
-import aiohttp
+from datetime import datetime
 from bs4 import BeautifulSoup
 from typing import Optional
-class PageInfoFetcher:
+@dataclass
-    async def get_page_title(self, url: str) -> Optional[str]:
+class PageInfo:
-        try:
+    url: str
-            async with aiohttp.ClientSession() as session:
+    html: str
-                async with session.get(url, timeout=5) as response:
+    timestamp: datetime
                    if response.status == 200:
                        html = await response.text()
                        soup = BeautifulSoup(html, 'html.parser')
                        return soup.title.string if soup.title else None
        except:
            return None
--- a/app/page_reader.py
+++ b/app/page_reader.py
@@ -0,0 +1,101 @@
 import re
 from markdownify import markdownify as md
 from bs4 import BeautifulSoup
 from typing import Optional
 from urllib.parse import urlparse
 from .config import ReaderConfig
 import logging
 from .database import SessionLocal, BlacklistedDomain
 # Setup logging with less verbose output
 logging.basicConfig(
    level=logging.WARNING,
    format='%(levelname)s: %(message)s'
 )
 logger = logging.getLogger(__name__)
 # Patterns for cleaning
 SCRIPT_PATTERN = r"<[ ]*script.*?\/[ ]*script[ ]*>"
 STYLE_PATTERN = r"<[ ]*style.*?\/[ ]*style[ ]*>"
 META_PATTERN = r"<[ ]*meta.*?>"
 COMMENT_PATTERN = r"<[ ]*!--.*?--[ ]*>"
 LINK_PATTERN = r"<[ ]*link.*?>"
 BASE64_IMG_PATTERN = r'<img[^>]+src="data:image/[^;]+;base64,[^"]+"[^>]*>'
 SVG_PATTERN = r"(<svg[^>]*>)(.*?)(<\/svg>)"
 class PageReader:
    def __init__(self):
        self.config = ReaderConfig()
        self.db = SessionLocal()
    def clean_html(self, html: str) -> str:
        """Clean HTML by removing unwanted elements and patterns."""
        if not html:
            return ""
        # First use regex to remove problematic patterns
        html = re.sub(SCRIPT_PATTERN, "", html, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
        html = re.sub(STYLE_PATTERN, "", html, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
        html = re.sub(META_PATTERN, "", html, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
        html = re.sub(COMMENT_PATTERN, "", html, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
        html = re.sub(LINK_PATTERN, "", html, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
        html = re.sub(SVG_PATTERN, "", html, flags=re.IGNORECASE | re.MULTILINE | re.DOTALL)
        html = re.sub(BASE64_IMG_PATTERN, "", html)
        try:
            # Use BeautifulSoup to remove additional elements we want to strip
            soup = BeautifulSoup(html, 'html.parser')
            # Remove unwanted elements
            elements_to_remove = [
                'canvas', 'img', 'picture', 'audio', 'video',
                'iframe', 'embed', 'object', 'param', 'track',
                'map', 'area', 'source'
            ]
            for element in elements_to_remove:
                for tag in soup.find_all(element):
                    tag.decompose()
            return str(soup)
        except Exception as e:
            logger.error(f"Error cleaning HTML: {e}")
            return ""
    def clean_whitespace(self, text: str) -> str:
        """Clean excessive whitespace from text."""
        if not text:
            return ""
        try:
            # Replace 3 or more newlines with 2 newlines
            cleaned = re.sub(r'\n{3,}', '\n\n', text)
            # Remove trailing whitespace from each line
            cleaned = '\n'.join(line.rstrip() for line in cleaned.splitlines())
            return cleaned.strip()
        except Exception as e:
            logger.error(f"Error cleaning whitespace: {e}")
            return ""
    def html_to_markdown(self, html: str) -> Optional[str]:
        """Convert HTML to markdown."""
        try:
            cleaned_html = self.clean_html(html)
            if not cleaned_html:
                return None
            return self.clean_whitespace(md(cleaned_html,
                                          heading_style="ATX",
                                          bullets="-",
                                          autolinks=True,
                                          strip=['form'],
                                          escape_asterisks=True,
                                          escape_underscores=True))
        except Exception as e:
            logger.error(f"Error converting to markdown: {e}")
            return None
    def close(self):
        """Cleanup resources"""
        self.db.close()
--- a/app/scheduler.py
+++ b/app/scheduler.py
@@ -3,14 +3,29 @@ from datetime import datetime, timedelta
 import asyncio
 from .database import SessionLocal, HistoryEntry, Bookmark
 from .browser import BrowserHistoryCollector
-from .page_info import PageInfoFetcher
+from .page_reader import PageReader
 from sqlalchemy import func
 from sqlalchemy.orm import Session
 import pytz
 class HistoryScheduler:
    def __init__(self):
        self.browser_collector = BrowserHistoryCollector()
-        self.page_fetcher = PageInfoFetcher()
+        self.page_reader = PageReader()
        self.last_history_update = None
        self.content_update_interval = timedelta(hours=24)  # Update content daily
    def _normalize_datetime(self, dt: datetime) -> datetime:
        """Convert datetime to UTC if it has timezone, or make it timezone-aware if it doesn't"""
        if dt is None:
            return None
        # If datetime is naive (no timezone), assume it's in UTC
        if dt.tzinfo is None:
            return pytz.UTC.localize(dt)
        # If datetime has timezone, convert to UTC
        return dt.astimezone(pytz.UTC)
    async def update_bookmarks(self):
        bookmarks = self.browser_collector.fetch_bookmarks()
@@ -26,6 +41,9 @@ class HistoryScheduler:
            new_entries = []
            for added_time, url, title, folder in bookmarks:
                # Normalize the datetime
                added_time = self._normalize_datetime(added_time)
                # Only add if URL doesn't exist or if it's in a different folder
                if (url not in existing_urls or
                    existing_urls[url][1] != folder):
@@ -51,6 +69,8 @@ class HistoryScheduler:
            try:
                # Get the latest timestamp from our database
                latest_entry = db.query(func.max(HistoryEntry.visit_time)).scalar()
                if latest_entry:
                    latest_entry = self._normalize_datetime(latest_entry)
                # Fetch new history
                history = self.browser_collector.fetch_history()
@@ -58,11 +78,11 @@ class HistoryScheduler:
                # Filter to only get entries newer than our latest entry
                new_entries = []
                for visit_time, url, title in history:
                    # Normalize the datetime
                    visit_time = self._normalize_datetime(visit_time)
                    if not latest_entry or visit_time > latest_entry:
                        domain = self.browser_collector.get_domain(url)
                        if not title:
                            title = await self.page_fetcher.get_page_title(url)
                        entry = HistoryEntry(
                            url=url,
                            title=title,
@@ -83,3 +103,7 @@ class HistoryScheduler:
            # Wait for 5 minutes before next update
            await asyncio.sleep(300)
    async def close(self):
        """Cleanup resources"""
        await self.page_reader.close()
--- a/app/websocket_server.py
+++ b/app/websocket_server.py
@@ -0,0 +1,33 @@
 import asyncio
 import websockets
 import json
 from page_info import PageInfo
 from datetime import datetime
 async def handle_websocket(websocket, path):
    try:
        async for message in websocket:
            data = json.loads(message)
            page_info = PageInfo(
                url=data['url'],
                html=data['html'],
                timestamp=datetime.fromisoformat(data['timestamp'])
            )
            print(f"Received page content from: {page_info.url}")
            # Here you can process the page_info object as needed
    except websockets.exceptions.ConnectionClosed:
        print("Client disconnected")
    except Exception as e:
        print(f"Error handling message: {e}")
 async def start_server():
    server = await websockets.serve(handle_websocket, "localhost", 8765)
    print("WebSocket server started on ws://localhost:8765")
    await server.wait_closed()
 def run_server():
    asyncio.run(start_server())
 if __name__ == "__main__":
    run_server()
--- a/config/reader_config.yaml
+++ b/config/reader_config.yaml
@@ -0,0 +1,15 @@
 # Domains to exclude from content reading
 excluded_domains:
  # Local sites
  - localhost
  - 127.0.0.1
  # IP ranges
  - 192.168.*.*
  - 10.*.*.*
  - 172.16.*.*
  # Example wildcard patterns
  # - *.local
  # - reddit-*.com
  # - *.githubusercontent.com
--- a/extension/background.js
+++ b/extension/background.js
@@ -0,0 +1,47 @@
 console.log("Background script loaded");
 async function isContentScriptReady(tabId) {
  try {
    await browser.tabs.sendMessage(tabId, { type: "PING" });
    return true;
  } catch (error) {
    return false;
  }
 }
 async function waitForContentScript(tabId, maxAttempts = 10) {
  console.log(`Waiting for content script in tab ${tabId}`);
  for (let i = 0; i < maxAttempts; i++) {
    if (await isContentScriptReady(tabId)) {
      console.log(`Content script ready in tab ${tabId}`);
      return true;
    }
    console.log(`Attempt ${i + 1}: Content script not ready, waiting...`);
    await new Promise(resolve => setTimeout(resolve, 500));
  }
  console.log(`Content script not ready after ${maxAttempts} attempts`);
  return false;
 }
 async function sendMessageToTab(tabId) {
  try {
    console.log(`Checking content script status for tab ${tabId}`);
    if (await waitForContentScript(tabId)) {
      console.log(`Sending GET_PAGE_CONTENT message to tab ${tabId}`);
      await browser.tabs.sendMessage(tabId, {
        type: "GET_PAGE_CONTENT"
      });
      console.log(`Successfully sent message to tab ${tabId}`);
    }
  } catch (error) {
    console.error(`Error sending message to tab ${tabId}:`, error);
  }
 }
 browser.webNavigation.onCompleted.addListener(async (details) => {
  console.log("Navigation completed", details);
  if (details.frameId === 0) { // Only handle main frame navigation
    console.log(`Main frame navigation detected for tab ${details.tabId}`);
    await sendMessageToTab(details.tabId);
  }
 });
--- a/extension/content.js
+++ b/extension/content.js
@@ -0,0 +1,132 @@
 console.log("Content script starting initialization...");
 // Function to log WebSocket state
 function getWebSocketState(ws) {
  const states = {
    0: 'CONNECTING',
    1: 'OPEN',
    2: 'CLOSING',
    3: 'CLOSED'
  };
  return states[ws.readyState] || 'UNKNOWN';
 }
 class WebSocketClient {
  constructor() {
    console.log("WebSocketClient constructor called");
    this.messageQueue = [];
    this.connect();
    this.reconnectAttempts = 0;
    this.maxReconnectAttempts = 5;
  }
  connect() {
    console.log('Attempting to connect to WebSocket server...');
    try {
      this.ws = new WebSocket('ws://localhost:8523/ws');
      console.log('WebSocket instance created');
      this.ws.addEventListener('open', () => {
        console.log('WebSocket connection opened successfully');
        this.reconnectAttempts = 0;
        // Process any queued messages
        this.processQueue();
      });
      this.ws.addEventListener('error', (event) => {
        console.error('WebSocket error occurred:', event);
      });
      this.ws.addEventListener('close', (event) => {
        console.log('WebSocket connection closed:', event.code, event.reason);
        this.tryReconnect();
      });
      this.ws.addEventListener('message', (event) => {
        console.log('Received message from server:', event.data);
      });
    } catch (error) {
      console.error('Error creating WebSocket:', error);
    }
  }
  processQueue() {
    console.log(`Processing message queue (${this.messageQueue.length} messages)`);
    while (this.messageQueue.length > 0) {
      const data = this.messageQueue.shift();
      this.sendMessage(data);
    }
  }
  tryReconnect() {
    if (this.reconnectAttempts < this.maxReconnectAttempts) {
      this.reconnectAttempts++;
      console.log(`Attempting to reconnect (${this.reconnectAttempts}/${this.maxReconnectAttempts})...`);
      setTimeout(() => this.connect(), 2000 * this.reconnectAttempts);
    } else {
      console.log('Max reconnection attempts reached');
    }
  }
  sendMessage(data) {
    console.log('sendMessage called, WebSocket state:', getWebSocketState(this.ws));
    if (this.ws.readyState === WebSocket.OPEN) {
      try {
        console.log('Preparing to send data:', {
          url: data.url,
          timestamp: data.timestamp,
          htmlLength: data.html.length
        });
        this.ws.send(JSON.stringify(data));
        console.log('Data sent successfully');
        return true;
      } catch (error) {
        console.error('Error sending data:', error);
        return false;
      }
    } else {
      console.log('WebSocket not ready, queueing message');
      this.messageQueue.push(data);
      return true;
    }
  }
 }
 console.log("Creating WebSocketClient instance...");
 const wsClient = new WebSocketClient();
 console.log("Setting up message listener...");
 browser.runtime.onMessage.addListener((message, sender, sendResponse) => {
  console.log('Message received from background script:', message);
  if (message.type === "PING") {
    console.log('Received PING, responding...');
    return Promise.resolve({ status: "ready" });
  }
  if (message.type === "GET_PAGE_CONTENT") {
    console.log('Processing GET_PAGE_CONTENT message');
    const pageContent = {
      url: window.location.href,
      html: document.documentElement.outerHTML,
      timestamp: new Date().toISOString().replace(/\.\d{3}Z$/, 'Z')
    };
    console.log('Created page content object for:', pageContent.url);
    wsClient.sendMessage(pageContent);
  }
  return true;
 });
 // Send initial page content
 console.log('Sending initial page content...');
 const pageContent = {
  url: window.location.href,
  html: document.documentElement.outerHTML,
  timestamp: new Date().toISOString().replace(/\.\d{3}Z$/, 'Z')
 };
 wsClient.sendMessage(pageContent);
 console.log("Content script initialization complete for:", window.location.href);
--- a/extension/manifest.json
+++ b/extension/manifest.json
@@ -0,0 +1,35 @@
 {
  "manifest_version": 2,
  "name": "Page Content Sender",
  "version": "1.0",
  "description": "Sends page content via WebSocket when a page loads",
  "permissions": [
    "webNavigation",
    "activeTab",
    "<all_urls>",
    "tabs"
  ],
  "background": {
    "scripts": [
      "background.js"
    ],
    "persistent": true
  },
  "content_scripts": [
    {
      "matches": [
        "<all_urls>"
      ],
      "js": [
        "content.js"
      ],
      "run_at": "document_idle",
      "all_frames": false
    }
  ],
  "browser_specific_settings": {
    "gecko": {
      "id": "page-content-sender@example.com"
    }
  }
 }
--- a/page-reader.py
+++ b/page-reader.py
@@ -82,9 +82,3 @@ def html_to_markdown(url: str) -> str:
    # Clean up excessive whitespace
    return clean_whitespace(markdown)
 if __name__ == "__main__":
    # Example usage
    url = "https://reddit.com"
    markdown_content = html_to_markdown(url)
    print(markdown_content)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,10 @@
-fastapi==0.109.2
+fastapi
-uvicorn==0.27.1
+uvicorn
-sqlalchemy==2.0.27
+sqlalchemy
-browser-history==0.4.1
+browser-history
-aiohttp==3.9.3
+beautifulsoup4
-beautifulsoup4==4.12.3
+markdownify
-httpx==0.27.0
+pyyaml
-markdownify==0.11.6
+pytz
 websockets==11.0.3
 iso8601==2.1.0
--- a/1
+++ b/1
@@ -0,0 +1 @@
 rm app/websocket_server.py
		`@@ -0,0 +1 @@`
							`# This file can be empty, it just marks the directory as a Python package`