import yaml
from pathlib import Path
from typing import Set
import fnmatch

class ReaderConfig:
    def __init__(self):
        self.excluded_patterns: Set[str] = set()
        self._load_config()

    def _load_config(self):
        config_path = Path("config/reader_config.yaml")
        if not config_path.exists():
            print("Warning: reader_config.yaml not found, creating default config")
            self._create_default_config(config_path)

        try:
            with open(config_path, 'r') as f:
                config = yaml.safe_load(f)
                self.excluded_patterns = set(config.get('excluded_domains', []))
        except Exception as e:
            print(f"Error loading config: {e}")
            self.excluded_patterns = set()

    def _create_default_config(self, config_path: Path):
        config_path.parent.mkdir(parents=True, exist_ok=True)
        default_config = {
            'excluded_domains': [
                'localhost',
                '127.0.0.1',
                '192.168.*.*',
                '10.*.*.*'
            ]
        }
        with open(config_path, 'w') as f:
            yaml.safe_dump(default_config, f, default_flow_style=False)

    def is_domain_excluded(self, domain: str) -> bool:
        """
        Check if a domain matches any exclusion pattern.
        Supports glob-style wildcards (* and ?)
        Examples:
            - '*.example.com' matches any subdomain of example.com
            - 'reddit-*.com' matches reddit-video.com, reddit-static.com, etc.
            - '192.168.*.*' matches any IP in the 192.168.0.0/16 subnet
        """
        domain = domain.lower()

        # Check each pattern
        for pattern in self.excluded_patterns:
            pattern = pattern.lower()

            # Handle IP address patterns specially
            if any(c.isdigit() for c in pattern):
                if self._match_ip_pattern(domain, pattern):
                    return True

            # Handle domain patterns
            if fnmatch.fnmatch(domain, pattern):
                return True
            # Also check if the pattern matches when prepended with a dot
            # This handles cases like 'example.com' matching 'subdomain.example.com'
            if fnmatch.fnmatch(domain, f"*.{pattern}"):
                return True

        return False

    def _match_ip_pattern(self, domain: str, pattern: str) -> bool:
        """
        Special handling for IP address patterns.
        Handles cases like '192.168.*.*' matching '192.168.1.1'
        """
        # Skip if domain isn't IP-like
        if not any(c.isdigit() for c in domain):
            return False

        # Split into octets
        domain_parts = domain.split('.')
        pattern_parts = pattern.split('.')

        # Must have same number of parts
        if len(domain_parts) != len(pattern_parts):
            return False

        # Check each octet
        for domain_part, pattern_part in zip(domain_parts, pattern_parts):
            if pattern_part == '*':
                continue
            if domain_part != pattern_part:
                return False

        return True