mirror of
https://github.com/Zetaphor/browser-recall.git
synced 2025-12-06 10:29:38 +00:00
All the things
This commit is contained in:
92
app/config.py
Normal file
92
app/config.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import Set
|
||||
import fnmatch
|
||||
|
||||
class ReaderConfig:
|
||||
def __init__(self):
|
||||
self.excluded_patterns: Set[str] = set()
|
||||
self._load_config()
|
||||
|
||||
def _load_config(self):
|
||||
config_path = Path("config/reader_config.yaml")
|
||||
if not config_path.exists():
|
||||
print("Warning: reader_config.yaml not found, creating default config")
|
||||
self._create_default_config(config_path)
|
||||
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
self.excluded_patterns = set(config.get('excluded_domains', []))
|
||||
except Exception as e:
|
||||
print(f"Error loading config: {e}")
|
||||
self.excluded_patterns = set()
|
||||
|
||||
def _create_default_config(self, config_path: Path):
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
default_config = {
|
||||
'excluded_domains': [
|
||||
'localhost',
|
||||
'127.0.0.1',
|
||||
'192.168.*.*',
|
||||
'10.*.*.*'
|
||||
]
|
||||
}
|
||||
with open(config_path, 'w') as f:
|
||||
yaml.safe_dump(default_config, f, default_flow_style=False)
|
||||
|
||||
def is_domain_excluded(self, domain: str) -> bool:
|
||||
"""
|
||||
Check if a domain matches any exclusion pattern.
|
||||
Supports glob-style wildcards (* and ?)
|
||||
Examples:
|
||||
- '*.example.com' matches any subdomain of example.com
|
||||
- 'reddit-*.com' matches reddit-video.com, reddit-static.com, etc.
|
||||
- '192.168.*.*' matches any IP in the 192.168.0.0/16 subnet
|
||||
"""
|
||||
domain = domain.lower()
|
||||
|
||||
# Check each pattern
|
||||
for pattern in self.excluded_patterns:
|
||||
pattern = pattern.lower()
|
||||
|
||||
# Handle IP address patterns specially
|
||||
if any(c.isdigit() for c in pattern):
|
||||
if self._match_ip_pattern(domain, pattern):
|
||||
return True
|
||||
|
||||
# Handle domain patterns
|
||||
if fnmatch.fnmatch(domain, pattern):
|
||||
return True
|
||||
# Also check if the pattern matches when prepended with a dot
|
||||
# This handles cases like 'example.com' matching 'subdomain.example.com'
|
||||
if fnmatch.fnmatch(domain, f"*.{pattern}"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _match_ip_pattern(self, domain: str, pattern: str) -> bool:
|
||||
"""
|
||||
Special handling for IP address patterns.
|
||||
Handles cases like '192.168.*.*' matching '192.168.1.1'
|
||||
"""
|
||||
# Skip if domain isn't IP-like
|
||||
if not any(c.isdigit() for c in domain):
|
||||
return False
|
||||
|
||||
# Split into octets
|
||||
domain_parts = domain.split('.')
|
||||
pattern_parts = pattern.split('.')
|
||||
|
||||
# Must have same number of parts
|
||||
if len(domain_parts) != len(pattern_parts):
|
||||
return False
|
||||
|
||||
# Check each octet
|
||||
for domain_part, pattern_part in zip(domain_parts, pattern_parts):
|
||||
if pattern_part == '*':
|
||||
continue
|
||||
if domain_part != pattern_part:
|
||||
return False
|
||||
|
||||
return True
|
||||
Reference in New Issue
Block a user