mirror of
https://github.com/Zetaphor/browser-recall.git
synced 2025-12-06 10:29:38 +00:00
Fix exclusion
This commit is contained in:
@@ -3,15 +3,14 @@ excluded_domains:
|
|||||||
- "identity.getpostman.com"
|
- "identity.getpostman.com"
|
||||||
- "localhost"
|
- "localhost"
|
||||||
- "127.0.0.1"
|
- "127.0.0.1"
|
||||||
- "*.namecheap.com*"
|
- "namecheap.com"
|
||||||
- "us.ovhcloud.com*"
|
- "us.ovhcloud.com"
|
||||||
- "*.linode.com*"
|
- "linode.com"
|
||||||
- "github.com/settings/*"
|
- "github.com/settings/"
|
||||||
- "*.secure.backblaze.com*"
|
- "secure.backblaze.com"
|
||||||
- "*.login.*"
|
- "login."
|
||||||
- "*.identity.*"
|
- "identity."
|
||||||
- "*.auth.*"
|
- ".auth."
|
||||||
- "192.168.*.*"
|
- "192.168."
|
||||||
- "10.*.*.*"
|
- "172.16."
|
||||||
- "172.16.*.*"
|
- "0.0.0."
|
||||||
- "0.0.0.*"
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import yaml
|
import yaml
|
||||||
from fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
class DomainExclusions:
|
class DomainExclusions:
|
||||||
def __init__(self, config_path="config/history_config.yaml"):
|
def __init__(self, config_path="config/history_config.yaml"):
|
||||||
@@ -14,42 +15,43 @@ class DomainExclusions:
|
|||||||
|
|
||||||
# Handle both direct list and dict with 'excluded_domains' key
|
# Handle both direct list and dict with 'excluded_domains' key
|
||||||
if isinstance(config, list):
|
if isinstance(config, list):
|
||||||
self.excluded_domains = config
|
loaded_patterns = config
|
||||||
|
elif isinstance(config, dict):
|
||||||
|
loaded_patterns = config.get('excluded_domains', [])
|
||||||
else:
|
else:
|
||||||
self.excluded_domains = config.get('excluded_domains', [])
|
loaded_patterns = [] # Handle other invalid config types
|
||||||
|
|
||||||
|
# Basic validation/cleaning of patterns
|
||||||
|
self.excluded_domains = [
|
||||||
|
str(p).strip() for p in loaded_patterns if p and isinstance(p, str)
|
||||||
|
]
|
||||||
|
# Optional: Warn if some patterns were ignored
|
||||||
|
# if len(self.excluded_domains) != len(loaded_patterns):
|
||||||
|
# print(f"Warning: Some invalid patterns were ignored in {config_path}")
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print(f"Warning: Configuration file {config_path} not found. No domains will be excluded.")
|
print(f"Warning: Configuration file {config_path} not found. No domains will be excluded.")
|
||||||
|
self.excluded_domains = [] # Ensure it's empty on error
|
||||||
except yaml.YAMLError as e:
|
except yaml.YAMLError as e:
|
||||||
print(f"Error parsing YAML configuration: {e}")
|
print(f"Error parsing YAML configuration: {e}")
|
||||||
self.excluded_domains = []
|
self.excluded_domains = []
|
||||||
|
except Exception as e: # Catch other potential errors
|
||||||
|
print(f"An unexpected error occurred during config loading: {e}")
|
||||||
|
self.excluded_domains = []
|
||||||
|
|
||||||
def is_excluded(self, domain):
|
def is_excluded(self, url_string):
|
||||||
"""
|
if not url_string or not isinstance(url_string, str):
|
||||||
Check if a domain matches any of the excluded domain patterns.
|
return True
|
||||||
"""
|
|
||||||
# Strip protocol (http:// or https://) if present
|
|
||||||
domain = domain.lower().strip('/')
|
|
||||||
if '://' in domain:
|
|
||||||
domain = domain.split('://', 1)[1]
|
|
||||||
|
|
||||||
# Strip query parameters if present
|
input_url = url_string.strip()
|
||||||
if '?' in domain:
|
|
||||||
domain = domain.split('?', 1)[0]
|
|
||||||
|
|
||||||
# Split domain and path
|
# If the url starts with www, remove it
|
||||||
if '/' in domain:
|
if input_url.startswith('www.'):
|
||||||
domain = domain.split('/', 1)[0]
|
input_url = input_url[4:]
|
||||||
|
|
||||||
for pattern in self.excluded_domains:
|
for pattern in self.excluded_domains:
|
||||||
pattern = pattern.lower().strip('/')
|
if pattern in input_url:
|
||||||
if '/' in pattern:
|
|
||||||
pattern = pattern.split('/', 1)[0]
|
|
||||||
|
|
||||||
# Remove trailing wildcard if present
|
|
||||||
if pattern.endswith('*'):
|
|
||||||
pattern = pattern.rstrip('*').rstrip('.')
|
|
||||||
|
|
||||||
# Use fnmatch for proper wildcard pattern matching
|
|
||||||
if fnmatch(domain, pattern):
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# If no patterns matched
|
||||||
return False
|
return False
|
||||||
Reference in New Issue
Block a user