|
11 | 11 | import zlib
|
12 | 12 | import gzip
|
13 | 13 | from time import time
|
| 14 | +from fnmatch import fnmatch |
14 | 15 | from html import escape as html_escape
|
15 | 16 | import threading
|
16 | 17 | from urllib.parse import urljoin, urlsplit, urlunsplit, quote_plus
|
|
44 | 45 | errprint('Can Not Create Local File Cache: ', e, ' local file cache is disabled automatically.')
|
45 | 46 | local_cache_enable = False
|
46 | 47 |
|
47 |
| -__VERSION__ = '0.18.6-dev' |
| 48 | +__VERSION__ = '0.19.0-dev' |
48 | 49 | __author__ = 'Aploium <[email protected]>'
|
49 | 50 |
|
50 | 51 | # ########## Basic Init #############
|
|
91 | 92 | if not isinstance(target_static_domains, set):
|
92 | 93 | target_static_domains = set()
|
93 | 94 |
|
| 95 | +if not enable_automatic_domains_whitelist: |
| 96 | + domains_whitelist_auto_add_glob_list = tuple() |
| 97 | + |
94 | 98 | if not enable_individual_sites_isolation:
|
95 | 99 | isolated_domains = set()
|
96 | 100 | else:
|
|
210 | 214 | #
|
211 | 215 |
|
212 | 216 | # ########## Begin Utils #############
|
| 217 | +@lru_cache(maxsize=8192) |
| 218 | +def is_domain_match_glob_whitelist(domain): |
| 219 | + for domain_glob in domains_whitelist_auto_add_glob_list: |
| 220 | + if fnmatch(domain, domain_glob): |
| 221 | + return True |
| 222 | + return False |
| 223 | + |
| 224 | + |
| 225 | +def try_match_and_add_domain_to_rewrite_white_list(domain): |
| 226 | + if domain is None or not domain: |
| 227 | + return False |
| 228 | + if domain in external_domains_set or domain == target_domain: |
| 229 | + return True |
| 230 | + if not is_domain_match_glob_whitelist(domain): |
| 231 | + return False |
| 232 | + else: |
| 233 | + infoprint('A domain:', domain, 'was added to whitelist') |
| 234 | + |
| 235 | + global external_domains, external_domains_set, allowed_domains_set |
| 236 | + _buff = list(external_domains) |
| 237 | + _buff.append(domain) |
| 238 | + external_domains = tuple(_buff) |
| 239 | + external_domains_set.add(domain) |
| 240 | + allowed_domains_set.add(domain) |
| 241 | + |
| 242 | + # write log |
| 243 | + try: |
| 244 | + with open('automatic_domains_whitelist.log', 'a', encoding='utf-8') as fp: |
| 245 | + fp.write(domain + '\n') |
| 246 | + except: |
| 247 | + traceback.print_exc() |
| 248 | + |
| 249 | + return True |
| 250 | + |
213 | 251 |
|
214 | 252 | def current_line_number():
|
215 | 253 | """Returns the current line number in our program."""
|
@@ -580,6 +618,10 @@ def regex_url_reassemble(match_obj):
|
580 | 618 | # dbgprint('returned_un_touch', whole_match_string)
|
581 | 619 | return whole_match_string
|
582 | 620 |
|
| 621 | + # v0.19.0+ Automatic Domains Whitelist (Experimental) |
| 622 | + if enable_automatic_domains_whitelist: |
| 623 | + try_match_and_add_domain_to_rewrite_white_list(match_domain) |
| 624 | + |
583 | 625 | remote_domain, _is_remote_https, remote_path = extract_real_domain_from_url_may_have_extdomains()
|
584 | 626 | # dbgprint('remote_path:', remote_path, 'remote_domain:', remote_domain, 'match_domain', match_domain, v=5)
|
585 | 627 | # dbgprint(match_obj.groups(), v=5)
|
|
0 commit comments