@@ -445,84 +445,78 @@ process_domains() {
445445
446446 log " Classifying domains from: $input "
447447
448- # Create all required directories and files
449448 mkdir -p " ${output_dir} /{second,regional,other}"
450449
451450 local second_level=" ${output_dir} /second.txt"
452451 local regional=" ${output_dir} /regional.txt"
453452 local other=" ${output_dir} /other.txt"
454453 local base_domains=" ${output_dir} /base_domains.tmp"
454+ local domain_registry=" ${output_dir} /domain_registry.tmp"
455455
456- # Create all files from scratch
457456 : > " $second_level "
458457 : > " $regional "
459458 : > " $other "
460459 : > " $base_domains "
460+ : > " $domain_registry "
461461
462- # Check that all files are created successfully
463- for file in " $second_level " " $regional " " $other " " $base_domains " ; do
464- if [[ ! -f " $file " ]]; then
465- log " ERROR: Failed to create file $file "
466- return 1
467- fi
468- done
469-
470- log " Directories and files prepared for classification"
471-
472- # First pass - find all second-level and regional domains
462+ # First pass - register all domains and their levels
473463 while IFS= read -r domain; do
474464 local parts
475465 IFS=' .' read -ra parts <<< " $domain"
476466 local levels=${# parts[@]}
477467
478- # Limit to 4th level
468+ # Limit to 4th level but preserve original structure
479469 if [[ $levels -gt 4 ]]; then
480470 domain=" ${parts[-4]} .${parts[-3]} .${parts[-2]} .${parts[-1]} "
471+ levels=4
481472 fi
482473
474+ echo " $domain $levels " >> " $domain_registry "
475+ done < " $input "
476+
477+ # Second pass - classify domains
478+ while IFS=' ' read -r domain levels; do
479+ local parts
480+ IFS=' .' read -ra parts <<< " $domain"
481+
483482 if [[ $levels -eq 2 ]]; then
483+ # Second-level domains
484484 echo " $domain " >> " $second_level "
485485 echo " $domain " >> " $base_domains "
486- else
486+ elif [[ $levels -eq 3 ]] ; then
487487 local base_domain=" ${parts[-2]} .${parts[-1]} "
488488 if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
489- if [[ $levels -eq 3 ]]; then
490- echo " $domain " >> " $regional "
489+ # Regional domain
490+ echo " $domain " >> " $regional "
491+ echo " $domain " >> " $base_domains "
492+ else
493+ # Check if base domain exists
494+ if ! grep -Fxq " $base_domain " " $second_level " ; then
495+ # Keep third-level domain as is
496+ echo " $domain " >> " $other "
491497 echo " $domain " >> " $base_domains "
492498 fi
493499 fi
494- fi
495- done < " $input "
496-
497- # Check that files are not empty after first pass
498- if [[ ! -s " $base_domains " ]]; then
499- log " WARNING: No base domains found in $input "
500- return 1
501- fi
502-
503- # Second pass - filter subdomains
504- while IFS= read -r domain; do
505- local parts
506- IFS=' .' read -ra parts <<< " $domain"
507- local skip=false
508-
509- # Skip already processed domains
510- if grep -Fxq " $domain " " $base_domains " ; then
511- continue
512- fi
500+ elif [[ $levels -eq 4 ]]; then
501+ local base_domain=" ${parts[-2]} .${parts[-1]} "
502+ local third_level=" ${parts[-3]} .${parts[-2]} .${parts[-1]} "
513503
514- # Check if domain is a subdomain of already known domains
515- while IFS= read -r base; do
516- if [[ " $domain " == * " .$base " ]]; then
517- skip=true
518- break
504+ if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
505+ # Regional subdomain
506+ if ! grep -Fxq " $third_level " " $regional " ; then
507+ echo " $domain " >> " $other "
508+ echo " $domain " >> " $base_domains "
509+ fi
510+ else
511+ # Check if parent domains exist
512+ if ! grep -Fxq " $base_domain " " $second_level " && \
513+ ! grep -Fxq " $third_level " " $other " ; then
514+ echo " $domain " >> " $other "
515+ echo " $domain " >> " $base_domains "
516+ fi
519517 fi
520- done < " $base_domains "
521-
522- [[ $skip == true ]] && continue
523-
524- echo " $domain " >> " $other "
525- done < " $input "
518+ fi
519+ done < " $domain_registry "
526520
527521 # Sort and remove duplicates
528522 for file in " $second_level " " $regional " " $other " ; do
@@ -531,12 +525,8 @@ process_domains() {
531525 fi
532526 done
533527
534- # Check results before deleting temporary files
535- if [[ -f " $base_domains " ]]; then
536- rm -f " $base_domains "
537- else
538- log " WARNING: File $base_domains not found during deletion attempt"
539- fi
528+ # Cleanup temporary files
529+ rm -f " $base_domains " " $domain_registry "
540530
541531 # Statistics
542532 local second_count=0 regional_count=0 other_count=0
@@ -549,13 +539,7 @@ process_domains() {
549539 log " - Regional domains: $regional_count "
550540 log " - Other domains: $other_count "
551541
552- # Check operation success
553- if [[ $second_count -eq 0 && $regional_count -eq 0 && $other_count -eq 0 ]]; then
554- log " ERROR: No domains found after classification"
555- return 1
556- fi
557-
558- return 0
542+ return $(( second_count + regional_count + other_count > 0 ? 0 : 1 ))
559543}
560544
561545# Function to prepare domains for DNS check
@@ -582,13 +566,15 @@ apply_whitelist() {
582566 if [[ ! -f " $input " || ! -f " $whitelist " ]]; then
583567 log " ERROR: One of the files does not exist"
584568 return 1
585- fi
569+ fi
586570
587- # Create temporary file for exclusion patterns
571+ # Create temporary files
588572 local whitelist_pattern=" ${TMP_DIR} /whitelist_pattern.txt"
573+ local whitelist_domains=" ${TMP_DIR} /whitelist_domains.txt"
589574 true > " $whitelist_pattern "
575+ true > " $whitelist_domains "
590576
591- # Process whitelist
577+ # Process whitelist and create exclusion patterns
592578 while IFS= read -r domain; do
593579 local parts
594580 IFS=' .' read -ra parts <<< " ${domain//./ }"
@@ -597,14 +583,27 @@ apply_whitelist() {
597583
598584 if [[ $levels -eq 2 ]]; then
599585 # Second-level domain
586+ echo " $domain " >> " $whitelist_domains "
600587 echo " ^${domain} $" >> " $whitelist_pattern "
601588 echo " \.${domain} $" >> " $whitelist_pattern "
602589 elif [[ $levels -eq 3 ]]; then
603- # Check if domain is regional
604590 base_domain=" ${parts[-2]} .${parts[-1]} "
605591 if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
592+ # Regional domain
593+ echo " $domain " >> " $whitelist_domains "
606594 echo " ^${domain} $" >> " $whitelist_pattern "
607595 echo " \.${domain} $" >> " $whitelist_pattern "
596+ else
597+ # Third-level domain
598+ echo " $domain " >> " $whitelist_domains "
599+ echo " ^${domain} $" >> " $whitelist_pattern "
600+ fi
601+ elif [[ $levels -eq 4 ]]; then
602+ # Check if it's a regional subdomain
603+ base_domain=" ${parts[-2]} .${parts[-1]} "
604+ if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
605+ echo " $domain " >> " $whitelist_domains "
606+ echo " ^${domain} $" >> " $whitelist_pattern "
608607 fi
609608 fi
610609 done < " $whitelist "
@@ -619,7 +618,7 @@ apply_whitelist() {
619618 local removed=$(( $(wc - l < "$input ") - $(wc - l < "$output ")) )
620619 log " Domains removed by whitelist: $removed "
621620
622- rm -f " $whitelist_pattern "
621+ rm -f " $whitelist_pattern " " $whitelist_domains "
623622}
624623
625624# Function to check intersections between lists
0 commit comments