@@ -369,6 +369,39 @@ validate_domain() {
369369}
370370export -f validate_domain
371371
372+ # Function to extract domains from various formats
373+ extract_domains () {
374+ local input=$1
375+ local output=$2
376+
377+ log " Extracting domains from various formats..."
378+
379+ # Create temporary file
380+ local temp_file=" ${TMP_DIR} /extracted_domains.tmp"
381+ : > " $temp_file "
382+
383+ while IFS= read -r line; do
384+ # Skip empty lines and comments
385+ [[ -z " $line " || " $line " =~ ^[[:space:]]* # ]] && continue
386+
387+ # Extract domain from different formats
388+ if [[ " $line " =~ ^(DOMAIN-SUFFIX| DOMAIN| DOMAIN-KEYWORD),(.+)$ ]]; then
389+ echo " ${BASH_REMATCH[2]} " >> " $temp_file "
390+ elif [[ " $line " =~ ^[a-zA-Z0-9][a-zA-Z0-9.-]* \. [a-zA-Z]{2,}$ ]]; then
391+ echo " $line " >> " $temp_file "
392+ fi
393+ done < " $input "
394+
395+ # Sort and remove duplicates
396+ sort -u " $temp_file " > " $output "
397+ rm -f " $temp_file "
398+
399+ local count
400+ count= $( wc -l < " $output " )
401+ log " Extracted $count unique domains"
402+ }
403+
404+
372405# Function for initial filtering
373406initial_filter () {
374407 local input=$1
@@ -848,29 +881,110 @@ update_gists() {
848881
849882# Function to check if update is needed
850883check_updates_needed () {
851- local main_md5=" ${TMP_DIR} /main.md5"
852- local special_md5=" ${TMP_DIR} /special.md5"
853- local white_md5=" ${TMP_DIR} /white.md5"
854-
855- # Save current MD5
856- md5sum " $SOURCES_FILE " > " $main_md5 "
857- md5sum " $SOURCESSPECIAL_FILE " > " $special_md5 "
858- [[ -f " $WHITELIST_FILE " ]] && md5sum " $WHITELIST_FILE " > " $white_md5 "
859-
860- # Check for changes
861- if [[ -f " ${main_md5} .old" ]] && \
862- diff -q " $main_md5 " " ${main_md5} .old" > /dev/null && \
863- diff -q " $special_md5 " " ${special_md5} .old" > /dev/null && \
864- { [[ ! -f " $WHITELIST_FILE " ]] || diff -q " $white_md5 " " ${white_md5} .old" > /dev/null; }; then
865- return 1
866- fi
884+ local temp_dir=" ${TMP_DIR} /downloads"
885+ local current_md5=" ${temp_dir} /current_md5"
886+ local previous_md5=" ${temp_dir} /previous_md5"
887+ local update_needed=false
888+
889+ log " Checking for updates..."
890+ mkdir -p " $temp_dir "
891+ : > " $current_md5 "
892+
893+ # Function to download and check content from a single source
894+ download_and_check () {
895+ local source=$1
896+ local temp_file=" ${temp_dir} /$( echo " $source " | md5sum | cut -d' ' -f1) "
897+
898+ log " Downloading: $source "
899+ if curl -sSL --max-time 30 --retry 3 --retry-delay 2 " $source " -o " $temp_file " ; then
900+ if [[ -s " $temp_file " ]]; then
901+ local md5_sum
902+ md5_sum=$( md5sum " $temp_file " | cut -d' ' -f1)
903+ echo " ${source} ${md5_sum} " >> " $current_md5 "
904+ return 0
905+ fi
906+ fi
907+ log " WARNING: Failed to download $source "
908+ return 1
909+ }
910+
911+ # Process all sources and calculate MD5
912+ local process_failed=false
913+
914+ # Process main sources
915+ if [[ -f " $SOURCES_FILE " ]]; then
916+ log " Processing main sources..."
917+ while IFS= read -r source ; do
918+ [[ -z " $source " || " $source " == " #" * ]] && continue
919+ download_and_check " $source " || process_failed=true
920+ done < " $SOURCES_FILE "
921+ fi
867922
868- # Update old MD5
869- mv " $main_md5 " " ${main_md5} .old"
870- mv " $special_md5 " " ${special_md5} .old"
871- [[ -f " $white_md5 " ]] && mv " $white_md5 " " ${white_md5} .old"
923+ # Process special sources
924+ if [[ -f " $SOURCESSPECIAL_FILE " ]]; then
925+ log " Processing special sources..."
926+ while IFS= read -r source ; do
927+ [[ -z " $source " || " $source " == " #" * ]] && continue
928+ download_and_check " $source " || process_failed=true
929+ done < " $SOURCESSPECIAL_FILE "
930+ fi
872931
873- return 0
932+ # Process whitelist
933+ if [[ -f " $WHITELIST_FILE " ]]; then
934+ log " Processing whitelist..."
935+ while IFS= read -r source ; do
936+ [[ -z " $source " || " $source " == " #" * ]] && continue
937+ download_and_check " $source " || process_failed=true
938+ done < " $WHITELIST_FILE "
939+ fi
940+
941+ # Check if we failed to process any sources
942+ if $process_failed ; then
943+ log " ERROR: Failed to process one or more sources"
944+ rm -rf " $temp_dir "
945+ return 1
946+ fi
947+
948+ # Debug output for current MD5 sums
949+ log " Current MD5 sums:"
950+ while IFS= read -r line; do
951+ log " $line "
952+ done < " $current_md5 "
953+
954+ # Compare current and previous MD5
955+ if [[ -f " $previous_md5 " ]]; then
956+ log " Previous MD5 sums:"
957+ while IFS= read -r line; do
958+ log " $line "
959+ done < " $previous_md5 "
960+
961+ if ! diff -q " $current_md5 " " $previous_md5 " > /dev/null 2>&1 ; then
962+ update_needed=true
963+ log " Changes detected in source content"
964+ # Show what changed
965+ log " Changes:"
966+ diff " $previous_md5 " " $current_md5 " | while IFS= read -r line; do
967+ log " $line "
968+ done
969+ fi
970+ else
971+ update_needed=true
972+ log " No previous MD5 found, update needed"
973+ fi
974+
975+ if $update_needed ; then
976+ # Save current MD5 as previous for next run
977+ cp " $current_md5 " " $previous_md5 "
978+ log " MD5 checksums updated"
979+ else
980+ log " No changes detected in sources"
981+ fi
982+
983+ # Don't remove temp_dir immediately to preserve MD5 files for next run
984+ # Only remove downloaded content
985+ find " $temp_dir " -type f ! -name " *md5" -delete
986+
987+ return $( [ " $update_needed " = true ] && echo 0 || echo 1)
874988}
875989
876990# Helper function to restore backups
@@ -927,6 +1041,7 @@ log "Starting main processing..."
9271041
9281042 # Process main list
9291043 log " Processing main list..."
1044+ extract_domains " $main_raw " " ${TMP_DIR} /main_extracted.txt"
9301045 initial_filter " $main_raw " " ${TMP_DIR} /main_initial.txt"
9311046 if ! process_domains " ${TMP_DIR} /main_initial.txt" " ${TMP_DIR} /main" ; then
9321047 log " ERROR: Failed to process main domain list"
@@ -937,6 +1052,7 @@ log "Starting main processing..."
9371052
9381053 # Process special list
9391054 log " Processing special list..."
1055+ extract_domains " $special_raw " " ${TMP_DIR} /special_extracted.txt"
9401056 initial_filter " $special_raw " " ${TMP_DIR} /special_initial.txt"
9411057 if ! process_domains " ${TMP_DIR} /special_initial.txt" " ${TMP_DIR} /special" ; then
9421058 log " ERROR: Failed to process special domain list"
@@ -948,6 +1064,7 @@ log "Starting main processing..."
9481064 # Apply whitelist if exists
9491065 if [[ -f " $whitelist_raw " ]]; then
9501066 log " Applying whitelist..."
1067+ extract_domains " $whitelist_raw " " ${TMP_DIR} /whitelist_extracted.txt"
9511068 initial_filter " $whitelist_raw " " ${TMP_DIR} /whitelist.txt"
9521069 apply_whitelist " ${TMP_DIR} /main_filtered.txt" " ${TMP_DIR} /whitelist.txt" " ${TMP_DIR} /main_filtered_clean.txt"
9531070 apply_whitelist " ${TMP_DIR} /special_filtered.txt" " ${TMP_DIR} /whitelist.txt" " ${TMP_DIR} /special_filtered_clean.txt"
0 commit comments