#! /bin/bash url="https://www.dialectsarchive.com" continents="africa asia australia-oceania caribbean central-america europe middle-east north-america south-america" destination="$1" for continent in $continents do countries_html="$(wget -q -O - $url/$continent)" country_urls="$( \ echo "$countries_html" | \ sed -n -e '/Please select a/,/clear/{p;}' | \ grep -io --color=never "$url/[^\"]*" | \ uniq )" for country_url in $country_urls do dialect_html="$(wget -q -O - $country_url)" country="$(basename $country_url)" dialect_lines="$( \ echo "$dialect_html" | \ grep -iP --color=never "$url/$country-\d+\">[^>]*>.*?(fe)?male" \ )" directory="$destination/$continent/$country" mkdir -p "$directory" IFS=$'\n' for dialect_line in $dialect_lines do dialect_url="$( \ echo "$dialect_line" | \ grep -io --color=never 'https[^"]*' \ )" dialect_info="$( \ echo "$dialect_line" | \ grep -Po --color=never "(?<=)[^<]+(?=<)" | \ sed 's/\W*$//' | \ sed 's/^\W*//' | \ sed 's/\//_/g' \ )" filename="$(basename $dialect_url): $dialect_info.mp3" filepath="$directory/$filename" if [ -f "$filepath" ] then echo "Skipping existing file $filepath" continue fi dialect_html="$(wget -q -O - $dialect_url)" dialect_download="$( \ echo $dialect_html | \ grep -io 'https[^"]*\.mp3' | \ head -1 \ )" echo "Downloading $filename to $directory" wget -q -O "$filepath" $dialect_download done done done