#!/bin/bash # Language code mapping declare -A LANG_CODES=( ["eng"]="English" ["en"]="English" ["spa"]="Spanish" ["es"]="Spanish" ["fra"]="French" ["fr"]="French" ["deu"]="German" ["ger"]="German" ["de"]="German" ["jpn"]="Japanese" ["ja"]="Japanese" ["ita"]="Italian" ["it"]="Italian" ["por"]="Portuguese" ["pt"]="Portuguese" ["rus"]="Russian" ["ru"]="Russian" ["chi"]="Chinese" ["zh"]="Chinese" ["kor"]="Korean" ["ko"]="Korean" ["dut"]="Dutch" ["nl"]="Dutch" ["swe"]="Swedish" ["sv"]="Swedish" ["fin"]="Finnish" ["fi"]="Finnish" ["pol"]="Polish" ["pl"]="Polish" ["ara"]="Arabic" ["ar"]="Arabic" ["hin"]="Hindi" ["hi"]="Hindi" ["tur"]="Turkish" ["tr"]="Turkish" ["und"]="Undefined" [" "]="Undefined" ["ab"]="Abkhazian" ["abk"]="Abkhazian" ["aa"]="Afar" ["aar"]="Afar" ["af"]="Afrikaans" ["afr"]="Afrikaans" ["ak"]="Akan" ["aka"]="Akan" ["twi"]="Twi" ["fat"]="Fanti" ["sq"]="Albanian" ["sqi"]="Albanian" ["alb"]="Albanian" ["am"]="Amharic" ["amh"]="Amharic" ["arb"]="Arabic" ["an"]="Aragonese" ["arg"]="Aragonese" ["hy"]="Armenian" ["hye"]="Armenian" ["arm"]="Armenian" ["as"]="Assamese" ["asm"]="Assamese" ["av"]="Avaric" ["ava"]="Avaric" ["ae"]="Avestan" ["ave"]="Avestan" ["ay"]="Aymara" ["aym"]="Aymara" ["az"]="Azerbaijani" ["aze"]="Azerbaijani" ["bm"]="Bambara" ["bam"]="Bambara" ["ba"]="Bashkir" ["bak"]="Bashkir" ["eu"]="Basque" ["eus"]="Basque" ["baq"]="Basque" ["be"]="Belarusian" ["bel"]="Belarusian" ["bn"]="Bengali" ["ben"]="Bengali" ["bi"]="Bislama" ["bis"]="Bislama" ["bs"]="Bosnian" ["bos"]="Bosnian" ["br"]="Breton" ["bre"]="Breton" ["bg"]="Bulgarian" ["bul"]="Bulgarian" ["my"]="Burmese" ["mya"]="Burmese" ["ca"]="Catalan" ["cat"]="Catalan" ["ch"]="Chamorro" ["cha"]="Chamorro" ["ce"]="Chechen" ["che"]="Chechen" ["ny"]="Chichewa" ["nya"]="Chichewa" ["zho"]="Chinese" ["cu"]="Church Slavonic" ["chu"]="Church Slavonic" ["cv"]="Chuvash" ["chv"]="Chuvash" ["kw"]="Cornish" ["cor"]="Cornish" ["co"]="Corsican" ["cos"]="Corsican" ["cr"]="Cree" ["cre"]="Cree" ["hr"]="Croatian" ["hrv"]="Croatian" ["cs"]="Czech" ["ces"]="Czech" ["cze"]="Czech" ["da"]="Danish" ["dan"]="Danish" ["dv"]="Divehi" ["div"]="Divehi" ["dz"]="Dzongkha" ["dzo"]="Dzongkha" ["eo"]="Esperanto" ["epo"]="Esperanto" ["et"]="Estonian" ["est"]="Estonian" ["ee"]="Ewe" ["ewe"]="Ewe" ["fo"]="Faroese" ["fao"]="Faroese" ["fj"]="Fijian" ["fij"]="Fijian" ["fre"]="French" ["fy"]="Western Frisian" ["fry"]="Western Frisian" ["ff"]="Fulah" ["ful"]="Fulah" ["gd"]="Gaelic, Scottish Gaelic" ["gla"]="Gaelic" ["gl"]="Galician" ["glg"]="Galician" ["lg"]="Ganda" ["lug"]="Ganda" ["ka"]="Georgian" ["kat"]="Georgian" ["geo"]="Georgian" ["el"]="Greek" ["ell"]="Greek" ["gre"]="Greek" ["kl"]="Kalaallisut" ["kal"]="Kalaallisut" ["gn"]="Guarani" ["grn"]="Guarani" ["gu"]="Gujarati" ["guj"]="Gujarati" ["ht"]="Haitian Creole" ["hat"]="Haitian Creole" ["ha"]="Hausa" ["hau"]="Hausa" ["he"]="Hebrew" ["heb"]="Hebrew" ["hz"]="Herero" ["her"]="Herero" ["ho"]="Hiri Motu" ["hmo"]="Hiri Motu" ["hu"]="Hungarian" ["hun"]="Hungarian" ["is"]="Icelandic" ["isl"]="Icelandic" ["ice"]="Icelandic" ["io"]="Ido" ["ido"]="Ido" ["ig"]="Igbo" ["ibo"]="Igbo" ["id"]="Indonesian" ["ind"]="Indonesian" ["ia"]="Interlingua" ["ina"]="Interlingua" ["ie"]="Interlingue" ["ile"]="Interlingue" ["iu"]="Inuktitut" ["iku"]="Inuktitut" ["ik"]="Inupiaq" ["ipk"]="Inupiaq" ["ga"]="Irish" ["gle"]="Irish" ["jv"]="Javanese" ["jav"]="Javanese" ["kn"]="Kannada" ["kan"]="Kannada" ["kr"]="Kanuri" ["kau"]="Kanuri" ["ks"]="Kashmiri" ["kas"]="Kashmiri" ["kk"]="Kazakh" ["kaz"]="Kazakh" ["km"]="Central Khmer" ["khm"]="Central Khmer" ["ki"]="Kikuyu" ["kik"]="Kikuyu" ["rw"]="Kinyarwanda" ["kin"]="Kinyarwanda" ["ky"]="Kyrgyz" ["kir"]="Kyrgyz" ["kv"]="Komi" ["kom"]="Komi" ["kg"]="Kongo" ["kon"]="Kongo" ["kj"]="Kuanyama" ["kua"]="Kuanyama" ["ku"]="Kurdish" ["kur"]="Kurdish" ["lo"]="Lao" ["lao"]="Lao" ["la"]="Latin" ["lat"]="Latin" ["lv"]="Latvian" ["lav"]="Latvian" ["li"]="Limburgan" ["lim"]="Limburgan" ["ln"]="Lingala" ["lin"]="Lingala" ["lt"]="Lithuanian" ["lit"]="Lithuanian" ["lu"]="Luba-Katanga" ["lub"]="Luba-Katanga" ["lb"]="Luxembourgish" ["ltz"]="Luxembourgish" ["mk"]="Macedonian" ["mkd"]="Macedonian" ["mac"]="Macedonian" ["mg"]="Malagasy" ["mlg"]="Malagasy" ["ms"]="Malay" ["msa"]="Malay" ["ml"]="Malayalam" ["mal"]="Malayalam" ["mt"]="Maltese" ["mlt"]="Maltese" ["gv"]="Manx" ["glv"]="Manx" ["mi"]="Maori" ["mri"]="Maori" ["mao"]="Maori" ["mr"]="Marathi" ["mar"]="Marathi" ["mh"]="Marshallese" ["mah"]="Marshallese" ["mn"]="Mongolian" ["mon"]="Mongolian" ["na"]="Nauru" ["nau"]="Nauru" ["nv"]="Navajo" ["nav"]="Navajo" ["nd"]="North Ndebele" ["nde"]="North Ndebele" ["nr"]="South Ndebele" ["nbl"]="South Ndebele" ["ng"]="Ndonga" ["ndo"]="Ndonga" ["ne"]="Nepali" ["nep"]="Nepali" ["no"]="Norwegian" ["nor"]="Norwegian" ["nb"]="Norwegian Bokmål" ["nob"]="Norwegian Bokmål" ["nn"]="Norwegian Nynorsk" ["nno"]="Norwegian Nynorsk" ["oc"]="Occitan" ["oci"]="Occitan" ["oj"]="Ojibwa" ["oji"]="Ojibwa" ["or"]="Oriya" ["ori"]="Oriya" ["om"]="Oromo" ["orm"]="Oromo" ["os"]="Ossetian" ["oss"]="Ossetian" ["pi"]="Pali" ["pli"]="Pali" ["ps"]="Pashto" ["pus"]="Pashto" ["fa"]="Persian" ["fas"]="Persian" ["per"]="Persian" ["pa"]="Punjabi" ["pan"]="Punjabi" ["qu"]="Quechua" ["que"]="Quechua" ["ro"]="Romanian" ["ron"]="Romanian" ["rum"]="Romanian" ["rm"]="Romansh" ["roh"]="Romansh" ["rn"]="Rundi" ["run"]="Rundi" ["se"]="Northern Sami" ["sme"]="Northern Sami" ["sm"]="Samoan" ["smo"]="Samoan" ["sg"]="Sango" ["sag"]="Sango" ["sa"]="Sanskrit" ["san"]="Sanskrit" ["sc"]="Sardinian" ["srd"]="Sardinian" ["sr"]="Serbian" ["srp"]="Serbian" ["sn"]="Shona" ["sna"]="Shona" ["sd"]="Sindhi" ["snd"]="Sindhi" ["si"]="Sinhala" ["sin"]="Sinhala" ["sk"]="Slovak" ["slk"]="Slovak" ["slo"]="Slovak" ["sl"]="Slovenian" ["slv"]="Slovenian" ["so"]="Somali" ["som"]="Somali" ["st"]="Southern Sotho" ["sot"]="Southern Sotho" ["su"]="Sundanese" ["sun"]="Sundanese" ["sw"]="Swahil" ["swa"]="Swahili" ["ss"]="Swati" ["ssw"]="Swati" ["tl"]="Tagalog" ["tgl"]="Tagalog" ["ty"]="Tahitian" ["tah"]="Tahitian" ["tg"]="Tajik" ["tgk"]="Tajik" ["ta"]="Tamil" ["tam"]="Tamil" ["tt"]="Tatar" ["tat"]="Tatar" ["te"]="Telugu" ["tel"]="Telugu" ["th"]="Thai" ["tha"]="Thai" ["bo"]="Tibetan" ["bod"]="Tibetan" ["tib"]="Tibetan" ["ti"]="Tigrinya" ["tir"]="Tigrinya" ["to"]="Tongan" ["ton"]="Tongan" ["ts"]="Tsonga" ["tso"]="Tsonga" ["tn"]="Tswana" ["tsn"]="Tswana" ["tk"]="Turkmen" ["tuk"]="Turkmen" ["ug"]="Uighur" ["uig"]="Uighur" ["uk"]="Ukrainian" ["ukr"]="Ukrainian" ["ur"]="Urdu" ["urd"]="Urdu" ["uz"]="Uzbek" ["uzb"]="Uzbek" ["ve"]="Venda" ["ven"]="Venda" ["vi"]="Vietnamese" ["vie"]="Vietnamese" ["vo"]="Volapük" ["vol"]="Volapük" ["wa"]="Walloon" ["wln"]="Walloon" ["cy"]="Welsh" ["cym"]="Welsh" ["wel"]="Welsh" ["wo"]="Wolof" ["wol"]="Wolof" ["xh"]="Xhosa" ["xho"]="Xhosa" ["ii"]="Sichuan Yi" ["iii"]="Sichuan Yi" ["yi"]="Yiddish" ["yid"]="Yiddish" ["yo"]="Yoruba" ["yor"]="Yoruba" ["za"]="Zhuang" ["zha"]="Zhuang" ["zu"]="Zulu" ["zul"]="Zulu" ) # Function to translate language codes translate_language() { local code="$1" # Sanitize input: remove spaces and convert to lowercase code=$(echo "$code" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]') # Check for valid subscript and handle fallback if [[ -z "$code" || ! ${LANG_CODES[$code]+_} ]]; then echo "Undefined" else echo "${LANG_CODES[$code]}" fi } extract_audio_languages() { audio_info=$(ffprobe -v quiet -select_streams a -show_entries stream=codec_name,channel_layout:stream_tags=language \ -of default=noprint_wrappers=1:nokey=1 "$file" | paste -sd "," -) languages="" IFS=',' read -r -a audio_streams <<< "$audio_info" for (( i=0; i<${#audio_streams[@]}; i+=3 )); do lang_code="${audio_streams[i+2]}" lang=$(translate_language "${lang_code:-" "}") # Collect languages languages+="$lang" # Add comma separator if not the last item if (( i+3 < ${#audio_streams[@]} )); then languages+=", " fi done # Format output with languages first, followed by detailed info in brackets echo "$languages" } extract_subtitle_languages() { subtitle_info=$(ffprobe -v quiet -select_streams s -show_entries stream_tags=language \ -of default=noprint_wrappers=1:nokey=1 "$file" | paste -sd "," -) languages="" # echo "Subtitle Info: $subtitle_info" # Debugging line IFS=',' read -r -a subtitle_streams <<< "$subtitle_info" for ((i=0; i<${#subtitle_streams[@]}; i++)); do lang_code="${subtitle_streams[i]}" lang_code=$(echo "$lang_code" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]') # Normalize case lang=$(translate_language "$lang_code") languages+="$lang" # Add comma separator if not the last item if (( i < ${#subtitle_streams[@]} - 1 )); then languages+=", " fi done # Format output with languages echo "$languages" } # Function to detect if codec is h265 check_h265_codec() { local file="$1" ffmpeg -i "$file" 2>&1 | grep -qi 'Video: hevc' && echo "h265" || echo "other" } # Process all .mkv files in the directory for file in *.mkv; do if [[ -f "$file" ]]; then # echo "Processing: $file" # Extract filename without extension filename=$(basename "$file" .mkv) # Extract and translate audio and subtitle languages audio_languages=$(extract_audio_languages "$file" "Audio") subtitle_languages=$(extract_subtitle_languages "$file" "Subtitle") # Debugging: Ensure languages are detected # echo "Audio Languages: $audio_languages" # echo "Subtitle Languages: $subtitle_languages" # Detect codec codec=$(check_h265_codec "$file") # Call Python script to update Excel python3 /home/honney/.bin/sub_dub.py "$filename" "$audio_languages" "$subtitle_languages" "$codec" fi done # echo "Processing completed."