Files
custom_scripts/sub_dub
2025-11-17 00:41:24 +01:00

283 lines
11 KiB
Bash
Executable File

#!/bin/bash
# Language code mapping
declare -A LANG_CODES=(
["eng"]="English" ["en"]="English"
["spa"]="Spanish" ["es"]="Spanish"
["fra"]="French" ["fr"]="French"
["deu"]="German" ["ger"]="German" ["de"]="German"
["jpn"]="Japanese" ["ja"]="Japanese"
["ita"]="Italian" ["it"]="Italian"
["por"]="Portuguese" ["pt"]="Portuguese"
["rus"]="Russian" ["ru"]="Russian"
["chi"]="Chinese" ["zh"]="Chinese"
["kor"]="Korean" ["ko"]="Korean"
["dut"]="Dutch" ["nl"]="Dutch"
["swe"]="Swedish" ["sv"]="Swedish"
["fin"]="Finnish" ["fi"]="Finnish"
["pol"]="Polish" ["pl"]="Polish"
["ara"]="Arabic" ["ar"]="Arabic"
["hin"]="Hindi" ["hi"]="Hindi"
["tur"]="Turkish" ["tr"]="Turkish"
["und"]="Undefined" [" "]="Undefined"
["ab"]="Abkhazian" ["abk"]="Abkhazian"
["aa"]="Afar" ["aar"]="Afar"
["af"]="Afrikaans" ["afr"]="Afrikaans"
["ak"]="Akan" ["aka"]="Akan"
["twi"]="Twi"
["fat"]="Fanti"
["sq"]="Albanian" ["sqi"]="Albanian" ["alb"]="Albanian"
["am"]="Amharic"
["amh"]="Amharic"
["arb"]="Arabic"
["an"]="Aragonese" ["arg"]="Aragonese"
["hy"]="Armenian" ["hye"]="Armenian" ["arm"]="Armenian"
["as"]="Assamese" ["asm"]="Assamese"
["av"]="Avaric" ["ava"]="Avaric"
["ae"]="Avestan" ["ave"]="Avestan"
["ay"]="Aymara" ["aym"]="Aymara"
["az"]="Azerbaijani" ["aze"]="Azerbaijani"
["bm"]="Bambara" ["bam"]="Bambara"
["ba"]="Bashkir" ["bak"]="Bashkir"
["eu"]="Basque" ["eus"]="Basque" ["baq"]="Basque"
["be"]="Belarusian" ["bel"]="Belarusian"
["bn"]="Bengali" ["ben"]="Bengali"
["bi"]="Bislama" ["bis"]="Bislama"
["bs"]="Bosnian" ["bos"]="Bosnian"
["br"]="Breton" ["bre"]="Breton"
["bg"]="Bulgarian" ["bul"]="Bulgarian"
["my"]="Burmese" ["mya"]="Burmese"
["ca"]="Catalan" ["cat"]="Catalan"
["ch"]="Chamorro" ["cha"]="Chamorro"
["ce"]="Chechen" ["che"]="Chechen"
["ny"]="Chichewa" ["nya"]="Chichewa" ["zho"]="Chinese"
["cu"]="Church Slavonic" ["chu"]="Church Slavonic"
["cv"]="Chuvash" ["chv"]="Chuvash"
["kw"]="Cornish" ["cor"]="Cornish"
["co"]="Corsican" ["cos"]="Corsican"
["cr"]="Cree" ["cre"]="Cree"
["hr"]="Croatian" ["hrv"]="Croatian"
["cs"]="Czech" ["ces"]="Czech" ["cze"]="Czech"
["da"]="Danish" ["dan"]="Danish"
["dv"]="Divehi" ["div"]="Divehi"
["dz"]="Dzongkha" ["dzo"]="Dzongkha"
["eo"]="Esperanto" ["epo"]="Esperanto"
["et"]="Estonian" ["est"]="Estonian"
["ee"]="Ewe" ["ewe"]="Ewe"
["fo"]="Faroese" ["fao"]="Faroese"
["fj"]="Fijian" ["fij"]="Fijian"
["fre"]="French"
["fy"]="Western Frisian" ["fry"]="Western Frisian"
["ff"]="Fulah" ["ful"]="Fulah"
["gd"]="Gaelic, Scottish Gaelic"
["gla"]="Gaelic"
["gl"]="Galician" ["glg"]="Galician"
["lg"]="Ganda" ["lug"]="Ganda"
["ka"]="Georgian" ["kat"]="Georgian" ["geo"]="Georgian"
["el"]="Greek" ["ell"]="Greek" ["gre"]="Greek"
["kl"]="Kalaallisut" ["kal"]="Kalaallisut"
["gn"]="Guarani" ["grn"]="Guarani"
["gu"]="Gujarati" ["guj"]="Gujarati"
["ht"]="Haitian Creole" ["hat"]="Haitian Creole"
["ha"]="Hausa" ["hau"]="Hausa"
["he"]="Hebrew" ["heb"]="Hebrew"
["hz"]="Herero" ["her"]="Herero"
["ho"]="Hiri Motu" ["hmo"]="Hiri Motu"
["hu"]="Hungarian" ["hun"]="Hungarian"
["is"]="Icelandic" ["isl"]="Icelandic" ["ice"]="Icelandic"
["io"]="Ido" ["ido"]="Ido"
["ig"]="Igbo" ["ibo"]="Igbo"
["id"]="Indonesian" ["ind"]="Indonesian"
["ia"]="Interlingua" ["ina"]="Interlingua"
["ie"]="Interlingue" ["ile"]="Interlingue"
["iu"]="Inuktitut" ["iku"]="Inuktitut"
["ik"]="Inupiaq" ["ipk"]="Inupiaq"
["ga"]="Irish" ["gle"]="Irish"
["jv"]="Javanese" ["jav"]="Javanese"
["kn"]="Kannada" ["kan"]="Kannada"
["kr"]="Kanuri" ["kau"]="Kanuri"
["ks"]="Kashmiri" ["kas"]="Kashmiri"
["kk"]="Kazakh" ["kaz"]="Kazakh"
["km"]="Central Khmer" ["khm"]="Central Khmer"
["ki"]="Kikuyu" ["kik"]="Kikuyu"
["rw"]="Kinyarwanda" ["kin"]="Kinyarwanda"
["ky"]="Kyrgyz" ["kir"]="Kyrgyz"
["kv"]="Komi" ["kom"]="Komi"
["kg"]="Kongo" ["kon"]="Kongo"
["kj"]="Kuanyama" ["kua"]="Kuanyama"
["ku"]="Kurdish" ["kur"]="Kurdish"
["lo"]="Lao" ["lao"]="Lao"
["la"]="Latin" ["lat"]="Latin"
["lv"]="Latvian" ["lav"]="Latvian"
["li"]="Limburgan" ["lim"]="Limburgan"
["ln"]="Lingala" ["lin"]="Lingala"
["lt"]="Lithuanian" ["lit"]="Lithuanian"
["lu"]="Luba-Katanga" ["lub"]="Luba-Katanga"
["lb"]="Luxembourgish" ["ltz"]="Luxembourgish"
["mk"]="Macedonian" ["mkd"]="Macedonian" ["mac"]="Macedonian"
["mg"]="Malagasy" ["mlg"]="Malagasy"
["ms"]="Malay" ["msa"]="Malay"
["ml"]="Malayalam" ["mal"]="Malayalam"
["mt"]="Maltese" ["mlt"]="Maltese"
["gv"]="Manx" ["glv"]="Manx"
["mi"]="Maori" ["mri"]="Maori" ["mao"]="Maori"
["mr"]="Marathi" ["mar"]="Marathi"
["mh"]="Marshallese" ["mah"]="Marshallese"
["mn"]="Mongolian" ["mon"]="Mongolian"
["na"]="Nauru" ["nau"]="Nauru"
["nv"]="Navajo" ["nav"]="Navajo"
["nd"]="North Ndebele" ["nde"]="North Ndebele"
["nr"]="South Ndebele" ["nbl"]="South Ndebele"
["ng"]="Ndonga" ["ndo"]="Ndonga"
["ne"]="Nepali" ["nep"]="Nepali"
["no"]="Norwegian" ["nor"]="Norwegian"
["nb"]="Norwegian Bokmål" ["nob"]="Norwegian Bokmål"
["nn"]="Norwegian Nynorsk" ["nno"]="Norwegian Nynorsk"
["oc"]="Occitan" ["oci"]="Occitan"
["oj"]="Ojibwa" ["oji"]="Ojibwa"
["or"]="Oriya" ["ori"]="Oriya"
["om"]="Oromo" ["orm"]="Oromo"
["os"]="Ossetian" ["oss"]="Ossetian"
["pi"]="Pali" ["pli"]="Pali"
["ps"]="Pashto" ["pus"]="Pashto"
["fa"]="Persian" ["fas"]="Persian" ["per"]="Persian"
["pa"]="Punjabi" ["pan"]="Punjabi"
["qu"]="Quechua" ["que"]="Quechua"
["ro"]="Romanian" ["ron"]="Romanian" ["rum"]="Romanian"
["rm"]="Romansh" ["roh"]="Romansh"
["rn"]="Rundi" ["run"]="Rundi"
["se"]="Northern Sami" ["sme"]="Northern Sami"
["sm"]="Samoan" ["smo"]="Samoan"
["sg"]="Sango" ["sag"]="Sango"
["sa"]="Sanskrit" ["san"]="Sanskrit"
["sc"]="Sardinian" ["srd"]="Sardinian"
["sr"]="Serbian" ["srp"]="Serbian"
["sn"]="Shona" ["sna"]="Shona"
["sd"]="Sindhi" ["snd"]="Sindhi" ["si"]="Sinhala" ["sin"]="Sinhala"
["sk"]="Slovak" ["slk"]="Slovak" ["slo"]="Slovak"
["sl"]="Slovenian" ["slv"]="Slovenian"
["so"]="Somali" ["som"]="Somali"
["st"]="Southern Sotho" ["sot"]="Southern Sotho"
["su"]="Sundanese" ["sun"]="Sundanese"
["sw"]="Swahil" ["swa"]="Swahili"
["ss"]="Swati" ["ssw"]="Swati"
["tl"]="Tagalog" ["tgl"]="Tagalog"
["ty"]="Tahitian" ["tah"]="Tahitian"
["tg"]="Tajik" ["tgk"]="Tajik"
["ta"]="Tamil" ["tam"]="Tamil"
["tt"]="Tatar" ["tat"]="Tatar"
["te"]="Telugu" ["tel"]="Telugu"
["th"]="Thai" ["tha"]="Thai"
["bo"]="Tibetan" ["bod"]="Tibetan" ["tib"]="Tibetan"
["ti"]="Tigrinya" ["tir"]="Tigrinya"
["to"]="Tongan" ["ton"]="Tongan" ["ts"]="Tsonga" ["tso"]="Tsonga"
["tn"]="Tswana" ["tsn"]="Tswana"
["tk"]="Turkmen" ["tuk"]="Turkmen"
["ug"]="Uighur" ["uig"]="Uighur"
["uk"]="Ukrainian" ["ukr"]="Ukrainian"
["ur"]="Urdu" ["urd"]="Urdu"
["uz"]="Uzbek" ["uzb"]="Uzbek"
["ve"]="Venda" ["ven"]="Venda"
["vi"]="Vietnamese" ["vie"]="Vietnamese"
["vo"]="Volapük" ["vol"]="Volapük"
["wa"]="Walloon" ["wln"]="Walloon"
["cy"]="Welsh" ["cym"]="Welsh" ["wel"]="Welsh"
["wo"]="Wolof" ["wol"]="Wolof"
["xh"]="Xhosa" ["xho"]="Xhosa"
["ii"]="Sichuan Yi" ["iii"]="Sichuan Yi"
["yi"]="Yiddish" ["yid"]="Yiddish"
["yo"]="Yoruba" ["yor"]="Yoruba"
["za"]="Zhuang" ["zha"]="Zhuang"
["zu"]="Zulu" ["zul"]="Zulu"
)
# Function to translate language codes
translate_language() {
local code="$1"
# Sanitize input: remove spaces and convert to lowercase
code=$(echo "$code" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]')
# Check for valid subscript and handle fallback
if [[ -z "$code" || ! ${LANG_CODES[$code]+_} ]]; then
echo "Undefined"
else
echo "${LANG_CODES[$code]}"
fi
}
extract_audio_languages() {
audio_info=$(ffprobe -v quiet -select_streams a -show_entries stream=codec_name,channel_layout:stream_tags=language \
-of default=noprint_wrappers=1:nokey=1 "$file" | paste -sd "," -)
languages=""
IFS=',' read -r -a audio_streams <<< "$audio_info"
for (( i=0; i<${#audio_streams[@]}; i+=3 )); do
lang_code="${audio_streams[i+2]}"
lang=$(translate_language "${lang_code:-" "}")
# Collect languages
languages+="$lang"
# Add comma separator if not the last item
if (( i+3 < ${#audio_streams[@]} )); then
languages+=", "
fi
done
# Format output with languages first, followed by detailed info in brackets
echo "$languages"
}
extract_subtitle_languages() {
subtitle_info=$(ffprobe -v quiet -select_streams s -show_entries stream_tags=language \
-of default=noprint_wrappers=1:nokey=1 "$file" | paste -sd "," -)
languages=""
# echo "Subtitle Info: $subtitle_info" # Debugging line
IFS=',' read -r -a subtitle_streams <<< "$subtitle_info"
for ((i=0; i<${#subtitle_streams[@]}; i++)); do
lang_code="${subtitle_streams[i]}"
lang_code=$(echo "$lang_code" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]') # Normalize case
lang=$(translate_language "$lang_code")
languages+="$lang"
# Add comma separator if not the last item
if (( i < ${#subtitle_streams[@]} - 1 )); then
languages+=", "
fi
done
# Format output with languages
echo "$languages"
}
# Function to detect if codec is h265
check_h265_codec() {
local file="$1"
ffmpeg -i "$file" 2>&1 | grep -qi 'Video: hevc' && echo "h265" || echo "other"
}
# Process all .mkv files in the directory
for file in *.mkv; do
if [[ -f "$file" ]]; then
# echo "Processing: $file"
# Extract filename without extension
filename=$(basename "$file" .mkv)
# Extract and translate audio and subtitle languages
audio_languages=$(extract_audio_languages "$file" "Audio")
subtitle_languages=$(extract_subtitle_languages "$file" "Subtitle")
# Debugging: Ensure languages are detected
# echo "Audio Languages: $audio_languages"
# echo "Subtitle Languages: $subtitle_languages"
# Detect codec
codec=$(check_h265_codec "$file")
# Call Python script to update Excel
python3 /home/honney/.bin/sub_dub.py "$filename" "$audio_languages" "$subtitle_languages" "$codec"
fi
done
# echo "Processing completed."