diff --git a/check_video/__init__.py b/check_video/__init__.py new file mode 100644 index 0000000..c6035b7 --- /dev/null +++ b/check_video/__init__.py @@ -0,0 +1 @@ +from .check_video import check_video_ext, normalize_language, normalize_lang_code \ No newline at end of file diff --git a/check_video/__pycache__/__init__.cpython-314.pyc b/check_video/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..926c9bc Binary files /dev/null and b/check_video/__pycache__/__init__.cpython-314.pyc differ diff --git a/check_video/__pycache__/check_video.cpython-314.pyc b/check_video/__pycache__/check_video.cpython-314.pyc new file mode 100644 index 0000000..f3cb7c5 Binary files /dev/null and b/check_video/__pycache__/check_video.cpython-314.pyc differ diff --git a/check_video/__pycache__/languages.cpython-314.pyc b/check_video/__pycache__/languages.cpython-314.pyc new file mode 100644 index 0000000..9bf1b3b Binary files /dev/null and b/check_video/__pycache__/languages.cpython-314.pyc differ diff --git a/check_video/check_video.py b/check_video/check_video.py new file mode 100644 index 0000000..15ccf41 --- /dev/null +++ b/check_video/check_video.py @@ -0,0 +1,42 @@ +from languages import LANG_CODES, REVERSE_LANG_CODES + +extensions=("mp4", "mkv", "avi", "mov", "wmv", "flv", "webm", "lrv", "gif") + +def check_video_ext(extension: str) -> bool: + if extension.lower().strip('.') in extensions: + return True + return False + +def normalize_language(lang: str) -> str: + """ + Input: 'ger', 'DE', 'German', 'GERMAN' + Output: 'German' + """ + if not lang: + return "Undefined" + + # 1. Clean the input + query = lang.strip().lower() + + # 2. Check if it's already a code (e.g., 'de' or 'ger') + if query in LANG_CODES: + return LANG_CODES[query] + + # 3. Check if it's a full name (e.g., 'german') + # We need a case-insensitive check against the names in REVERSE_LANG_CODES + for full_name in REVERSE_LANG_CODES: + if full_name.lower() == query: + return full_name + + return "Undefined" + +def normalize_lang_code(lang: str) -> str: + """ + Input: 'ger', 'DE', 'German', 'GERMAN' + Output: 'de' (the canonical media code (ISO 639-1)) + """ + # First, get the standard full name + full_name = normalize_language(lang) + + # Then, look up that full name in the reverse table + return REVERSE_LANG_CODES.get(full_name, "und") \ No newline at end of file diff --git a/ff b/ff index 6319e22..78c45cc 100755 --- a/ff +++ b/ff @@ -3,10 +3,7 @@ import sys import os import subprocess import json - -# cmd = "ls ../Videos/OBS/*.mkv" -# result = subprocess.run(cmd, shell=True, capture_output=True, text=True) -# print(result.stdout) +from check_video import check_video_ext, normalize_language, normalize_lang_code color = True try: @@ -18,8 +15,6 @@ except ImportError: print("For nicer output install termcolor:\npip install termcolor") color = False -EXT=[".mp4", ".mkv", ".avi", ".mov", ".wmv", ".flv", ".webm", ".lrv", ".gif"] - NORMAL_STYLE = ("white", None, []) ERROR_STYLE = ("red", None, ["bold"]) WARN_STYLE = ("yellow", None, ["bold"]) @@ -78,7 +73,7 @@ def get_interlace_label(fo): return "Progressive" # Default assumption for modern web video class video_lines: - def __init__(self, stream, duration): + def __init__(self, stream): if stream.get("index"): self.id = stream.get("index") else: @@ -92,7 +87,7 @@ class video_lines: if stream.get("name"): self.duration = seconds_to_hms(stream.get("duration")) else: - self.duration = duration + self.duration = None if stream.get("codec_name"): self.codec = stream.get("codec_name") @@ -160,89 +155,8 @@ class video_lines: string += f" [{self.field_order}]" return string -LANG_CODES = { - "eng": "English", "en": "English", "spa": "Spanish", "es": "Spanish", - "fra": "French", "fr": "French", "deu": "German", "ger": "German", "de": "German", - "jpn": "Japanese", "ja": "Japanese", "ita": "Italian", "it": "Italian", - "por": "Portuguese", "pt": "Portuguese", "rus": "Russian", "ru": "Russian", - "chi": "Chinese", "zho": "Chinese", "zh": "Chinese", "kor": "Korean", "ko": "Korean", - "dut": "Dutch", "nl": "Dutch", "swe": "Swedish", "sv": "Swedish", - "fin": "Finnish", "fi": "Finnish", "pol": "Polish", "pl": "Polish", - "ara": "Arabic", "ar": "Arabic", "hin": "Hindi", "hi": "Hindi", - "tur": "Turkish", "tr": "Turkish", "und": "Undefined", " ": "Undefined", - "ab": "Abkhazian", "abk": "Abkhazian", "aa": "Afar", "aar": "Afar", - "af": "Afrikaans", "afr": "Afrikaans", "ak": "Akan", "aka": "Akan", - "twi": "Twi", "fat": "Fanti", "sq": "Albanian", "sqi": "Albanian", "alb": "Albanian", - "am": "Amharic", "amh": "Amharic", "arb": "Arabic", "an": "Aragonese", "arg": "Aragonese", - "hy": "Armenian", "hye": "Armenian", "arm": "Armenian", "as": "Assamese", "asm": "Assamese", - "av": "Avaric", "ava": "Avaric", "ae": "Avestan", "ave": "Avestan", "ay": "Aymara", "aym": "Aymara", - "az": "Azerbaijani", "aze": "Azerbaijani", "bm": "Bambara", "bam": "Bambara", - "ba": "Bashkir", "bak": "Bashkir", "eu": "Basque", "eus": "Basque", "baq": "Basque", - "be": "Belarusian", "bel": "Belarusian", "bn": "Bengali", "ben": "Bengali", - "bi": "Bislama", "bis": "Bislama", "bs": "Bosnian", "bos": "Bosnian", - "br": "Breton", "bre": "Breton", "bg": "Bulgarian", "bul": "Bulgarian", - "my": "Burmese", "mya": "Burmese", "ca": "Catalan", "cat": "Catalan", - "ch": "Chamorro", "cha": "Chamorro", "ce": "Chechen", "che": "Chechen", - "ny": "Chichewa", "nya": "Chichewa", "cu": "Church Slavonic", "chu": "Church Slavonic", - "cv": "Chuvash", "chv": "Chuvash", "kw": "Cornish", "cor": "Cornish", - "co": "Corsican", "cos": "Corsican", "cr": "Cree", "cre": "Cree", - "hr": "Croatian", "hrv": "Croatian", "cs": "Czech", "ces": "Czech", "cze": "Czech", - "da": "Danish", "dan": "Danish", "dv": "Divehi", "div": "Divehi", "dz": "Dzongkha", "dzo": "Dzongkha", - "eo": "Esperanto", "epo": "Esperanto", "et": "Estonian", "est": "Estonian", - "ee": "Ewe", "ewe": "Ewe", "fo": "Faroese", "fao": "Faroese", "fj": "Fijian", "fij": "Fijian", - "fre": "French", "fy": "Western Frisian", "fry": "Western Frisian", "ff": "Fulah", "ful": "Fulah", - "gd": "Gaelic, Scottish Gaelic", "gla": "Gaelic", "gl": "Galician", "glg": "Galician", - "lg": "Ganda", "lug": "Ganda", "ka": "Georgian", "kat": "Georgian", "geo": "Georgian", - "el": "Greek", "ell": "Greek", "gre": "Greek", "kl": "Kalaallisut", "kal": "Kalaallisut", - "gn": "Guarani", "grn": "Guarani", "gu": "Gujarati", "guj": "Gujarati", - "ht": "Haitian Creole", "hat": "Haitian Creole", "ha": "Hausa", "hau": "Hausa", - "he": "Hebrew", "heb": "Hebrew", "hz": "Herero", "her": "Herero", "ho": "Hiri Motu", "hmo": "Hiri Motu", - "hu": "Hungarian", "hun": "Hungarian", "is": "Icelandic", "isl": "Icelandic", "ice": "Icelandic", - "io": "Ido", "ido": "Ido", "ig": "Igbo", "ibo": "Igbo", "id": "Indonesian", "ind": "Indonesian", - "ia": "Interlingua", "ina": "Interlingua", "ie": "Interlingue", "ile": "Interlingue", - "iu": "Inuktitut", "iku": "Inuktitut", "ik": "Inupiaq", "ipk": "Inupiaq", - "ga": "Irish", "gle": "Irish", "jv": "Javanese", "jav": "Javanese", - "kn": "Kannada", "kan": "Kannada", "kr": "Kanuri", "kau": "Kanuri", - "ks": "Kashmiri", "kas": "Kashmiri", "kk": "Kazakh", "kaz": "Kazakh", - "km": "Central Khmer", "khm": "Central Khmer", "ki": "Kikuyu", "kik": "Kikuyu", - "rw": "Kinyarwanda", "kin": "Kinyarwanda", "ky": "Kyrgyz", "kir": "Kyrgyz", - "kv": "Komi", "kom": "Komi", "kg": "Kongo", "kon": "Kongo", "kj": "Kuanyama", "kua": "Kuanyama", - "ku": "Kurdish", "kur": "Kurdish", "lo": "Lao", "lao": "Lao", "la": "Latin", "lat": "Latin", - "lv": "Latvian", "lav": "Latvian", "li": "Limburgan", "lim": "Limburgan", - "ln": "Lingala", "lin": "Lingala", "lt": "Lithuanian", "lit": "Lithuanian", - "lu": "Luba-Katanga", "lub": "Luba-Katanga", "lb": "Luxembourgish", "ltz": "Luxembourgish", - "mk": "Macedonian", "mkd": "Macedonian", "mac": "Macedonian", "mg": "Malagasy", "mlg": "Malagasy", - "ms": "Malay", "msa": "Malay", "ml": "Malayalam", "mal": "Malayalam", "mt": "Maltese", "mlt": "Maltese", - "gv": "Manx", "glv": "Manx", "mi": "Maori", "mri": "Maori", "mao": "Maori", - "mr": "Marathi", "mar": "Marathi", "mh": "Marshallese", "mah": "Marshallese", - "mn": "Mongolian", "mon": "Mongolian", "na": "Nauru", "nau": "Nauru", "nv": "Navajo", "nav": "Navajo", - "nd": "North Ndebele", "nde": "North Ndebele", "nr": "South Ndebele", "nbl": "South Ndebele", - "ng": "Ndonga", "ndo": "Ndonga", "ne": "Nepali", "nep": "Nepali", "no": "Norwegian", "nor": "Norwegian", - "nb": "Norwegian Bokmål", "nob": "Norwegian Bokmål", "nn": "Norwegian Nynorsk", "nno": "Norwegian Nynorsk", - "oc": "Occitan", "oci": "Occitan", "oj": "Ojibwa", "oji": "Ojibwa", "or": "Oriya", "ori": "Oriya", - "om": "Oromo", "orm": "Oromo", "os": "Ossetian", "oss": "Ossetian", "pi": "Pali", "pli": "Pali", - "ps": "Pashto", "pus": "Pashto", "fa": "Persian", "fas": "Persian", "per": "Persian", - "pa": "Punjabi", "pan": "Punjabi", "qu": "Quechua", "que": "Quechua", "ro": "Romanian", "ron": "Romanian", "rum": "Romanian", - "rm": "Romansh", "roh": "Romansh", "rn": "Rundi", "run": "Rundi", "se": "Northern Sami", "sme": "Northern Sami", - "sm": "Samoan", "smo": "Samoan", "sg": "Sango", "sag": "Sango", "sa": "Sanskrit", "san": "Sanskrit", - "sc": "Sardinian", "srd": "Sardinian", "sr": "Serbian", "srp": "Serbian", "sn": "Shona", "sna": "Shona", - "sd": "Sindhi", "snd": "Sindhi", "si": "Sinhala", "sin": "Sinhala", "sk": "Slovak", "slk": "Slovak", "slo": "Slovak", - "sl": "Slovenian", "slv": "Slovenian", "so": "Somali", "som": "Somali", "st": "Southern Sotho", "sot": "Southern Sotho", - "su": "Sundanese", "sun": "Sundanese", "sw": "Swahili", "swa": "Swahili", "ss": "Swati", "ssw": "Swati", - "tl": "Tagalog", "tgl": "Tagalog", "ty": "Tahitian", "tah": "Tahitian", "tg": "Tajik", "tgk": "Tajik", - "ta": "Tamil", "tam": "Tamil", "tt": "Tatar", "tat": "Tatar", "te": "Telugu", "tel": "Telugu", - "th": "Thai", "tha": "Thai", "bo": "Tibetan", "bod": "Tibetan", "tib": "Tibetan", - "ti": "Tigrinya", "tir": "Tigrinya", "to": "Tongan", "ton": "Tongan", "ts": "Tsonga", "tso": "Tsonga", - "tn": "Tswana", "tsn": "Tswana", "tk": "Turkmen", "tuk": "Turkmen", "ug": "Uighur", "uig": "Uighur", - "uk": "Ukrainian", "ukr": "Ukrainian", "ur": "Urdu", "urd": "Urdu", "uz": "Uzbek", "uzb": "Uzbek", - "ve": "Venda", "ven": "Venda", "vi": "Vietnamese", "vie": "Vietnamese", "vo": "Volapük", "vol": "Volapük", - "wa": "Walloon", "wln": "Walloon", "cy": "Welsh", "cym": "Welsh", "wel": "Welsh", "wo": "Wolof", "wol": "Wolof", - "xh": "Xhosa", "xho": "Xhosa", "ii": "Sichuan Yi", "iii": "Sichuan Yi", "yi": "Yiddish", "yid": "Yiddish", - "yo": "Yoruba", "yor": "Yoruba", "za": "Zhuang", "zha": "Zhuang", "zu": "Zulu", "zul": "Zulu" -} - class audio_lines: - def __init__(self, stream, file_duration): + def __init__(self, stream): # 1. Basic ID self.id = stream.get("index") @@ -251,14 +165,14 @@ class audio_lines: # 3. Language (usually in tags) raw_lang = stream.get("tags", {}).get("language", "und").lower() - self.language = LANG_CODES.get(raw_lang, raw_lang.capitalize()) + self.language = normalize_language(raw_lang) # 4. Duration (fallback to file duration if stream duration is missing) stream_dur = stream.get("duration") if stream_dur: self.duration = seconds_to_hms(float(stream_dur)) else: - self.duration = file_duration + self.duration = None # 5. Codec self.codec = stream.get("codec_name", "") @@ -315,17 +229,17 @@ class audio_lines: return string class subtitles: - def __init__(self, stream, file_duration): + def __init__(self, stream): self.id = stream.get("index") self.name = stream.get("tags", {}).get("title", "") # Language translation raw_lang = stream.get("tags", {}).get("language", "und") - self.language = LANG_CODES.get(raw_lang, raw_lang.capitalize()) + self.language = normalize_language(raw_lang) # Duration logic stream_dur = stream.get("duration") - self.duration = seconds_to_hms(float(stream_dur)) if stream_dur else file_duration + self.duration = seconds_to_hms(float(stream_dur)) if stream_dur else None # Codec (e.g., srt, ass, subrip) self.codec = stream.get("codec_name", "") @@ -359,72 +273,51 @@ class subtitles: return " ".join(parts) -def get_video_lines(file): - cmd = ["ffprobe", - "-v", "error", - "-select_streams", "v", # video streams only - "-show_entries", - "stream=index,codec_name,width,height,r_frame_rate,bit_rate,duration,nb_frames,pix_fmt,field_order,time_base,display_aspect_ratio,color_space,color_transfer,color_primaries,bits_per_raw_sample:stream_tags=title", - "-of", "json", - file - ] - - result = subprocess.run(cmd, capture_output=True, text=True) - info = json.loads(result.stdout) - streams = info.get("streams", []) - return streams - -def get_audio_lines(file): - cmd = [ - "ffprobe", - "-v", "error", - "-select_streams", "a", # Select audio streams only - "-show_entries", - # Entries mapped to your requirements: - "stream=index,codec_name,sample_rate,channels,bits_per_sample,bits_per_raw_sample,bit_rate,duration" + - ":stream_tags=language,title", - "-of", "json", - file - ] - - try: - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - info = json.loads(result.stdout) - return info.get("streams", []) - except subprocess.CalledProcessError as e: - print(f"Error running ffprobe: {e.stderr}") - return [] - -def get_subtitle_lines(file): - cmd = [ - "ffprobe", - "-v", "error", - "-select_streams", "s", # Subtitle streams only - "-show_entries", - "stream=index,codec_name,duration:stream_tags=language,title:stream_disposition=forced,default", - "-of", "json", - file - ] - - try: - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - info = json.loads(result.stdout) - return info.get("streams", []) - except subprocess.CalledProcessError: - return [] - -def get_video_duration(file_path): +def get_media_info(file): cmd = [ "ffprobe", "-v", "error", - "-show_entries", "format=duration", - "-of", "default=noprint_wrappers=1:nokey=1", - file_path + "-show_entries", + ( + "format=duration:" + "stream=index,codec_type,codec_name," + "width,height,r_frame_rate,bit_rate,duration,nb_frames," + "pix_fmt,field_order,time_base,display_aspect_ratio," + "color_space,color_transfer,color_primaries,bits_per_raw_sample," + "sample_rate,channels,bits_per_sample," + "stream_disposition=forced,default:" + "stream_tags=language,title" + ), + "-of", "json", + file ] - result = subprocess.run(cmd, capture_output=True, text=True) - duration = float(result.stdout.strip()) - return duration + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + info = json.loads(result.stdout) + except subprocess.CalledProcessError as e: + print(f"Error running ffprobe: {e.stderr}") + return None, [], [], [] + + # Container / file duration (string seconds, per ffprobe convention) + duration = None + if "format" in info: + duration = info["format"].get("duration") + + video_streams = [] + audio_streams = [] + subtitle_streams = [] + + for stream in info.get("streams", []): + stream_type = stream.get("codec_type") + if stream_type == "video": + video_streams.append(stream) + elif stream_type == "audio": + audio_streams.append(stream) + elif stream_type == "subtitle": + subtitle_streams.append(stream) + + return float(duration), video_streams, audio_streams, subtitle_streams def seconds_to_hms(seconds): h = int(seconds // 3600) @@ -432,42 +325,34 @@ def seconds_to_hms(seconds): s = int(seconds % 60) return f"{h:02}:{m:02}:{s:02}" - -def get_stream_bitrate(file_path, stream=None): - # Get duration in seconds - duration = get_video_duration(file_path) - - # Get file size in bits - size_bits = os.path.getsize(file_path) * 8 - - # Approximate average bitrate - avg_bitrate = size_bits / duration if duration > 0 else 0 - return avg_bitrate # bits per second - +def get_stream_bitrate(file_size, duration): + return int((file_size * 8)/duration/1000000) if duration > 0 else 0 class video_file: def __init__(self, path, base_tab=""): self.base_tab = base_tab # \t self.path = path # folder/25.mkv self.name = os.path.basename(path) # 25.mkv - self.size = human_readable_size(os.path.getsize(path)) # 198MB - self.duration = seconds_to_hms(get_video_duration(path)) - self.bitrate = int((os.path.getsize(path) * 8)/get_video_duration(path))/1000000 if get_video_duration(path) > 0 else 0 - # self. = get_video_lines(path) + self.size = os.path.getsize(path) + self.videos = [] self.audios = [] self.subtitles = [] - for vl in get_video_lines(path): - video_line = video_lines(vl, self.duration) + self.duration, videos, audios, subtitles = get_media_info(path) + + for vl in videos: + video_line = video_lines(vl) self.videos.append(video_line) - for al in get_audio_lines(path): - audio_line = audio_lines(al, self.duration) + for al in audios: + audio_line = audio_lines(al) self.videos.append(audio_line) - for st in get_subtitle_lines(path): - subtitle = subtitles(st, self.duration) + for st in subtitles: + subtitle = subtitles(st) self.videos.append(subtitle) - + self.bitrate = get_stream_bitrate(self.size, self.duration) + self.size = human_readable_size(self.size) # 198MB + self.duration = seconds_to_hms(self.duration) def print(self): if self.base_tab == "\t": @@ -525,6 +410,8 @@ def handle_files(files, all_files): all_files.extend(grouped) + + def handle_folders(dirs, all_files): if(dirs != []): dirs.sort(key=lambda f: os.path.dirname(f)) @@ -533,7 +420,7 @@ def handle_folders(dirs, all_files): for file in os.scandir(dir): if file.is_file(): file = file.path - if os.path.splitext(file)[1].lower() in EXT: + if check_video_ext(os.path.splitext(file)[1]): dir_files.append(file) else: np(f"{file} is not a compatabile Video file", WARN_STYLE) diff --git a/languages/__init__.py b/languages/__init__.py new file mode 100644 index 0000000..8aeadb5 --- /dev/null +++ b/languages/__init__.py @@ -0,0 +1 @@ +from .languages import LANG_CODES, REVERSE_LANG_CODES \ No newline at end of file diff --git a/languages/__pycache__/__init__.cpython-314.pyc b/languages/__pycache__/__init__.cpython-314.pyc new file mode 100644 index 0000000..c8a4242 Binary files /dev/null and b/languages/__pycache__/__init__.cpython-314.pyc differ diff --git a/languages/__pycache__/languages.cpython-314.pyc b/languages/__pycache__/languages.cpython-314.pyc new file mode 100644 index 0000000..f9844a1 Binary files /dev/null and b/languages/__pycache__/languages.cpython-314.pyc differ diff --git a/languages/languages.py b/languages/languages.py new file mode 100644 index 0000000..e520fc3 --- /dev/null +++ b/languages/languages.py @@ -0,0 +1,562 @@ +LANG_CODES = { + " ": "Undefined", + "aa": "Afar", + "aar": "Afar", + "ab": "Abkhazian", + "abk": "Abkhazian", + "ae": "Avestan", + "af": "Afrikaans", + "afr": "Afrikaans", + "ak": "Akan", + "aka": "Akan", + "alb": "Albanian", + "am": "Amharic", + "amh": "Amharic", + "an": "Aragonese", + "ar": "Arabic", + "ara": "Arabic", + "arb": "Arabic", + "arg": "Aragonese", + "arm": "Armenian", + "as": "Assamese", + "asm": "Assamese", + "av": "Avaric", + "ava": "Avaric", + "ave": "Avestan", + "ay": "Aymara", + "aym": "Aymara", + "az": "Azerbaijani", + "aze": "Azerbaijani", + "ba": "Bashkir", + "bak": "Bashkir", + "bam": "Bambara", + "baq": "Basque", + "be": "Belarusian", + "bel": "Belarusian", + "bg": "Bulgarian", + "bi": "Bislama", + "bis": "Bislama", + "bm": "Bambara", + "bn": "Bengali", + "bo": "Tibetan", + "bod": "Tibetan", + "bos": "Bosnian", + "br": "Breton", + "bre": "Breton", + "bs": "Bosnian", + "bul": "Bulgarian", + "ca": "Catalan", + "cat": "Catalan", + "ce": "Chechen", + "ces": "Czech", + "ch": "Chamorro", + "cha": "Chamorro", + "che": "Chechen", + "chi": "Chinese", + "chu": "Church Slavonic", + "chv": "Chuvash", + "cmn": "Mandarin Chinese", + "co": "Corsican", + "cor": "Cornish", + "cos": "Corsican", + "cr": "Cree", + "cre": "Cree", + "cs": "Czech", + "cu": "Church Slavonic", + "cv": "Chuvash", + "cw": "Cornish", + "cy": "Welsh", + "cym": "Welsh", + "cze": "Czech", + "da": "Danish", + "dan": "Danish", + "de": "German", + "deu": "German", + "div": "Divehi", + "dv": "Divehi", + "dz": "Dzongkha", + "dzo": "Dzongkha", + "ee": "Ewe", + "el": "Greek", + "ell": "Greek", + "en": "English", + "eng": "English", + "eo": "Esperanto", + "epo": "Esperanto", + "es": "Spanish", + "est": "Estonian", + "et": "Estonian", + "eu": "Basque", + "eus": "Basque", + "ewe": "Ewe", + "fa": "Persian", + "fao": "Faroese", + "fas": "Persian", + "fat": "Fanti", + "ff": "Fulah", + "fi": "Finnish", + "fil": "Filipino", + "fin": "Finnish", + "fj": "Fijian", + "fij": "Fijian", + "fo": "Faroese", + "fr": "French", + "fra": "French", + "fre": "French", + "fry": "Western Frisian", + "ful": "Fulah", + "fy": "Western Frisian", + "ga": "Irish", + "gd": "Scottish Gaelic", + "geo": "Georgian", + "ger": "German", + "gl": "Galician", + "gla": "Scottish Gaelic", + "glg": "Galician", + "glv": "Manx", + "gn": "Guarani", + "gre": "Greek", + "grn": "Guarani", + "gu": "Gujarati", + "guj": "Gujarati", + "gv": "Manx", + "ha": "Hausa", + "hat": "Haitian Creole", + "hau": "Hausa", + "he": "Hebrew", + "heb": "Hebrew", + "her": "Herero", + "hi": "Hindi", + "hin": "Hindi", + "hmo": "Hiri Motu", + "ho": "Hiri Motu", + "hr": "Croatian", + "hrv": "Croatian", + "ht": "Haitian Creole", + "hu": "Hungarian", + "hun": "Hungarian", + "hy": "Armenian", + "hye": "Armenian", + "hz": "Herero", + "ia": "Interlingua", + "ice": "Icelandic", + "id": "Indonesian", + "ido": "Ido", + "ie": "Interlingue", + "ig": "Igbo", + "ii": "Sichuan Yi", + "ik": "Inupiaq", + "iku": "Inuktitut", + "ile": "Interlingue", + "ina": "Interlingua", + "ind": "Indonesian", + "io": "Ido", + "ipk": "Inupiaq", + "is": "Icelandic", + "isl": "Icelandic", + "it": "Italian", + "ita": "Italian", + "iu": "Inuktitut", + "ja": "Japanese", + "jav": "Javanese", + "jpn": "Japanese", + "jv": "Javanese", + "ka": "Georgian", + "kal": "Kalaallisut", + "kan": "Kannada", + "kas": "Kashmiri", + "kat": "Georgian", + "kau": "Kanuri", + "kaz": "Kazakh", + "kg": "Kongo", + "khm": "Central Khmer", + "ki": "Kikuyu", + "kik": "Kikuyu", + "kin": "Kinyarwanda", + "kir": "Kyrgyz", + "kj": "Kuanyama", + "kk": "Kazakh", + "kl": "Kalaallisut", + "km": "Central Khmer", + "kn": "Kannada", + "ko": "Korean", + "kom": "Komi", + "kon": "Kongo", + "kor": "Korean", + "kr": "Kanuri", + "ks": "Kashmiri", + "ku": "Kurdish", + "kua": "Kuanyama", + "kur": "Kurdish", + "kv": "Komi", + "kw": "Cornish", + "ky": "Kyrgyz", + "la": "Latin", + "lao": "Lao", + "lat": "Latin", + "lb": "Luxembourgish", + "lg": "Ganda", + "li": "Limburgan", + "lim": "Limburgan", + "lin": "Lingala", + "lit": "Lithuanian", + "lo": "Lao", + "lt": "Lithuanian", + "ltz": "Luxembourgish", + "lu": "Luba-Katanga", + "lub": "Luba-Katanga", + "lug": "Ganda", + "lv": "Latvian", + "mac": "Macedonian", + "mao": "Maori", + "may": "Malay", + "mg": "Malagasy", + "mh": "Marshallese", + "mi": "Maori", + "mk": "Macedonian", + "mkd": "Macedonian", + "ml": "Malayalam", + "mlg": "Malagasy", + "mlt": "Maltese", + "mn": "Mongolian", + "mon": "Mongolian", + "mr": "Marathi", + "mri": "Maori", + "ms": "Malay", + "msa": "Malay", + "mt": "Maltese", + "my": "Burmese", + "mya": "Burmese", + "na": "Nauru", + "nau": "Nauru", + "nav": "Navajo", + "nb": "Norwegian Bokmål", + "nd": "North Ndebele", + "nde": "North Ndebele", + "ndo": "Ndonga", + "ne": "Nepali", + "nep": "Nepali", + "ng": "Ndonga", + "nl": "Dutch", + "nn": "Norwegian Nynorsk", + "nno": "Norwegian Nynorsk", + "no": "Norwegian", + "nob": "Norwegian Bokmål", + "nor": "Norwegian", + "nr": "South Ndebele", + "nv": "Navajo", + "ny": "Chichewa", + "nya": "Chichewa", + "oc": "Occitan", + "oci": "Occitan", + "oj": "Ojibwa", + "oji": "Ojibwa", + "om": "Oromo", + "or": "Oriya", + "ori": "Oriya", + "orm": "Oromo", + "os": "Ossetian", + "oss": "Ossetian", + "pa": "Punjabi", + "pan": "Punjabi", + "per": "Persian", + "pi": "Pali", + "pl": "Polish", + "pli": "Pali", + "pol": "Polish", + "por": "Portuguese", + "ps": "Pashto", + "pt": "Portuguese", + "pus": "Pashto", + "qu": "Quechua", + "que": "Quechua", + "rm": "Romansh", + "rn": "Rundi", + "ro": "Romanian", + "roh": "Romansh", + "ron": "Romanian", + "ru": "Russian", + "rum": "Romanian", + "run": "Rundi", + "rus": "Russian", + "rw": "Kinyarwanda", + "sa": "Sanskrit", + "sag": "Sango", + "san": "Sanskrit", + "sc": "Sardinian", + "sd": "Sindhi", + "se": "Northern Sami", + "sg": "Sango", + "si": "Sinhala", + "sin": "Sinhala", + "sk": "Slovak", + "sl": "Slovenian", + "slk": "Slovak", + "slo": "Slovak", + "slv": "Slovenian", + "sm": "Samoan", + "sme": "Northern Sami", + "smo": "Samoan", + "sn": "Shona", + "sna": "Shona", + "so": "Somali", + "som": "Somali", + "sq": "Albanian", + "sqi": "Albanian", + "sr": "Serbian", + "srd": "Sardinian", + "srp": "Serbian", + "ss": "Swati", + "ssw": "Swati", + "st": "Southern Sotho", + "su": "Sundanese", + "sun": "Sundanese", + "sv": "Swedish", + "sw": "Swahili", + "swa": "Swahili", + "swe": "Swedish", + "ta": "Tamil", + "tam": "Tamil", + "tat": "Tatar", + "te": "Telugu", + "tel": "Telugu", + "tg": "Tajik", + "tgk": "Tajik", + "tgl": "Tagalog", + "th": "Thai", + "tha": "Thai", + "tib": "Tibetan", + "ti": "Tigrinya", + "tir": "Tigrinya", + "tk": "Turkmen", + "tl": "Tagalog", + "tn": "Tswana", + "to": "Tongan", + "ton": "Tongan", + "tr": "Turkish", + "ts": "Tsonga", + "tsn": "Tswana", + "tso": "Tsonga", + "tt": "Tatar", + "tuk": "Turkmen", + "ty": "Tahitian", + "ug": "Uighur", + "uig": "Uighur", + "uk": "Ukrainian", + "ukr": "Ukrainian", + "und": "Undefined", + "ur": "Urdu", + "urd": "Urdu", + "uz": "Uzbek", + "uzb": "Uzbek", + "ve": "Venda", + "ven": "Venda", + "vi": "Vietnamese", + "vie": "Vietnamese", + "vo": "Volapük", + "vol": "Volapük", + "wa": "Walloon", + "wel": "Welsh", + "wln": "Walloon", + "wo": "Wolof", + "wol": "Wolof", + "xh": "Xhosa", + "xho": "Xhosa", + "yi": "Yiddish", + "yid": "Yiddish", + "yo": "Yoruba", + "yor": "Yoruba", + "yue": "Cantonese", + "za": "Zhuang", + "zha": "Zhuang", + "zh": "Chinese", + "zho": "Chinese", + "zu": "Zulu", + "zul": "Zulu", +} + +REVERSE_LANG_CODES = { + "Afar": "aa", + "Abkhazian": "ab", + "Avestan": "ae", + "Afrikaans": "af", + "Akan": "ak", + "Amharic": "am", + "Aragonese": "an", + "Arabic": "ar", + "Assamese": "as", + "Avaric": "av", + "Aymara": "ay", + "Azerbaijani": "az", + "Bashkir": "ba", + "Bambara": "bm", + "Belarusian": "be", + "Bulgarian": "bg", + "Bislama": "bi", + "Bengali": "bn", + "Tibetan": "bo", + "Breton": "br", + "Bosnian": "bs", + "Catalan": "ca", + "Chechen": "ce", + "Chamorro": "ch", + "Chinese": "zh", + "Church Slavonic": "cu", + "Chuvash": "cv", + "Cornish": "kw", + "Corsican": "co", + "Cree": "cr", + "Czech": "cs", + "Danish": "da", + "German": "de", + "Divehi": "dv", + "Dzongkha": "dz", + "Ewe": "ee", + "Greek": "el", + "English": "en", + "Esperanto": "eo", + "Spanish": "es", + "Estonian": "et", + "Basque": "eu", + "Persian": "fa", + "Faroese": "fo", + "Fanti": "fat", + "Fulah": "ff", + "Finnish": "fi", + "Filipino": "fil", + "Fijian": "fj", + "French": "fr", + "Western Frisian": "fy", + "Irish": "ga", + "Scottish Gaelic": "gd", + "Galician": "gl", + "Manx": "gv", + "Guarani": "gn", + "Gujarati": "gu", + "Hausa": "ha", + "Haitian Creole": "ht", + "Hebrew": "he", + "Herero": "hz", + "Hindi": "hi", + "Hiri Motu": "ho", + "Croatian": "hr", + "Hungarian": "hu", + "Armenian": "hy", + "Interlingua": "ia", + "Indonesian": "id", + "Ido": "io", + "Interlingue": "ie", + "Igbo": "ig", + "Sichuan Yi": "ii", + "Inupiaq": "ik", + "Inuktitut": "iu", + "Icelandic": "is", + "Italian": "it", + "Japanese": "ja", + "Javanese": "jv", + "Georgian": "ka", + "Kalaallisut": "kl", + "Kannada": "kn", + "Kashmiri": "ks", + "Kanuri": "kr", + "Kazakh": "kk", + "Kongo": "kg", + "Central Khmer": "km", + "Kikuyu": "ki", + "Kinyarwanda": "rw", + "Kyrgyz": "ky", + "Kuanyama": "kj", + "Lao": "lo", + "Latin": "la", + "Luxembourgish": "lb", + "Ganda": "lg", + "Limburgan": "li", + "Lingala": "ln", + "Lithuanian": "lt", + "Luba-Katanga": "lu", + "Latvian": "lv", + "Macedonian": "mk", + "Malagasy": "mg", + "Marshallese": "mh", + "Maori": "mi", + "Malayalam": "ml", + "Maltese": "mt", + "Mongolian": "mn", + "Marathi": "mr", + "Malay": "ms", + "Burmese": "my", + "Nauru": "na", + "Navajo": "nv", + "Norwegian Bokmål": "nb", + "North Ndebele": "nd", + "Ndonga": "ng", + "Nepali": "ne", + "Norwegian Nynorsk": "nn", + "Norwegian": "no", + "South Ndebele": "nr", + "Chichewa": "ny", + "Occitan": "oc", + "Ojibwa": "oj", + "Oromo": "om", + "Oriya": "or", + "Ossetian": "os", + "Punjabi": "pa", + "Pali": "pi", + "Polish": "pl", + "Portuguese": "pt", + "Pashto": "ps", + "Quechua": "qu", + "Romansh": "rm", + "Rundi": "rn", + "Romanian": "ro", + "Russian": "ru", + "Sanskrit": "sa", + "Sango": "sg", + "Sardinian": "sc", + "Sindhi": "sd", + "Northern Sami": "se", + "Sinhala": "si", + "Slovak": "sk", + "Slovenian": "sl", + "Samoan": "sm", + "Shona": "sn", + "Somali": "so", + "Albanian": "sq", + "Serbian": "sr", + "Swati": "ss", + "Southern Sotho": "st", + "Sundanese": "su", + "Swedish": "sv", + "Swahili": "sw", + "Tamil": "ta", + "Tatar": "tt", + "Telugu": "te", + "Tajik": "tg", + "Tagalog": "tl", + "Thai": "th", + "Tigrinya": "ti", + "Turkmen": "tk", + "Tswana": "tn", + "Tongan": "to", + "Turkish": "tr", + "Tsonga": "ts", + "Tahitian": "ty", + "Uighur": "ug", + "Ukrainian": "uk", + "Urdu": "ur", + "Uzbek": "uz", + "Venda": "ve", + "Vietnamese": "vi", + "Volapük": "vo", + "Walloon": "wa", + "Welsh": "cy", + "Wolof": "wo", + "Xhosa": "xh", + "Yiddish": "yi", + "Yoruba": "yo", + "Cantonese": "yue", + "Mandarin Chinese": "cmn", + "Zhuang": "za", + "Zulu": "zu", + "Undefined": "und" +} \ No newline at end of file diff --git a/set_1_ger b/set_1_ger new file mode 100755 index 0000000..d3622e9 --- /dev/null +++ b/set_1_ger @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +import sys +import pathlib +import subprocess +from check_video import check_video_ext, normalize_language, normalize_lang_code + +def collect_files(paths): + files = [] + for p in paths: + p = pathlib.Path(p) + if p.is_dir(): + files += [f for f in p.iterdir() if check_video_ext(f.suffix)] + elif check_video_ext(p.suffix): + files.append(p) + return files + +def set_audio_lang(file_path, lang_code): + """ + Sets the first audio track language to the given ISO 639-1 code using mkvpropedit. + """ + try: + # Target first audio track (track ID 1) + subprocess.run([ + "mkvpropedit", str(file_path), + "--edit", "track:a1", + "--set", f"language={lang_code}" + ], check=True) + print(f"Set first audio track of '{file_path}' to {lang_code}") + except subprocess.CalledProcessError as e: + print(f"Failed to set language for '{file_path}': {e}") + +def main(): + if len(sys.argv) == 1: + targets = ["."] + lang = "German" + else: + targets = sys.argv[1:] + lang = "German" # Hardcoded to German + + lang = normalize_language(lang) + lang_code = normalize_lang_code(lang) + + for f in collect_files(targets): + set_audio_lang(f, lang_code) + +if __name__ == "__main__": + main() diff --git a/set_audio_lang b/set_audio_lang new file mode 100755 index 0000000..88d103c --- /dev/null +++ b/set_audio_lang @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +import subprocess +import sys +import json +import pathlib + +VIDEO_EXTS = {".mkv", ".mp4", ".avi", ".mov"} + +LANG_CODES = { + "eng": "English", "en": "English", "english": "English", + "spa": "Spanish", "es": "Spanish", "spanish": "Spanish", + "fra": "French", "fre": "French", "fr": "French", "french": "French", + "deu": "German", "ger": "German", "de": "German", "german": "German", +} + +CANONICAL = { + "English": "eng", + "Spanish": "spa", + "French": "fra", + "German": "deu", +} + +def normalize_language(value): + key = value.lower() + if key not in LANG_CODES: + raise ValueError(f"Unknown language: {value}") + return CANONICAL[LANG_CODES[key]] + +def probe(file): + cmd = ["ffprobe", "-v", "error", "-print_format", "json", "-show_streams", file] + return json.loads(subprocess.check_output(cmd)) + +def collect_files(paths): + files = [] + for p in paths: + p = pathlib.Path(p) + if p.is_dir(): + files += [f for f in p.iterdir() if f.suffix.lower() in VIDEO_EXTS] + elif p.suffix.lower() in VIDEO_EXTS: + files.append(p) + return files + +def preserve_original(file): + original = file.with_name(f"{file.stem}_original{file.suffix}") + if original.exists(): + print(f"Original already preserved: {original.name}") + return + file.rename(original) + print(f"Preserved original as: {original.name}") + +def set_audio_lang(file, lang): + data = probe(file) + audio_streams = [s for s in data["streams"] if s["codec_type"] == "audio"] + + if not audio_streams: + print(f"No audio streams in {file.name}") + return + + current = audio_streams[0].get("tags", {}).get("language") + + if current: + ans = input(f"{file.name}: audio already '{current}'. Overwrite? [y/N] ") + if ans.lower() != "y": + return + + out = file.with_name(f"{file.stem}_dub-{lang}{file.suffix}") + + print(f"{file.name} -> {out.name}") + + cmd = [ + "ffmpeg", "-y", + "-i", str(file), + "-map", "0", + "-c", "copy", + "-metadata:s:a:0", f"language={lang}", + str(out) + ] + + result = subprocess.run( + cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True + ) + + if result.returncode != 0: + print(f"ERROR processing {file.name}") + print(result.stderr.strip()) + return + + + preserve_original(file) + + +def main(): + if len(sys.argv) == 1: + targets = ["."] + lang = input("Language: ") + elif len(sys.argv) == 2: + targets = ["."] + lang = sys.argv[1] + else: + targets = sys.argv[1:-1] + lang = sys.argv[-1] + + lang = normalize_language(lang) + + for f in collect_files(targets): + set_audio_lang(f, lang) + +if __name__ == "__main__": + main() diff --git a/set_subtitle_lang[untested] b/set_subtitle_lang[untested] new file mode 100644 index 0000000..b615401 --- /dev/null +++ b/set_subtitle_lang[untested] @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +import subprocess +import sys +import json +import pathlib + +VIDEO_EXTS = {".mkv", ".mp4"} + +LANG_CODES = { + "eng": "English", "en": "English", "english": "English", + "spa": "Spanish", "es": "Spanish", "spanish": "Spanish", + "fra": "French", "fre": "French", "fr": "French", "french": "French", + "deu": "German", "ger": "German", "de": "German", "german": "German", +} + +CANONICAL = { + "English": "eng", + "Spanish": "spa", + "French": "fra", + "German": "deu", +} + +def normalize_language(value): + key = value.lower() + if key not in LANG_CODES: + raise ValueError(f"Unknown language: {value}") + return CANONICAL[LANG_CODES[key]] + +def probe(file): + cmd = [ + "ffprobe", + "-v", "error", + "-print_format", "json", + "-show_streams", + str(file), + ] + return json.loads(subprocess.check_output(cmd)) + +def collect_files(paths): + files = [] + for p in paths: + p = pathlib.Path(p) + if p.is_dir(): + files.extend( + f for f in p.iterdir() + if f.suffix.lower() in VIDEO_EXTS + ) + elif p.suffix.lower() in VIDEO_EXTS: + files.append(p) + return files + +def preserve_original(file): + original = file.with_name(f"{file.stem}_original{file.suffix}") + if original.exists(): + return + file.rename(original) + print(f"preserve: {file.name} -> {original.name}") + +def set_sub_lang(file, lang): + data = probe(file) + subs = [s for s in data["streams"] if s["codec_type"] == "subtitle"] + + if not subs: + return + + for i, s in enumerate(subs): + cur = s.get("tags", {}).get("language", "unset") + print(f"{i}: subtitle ({cur})") + + sel = input(f"{file.name}: select subtitle index (blank = skip): ") + if sel == "": + return + + idx = int(sel) + current = subs[idx].get("tags", {}).get("language") + + if current: + ans = input(f"Overwrite existing '{current}'? [y/N] ") + if ans.lower() != "y": + return + + out = file.with_name(f"{file.stem}_sub-{lang}{file.suffix}") + print(f"{file.name} -> {out.name}") + + cmd = [ + "ffmpeg", "-y", + "-i", str(file), + "-map", "0", + "-c", "copy", + f"-metadata:s:s:{idx}", f"language={lang}", + str(out) + ] + + result = subprocess.run( + cmd, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True + ) + + if result.returncode != 0: + print(f"ERROR processing {file.name}") + print(result.stderr.strip()) + return + + preserve_original(file) + +def main(): + if len(sys.argv) == 1: + targets = ["."] + lang = input("Subtitle language: ") + elif len(sys.argv) == 2: + targets = ["."] + lang = sys.argv[1] + else: + targets = sys.argv[1:-1] + lang = sys.argv[-1] + + lang = normalize_language(lang) + + for f in collect_files(targets): + set_sub_lang(f, lang) + +if __name__ == "__main__": + main() diff --git a/show_path b/show_path index 05032e1..d2988ec 100755 --- a/show_path +++ b/show_path @@ -1,3 +1,3 @@ #!/bin/bash -echo $PATH | tr " " "\n" | nl +echo $PATH | tr ":" "\n" | nl