Repaired a lot of Media Management

This commit is contained in:
Hannes
2026-02-12 00:24:39 +01:00
parent 0582996bf2
commit 3b6f5f360e
14 changed files with 958 additions and 181 deletions

1
check_video/__init__.py Normal file
View File

@@ -0,0 +1 @@
from .check_video import check_video_ext, normalize_language, normalize_lang_code

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,42 @@
from languages import LANG_CODES, REVERSE_LANG_CODES
extensions=("mp4", "mkv", "avi", "mov", "wmv", "flv", "webm", "lrv", "gif")
def check_video_ext(extension: str) -> bool:
if extension.lower().strip('.') in extensions:
return True
return False
def normalize_language(lang: str) -> str:
"""
Input: 'ger', 'DE', 'German', 'GERMAN'
Output: 'German'
"""
if not lang:
return "Undefined"
# 1. Clean the input
query = lang.strip().lower()
# 2. Check if it's already a code (e.g., 'de' or 'ger')
if query in LANG_CODES:
return LANG_CODES[query]
# 3. Check if it's a full name (e.g., 'german')
# We need a case-insensitive check against the names in REVERSE_LANG_CODES
for full_name in REVERSE_LANG_CODES:
if full_name.lower() == query:
return full_name
return "Undefined"
def normalize_lang_code(lang: str) -> str:
"""
Input: 'ger', 'DE', 'German', 'GERMAN'
Output: 'de' (the canonical media code (ISO 639-1))
"""
# First, get the standard full name
full_name = normalize_language(lang)
# Then, look up that full name in the reverse table
return REVERSE_LANG_CODES.get(full_name, "und")

223
ff
View File

@@ -3,10 +3,7 @@ import sys
import os
import subprocess
import json
# cmd = "ls ../Videos/OBS/*.mkv"
# result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
# print(result.stdout)
from check_video import check_video_ext, normalize_language, normalize_lang_code
color = True
try:
@@ -18,8 +15,6 @@ except ImportError:
print("For nicer output install termcolor:\npip install termcolor")
color = False
EXT=[".mp4", ".mkv", ".avi", ".mov", ".wmv", ".flv", ".webm", ".lrv", ".gif"]
NORMAL_STYLE = ("white", None, [])
ERROR_STYLE = ("red", None, ["bold"])
WARN_STYLE = ("yellow", None, ["bold"])
@@ -78,7 +73,7 @@ def get_interlace_label(fo):
return "Progressive" # Default assumption for modern web video
class video_lines:
def __init__(self, stream, duration):
def __init__(self, stream):
if stream.get("index"):
self.id = stream.get("index")
else:
@@ -92,7 +87,7 @@ class video_lines:
if stream.get("name"):
self.duration = seconds_to_hms(stream.get("duration"))
else:
self.duration = duration
self.duration = None
if stream.get("codec_name"):
self.codec = stream.get("codec_name")
@@ -160,89 +155,8 @@ class video_lines:
string += f" [{self.field_order}]"
return string
LANG_CODES = {
"eng": "English", "en": "English", "spa": "Spanish", "es": "Spanish",
"fra": "French", "fr": "French", "deu": "German", "ger": "German", "de": "German",
"jpn": "Japanese", "ja": "Japanese", "ita": "Italian", "it": "Italian",
"por": "Portuguese", "pt": "Portuguese", "rus": "Russian", "ru": "Russian",
"chi": "Chinese", "zho": "Chinese", "zh": "Chinese", "kor": "Korean", "ko": "Korean",
"dut": "Dutch", "nl": "Dutch", "swe": "Swedish", "sv": "Swedish",
"fin": "Finnish", "fi": "Finnish", "pol": "Polish", "pl": "Polish",
"ara": "Arabic", "ar": "Arabic", "hin": "Hindi", "hi": "Hindi",
"tur": "Turkish", "tr": "Turkish", "und": "Undefined", " ": "Undefined",
"ab": "Abkhazian", "abk": "Abkhazian", "aa": "Afar", "aar": "Afar",
"af": "Afrikaans", "afr": "Afrikaans", "ak": "Akan", "aka": "Akan",
"twi": "Twi", "fat": "Fanti", "sq": "Albanian", "sqi": "Albanian", "alb": "Albanian",
"am": "Amharic", "amh": "Amharic", "arb": "Arabic", "an": "Aragonese", "arg": "Aragonese",
"hy": "Armenian", "hye": "Armenian", "arm": "Armenian", "as": "Assamese", "asm": "Assamese",
"av": "Avaric", "ava": "Avaric", "ae": "Avestan", "ave": "Avestan", "ay": "Aymara", "aym": "Aymara",
"az": "Azerbaijani", "aze": "Azerbaijani", "bm": "Bambara", "bam": "Bambara",
"ba": "Bashkir", "bak": "Bashkir", "eu": "Basque", "eus": "Basque", "baq": "Basque",
"be": "Belarusian", "bel": "Belarusian", "bn": "Bengali", "ben": "Bengali",
"bi": "Bislama", "bis": "Bislama", "bs": "Bosnian", "bos": "Bosnian",
"br": "Breton", "bre": "Breton", "bg": "Bulgarian", "bul": "Bulgarian",
"my": "Burmese", "mya": "Burmese", "ca": "Catalan", "cat": "Catalan",
"ch": "Chamorro", "cha": "Chamorro", "ce": "Chechen", "che": "Chechen",
"ny": "Chichewa", "nya": "Chichewa", "cu": "Church Slavonic", "chu": "Church Slavonic",
"cv": "Chuvash", "chv": "Chuvash", "kw": "Cornish", "cor": "Cornish",
"co": "Corsican", "cos": "Corsican", "cr": "Cree", "cre": "Cree",
"hr": "Croatian", "hrv": "Croatian", "cs": "Czech", "ces": "Czech", "cze": "Czech",
"da": "Danish", "dan": "Danish", "dv": "Divehi", "div": "Divehi", "dz": "Dzongkha", "dzo": "Dzongkha",
"eo": "Esperanto", "epo": "Esperanto", "et": "Estonian", "est": "Estonian",
"ee": "Ewe", "ewe": "Ewe", "fo": "Faroese", "fao": "Faroese", "fj": "Fijian", "fij": "Fijian",
"fre": "French", "fy": "Western Frisian", "fry": "Western Frisian", "ff": "Fulah", "ful": "Fulah",
"gd": "Gaelic, Scottish Gaelic", "gla": "Gaelic", "gl": "Galician", "glg": "Galician",
"lg": "Ganda", "lug": "Ganda", "ka": "Georgian", "kat": "Georgian", "geo": "Georgian",
"el": "Greek", "ell": "Greek", "gre": "Greek", "kl": "Kalaallisut", "kal": "Kalaallisut",
"gn": "Guarani", "grn": "Guarani", "gu": "Gujarati", "guj": "Gujarati",
"ht": "Haitian Creole", "hat": "Haitian Creole", "ha": "Hausa", "hau": "Hausa",
"he": "Hebrew", "heb": "Hebrew", "hz": "Herero", "her": "Herero", "ho": "Hiri Motu", "hmo": "Hiri Motu",
"hu": "Hungarian", "hun": "Hungarian", "is": "Icelandic", "isl": "Icelandic", "ice": "Icelandic",
"io": "Ido", "ido": "Ido", "ig": "Igbo", "ibo": "Igbo", "id": "Indonesian", "ind": "Indonesian",
"ia": "Interlingua", "ina": "Interlingua", "ie": "Interlingue", "ile": "Interlingue",
"iu": "Inuktitut", "iku": "Inuktitut", "ik": "Inupiaq", "ipk": "Inupiaq",
"ga": "Irish", "gle": "Irish", "jv": "Javanese", "jav": "Javanese",
"kn": "Kannada", "kan": "Kannada", "kr": "Kanuri", "kau": "Kanuri",
"ks": "Kashmiri", "kas": "Kashmiri", "kk": "Kazakh", "kaz": "Kazakh",
"km": "Central Khmer", "khm": "Central Khmer", "ki": "Kikuyu", "kik": "Kikuyu",
"rw": "Kinyarwanda", "kin": "Kinyarwanda", "ky": "Kyrgyz", "kir": "Kyrgyz",
"kv": "Komi", "kom": "Komi", "kg": "Kongo", "kon": "Kongo", "kj": "Kuanyama", "kua": "Kuanyama",
"ku": "Kurdish", "kur": "Kurdish", "lo": "Lao", "lao": "Lao", "la": "Latin", "lat": "Latin",
"lv": "Latvian", "lav": "Latvian", "li": "Limburgan", "lim": "Limburgan",
"ln": "Lingala", "lin": "Lingala", "lt": "Lithuanian", "lit": "Lithuanian",
"lu": "Luba-Katanga", "lub": "Luba-Katanga", "lb": "Luxembourgish", "ltz": "Luxembourgish",
"mk": "Macedonian", "mkd": "Macedonian", "mac": "Macedonian", "mg": "Malagasy", "mlg": "Malagasy",
"ms": "Malay", "msa": "Malay", "ml": "Malayalam", "mal": "Malayalam", "mt": "Maltese", "mlt": "Maltese",
"gv": "Manx", "glv": "Manx", "mi": "Maori", "mri": "Maori", "mao": "Maori",
"mr": "Marathi", "mar": "Marathi", "mh": "Marshallese", "mah": "Marshallese",
"mn": "Mongolian", "mon": "Mongolian", "na": "Nauru", "nau": "Nauru", "nv": "Navajo", "nav": "Navajo",
"nd": "North Ndebele", "nde": "North Ndebele", "nr": "South Ndebele", "nbl": "South Ndebele",
"ng": "Ndonga", "ndo": "Ndonga", "ne": "Nepali", "nep": "Nepali", "no": "Norwegian", "nor": "Norwegian",
"nb": "Norwegian Bokmål", "nob": "Norwegian Bokmål", "nn": "Norwegian Nynorsk", "nno": "Norwegian Nynorsk",
"oc": "Occitan", "oci": "Occitan", "oj": "Ojibwa", "oji": "Ojibwa", "or": "Oriya", "ori": "Oriya",
"om": "Oromo", "orm": "Oromo", "os": "Ossetian", "oss": "Ossetian", "pi": "Pali", "pli": "Pali",
"ps": "Pashto", "pus": "Pashto", "fa": "Persian", "fas": "Persian", "per": "Persian",
"pa": "Punjabi", "pan": "Punjabi", "qu": "Quechua", "que": "Quechua", "ro": "Romanian", "ron": "Romanian", "rum": "Romanian",
"rm": "Romansh", "roh": "Romansh", "rn": "Rundi", "run": "Rundi", "se": "Northern Sami", "sme": "Northern Sami",
"sm": "Samoan", "smo": "Samoan", "sg": "Sango", "sag": "Sango", "sa": "Sanskrit", "san": "Sanskrit",
"sc": "Sardinian", "srd": "Sardinian", "sr": "Serbian", "srp": "Serbian", "sn": "Shona", "sna": "Shona",
"sd": "Sindhi", "snd": "Sindhi", "si": "Sinhala", "sin": "Sinhala", "sk": "Slovak", "slk": "Slovak", "slo": "Slovak",
"sl": "Slovenian", "slv": "Slovenian", "so": "Somali", "som": "Somali", "st": "Southern Sotho", "sot": "Southern Sotho",
"su": "Sundanese", "sun": "Sundanese", "sw": "Swahili", "swa": "Swahili", "ss": "Swati", "ssw": "Swati",
"tl": "Tagalog", "tgl": "Tagalog", "ty": "Tahitian", "tah": "Tahitian", "tg": "Tajik", "tgk": "Tajik",
"ta": "Tamil", "tam": "Tamil", "tt": "Tatar", "tat": "Tatar", "te": "Telugu", "tel": "Telugu",
"th": "Thai", "tha": "Thai", "bo": "Tibetan", "bod": "Tibetan", "tib": "Tibetan",
"ti": "Tigrinya", "tir": "Tigrinya", "to": "Tongan", "ton": "Tongan", "ts": "Tsonga", "tso": "Tsonga",
"tn": "Tswana", "tsn": "Tswana", "tk": "Turkmen", "tuk": "Turkmen", "ug": "Uighur", "uig": "Uighur",
"uk": "Ukrainian", "ukr": "Ukrainian", "ur": "Urdu", "urd": "Urdu", "uz": "Uzbek", "uzb": "Uzbek",
"ve": "Venda", "ven": "Venda", "vi": "Vietnamese", "vie": "Vietnamese", "vo": "Volapük", "vol": "Volapük",
"wa": "Walloon", "wln": "Walloon", "cy": "Welsh", "cym": "Welsh", "wel": "Welsh", "wo": "Wolof", "wol": "Wolof",
"xh": "Xhosa", "xho": "Xhosa", "ii": "Sichuan Yi", "iii": "Sichuan Yi", "yi": "Yiddish", "yid": "Yiddish",
"yo": "Yoruba", "yor": "Yoruba", "za": "Zhuang", "zha": "Zhuang", "zu": "Zulu", "zul": "Zulu"
}
class audio_lines:
def __init__(self, stream, file_duration):
def __init__(self, stream):
# 1. Basic ID
self.id = stream.get("index")
@@ -251,14 +165,14 @@ class audio_lines:
# 3. Language (usually in tags)
raw_lang = stream.get("tags", {}).get("language", "und").lower()
self.language = LANG_CODES.get(raw_lang, raw_lang.capitalize())
self.language = normalize_language(raw_lang)
# 4. Duration (fallback to file duration if stream duration is missing)
stream_dur = stream.get("duration")
if stream_dur:
self.duration = seconds_to_hms(float(stream_dur))
else:
self.duration = file_duration
self.duration = None
# 5. Codec
self.codec = stream.get("codec_name", "")
@@ -315,17 +229,17 @@ class audio_lines:
return string
class subtitles:
def __init__(self, stream, file_duration):
def __init__(self, stream):
self.id = stream.get("index")
self.name = stream.get("tags", {}).get("title", "")
# Language translation
raw_lang = stream.get("tags", {}).get("language", "und")
self.language = LANG_CODES.get(raw_lang, raw_lang.capitalize())
self.language = normalize_language(raw_lang)
# Duration logic
stream_dur = stream.get("duration")
self.duration = seconds_to_hms(float(stream_dur)) if stream_dur else file_duration
self.duration = seconds_to_hms(float(stream_dur)) if stream_dur else None
# Codec (e.g., srt, ass, subrip)
self.codec = stream.get("codec_name", "")
@@ -359,30 +273,21 @@ class subtitles:
return " ".join(parts)
def get_video_lines(file):
cmd = ["ffprobe",
"-v", "error",
"-select_streams", "v", # video streams only
"-show_entries",
"stream=index,codec_name,width,height,r_frame_rate,bit_rate,duration,nb_frames,pix_fmt,field_order,time_base,display_aspect_ratio,color_space,color_transfer,color_primaries,bits_per_raw_sample:stream_tags=title",
"-of", "json",
file
]
result = subprocess.run(cmd, capture_output=True, text=True)
info = json.loads(result.stdout)
streams = info.get("streams", [])
return streams
def get_audio_lines(file):
def get_media_info(file):
cmd = [
"ffprobe",
"-v", "error",
"-select_streams", "a", # Select audio streams only
"-show_entries",
# Entries mapped to your requirements:
"stream=index,codec_name,sample_rate,channels,bits_per_sample,bits_per_raw_sample,bit_rate,duration" +
":stream_tags=language,title",
(
"format=duration:"
"stream=index,codec_type,codec_name,"
"width,height,r_frame_rate,bit_rate,duration,nb_frames,"
"pix_fmt,field_order,time_base,display_aspect_ratio,"
"color_space,color_transfer,color_primaries,bits_per_raw_sample,"
"sample_rate,channels,bits_per_sample,"
"stream_disposition=forced,default:"
"stream_tags=language,title"
),
"-of", "json",
file
]
@@ -390,41 +295,29 @@ def get_audio_lines(file):
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
info = json.loads(result.stdout)
return info.get("streams", [])
except subprocess.CalledProcessError as e:
print(f"Error running ffprobe: {e.stderr}")
return []
return None, [], [], []
def get_subtitle_lines(file):
cmd = [
"ffprobe",
"-v", "error",
"-select_streams", "s", # Subtitle streams only
"-show_entries",
"stream=index,codec_name,duration:stream_tags=language,title:stream_disposition=forced,default",
"-of", "json",
file
]
# Container / file duration (string seconds, per ffprobe convention)
duration = None
if "format" in info:
duration = info["format"].get("duration")
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
info = json.loads(result.stdout)
return info.get("streams", [])
except subprocess.CalledProcessError:
return []
video_streams = []
audio_streams = []
subtitle_streams = []
def get_video_duration(file_path):
cmd = [
"ffprobe",
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
file_path
]
for stream in info.get("streams", []):
stream_type = stream.get("codec_type")
if stream_type == "video":
video_streams.append(stream)
elif stream_type == "audio":
audio_streams.append(stream)
elif stream_type == "subtitle":
subtitle_streams.append(stream)
result = subprocess.run(cmd, capture_output=True, text=True)
duration = float(result.stdout.strip())
return duration
return float(duration), video_streams, audio_streams, subtitle_streams
def seconds_to_hms(seconds):
h = int(seconds // 3600)
@@ -432,42 +325,34 @@ def seconds_to_hms(seconds):
s = int(seconds % 60)
return f"{h:02}:{m:02}:{s:02}"
def get_stream_bitrate(file_path, stream=None):
# Get duration in seconds
duration = get_video_duration(file_path)
# Get file size in bits
size_bits = os.path.getsize(file_path) * 8
# Approximate average bitrate
avg_bitrate = size_bits / duration if duration > 0 else 0
return avg_bitrate # bits per second
def get_stream_bitrate(file_size, duration):
return int((file_size * 8)/duration/1000000) if duration > 0 else 0
class video_file:
def __init__(self, path, base_tab=""):
self.base_tab = base_tab # \t
self.path = path # folder/25.mkv
self.name = os.path.basename(path) # 25.mkv
self.size = human_readable_size(os.path.getsize(path)) # 198MB
self.duration = seconds_to_hms(get_video_duration(path))
self.bitrate = int((os.path.getsize(path) * 8)/get_video_duration(path))/1000000 if get_video_duration(path) > 0 else 0
# self. = get_video_lines(path)
self.size = os.path.getsize(path)
self.videos = []
self.audios = []
self.subtitles = []
for vl in get_video_lines(path):
video_line = video_lines(vl, self.duration)
self.duration, videos, audios, subtitles = get_media_info(path)
for vl in videos:
video_line = video_lines(vl)
self.videos.append(video_line)
for al in get_audio_lines(path):
audio_line = audio_lines(al, self.duration)
for al in audios:
audio_line = audio_lines(al)
self.videos.append(audio_line)
for st in get_subtitle_lines(path):
subtitle = subtitles(st, self.duration)
for st in subtitles:
subtitle = subtitles(st)
self.videos.append(subtitle)
self.bitrate = get_stream_bitrate(self.size, self.duration)
self.size = human_readable_size(self.size) # 198MB
self.duration = seconds_to_hms(self.duration)
def print(self):
if self.base_tab == "\t":
@@ -525,6 +410,8 @@ def handle_files(files, all_files):
all_files.extend(grouped)
def handle_folders(dirs, all_files):
if(dirs != []):
dirs.sort(key=lambda f: os.path.dirname(f))
@@ -533,7 +420,7 @@ def handle_folders(dirs, all_files):
for file in os.scandir(dir):
if file.is_file():
file = file.path
if os.path.splitext(file)[1].lower() in EXT:
if check_video_ext(os.path.splitext(file)[1]):
dir_files.append(file)
else:
np(f"{file} is not a compatabile Video file", WARN_STYLE)

1
languages/__init__.py Normal file
View File

@@ -0,0 +1 @@
from .languages import LANG_CODES, REVERSE_LANG_CODES

Binary file not shown.

Binary file not shown.

562
languages/languages.py Normal file
View File

@@ -0,0 +1,562 @@
LANG_CODES = {
" ": "Undefined",
"aa": "Afar",
"aar": "Afar",
"ab": "Abkhazian",
"abk": "Abkhazian",
"ae": "Avestan",
"af": "Afrikaans",
"afr": "Afrikaans",
"ak": "Akan",
"aka": "Akan",
"alb": "Albanian",
"am": "Amharic",
"amh": "Amharic",
"an": "Aragonese",
"ar": "Arabic",
"ara": "Arabic",
"arb": "Arabic",
"arg": "Aragonese",
"arm": "Armenian",
"as": "Assamese",
"asm": "Assamese",
"av": "Avaric",
"ava": "Avaric",
"ave": "Avestan",
"ay": "Aymara",
"aym": "Aymara",
"az": "Azerbaijani",
"aze": "Azerbaijani",
"ba": "Bashkir",
"bak": "Bashkir",
"bam": "Bambara",
"baq": "Basque",
"be": "Belarusian",
"bel": "Belarusian",
"bg": "Bulgarian",
"bi": "Bislama",
"bis": "Bislama",
"bm": "Bambara",
"bn": "Bengali",
"bo": "Tibetan",
"bod": "Tibetan",
"bos": "Bosnian",
"br": "Breton",
"bre": "Breton",
"bs": "Bosnian",
"bul": "Bulgarian",
"ca": "Catalan",
"cat": "Catalan",
"ce": "Chechen",
"ces": "Czech",
"ch": "Chamorro",
"cha": "Chamorro",
"che": "Chechen",
"chi": "Chinese",
"chu": "Church Slavonic",
"chv": "Chuvash",
"cmn": "Mandarin Chinese",
"co": "Corsican",
"cor": "Cornish",
"cos": "Corsican",
"cr": "Cree",
"cre": "Cree",
"cs": "Czech",
"cu": "Church Slavonic",
"cv": "Chuvash",
"cw": "Cornish",
"cy": "Welsh",
"cym": "Welsh",
"cze": "Czech",
"da": "Danish",
"dan": "Danish",
"de": "German",
"deu": "German",
"div": "Divehi",
"dv": "Divehi",
"dz": "Dzongkha",
"dzo": "Dzongkha",
"ee": "Ewe",
"el": "Greek",
"ell": "Greek",
"en": "English",
"eng": "English",
"eo": "Esperanto",
"epo": "Esperanto",
"es": "Spanish",
"est": "Estonian",
"et": "Estonian",
"eu": "Basque",
"eus": "Basque",
"ewe": "Ewe",
"fa": "Persian",
"fao": "Faroese",
"fas": "Persian",
"fat": "Fanti",
"ff": "Fulah",
"fi": "Finnish",
"fil": "Filipino",
"fin": "Finnish",
"fj": "Fijian",
"fij": "Fijian",
"fo": "Faroese",
"fr": "French",
"fra": "French",
"fre": "French",
"fry": "Western Frisian",
"ful": "Fulah",
"fy": "Western Frisian",
"ga": "Irish",
"gd": "Scottish Gaelic",
"geo": "Georgian",
"ger": "German",
"gl": "Galician",
"gla": "Scottish Gaelic",
"glg": "Galician",
"glv": "Manx",
"gn": "Guarani",
"gre": "Greek",
"grn": "Guarani",
"gu": "Gujarati",
"guj": "Gujarati",
"gv": "Manx",
"ha": "Hausa",
"hat": "Haitian Creole",
"hau": "Hausa",
"he": "Hebrew",
"heb": "Hebrew",
"her": "Herero",
"hi": "Hindi",
"hin": "Hindi",
"hmo": "Hiri Motu",
"ho": "Hiri Motu",
"hr": "Croatian",
"hrv": "Croatian",
"ht": "Haitian Creole",
"hu": "Hungarian",
"hun": "Hungarian",
"hy": "Armenian",
"hye": "Armenian",
"hz": "Herero",
"ia": "Interlingua",
"ice": "Icelandic",
"id": "Indonesian",
"ido": "Ido",
"ie": "Interlingue",
"ig": "Igbo",
"ii": "Sichuan Yi",
"ik": "Inupiaq",
"iku": "Inuktitut",
"ile": "Interlingue",
"ina": "Interlingua",
"ind": "Indonesian",
"io": "Ido",
"ipk": "Inupiaq",
"is": "Icelandic",
"isl": "Icelandic",
"it": "Italian",
"ita": "Italian",
"iu": "Inuktitut",
"ja": "Japanese",
"jav": "Javanese",
"jpn": "Japanese",
"jv": "Javanese",
"ka": "Georgian",
"kal": "Kalaallisut",
"kan": "Kannada",
"kas": "Kashmiri",
"kat": "Georgian",
"kau": "Kanuri",
"kaz": "Kazakh",
"kg": "Kongo",
"khm": "Central Khmer",
"ki": "Kikuyu",
"kik": "Kikuyu",
"kin": "Kinyarwanda",
"kir": "Kyrgyz",
"kj": "Kuanyama",
"kk": "Kazakh",
"kl": "Kalaallisut",
"km": "Central Khmer",
"kn": "Kannada",
"ko": "Korean",
"kom": "Komi",
"kon": "Kongo",
"kor": "Korean",
"kr": "Kanuri",
"ks": "Kashmiri",
"ku": "Kurdish",
"kua": "Kuanyama",
"kur": "Kurdish",
"kv": "Komi",
"kw": "Cornish",
"ky": "Kyrgyz",
"la": "Latin",
"lao": "Lao",
"lat": "Latin",
"lb": "Luxembourgish",
"lg": "Ganda",
"li": "Limburgan",
"lim": "Limburgan",
"lin": "Lingala",
"lit": "Lithuanian",
"lo": "Lao",
"lt": "Lithuanian",
"ltz": "Luxembourgish",
"lu": "Luba-Katanga",
"lub": "Luba-Katanga",
"lug": "Ganda",
"lv": "Latvian",
"mac": "Macedonian",
"mao": "Maori",
"may": "Malay",
"mg": "Malagasy",
"mh": "Marshallese",
"mi": "Maori",
"mk": "Macedonian",
"mkd": "Macedonian",
"ml": "Malayalam",
"mlg": "Malagasy",
"mlt": "Maltese",
"mn": "Mongolian",
"mon": "Mongolian",
"mr": "Marathi",
"mri": "Maori",
"ms": "Malay",
"msa": "Malay",
"mt": "Maltese",
"my": "Burmese",
"mya": "Burmese",
"na": "Nauru",
"nau": "Nauru",
"nav": "Navajo",
"nb": "Norwegian Bokmål",
"nd": "North Ndebele",
"nde": "North Ndebele",
"ndo": "Ndonga",
"ne": "Nepali",
"nep": "Nepali",
"ng": "Ndonga",
"nl": "Dutch",
"nn": "Norwegian Nynorsk",
"nno": "Norwegian Nynorsk",
"no": "Norwegian",
"nob": "Norwegian Bokmål",
"nor": "Norwegian",
"nr": "South Ndebele",
"nv": "Navajo",
"ny": "Chichewa",
"nya": "Chichewa",
"oc": "Occitan",
"oci": "Occitan",
"oj": "Ojibwa",
"oji": "Ojibwa",
"om": "Oromo",
"or": "Oriya",
"ori": "Oriya",
"orm": "Oromo",
"os": "Ossetian",
"oss": "Ossetian",
"pa": "Punjabi",
"pan": "Punjabi",
"per": "Persian",
"pi": "Pali",
"pl": "Polish",
"pli": "Pali",
"pol": "Polish",
"por": "Portuguese",
"ps": "Pashto",
"pt": "Portuguese",
"pus": "Pashto",
"qu": "Quechua",
"que": "Quechua",
"rm": "Romansh",
"rn": "Rundi",
"ro": "Romanian",
"roh": "Romansh",
"ron": "Romanian",
"ru": "Russian",
"rum": "Romanian",
"run": "Rundi",
"rus": "Russian",
"rw": "Kinyarwanda",
"sa": "Sanskrit",
"sag": "Sango",
"san": "Sanskrit",
"sc": "Sardinian",
"sd": "Sindhi",
"se": "Northern Sami",
"sg": "Sango",
"si": "Sinhala",
"sin": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"slk": "Slovak",
"slo": "Slovak",
"slv": "Slovenian",
"sm": "Samoan",
"sme": "Northern Sami",
"smo": "Samoan",
"sn": "Shona",
"sna": "Shona",
"so": "Somali",
"som": "Somali",
"sq": "Albanian",
"sqi": "Albanian",
"sr": "Serbian",
"srd": "Sardinian",
"srp": "Serbian",
"ss": "Swati",
"ssw": "Swati",
"st": "Southern Sotho",
"su": "Sundanese",
"sun": "Sundanese",
"sv": "Swedish",
"sw": "Swahili",
"swa": "Swahili",
"swe": "Swedish",
"ta": "Tamil",
"tam": "Tamil",
"tat": "Tatar",
"te": "Telugu",
"tel": "Telugu",
"tg": "Tajik",
"tgk": "Tajik",
"tgl": "Tagalog",
"th": "Thai",
"tha": "Thai",
"tib": "Tibetan",
"ti": "Tigrinya",
"tir": "Tigrinya",
"tk": "Turkmen",
"tl": "Tagalog",
"tn": "Tswana",
"to": "Tongan",
"ton": "Tongan",
"tr": "Turkish",
"ts": "Tsonga",
"tsn": "Tswana",
"tso": "Tsonga",
"tt": "Tatar",
"tuk": "Turkmen",
"ty": "Tahitian",
"ug": "Uighur",
"uig": "Uighur",
"uk": "Ukrainian",
"ukr": "Ukrainian",
"und": "Undefined",
"ur": "Urdu",
"urd": "Urdu",
"uz": "Uzbek",
"uzb": "Uzbek",
"ve": "Venda",
"ven": "Venda",
"vi": "Vietnamese",
"vie": "Vietnamese",
"vo": "Volapük",
"vol": "Volapük",
"wa": "Walloon",
"wel": "Welsh",
"wln": "Walloon",
"wo": "Wolof",
"wol": "Wolof",
"xh": "Xhosa",
"xho": "Xhosa",
"yi": "Yiddish",
"yid": "Yiddish",
"yo": "Yoruba",
"yor": "Yoruba",
"yue": "Cantonese",
"za": "Zhuang",
"zha": "Zhuang",
"zh": "Chinese",
"zho": "Chinese",
"zu": "Zulu",
"zul": "Zulu",
}
REVERSE_LANG_CODES = {
"Afar": "aa",
"Abkhazian": "ab",
"Avestan": "ae",
"Afrikaans": "af",
"Akan": "ak",
"Amharic": "am",
"Aragonese": "an",
"Arabic": "ar",
"Assamese": "as",
"Avaric": "av",
"Aymara": "ay",
"Azerbaijani": "az",
"Bashkir": "ba",
"Bambara": "bm",
"Belarusian": "be",
"Bulgarian": "bg",
"Bislama": "bi",
"Bengali": "bn",
"Tibetan": "bo",
"Breton": "br",
"Bosnian": "bs",
"Catalan": "ca",
"Chechen": "ce",
"Chamorro": "ch",
"Chinese": "zh",
"Church Slavonic": "cu",
"Chuvash": "cv",
"Cornish": "kw",
"Corsican": "co",
"Cree": "cr",
"Czech": "cs",
"Danish": "da",
"German": "de",
"Divehi": "dv",
"Dzongkha": "dz",
"Ewe": "ee",
"Greek": "el",
"English": "en",
"Esperanto": "eo",
"Spanish": "es",
"Estonian": "et",
"Basque": "eu",
"Persian": "fa",
"Faroese": "fo",
"Fanti": "fat",
"Fulah": "ff",
"Finnish": "fi",
"Filipino": "fil",
"Fijian": "fj",
"French": "fr",
"Western Frisian": "fy",
"Irish": "ga",
"Scottish Gaelic": "gd",
"Galician": "gl",
"Manx": "gv",
"Guarani": "gn",
"Gujarati": "gu",
"Hausa": "ha",
"Haitian Creole": "ht",
"Hebrew": "he",
"Herero": "hz",
"Hindi": "hi",
"Hiri Motu": "ho",
"Croatian": "hr",
"Hungarian": "hu",
"Armenian": "hy",
"Interlingua": "ia",
"Indonesian": "id",
"Ido": "io",
"Interlingue": "ie",
"Igbo": "ig",
"Sichuan Yi": "ii",
"Inupiaq": "ik",
"Inuktitut": "iu",
"Icelandic": "is",
"Italian": "it",
"Japanese": "ja",
"Javanese": "jv",
"Georgian": "ka",
"Kalaallisut": "kl",
"Kannada": "kn",
"Kashmiri": "ks",
"Kanuri": "kr",
"Kazakh": "kk",
"Kongo": "kg",
"Central Khmer": "km",
"Kikuyu": "ki",
"Kinyarwanda": "rw",
"Kyrgyz": "ky",
"Kuanyama": "kj",
"Lao": "lo",
"Latin": "la",
"Luxembourgish": "lb",
"Ganda": "lg",
"Limburgan": "li",
"Lingala": "ln",
"Lithuanian": "lt",
"Luba-Katanga": "lu",
"Latvian": "lv",
"Macedonian": "mk",
"Malagasy": "mg",
"Marshallese": "mh",
"Maori": "mi",
"Malayalam": "ml",
"Maltese": "mt",
"Mongolian": "mn",
"Marathi": "mr",
"Malay": "ms",
"Burmese": "my",
"Nauru": "na",
"Navajo": "nv",
"Norwegian Bokmål": "nb",
"North Ndebele": "nd",
"Ndonga": "ng",
"Nepali": "ne",
"Norwegian Nynorsk": "nn",
"Norwegian": "no",
"South Ndebele": "nr",
"Chichewa": "ny",
"Occitan": "oc",
"Ojibwa": "oj",
"Oromo": "om",
"Oriya": "or",
"Ossetian": "os",
"Punjabi": "pa",
"Pali": "pi",
"Polish": "pl",
"Portuguese": "pt",
"Pashto": "ps",
"Quechua": "qu",
"Romansh": "rm",
"Rundi": "rn",
"Romanian": "ro",
"Russian": "ru",
"Sanskrit": "sa",
"Sango": "sg",
"Sardinian": "sc",
"Sindhi": "sd",
"Northern Sami": "se",
"Sinhala": "si",
"Slovak": "sk",
"Slovenian": "sl",
"Samoan": "sm",
"Shona": "sn",
"Somali": "so",
"Albanian": "sq",
"Serbian": "sr",
"Swati": "ss",
"Southern Sotho": "st",
"Sundanese": "su",
"Swedish": "sv",
"Swahili": "sw",
"Tamil": "ta",
"Tatar": "tt",
"Telugu": "te",
"Tajik": "tg",
"Tagalog": "tl",
"Thai": "th",
"Tigrinya": "ti",
"Turkmen": "tk",
"Tswana": "tn",
"Tongan": "to",
"Turkish": "tr",
"Tsonga": "ts",
"Tahitian": "ty",
"Uighur": "ug",
"Ukrainian": "uk",
"Urdu": "ur",
"Uzbek": "uz",
"Venda": "ve",
"Vietnamese": "vi",
"Volapük": "vo",
"Walloon": "wa",
"Welsh": "cy",
"Wolof": "wo",
"Xhosa": "xh",
"Yiddish": "yi",
"Yoruba": "yo",
"Cantonese": "yue",
"Mandarin Chinese": "cmn",
"Zhuang": "za",
"Zulu": "zu",
"Undefined": "und"
}

47
set_1_ger Executable file
View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
import sys
import pathlib
import subprocess
from check_video import check_video_ext, normalize_language, normalize_lang_code
def collect_files(paths):
files = []
for p in paths:
p = pathlib.Path(p)
if p.is_dir():
files += [f for f in p.iterdir() if check_video_ext(f.suffix)]
elif check_video_ext(p.suffix):
files.append(p)
return files
def set_audio_lang(file_path, lang_code):
"""
Sets the first audio track language to the given ISO 639-1 code using mkvpropedit.
"""
try:
# Target first audio track (track ID 1)
subprocess.run([
"mkvpropedit", str(file_path),
"--edit", "track:a1",
"--set", f"language={lang_code}"
], check=True)
print(f"Set first audio track of '{file_path}' to {lang_code}")
except subprocess.CalledProcessError as e:
print(f"Failed to set language for '{file_path}': {e}")
def main():
if len(sys.argv) == 1:
targets = ["."]
lang = "German"
else:
targets = sys.argv[1:]
lang = "German" # Hardcoded to German
lang = normalize_language(lang)
lang_code = normalize_lang_code(lang)
for f in collect_files(targets):
set_audio_lang(f, lang_code)
if __name__ == "__main__":
main()

112
set_audio_lang Executable file
View File

@@ -0,0 +1,112 @@
#!/usr/bin/env python3
import subprocess
import sys
import json
import pathlib
VIDEO_EXTS = {".mkv", ".mp4", ".avi", ".mov"}
LANG_CODES = {
"eng": "English", "en": "English", "english": "English",
"spa": "Spanish", "es": "Spanish", "spanish": "Spanish",
"fra": "French", "fre": "French", "fr": "French", "french": "French",
"deu": "German", "ger": "German", "de": "German", "german": "German",
}
CANONICAL = {
"English": "eng",
"Spanish": "spa",
"French": "fra",
"German": "deu",
}
def normalize_language(value):
key = value.lower()
if key not in LANG_CODES:
raise ValueError(f"Unknown language: {value}")
return CANONICAL[LANG_CODES[key]]
def probe(file):
cmd = ["ffprobe", "-v", "error", "-print_format", "json", "-show_streams", file]
return json.loads(subprocess.check_output(cmd))
def collect_files(paths):
files = []
for p in paths:
p = pathlib.Path(p)
if p.is_dir():
files += [f for f in p.iterdir() if f.suffix.lower() in VIDEO_EXTS]
elif p.suffix.lower() in VIDEO_EXTS:
files.append(p)
return files
def preserve_original(file):
original = file.with_name(f"{file.stem}_original{file.suffix}")
if original.exists():
print(f"Original already preserved: {original.name}")
return
file.rename(original)
print(f"Preserved original as: {original.name}")
def set_audio_lang(file, lang):
data = probe(file)
audio_streams = [s for s in data["streams"] if s["codec_type"] == "audio"]
if not audio_streams:
print(f"No audio streams in {file.name}")
return
current = audio_streams[0].get("tags", {}).get("language")
if current:
ans = input(f"{file.name}: audio already '{current}'. Overwrite? [y/N] ")
if ans.lower() != "y":
return
out = file.with_name(f"{file.stem}_dub-{lang}{file.suffix}")
print(f"{file.name} -> {out.name}")
cmd = [
"ffmpeg", "-y",
"-i", str(file),
"-map", "0",
"-c", "copy",
"-metadata:s:a:0", f"language={lang}",
str(out)
]
result = subprocess.run(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=True
)
if result.returncode != 0:
print(f"ERROR processing {file.name}")
print(result.stderr.strip())
return
preserve_original(file)
def main():
if len(sys.argv) == 1:
targets = ["."]
lang = input("Language: ")
elif len(sys.argv) == 2:
targets = ["."]
lang = sys.argv[1]
else:
targets = sys.argv[1:-1]
lang = sys.argv[-1]
lang = normalize_language(lang)
for f in collect_files(targets):
set_audio_lang(f, lang)
if __name__ == "__main__":
main()

125
set_subtitle_lang[untested] Normal file
View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python3
import subprocess
import sys
import json
import pathlib
VIDEO_EXTS = {".mkv", ".mp4"}
LANG_CODES = {
"eng": "English", "en": "English", "english": "English",
"spa": "Spanish", "es": "Spanish", "spanish": "Spanish",
"fra": "French", "fre": "French", "fr": "French", "french": "French",
"deu": "German", "ger": "German", "de": "German", "german": "German",
}
CANONICAL = {
"English": "eng",
"Spanish": "spa",
"French": "fra",
"German": "deu",
}
def normalize_language(value):
key = value.lower()
if key not in LANG_CODES:
raise ValueError(f"Unknown language: {value}")
return CANONICAL[LANG_CODES[key]]
def probe(file):
cmd = [
"ffprobe",
"-v", "error",
"-print_format", "json",
"-show_streams",
str(file),
]
return json.loads(subprocess.check_output(cmd))
def collect_files(paths):
files = []
for p in paths:
p = pathlib.Path(p)
if p.is_dir():
files.extend(
f for f in p.iterdir()
if f.suffix.lower() in VIDEO_EXTS
)
elif p.suffix.lower() in VIDEO_EXTS:
files.append(p)
return files
def preserve_original(file):
original = file.with_name(f"{file.stem}_original{file.suffix}")
if original.exists():
return
file.rename(original)
print(f"preserve: {file.name} -> {original.name}")
def set_sub_lang(file, lang):
data = probe(file)
subs = [s for s in data["streams"] if s["codec_type"] == "subtitle"]
if not subs:
return
for i, s in enumerate(subs):
cur = s.get("tags", {}).get("language", "unset")
print(f"{i}: subtitle ({cur})")
sel = input(f"{file.name}: select subtitle index (blank = skip): ")
if sel == "":
return
idx = int(sel)
current = subs[idx].get("tags", {}).get("language")
if current:
ans = input(f"Overwrite existing '{current}'? [y/N] ")
if ans.lower() != "y":
return
out = file.with_name(f"{file.stem}_sub-{lang}{file.suffix}")
print(f"{file.name} -> {out.name}")
cmd = [
"ffmpeg", "-y",
"-i", str(file),
"-map", "0",
"-c", "copy",
f"-metadata:s:s:{idx}", f"language={lang}",
str(out)
]
result = subprocess.run(
cmd,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True
)
if result.returncode != 0:
print(f"ERROR processing {file.name}")
print(result.stderr.strip())
return
preserve_original(file)
def main():
if len(sys.argv) == 1:
targets = ["."]
lang = input("Subtitle language: ")
elif len(sys.argv) == 2:
targets = ["."]
lang = sys.argv[1]
else:
targets = sys.argv[1:-1]
lang = sys.argv[-1]
lang = normalize_language(lang)
for f in collect_files(targets):
set_sub_lang(f, lang)
if __name__ == "__main__":
main()

View File

@@ -1,3 +1,3 @@
#!/bin/bash
echo $PATH | tr " " "\n" | nl
echo $PATH | tr ":" "\n" | nl