import copy
import os
import re
from packaging.version import Version
import romsearch
from ..util import (
centred_string,
left_aligned_string,
setup_logger,
get_file_time,
load_yml,
load_json,
match_retool_search_terms,
get_short_name,
get_sanitized_version,
)
DICT_DEFAULT_VALS = {"bool": False, "str": "", "list": []}
USE_TITLE_POS = [
"languages",
]
def find_pattern(regex, search_str, group_number=0):
"""
Take a regex pattern and find potential matches within a search string
"""
regex_search_str = None
regex_search = re.search(regex, search_str)
if regex_search:
regex_search_str = regex_search.group(group_number)
return regex_search_str
def get_pattern_val(
regex,
tag,
regex_type,
pattern_mappings=None,
title_pos=None,
use_title_pos=False,
):
"""Get values out from a regex pattern, optionally mapping back to something more readable for lists
Args:
regex: Regex pattern
tag: Found tag
regex_type: Regex pattern type. Can be str, bool, list
pattern_mappings: Mapping from regex pattern to more readable values
title_pos: Position of title for compilations. Defaults to None
use_title_pos: Use title_pos? Defaults to False
"""
pattern_string = find_pattern(regex, tag)
if pattern_string is not None:
pattern_string = pattern_string.strip("()")
# Split out to the specific languages, but only if they're marked correctly
if title_pos is not None and use_title_pos and "+" in pattern_string:
pattern_string = pattern_string.split("+")[title_pos - 1]
if regex_type == "bool":
pattern_val = True
elif regex_type == "str":
pattern_val = pattern_string
elif regex_type == "list":
if pattern_mappings is not None:
parsed_pattern_string = []
# Match to pattern mappings
for p in pattern_mappings:
if re.search(pattern_mappings[p], pattern_string):
parsed_pattern_string.append(p)
else:
# Split, and remove and trailing whitespace
parsed_pattern_string = pattern_string.split(",")
parsed_pattern_string = [s.strip() for s in parsed_pattern_string]
pattern_val = parsed_pattern_string
else:
raise ValueError("regex_type should be one of 'bool', 'str', or 'list'")
else:
pattern_val = None
return pattern_val
def apply_filters(
file_dict,
):
"""Apply any filters we may have
Args:
file_dict (dict): Dictionary of file properties
"""
# Flag supersets
flag_as_superset = file_dict.get("flag_as_superset", None)
if flag_as_superset is not None:
file_dict["flag_as_superset"] = flag_as_superset
return file_dict
def is_ra_subset(name):
"""Check if a name is a RetroAchievements subset
Args:
name (str): Name to check
"""
match_pattern = "\\[Subset.*\\]"
match = find_pattern(match_pattern, name)
is_subset = False
if match is not None:
is_subset = True
return is_subset
def check_match(i, j, checks_passed=None):
"""Check if two bools/strings/lists match
For lists, we simply check if there's any subset that matches
Args:
i: Input 1
j: Input 2
checks_passed: If not None, will inherit this as initial start.
Else, will default to True
"""
if checks_passed is None:
checks_passed = True
if not isinstance(checks_passed, bool):
raise ValueError("checks_passed should be a boolean value")
# If we have a bool or string, then they should match
if isinstance(i, bool) or isinstance(i, str):
if not i == j:
checks_passed = False
# If a list, treat differently
elif isinstance(i, list):
s_i = set(i)
s_j = set(j)
s_k = s_i.intersection(s_j)
# Only fail on a 0-length intersection if at least one of the inputs has
# non-zero length
if len(s_k) == 0 and len(s_i) > 0 and len(s_j) > 0:
checks_passed = False
# If we have the case where both entries have more than 1 item, then ensure
# they *all* match in at least one of the lists
if len(s_i) > 1 and len(s_j) > 1:
min_n_match = min(len(s_i), len(s_j))
if len(s_k) < min_n_match:
checks_passed = False
else:
t = type(i)
raise ValueError(f"Do not know how to check against type {t}")
return checks_passed
def set_english_friendly(
file_dict,
):
"""Set English as a language if English-friendly is flagged"""
# Only change things if we're flagged
is_english_friendly = file_dict.get("english_friendly", False)
if not is_english_friendly:
return file_dict
if "English" not in file_dict["languages"]:
file_dict["languages"].append("English")
return file_dict
[docs]
class ROMParser:
def __init__(
self,
platform,
game,
dat=None,
retool=None,
ra_hashes=None,
config_file=None,
config=None,
platform_config=None,
default_config=None,
regex_config=None,
logger=None,
log_line_sep="=",
log_line_length=100,
):
"""ROM parser tool
This works per-game, per-platform, so must be specified here
Args:
platform (str): Platform name
game (str): Game name
dat (dict): Parsed dat dictionary. Defaults to None, which will try to load the dat file if it exists
retool (dict): Retool dictionary. Defaults to None, which will try to load the file if it exists
ra_hashes (dict): RA hash dictionary. Defaults to None, which will try to load the file if it exists
config_file (str, optional): path to config file. Defaults to None.
config (dict, optional): configuration dictionary. Defaults to None.
platform_config (dict, optional): platform configuration dictionary. Defaults to None.
default_config (dict, optional): default configuration dictionary. Defaults to None.
regex_config (dict, optional): regex configuration dictionary. Defaults to None.
logger (logging.Logger, optional): logger instance. Defaults to None.
log_line_length (int, optional): Line length of log. Defaults to 100
TODO:
For the RetroAchievements, there are hacks and unlicensed stuff that seems to work differently
"""
if platform is None:
raise ValueError("platform must be specified")
self.platform = platform
if config_file is None and config is None:
raise ValueError("config_file or config must be specified")
if config is None:
config = load_yml(config_file)
self.config = config
self.game = game
if logger is None:
log_dir = self.config.get("dirs", {}).get(
"log_dir", os.path.join(os.getcwd(), "logs")
)
logger_add_dir = str(os.path.join(platform, game))
log_level = self.config.get("logger", {}).get("level", "info")
logger = setup_logger(
log_level=log_level,
script_name=f"ROMParser",
log_dir=log_dir,
additional_dir=logger_add_dir,
)
self.logger = logger
mod_dir = os.path.dirname(romsearch.__file__)
if default_config is None:
default_file = os.path.join(mod_dir, "configs", "defaults.yml")
default_config = load_yml(default_file)
self.default_config = default_config
self.ra_file_exts = self.default_config.get("ra_file_exts", [])
self.ra_labels = self.default_config.get("ra_labels", [])
self.ra_patch_checks = self.default_config.get("ra_patch_checks", [])
if regex_config is None:
regex_file = os.path.join(mod_dir, "configs", "regex.yml")
regex_config = load_yml(regex_file)
self.regex_config = regex_config
if platform_config is None:
platform_config_file = os.path.join(
mod_dir, "configs", "platforms", f"{platform}.yml"
)
platform_config = load_yml(platform_config_file)
self.platform_config = platform_config
self.raw_dir = self.config.get("dirs", {}).get("raw_dir", None)
if not self.raw_dir:
raise ValueError("raw_dir must be specified in config.yml")
self.use_dat = self.config.get("romparser", {}).get("use_dat", True)
self.use_retool = self.config.get("romparser", {}).get("use_retool", True)
self.use_ra_hashes = self.config.get("romparser", {}).get(
"use_ra_hashes", False
)
self.use_filename = self.config.get("romparser", {}).get("use_filename", True)
self.dry_run = self.config.get("romparser", {}).get("dry_run", False)
# If we're using the dat file, pull it out here
self.dat = dat
if self.use_dat and self.dat is None:
dat_dir = self.config.get("dirs", {}).get("parsed_dat_dir", None)
if dat_dir is None:
raise ValueError("parsed_dat_dir must be specified in config.yml")
dat_file = os.path.join(dat_dir, f"{platform} (dat parsed).json")
if os.path.exists(dat_file):
self.dat = load_json(dat_file)
# If we're using the retool file, pull it out here
self.retool = retool
if self.use_retool and self.retool is None:
dat_dir = self.config.get("dirs", {}).get("parsed_dat_dir", None)
if dat_dir is None:
raise ValueError("parsed_dat_dir must be specified in config.yml")
retool_file = os.path.join(dat_dir, f"{platform} (retool).json")
if os.path.exists(retool_file):
retool = load_json(retool_file)
self.retool = retool["variants"]
# If we're using the RA hashes, pull it out here
self.ra_hashes = ra_hashes
self.ra_dict = None
if self.use_ra_hashes and self.ra_hashes is None:
ra_hash_dir = self.config.get("dirs", {}).get("ra_hash_dir", None)
if ra_hash_dir is None:
raise ValueError("ra_hash_dir must be specified in config.yml")
ra_hash_file = os.path.join(ra_hash_dir, f"{platform}.json")
if os.path.exists(ra_hash_file):
self.ra_hashes = load_json(ra_hash_file)
self.hash_method = self.platform_config.get("ra_hash_method", None)
self.log_line_sep = log_line_sep
self.log_line_length = log_line_length
[docs]
def run(
self,
files,
):
"""Run the ROM parser"""
game_dict = copy.deepcopy(files)
self.logger.debug(f"{self.log_line_sep * self.log_line_length}")
self.logger.debug(
centred_string(
f"Running ROMParser for {self.game}", total_length=self.log_line_length
)
)
self.logger.debug(f"{self.log_line_sep * self.log_line_length}")
for f in files:
# # Get the potential title position out for compilations
title_pos = files[f].get("title_pos", None)
f_parsed = self.parse_file(
f=f,
file_dict=copy.deepcopy(files[f]),
title_pos=title_pos,
)
game_dict[f].update(f_parsed)
return game_dict
[docs]
def parse_file(
self,
f=None,
file_dict=None,
title_pos=None,
):
"""Parse useful info out of a specific file
Args:
f (str): Filename. Will only use this if something more suitable isn't found
file_dict (dict): Dictionary of file properties
title_pos (int, optional): Title position for compilations. Defaults to None.
"""
if file_dict is None:
file_dict = {}
if self.use_filename:
file_dict = self.parse_filename(
f=f,
file_dict=file_dict,
title_pos=title_pos,
)
if self.use_retool:
file_dict = self.parse_retool(file_dict=file_dict)
if self.use_dat:
file_dict = self.parse_dat(
f=f,
file_dict=file_dict,
)
# Apply any filters that wouldn't have been applied here
file_dict = apply_filters(file_dict)
file_dict["has_cheevos"] = False
file_dict["patch_file"] = ""
if self.use_ra_hashes:
file_dict = self.parse_ra_hashes(
file_dict=file_dict,
)
# Any last minute finalisations
self.finalise_file_dict(file_dict)
# File modification time
full_file_path = os.path.join(
self.raw_dir, self.platform, file_dict.get("original_name", f)
)
file_time = get_file_time(
full_file_path,
datetime_format=self.default_config["datetime_format"],
)
file_dict["file_mod_time"] = file_time
# And note that this thing has been parsed
file_dict["is_parsed"] = True
# Log out these tags in a nice readable way
self.logger.debug(centred_string(f"{f}:", total_length=self.log_line_length))
# Track the various tags we can have
true_tags = []
false_tags = []
none_tags = []
str_tags = {}
int_tags = {}
list_tags = {}
for key in file_dict:
if isinstance(file_dict[key], bool):
if file_dict[key]:
true_tags.append(key)
else:
false_tags.append(key)
elif isinstance(file_dict[key], str):
str_tags[key] = file_dict[key]
elif isinstance(file_dict[key], list):
list_tags[key] = file_dict[key]
elif isinstance(file_dict[key], int):
int_tags[key] = file_dict[key]
elif file_dict[key] is None:
none_tags.append(key)
else:
raise ValueError(
f"{file_dict[key]} is not something I know how to parse"
)
# Log the string tags
self.logger.debug(
left_aligned_string(f"String tags:", total_length=self.log_line_length)
)
for tag in str_tags:
if str_tags[tag] == "":
continue
self.logger.debug(
left_aligned_string(
f"-> {tag}: {str_tags[tag]}", total_length=self.log_line_length
)
)
# Log the list tags
self.logger.debug(
left_aligned_string(f"List tags:", total_length=self.log_line_length)
)
for tag in list_tags:
if not list_tags[tag]:
continue
self.logger.debug(
left_aligned_string(
f"-> {tag}: {', '.join(str(i) for i in list_tags[tag])}",
total_length=self.log_line_length,
)
)
# Log the list tags
self.logger.debug(
left_aligned_string(f"Number tags:", total_length=self.log_line_length)
)
for tag in int_tags:
self.logger.debug(
left_aligned_string(
f"-> {tag}: {int_tags[tag]}",
total_length=self.log_line_length,
)
)
# Log the True bool tags
self.logger.debug(
left_aligned_string(f"Tagged:", total_length=self.log_line_length)
)
for tag in true_tags:
self.logger.debug(
left_aligned_string(f"-> {tag}", total_length=self.log_line_length)
)
# Log the False bool tags
self.logger.debug(
left_aligned_string(f"Not tagged:", total_length=self.log_line_length)
)
for tag in false_tags:
self.logger.debug(
left_aligned_string(f"-> {tag}", total_length=self.log_line_length)
)
# Log any None tags
self.logger.debug(
left_aligned_string(f"None:", total_length=self.log_line_length)
)
for tag in none_tags:
self.logger.debug(
left_aligned_string(f"-> {tag}", total_length=self.log_line_length)
)
self.logger.debug(f"{'-' * self.log_line_length}")
return file_dict
[docs]
def parse_dat(
self,
f=None,
file_dict=None,
):
"""Parse info out of the dat file
Args:
f (str): Fallback filename
file_dict (dict): Dictionary of file info
"""
if file_dict is None:
file_dict = {}
f = copy.deepcopy(file_dict.get("original_name", f))
if self.dat is None:
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
self.logger.warning(
centred_string(
f"No dat file found for {self.platform}. Skipping",
total_length=self.log_line_length,
)
)
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
return file_dict
# Remember there aren't zips in the dat entries
dat_entry = self.dat.get(f.rstrip(".zip"), None)
if not dat_entry:
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
self.logger.warning(
centred_string(
f"No dat entry found for {f}. Skipping",
total_length=self.log_line_length,
)
)
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
return file_dict
dat_categories = self.default_config.get("dat_categories", [])
for dat_cat in dat_categories:
dat_val = dat_entry.get("category", "")
cat_val = dat_val == dat_cat
dat_cat_dict = dat_cat.lower().replace(" ", "_")
if dat_cat_dict in file_dict:
file_dict[dat_cat_dict] = file_dict[dat_cat_dict] | cat_val
else:
file_dict[dat_cat_dict] = cat_val
# Get the checksums out
checksums = self.default_config.get("dat_checksums", [])
for checksum in checksums:
# Because sometimes we have multiple files within the ROM, loop over and append them all
rom_entries = dat_entry.get("rom", [])
if isinstance(rom_entries, dict):
rom_entries = [rom_entries]
for rom_entry in rom_entries:
if checksum in rom_entry:
if checksum not in file_dict:
file_dict[checksum] = []
file_dict[checksum].append(rom_entry[checksum])
return file_dict
[docs]
def parse_ra_hashes(
self,
file_dict=None,
):
"""See if we can find ROMs that support RetroAchievements
Note that this requires a bunch of parsing to have already occurred
"""
# If we don't have a dictionary already, then this won't work
if file_dict is None:
file_dict = {}
return file_dict
if self.hash_method is None:
self.logger.warning(
centred_string(
f"RA hash method not defined for {self.platform}",
total_length=self.log_line_length,
)
)
return file_dict
file_dict = self.match_hashes(file_dict)
return file_dict
[docs]
def match_hashes(
self,
file_dict,
):
"""Get whether ROM has cheevos by various potential hash methods
Args:
f (str): Filename
file_dict (dict): Dictionary of ROM descriptions
"""
has_cheevos = False
patch_file = ""
if self.hash_method not in ["md5", "custom"]:
self.logger.warning(
centred_string(
f"Cannot currently handle {self.hash_method} hash method",
total_length=self.log_line_length,
)
)
return has_cheevos, patch_file
# Get the hash dict, if we don't already have it
if self.ra_dict is None:
self.ra_dict = self.get_ra_dict()
# Get the potential RA match by name (this won't include potentially patched ROMs)
has_cheevos, patch_file = self.get_ra_match(
file_dict=file_dict,
)
# If we've found something, stop here
if has_cheevos:
file_dict["has_cheevos"] = has_cheevos
file_dict["patch_file"] = patch_file
return file_dict
# If we're on a custom hash, and we haven't found anything, now look
# via parsing the names
if self.hash_method == "custom":
has_cheevos, patch_file = self.get_parsed_match(
file_dict=file_dict,
want_patched_files=False
)
if has_cheevos:
file_dict["has_cheevos"] = has_cheevos
file_dict["patch_file"] = patch_file
return file_dict
# If we still haven't, now look through files to see if we just need
# a patch (i.e. the hash will change, but we have the file)
has_cheevos, patch_file = self.get_parsed_match(
file_dict=file_dict,
want_patched_files=True,
)
file_dict["has_cheevos"] = has_cheevos
file_dict["patch_file"] = patch_file
return file_dict
[docs]
def get_ra_dict(
self,
):
"""Get a big dictionary of RA hashes with useful info"""
# Pull out the particular key we need
if self.hash_method == "md5":
key = "MD5"
elif self.hash_method == "custom":
key = "Name"
else:
raise ValueError(f"Cannot currently handle {self.hash_method} hash method")
# Because of inconsistencies between naming schemes, just pull a huge dictionary out here rather than try
# to be clever
ra_dict = {}
for r in self.ra_hashes:
for h in self.ra_hashes[r]["Hashes"]:
# If the RA list is a subset, then skip
if is_ra_subset(r):
continue
# Use the md5 as the unique key, and then name as the thing we'll match to.
# Ensure we lowercase the hash, just to be sure
md5 = copy.deepcopy(h["MD5"].lower())
id_name = copy.deepcopy(h[key])
# If for some weird reason there's no ID name, just skip
if id_name is None:
continue
# Also just pull out the ROM name, since we need that later
rom_name = copy.deepcopy(h["Name"])
rom_name = rom_name.strip()
# Ensure we also lowercase the hash here, if we need to
if key in ["MD5"]:
id_name = id_name.lower()
# If we're dealing with names, there might
# be file extensions to strip
if key in ["Name"]:
for ext in self.ra_file_exts:
if id_name.endswith(ext):
id_name = id_name.rstrip(ext)
# FIXME: Here as a catch-all, hopefully won't be a problem
if md5 in ra_dict:
raise ValueError(f"Hash {md5} multiply defined")
ra_dict[md5] = {
"name": id_name,
"full_name": rom_name,
"dir_name": rom_name.split(" (")[0],
"patch_url": h["PatchUrl"],
}
return ra_dict
[docs]
def get_ra_match(
self,
file_dict,
):
"""Match a file to RetroAchievements supported files
Args:
file_dict (dict): Dictionary of ROM descriptions
"""
has_cheevos = False
patch_file = ""
# Pull out the particular key we need
if self.hash_method == "md5":
match_list = file_dict.get("md5", [])
elif self.hash_method == "custom":
match_list = [file_dict["original_name"].rstrip(".zip")]
else:
self.logger.warning(
centred_string(
f"Cannot currently handle {self.hash_method} hash method",
total_length=self.log_line_length,
)
)
return has_cheevos, patch_file
# If we've got nothing, don't waste time
if len(match_list) == 0:
return has_cheevos, patch_file
if self.ra_dict is None:
self.ra_dict = self.get_ra_dict()
# Again, if there's nothing here just return
if len(self.ra_dict) == 0:
return has_cheevos, patch_file
for m in match_list:
for r in self.ra_dict:
if m == self.ra_dict[r]["name"]:
has_cheevos = True
patch_file = copy.deepcopy(self.ra_dict[r]["patch_url"])
if patch_file is None:
patch_file = ""
return has_cheevos, patch_file
[docs]
def get_parsed_match(
self,
file_dict,
want_patched_files=True,
):
"""Match a file to RetroAchievements supported files that potentially need patches
Args:
file_dict (dict): Dictionary of ROM descriptions
want_patched_files (bool): Whether we're looking for hashes with patches or not. Defaults to True
"""
has_cheevos = False
patch_file = ""
if self.ra_dict is None:
self.ra_dict = self.get_ra_dict()
# Again, if there's nothing here just return
if len(self.ra_dict) == 0:
return has_cheevos, patch_file
multiple_patch_files_found = False
for r in self.ra_dict:
# If we want patch files, and we don't have them, skip
if want_patched_files and self.ra_dict[r]["patch_url"] is None:
continue
# Conversely, if we don't want patch files, and we do have them,
# skip
if not want_patched_files and self.ra_dict[r]["patch_url"] is not None:
continue
if multiple_patch_files_found:
continue
# If we do have a patch file, make sure it's not a translation
if want_patched_files and self.ra_dict[r]["patch_url"] is not None:
if "Translation" in self.ra_dict[r]["patch_url"]:
continue
# Start by ensuring the names up to the first bracket at least match
if file_dict["dir_name"] == self.ra_dict[r]["dir_name"]:
# Make sure we're not parsing this every time
r_is_parsed = self.ra_dict[r].get("is_parsed", False)
if not r_is_parsed:
r_parsed = self.parse_filename(f=self.ra_dict[r]["full_name"])
r_parsed["is_parsed"] = True
self.ra_dict[r].update(r_parsed)
r_parsed = self.ra_dict.get(r)
# If we're a superset, then ensure the short names also match, since
# we need to be more stringent
is_superset = file_dict.get("is_superset", False)
ra_dict_short_name = get_short_name(
self.ra_dict[r]["full_name"],
regex_config=self.regex_config,
default_config=self.default_config,
)
if is_superset and not file_dict["short_name"] == ra_dict_short_name:
continue
# Force some version info in here, if the RA name doesn't have it
if r_parsed["version_no"] == "" and file_dict["version_no"] != "":
f_sanitized = get_sanitized_version(file_dict["version_no"])
if Version(f_sanitized) == Version("1"):
r_parsed["version_no"] = copy.deepcopy(file_dict["version_no"])
# Now, make sure all the useful checks pass
ra_checks_passed = True
for check in self.ra_patch_checks:
# If we've already failed, then just skip
if not ra_checks_passed:
continue
ra_checks_passed = check_match(
file_dict[check],
r_parsed[check],
checks_passed=ra_checks_passed,
)
# After this first pass, also see if any of the regex checks are grouped,
# and double-check the sublevel below. This is because we could have e.g.
# mismatched modern types (like a GameCube version vs a Wii U Virtual Console
# version), which inevitably won't match hashes
if ra_checks_passed:
if check not in self.regex_config:
for r_c in self.regex_config:
if not ra_checks_passed:
continue
r_c_group = self.regex_config[r_c].get("group", None)
if r_c_group == check:
ra_checks_passed = check_match(
file_dict[r_c],
r_parsed[r_c],
checks_passed=ra_checks_passed,
)
if ra_checks_passed:
# If we seem to have multiple patch files defined,
# then raise a warning and assume there isn't a patch
if patch_file != "":
self.logger.warning(
centred_string(
f"Multiple potential patch files found for {file_dict['original_name']}",
total_length=self.log_line_length,
)
)
has_cheevos = False
patch_file = None
multiple_patch_files_found = True
else:
has_cheevos = True
patch_file = copy.deepcopy(self.ra_dict[r]["patch_url"])
if patch_file is None:
patch_file = ""
if patch_file is None:
patch_file = ""
return has_cheevos, patch_file
[docs]
def finalise_file_dict(
self,
file_dict,
):
"""Do any last minute finalisation to the file dict"""
file_dict = self.set_game_category(file_dict)
file_dict = self.set_implicit_languages(file_dict)
file_dict = set_english_friendly(file_dict)
return file_dict
[docs]
def set_game_category(
self,
file_dict,
):
"""If a dat category hasn't been set, set it to game"""
dat_categories = self.default_config.get("dat_categories", [])
for d in dat_categories:
d_sanitized = d.lower().replace(" ", "_")
if d_sanitized not in file_dict:
file_dict[d_sanitized] = False
if all(
[file_dict[d.lower().replace(" ", "_")] is False for d in dat_categories]
):
file_dict["games"] = True
return file_dict
[docs]
def set_implicit_languages(
self,
file_dict,
):
"""Set implicit language from region, if we don't already have languages"""
implied_languages = self.default_config.get("implied_languages", {})
# Only set if languages is an empty list
if not file_dict["languages"]:
for r in file_dict["regions"]:
if r in implied_languages:
file_dict["languages"].append(implied_languages[r])
return file_dict
[docs]
def parse_filename(
self,
f=None,
file_dict=None,
title_pos=None,
):
"""Parse info out of filename
Args:
f (str): filename. Defaults to None, which will pull the original
name out of the dict
title_pos (int): Title position for compilations. Defaults to None
file_dict (dict): Existing file dictionary. Defaults to None, which
will create an empty one
"""
if file_dict is None:
file_dict = {}
if "full_name" not in file_dict and f is None:
raise ValueError(
"Either f needs to be defined, or full_name needs to be in the file dictionary"
)
if f is None:
# Pull the filename out, which is the full name
f = copy.deepcopy(file_dict["full_name"])
# Split file into tags
tags = [f"({x}" for x in f.rstrip(".zip").split(" (")][1:]
for regex_key in self.regex_config:
# Are we potentially using the title position?
use_title_pos = False
if regex_key in USE_TITLE_POS:
use_title_pos = True
# Is this something problematic we should be skipping?
ignore_names = self.regex_config[regex_key].get("ignore_names", [])
if len(ignore_names) != 0:
found_ignore_name = False
for ignore_name in ignore_names:
if found_ignore_name:
continue
if re.match(ignore_name, f) is not None:
found_ignore_name = True
if found_ignore_name:
continue
regex_type = self.regex_config[regex_key].get("type", "bool")
search_tags = self.regex_config[regex_key].get("search_tags", True)
group = self.regex_config[regex_key].get("group", None)
regex_flags = self.regex_config[regex_key].get("flags", "I")
transform_pattern = self.regex_config[regex_key].get(
"transform_pattern", None
)
transform_repl = self.regex_config[regex_key].get("transform_repl", None)
dict_default_val = DICT_DEFAULT_VALS.get(regex_type, None)
if dict_default_val is None:
raise ValueError(
f"regex_type should be one of {list(DICT_DEFAULT_VALS.keys())}"
)
if regex_key not in file_dict:
file_dict[regex_key] = copy.deepcopy(dict_default_val)
if regex_flags == "NOFLAG":
regex_flags = re.NOFLAG
elif regex_flags == "I":
regex_flags = re.I
else:
raise ValueError("regex_flags should be one of 'NOFLAG', 'I'")
pattern = self.regex_config[regex_key]["pattern"]
pattern_mappings = None
if regex_type == "list":
if isinstance(self.default_config[regex_key], dict):
str_to_join = [
self.default_config[regex_key][key]
for key in self.default_config[regex_key]
]
pattern_mappings = self.default_config[regex_key]
else:
str_to_join = copy.deepcopy(self.default_config[regex_key])
pattern = pattern.replace(f"[{regex_key}]", "|".join(str_to_join))
regex = re.compile(pattern, flags=regex_flags)
if search_tags:
found_tag = False
for tag in tags:
if found_tag:
continue
pattern_string = get_pattern_val(
regex,
tag,
regex_type,
pattern_mappings=pattern_mappings,
title_pos=title_pos,
use_title_pos=use_title_pos,
)
if pattern_string is not None:
if transform_pattern is not None:
pattern_string = re.sub(
transform_pattern, transform_repl, pattern_string
)
file_dict[regex_key] = pattern_string
found_tag = True
else:
pattern_string = get_pattern_val(
regex,
f,
regex_type,
pattern_mappings=pattern_mappings,
title_pos=title_pos,
use_title_pos=use_title_pos,
)
if pattern_string is not None:
file_dict[regex_key] = pattern_string
# Update groups, if needed
if group is not None:
# We can have multiple groups per-tag, so take that into account
if isinstance(group, str):
group = [group]
for g in group:
if g not in file_dict:
file_dict[g] = dict_default_val
if regex_type == "bool":
file_dict[g] = file_dict[g] | file_dict[regex_key]
elif regex_type == "str":
if file_dict[g] and file_dict[regex_key]:
raise ValueError(
"Can't combine multiple groups with type str"
)
else:
file_dict[g] += file_dict[regex_key]
elif regex_type == "list":
file_dict[g].extend(file_dict[regex_key])
else:
raise ValueError(
f"regex_type should be one of {list(DICT_DEFAULT_VALS.keys())}"
)
return file_dict