Source code for romsearch.modules.dupeparser
import copy
import os
import requests
import romsearch
from ..util import (
centred_string,
setup_logger,
load_yml,
load_json,
save_json,
)
ID_CLONE_KEYS = [
"cloneof",
"cloneofid",
]
[docs]
class DupeParser:
def __init__(
self,
platform=None,
config_file=None,
config=None,
default_config=None,
regex_config=None,
logger=None,
log_line_sep="=",
log_line_length=100,
):
"""Tool for figuring out a list of dupes
Args:
platform (str, optional): Platform name. Defaults to None, which will throw a ValueError.
config_file (str, optional): Path to config file. Defaults to None
config (dict, optional): Configuration dictionary. Defaults to None
default_config (dict, optional): Default configuration dictionary. Defaults to None
regex_config (dict, optional): Configuration dictionary for regex search. Defaults to None
logger (logging.Logger, optional): Logger instance. Defaults to None
log_line_length (int, optional): Line length of log. Defaults to 100
TODO:
- At some point, we might want to consider adding in the retool supersets
"""
if platform is None:
raise ValueError("platform must be specified")
self.platform = platform
if config_file is None and config is None:
raise ValueError("config_file or config must be specified")
if config is None:
config = load_yml(config_file)
self.config = config
if logger is None:
log_dir = self.config.get("dirs", {}).get(
"log_dir", os.path.join(os.getcwd(), "logs")
)
log_level = self.config.get("logger", {}).get("level", "info")
logger = setup_logger(
log_level=log_level,
script_name=f"DupeParser",
log_dir=log_dir,
additional_dir=platform,
)
self.logger = logger
# self.use_dat = self.config.get("dupeparser", {}).get("use_dat", True)
self.use_retool = self.config.get("dupeparser", {}).get("use_retool", True)
self.parsed_dat_dir = self.config.get("dirs", {}).get("parsed_dat_dir", None)
if self.use_retool and self.parsed_dat_dir is None:
raise ValueError("Must specify parsed_dat_dir if using retool files")
self.dupe_dir = self.config.get("dirs", {}).get("dupe_dir", None)
if self.dupe_dir is None:
raise ValueError("dupe_dir should be specified in config file")
# Pull in platform config that we need
mod_dir = os.path.dirname(romsearch.__file__)
retool_config_file = os.path.join(
mod_dir, "configs", "clonelists", f"retool.yml"
)
retool_config = load_yml(retool_config_file)
self.retool_url = retool_config.get("url", None)
self.retool_platform_file = retool_config.get(platform, None)
if default_config is None:
default_file = os.path.join(mod_dir, "configs", "defaults.yml")
default_config = load_yml(default_file)
self.default_config = default_config
if regex_config is None:
regex_file = os.path.join(mod_dir, "configs", "regex.yml")
regex_config = load_yml(regex_file)
self.regex_config = regex_config
self.log_line_sep = log_line_sep
self.log_line_length = log_line_length
[docs]
def run(self):
"""Run the dupe parser"""
if (
self.retool_platform_file is None or self.retool_url is None
) and self.use_retool:
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
self.logger.warning(
centred_string(
"retool config for the platform needs to be present "
"if using retool",
total_length=self.log_line_length,
)
)
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
return False
self.logger.info(f"{self.log_line_sep * self.log_line_length}")
self.logger.info(
centred_string("Running DupeParser", total_length=self.log_line_length)
)
self.logger.info(f"{self.log_line_sep * self.log_line_length}")
dupe_dict, retool_dict = self.get_dupe_dict()
# Save out the dupe dict
out_file = os.path.join(self.dupe_dir, f"{self.platform} (dupes).json")
save_json(dupe_dict, out_file)
self.logger.info(f"{self.log_line_sep * self.log_line_length}")
return dupe_dict, retool_dict
[docs]
def get_dupe_dict(self):
"""Loop through potentially the retool file to get out dupes"""
dupe_dict = {}
# Retool dupes
retool_dict = None
if self.use_retool:
dupe_dict, retool_dict = self.get_retool_dupes(dupe_dict)
dupe_dict = dict(sorted(dupe_dict.items()))
return dupe_dict, retool_dict
[docs]
def get_retool_dupes(self, dupe_dict=None):
"""Get dupes from the retool curated list"""
if dupe_dict is None:
dupe_dict = {}
retool_dupes = self.get_retool_dupe_dict()
for retool_dupe in retool_dupes:
# If we don't have titles or compilations within the dupe dict, skip
if "titles" not in retool_dupe and "compilations" not in retool_dupe:
continue
# Get group name
group = retool_dupe["group"]
# Ensure we've not mismatched upper/lowercase
group_lower = group.lower()
dupe_dict_keys = [key for key in dupe_dict.keys()]
dupe_dict_keys_lower = [key.lower() for key in dupe_dict.keys()]
if group_lower not in dupe_dict_keys_lower:
final_group = copy.deepcopy(group)
else:
final_group_idx = dupe_dict_keys_lower.index(group_lower)
final_group = dupe_dict_keys[final_group_idx]
# Pull out individual titles
if "titles" in retool_dupe:
if final_group not in dupe_dict:
dupe_dict[final_group] = {}
for title in retool_dupe["titles"]:
title_g = title["searchTerm"]
name_type = title.get("nameType", None)
priority = title.get("priority", 1)
filters = title.get("filters", None)
dupe_dict[group][title_g] = {
"name_type": name_type,
"priority": priority,
"filters": filters,
}
# Check for compilations. If we have them, pull them out and potentially the title position
if "compilations" in retool_dupe:
if final_group not in dupe_dict:
dupe_dict[final_group] = {}
for compilation in retool_dupe["compilations"]:
comp_g = compilation["searchTerm"]
name_type = compilation.get("nameType", None)
title_pos = compilation.get("titlePosition", None)
priority = compilation.get("priority", 1)
filters = compilation.get("filters", None)
dupe_dict[group][comp_g] = {
"name_type": name_type,
"is_compilation": True,
"priority": priority,
"title_pos": title_pos,
"filters": filters,
}
# Check for supersets. If we have them, pull them out and potentially the title position
if "supersets" in retool_dupe:
if final_group not in dupe_dict:
dupe_dict[final_group] = {}
for superset in retool_dupe["supersets"]:
sup_g = superset["searchTerm"]
name_type = superset.get("nameType", None)
title_pos = superset.get("titlePosition", None)
priority = superset.get("priority", 1)
english_friendly = superset.get("englishFriendly", False)
filters = superset.get("filters", None)
dupe_dict[group][sup_g] = {
"name_type": name_type,
"is_superset": True,
"priority": priority,
"english_friendly": english_friendly,
"title_pos": title_pos,
"filters": filters,
}
return dupe_dict, retool_dupes
[docs]
def download_retool_dupe(
self,
out_file=None,
just_date=False,
):
"""Download the retool curated list, optionally just returning the last modified date"""
retool_url = f"{self.retool_url}/{self.retool_platform_file}"
with requests.get(retool_url) as r:
retool_dict = r.json()
if just_date:
return retool_dict["description"]["lastUpdated"]
retool_full_file = r.text
if out_file is None:
raise ValueError(
"Should specify an out_file to save the retool dupe list to"
)
with open(out_file, "w", encoding="utf-8") as f:
f.write(retool_full_file)
return True
[docs]
def get_retool_dupe_dict(self):
"""Pull the retool duplicates out of the clonelist file"""
if not os.path.exists(self.dupe_dir):
os.makedirs(self.dupe_dir)
retool_dupe_file = os.path.join(
self.parsed_dat_dir, f"{self.platform} (retool).json"
)
if not os.path.exists(retool_dupe_file):
if not os.path.exists(self.parsed_dat_dir):
os.makedirs(self.parsed_dat_dir)
self.logger.info(
centred_string(
"No retool dupe file found. Downloading",
total_length=self.log_line_length,
)
)
self.download_retool_dupe(retool_dupe_file)
retool_dupes = load_json(retool_dupe_file)
# Check if there's a more updated file, if so download it
local_file_time = retool_dupes["description"]["lastUpdated"]
remote_file_time = self.download_retool_dupe(just_date=True)
if not local_file_time == remote_file_time:
self.logger.info(
centred_string(
"More up-to-date dupe file found. Will download",
total_length=self.log_line_length,
)
)
self.download_retool_dupe(retool_dupe_file)
self.logger.info(
centred_string(
f"Using retool clonelist {retool_dupe_file}",
total_length=self.log_line_length,
)
)
retool_dupes = load_json(retool_dupe_file)
retool_dupes = retool_dupes["variants"]
return retool_dupes