import os
import numpy as np
import requests
import romsearch
from ..util import (
centred_string,
setup_logger,
load_yml,
get_parent_name,
get_short_name,
load_json,
save_json,
)
ID_CLONE_KEYS = [
"cloneof",
"cloneofid",
]
[docs]
class DupeParser:
def __init__(
self,
platform=None,
config_file=None,
config=None,
default_config=None,
regex_config=None,
logger=None,
log_line_sep="=",
log_line_length=100,
):
"""Tool for figuring out a list of dupes
Args:
platform (str, optional): Platform name. Defaults to None, which will throw a ValueError.
config_file (str, optional): Path to config file. Defaults to None
config (dict, optional): Configuration dictionary. Defaults to None
default_config (dict, optional): Default configuration dictionary. Defaults to None
regex_config (dict, optional): Configuration dictionary for regex search. Defaults to None
logger (logging.Logger, optional): Logger instance. Defaults to None
log_line_length (int, optional): Line length of log. Defaults to 100
TODO:
- At some point, we might want to consider adding in the retool supersets
"""
if platform is None:
raise ValueError("platform must be specified")
self.platform = platform
if config_file is None and config is None:
raise ValueError("config_file or config must be specified")
if config is None:
config = load_yml(config_file)
self.config = config
if logger is None:
log_dir = self.config.get("dirs", {}).get(
"log_dir", os.path.join(os.getcwd(), "logs")
)
log_level = self.config.get("logger", {}).get("level", "info")
logger = setup_logger(
log_level=log_level,
script_name=f"DupeParser",
log_dir=log_dir,
additional_dir=platform,
)
self.logger = logger
self.use_dat = self.config.get("dupeparser", {}).get("use_dat", True)
self.use_retool = self.config.get("dupeparser", {}).get("use_retool", True)
self.parsed_dat_dir = self.config.get("dirs", {}).get("parsed_dat_dir", None)
if self.use_dat and self.parsed_dat_dir is None:
raise ValueError("Must specify parsed_dat_dir if using dat files")
self.dupe_dir = self.config.get("dirs", {}).get("dupe_dir", None)
if self.dupe_dir is None:
raise ValueError("dupe_dir should be specified in config file")
# Pull in platform config that we need
mod_dir = os.path.dirname(romsearch.__file__)
retool_config_file = os.path.join(
mod_dir, "configs", "clonelists", f"retool.yml"
)
retool_config = load_yml(retool_config_file)
self.retool_url = retool_config.get("url", None)
self.retool_platform_file = retool_config.get(platform, None)
if default_config is None:
default_file = os.path.join(mod_dir, "configs", "defaults.yml")
default_config = load_yml(default_file)
self.default_config = default_config
if regex_config is None:
regex_file = os.path.join(mod_dir, "configs", "regex.yml")
regex_config = load_yml(regex_file)
self.regex_config = regex_config
self.log_line_sep = log_line_sep
self.log_line_length = log_line_length
[docs]
def run(self):
"""Run the dupe parser"""
if (
self.retool_platform_file is None or self.retool_url is None
) and self.use_retool:
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
self.logger.warning(
centred_string(
"retool config for the platform needs to be present "
"if using retool",
total_length=self.log_line_length,
)
)
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
return False
self.logger.info(f"{self.log_line_sep * self.log_line_length}")
self.logger.info(
centred_string("Running DupeParser", total_length=self.log_line_length)
)
self.logger.info(f"{self.log_line_sep * self.log_line_length}")
dupe_dict, retool_dict = self.get_dupe_dict()
# Save out the dupe dict
out_file = os.path.join(self.dupe_dir, f"{self.platform} (dupes).json")
save_json(dupe_dict, out_file)
self.logger.info(f"{self.log_line_sep * self.log_line_length}")
return dupe_dict, retool_dict
[docs]
def get_dupe_dict(self):
"""Loop through potentially both the dat files and the retool config file to get out dupes"""
dupe_dict = {}
# Prefer retool dupes first
retool_dict = None
if self.use_retool:
dupe_dict, retool_dict = self.get_retool_dupes(dupe_dict)
if self.use_dat:
dupe_dict = self.get_dat_dupes(dupe_dict)
dupe_dict = dict(sorted(dupe_dict.items()))
return dupe_dict, retool_dict
[docs]
def get_dat_dupes(self, dupe_dict=None):
"""Get dupes from the dat that we've already parsed to JSON"""
if dupe_dict is None:
dupe_dict = {}
json_dat = os.path.join(
self.parsed_dat_dir, f"{self.platform} (dat parsed).json"
)
if not os.path.exists(json_dat):
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
self.logger.warning(
centred_string(
f"No dat file found for {self.platform}",
total_length=self.log_line_length,
)
)
self.logger.warning(f"{self.log_line_sep * self.log_line_length}")
return None
self.logger.info(
centred_string(
f"Using parsed dat file {json_dat}", total_length=self.log_line_length
)
)
dat_dict = load_json(json_dat)
all_keys = list(dat_dict.keys())
for clone_name in dat_dict:
for id_clone_key in ID_CLONE_KEYS:
if id_clone_key in dat_dict[clone_name]:
clone_key = dat_dict[clone_name][id_clone_key]
# If it's an ID, find that ID
if id_clone_key == "cloneofid":
# Sometimes, IDs are missing from the dat so just move on
try:
dat_idx = np.where(
[dat_dict[key]["id"] == clone_key for key in dat_dict]
)[0][0]
except IndexError:
continue
parent_entry = dat_dict[all_keys[dat_idx]]
parent_name = parent_entry["name"]
elif id_clone_key == "cloneof":
# TODO
raise NotImplemented("Only current implemented for cloneofid")
else:
raise ValueError(f"Only know how to parse {ID_CLONE_KEYS}")
# Get short names here
# parent_game_name = get_game_name(parent_name)
parent_game_name = get_short_name(
parent_name,
default_config=self.default_config,
regex_config=self.regex_config,
)
clone_short_name = get_short_name(
clone_name,
default_config=self.default_config,
regex_config=self.regex_config,
)
# If the names are the same, just skip
if parent_game_name == clone_short_name:
continue
found_parent_name = get_parent_name(
game_name=parent_game_name,
dupe_dict=dupe_dict,
)
if found_parent_name not in dupe_dict:
dupe_dict[found_parent_name] = {}
# Don't overwrite priority if it's already set
if clone_short_name not in dupe_dict[found_parent_name]:
dupe_dict[found_parent_name][clone_short_name] = {"priority": 1}
return dupe_dict