#!/usr/local/bin/python3 #remv, a complex renamer import os import hashlib import re import string import sys config = { 'match':'(.*)', 'format':'{}', 'change':' ', 'operation':'move', 'to':'_', 'del':'{}[]()', 'dir':'.', 'lower':True, 'dryrun':'yes', 'seqfill':2, 'load':'none', 'verbose':'no', } config_help = { 'match':'match files in regex format, "(?P)" re groups correspond with "{name}" format groups', 'format':'move file to this name in format string style, "{name}" groups come from "(?P)" match groups. special name {hash[-type][-size]} will result in the file hash', 'change':'characters to change', 'operation':'move/link operation to preform', 'to':'each character in change converts to the character found here', 'del':'delete characters found here', 'dir':'directory to match files from', 'lower':'true/false - convert to lowercase', 'dryrun':'yes/no - do not actually move files', 'seqfill':'digits - special matchgroup "seq" will get zero filled to this number of digits', 'load':'filename - load config from filename', 'verbose':'print additional debug information', } def usage(config, config_help): print('remv "match=Asshats that put spaces in filenames (?P[0-9]+).mkv" "format=no_spaces_here_{seq}.mkv"') print() print('move files with advanced reguler expression based matching and formating') print('all options are key=value (like dd)') print() print('Options (key=default)') print() for item in sorted(config_keys()): kv_text = ' "{}={}"'.format(item, config[item]) padding = ' ' * (20 - len(kv_text) ) print('{}{} - {}'.format(kv_text, padding, config_help.get(item, 'no help here') ) ) def parse_args(argv, format_map): ''' parse key=value pairs into a dictionary argv a list of key=value pairs format_map map a config key to a function that will correctly format that key single args will parse to arg:True the single arg = will interupt the normal parsing and append all remaining args into the 'parameters' key ''' config = {} for arg in argv: key = False value = True if 'parameters' in config: config['parameters'].append(arg) #expected that I should also fill bare args here? #config[arg] = True elif argv == '=': config['parameters'] = [] elif '=' in arg: key, sep, value = arg.partition('=') if key in format_map: value = format_map[key](value) else: key = arg if key is not False: config[key] = value return config def file_hash(fname, hash_name): h = hashlib.new(hash_name) buffer_size = 1000000 with open(fname, 'rb') as h_file: buffer = h_file.read(buffer_size) h.update(buffer) while len(buffer) == buffer_size: buffer = h_file.read(buffer_size) h.update(buffer) #could encode with base64... but it interacts poorly with limit so just using hexdigest #hash = base64.urlsafe_b64encode(h.digest()).decode() hash = h.hexdigest() return hash def move_listing(in_list, in_exp, out_exp, translate_table, seq_fill = 2, verbose = None): '''generate (from, to) based on parameters in_list = list of items to convert in_exp = python style regex out_exp = 'format{style}string' translate_table = dict to use in string.translate seq_fill = zfill digits verbose = print junk ''' in_re = re.compile(in_exp) hash_avail = '|'.join([h for h in hashlib.algorithms_available if ' ' not in h or '-' not in h]) verbose and print('hash available:', hash_avail) hash_re = re.compile(r'{(hash(-(?P' + hash_avail + '))?(-(?P[0-9]+))?)}') for in_name in in_list: in_match = in_re.match(in_name) if in_match: match_group = in_match.groupdict() match_list = in_match.groups() verbose and print('match list:', match_list) verbose and print('match group:', match_group) hash = {} for hash_match in hash_re.finditer(out_exp): hash_key = hash_match.group(1) hash_args = hash_match.groupdict() hash_name = hash_args.get('hash') if hash_name is None: hash_name = 'md5' hash_limit = hash_args.get('limit') verbose and print('hash', hash_match, hash_key, hash_name, hash_limit) if hash_name not in hash: hash[hash_name] = file_hash(in_name, hash_name) match_group[hash_key] = hash[hash_name] if hash_limit is not None: match_group[hash_key] = match_group[hash_key][:int(hash_limit)] #special match group 'seq' if 'seq' in match_group and match_group['seq'].isdigit(): match_group['seq'] = match_group['seq'].zfill(seq_fill) #when no groups are specified use whole match if not match_list: match_list = [in_match.group(0)] #apply the regex match to the format out_name = out_exp.format(*match_list, **match_group) #apply the translate table last out_name = out_name.translate(translate_table) verbose and print('out_format:', out_exp.format(*match_list, **match_group) ) out_set = (in_name, out_name) yield out_set else: verbose and print('no match:', in_name, in_exp) def re_mv(directory, in_exp, out_exp, translate_table, list_only = None, seq_fill = 2, verbose = None): '''move files based on parameters, wrapper around move_listing() directory = directory to list files in_exp = python style regex to match files out_exp = format string to rename files to translate_table = string.translate style table list_only = don't actually rename any files only print operation seq_fill = digits for zfill verbose = print additionall junk ''' in_list = os.listdir(directory) for fname_from, fname_to in move_listing(in_list, in_exp, out_exp, translate_table, seq_fill, verbose): fpath_from = os.path.join(directory, fname_from) skip = os.path.exists(fname_to) if skip: print('Skip: ', end='') print('mv', '"{}"'.format(fpath_from), '"{}"'.format(fname_to) ) if list_only or skip: pass else: os.rename(fpath_from, fname_to) def re_ln(directory, in_exp, out_exp, translate_table, list_only = None, seq_fill = 2, verbose = None): '''link files based on parameters, wrapper around move_listing() directory = directory to list files in_exp = python style regex to match files out_exp = format string to rename files to translate_table = string.translate style table list_only = don't actually rename any files only print operation seq_fill = digits for zfill verbose = print additionall junk ''' in_list = os.listdir(directory) for fname_from, fname_to in move_listing(in_list, in_exp, out_exp, translate_table, seq_fill, verbose): fpath_from = os.path.join(directory, fname_from) skip = os.path.exists(fname_to) if skip: print('Skip: ', end='') print('ln', '"{}"'.format(fpath_from), '"{}"'.format(fname_to) ) if list_only or skip: pass else: os.link(fpath_from, fname_to) def build_translate_table(from_string, to_string, delete_string, do_lower): translate_table = {} if do_lower: translate_table.update(dict(zip(map(ord, string.ascii_uppercase), string.ascii_lowercase) ) ) translate_table.update(dict(zip(map(ord, from_string), to_string))) translate_table.update(dict([(ord(c), None) for c in delete_string]) ) return translate_table def main(): def istrue(string): return string in ['yes', 'true'] config_format = { 'lower':istrue, 'seqfill':int, } operation_map = { #map operation name to function 'move':re_mv, 'link':re_ln, } argv_config = parse_args(sys.argv[1:], config_format) if 'help' in argv_config: usage(config, config_help) sys.exit() if 'load' in argv_config: load_config = {} load_config.update(argv_config) del argv_config['load'] while 'load' in load_config: load_fname = load_config['load'] del load_config['load'] with open(load_fname, 'r') as config_file: load_config.update(parse_args(config_file.read().split('\n'), config_format) ) if 'help' in load_config: usage(config, config_help) sys.exit() load_config.update(argv_config) argv_config = load_config config.update(argv_config) verbose = config['verbose'] == 'yes' if verbose: for item in config: print('config:', item, config[item]) #anything other than dryrun=no will make it a dry run dry_run = config['dryrun'] != 'no' mv_command = operation_map[config['operation']] translate_table = build_translate_table(config['change'], config['to'], config['del'], config['lower']) verbose and print('translate_table:', translate_table) mv_command(config['dir'], config['match'], config['format'], translate_table, dry_run, config['seqfill'], verbose) if __name__ == '__main__': main()