#!/usr/bin/env python # Module : multirep.py # Synopsis : Multiple replacements # Programmer : Simon Brunning - simon@brunningonline.net # Date : 20/09/2000 # Notes : Thanks to Fredrik Lundh for the MultiReplace class, which I have butchered. '''Perform multiple replacements. Takes a list of delimited from and to values from a replacements file, and replaces the from values with the to values in the target file(s). Usage : multirep.py [options] replacementsfile targetfile(s) Options: -h = help -d = delimiter (defaults to tabs, s = spaces, c = commas) -p = prefix for new files (defaults to no prefix) -c = case insensitive match -w = replace whole words only -q = quiet mode - no output -v = verbose mode - lists all changes made (Will not work for single caracter replacements) -l = list all replacements to be made''' def multirep(arguments): import getopt, glob, operator # Split arguments list into options and arguments options, arguments = getopt.getopt(arguments, '?hd:p:wcqvl') # Set defaults delimiter = '\t' prefix = '' wholeWords = False caseInsensitive = False quiet = False verbose = False listReplacements = False # Options - override defaults ond show help for option, value in options: if option[-1] in '?h': print; print __doc__ raw_input("Hit enter to continue...") elif option[-1] == 'd': # Specify delimiter if value == 's': delimiter = ' ' elif value == 'c': delimiter = ',' elif option[-1] == 'p': # Specify prefix prefix = value elif option[-1] == 'w': # Whole words wholeWords = True elif option[-1] == 'c': # Case insensitive caseInsensitive = True elif option[-1] == 'q': # Quiet quiet = True elif option[-1] == 'v': # Verbose verbose = True elif option[-1] == 'l': # List replacements listReplacements = True # Build replacement function from replacements file try: if verbose: replacer = VerboseMultiReplacer(arguments[0], delimiter, wholeWords, caseInsensitive) else: replacer = MultiReplacer(arguments[0], delimiter, wholeWords, caseInsensitive) except IndexError: print; print __doc__ raw_input("Hit enter to continue...") return except ValueError: print; print 'Invalid replacements file.' return if listReplacements: print "Replacements to be made:"; print replacementsList = replacer.replacements.items() replacementsList.sort() for replacement in replacementsList: print 'Replacing "%s"\t with "%s"' % replacement print # Expand remaining arguments into target file list try: targetFiles = reduce(operator.add, map(glob.glob, arguments[1:])) except TypeError: print; print __doc__ return # Perform replacement on each file processedCount = 0 for file in targetFiles: replaceFile(file, replacer, prefix) if not quiet: print "File %s processed." % file processedCount += 1 if not quiet: print; print "%s files processed." % processedCount def replaceFile(infilename, replacer, prefix=''): import os infile = open(infilename, 'rb') oldtext = infile.read() infile.close() newtext = replacer(oldtext) outfilename = os.path.join(os.path.dirname(infilename), ''.join((prefix, os.path.basename(infilename)))) outfile = open(outfilename, 'wb') outfile.write(newtext) outfile.close() class MultiReplacer(object): def __init__(self, replacements, delimiter='\t', wholeWords=None, caseInsensitive=None): import re # Build replacements dictionary - may come in as a mapping or as a file self.replacements = {} try: # replacements is a mapping ## print "replacements before update = " + str(replacements) ## print "self.replacements before update = " + str(self.replacements) self.replacements.update(replacements) ## print "replacements after update = " + str(replacements) ## print "self.replacements after update = " + str(self.replacements) except (AttributeError, ValueError): # replacements is a file replacementsFile = open(replacements, 'r') for line in replacementsFile.readlines(): fromValue, toValue = line.split(delimiter)[:2] # Split line while toValue[-1] in '\r\n': # Strip newlines toValue = toValue[:-1] self.replacements[fromValue] = toValue # Add to dictionary replacementsFile.close() ## print "self.replacements after file shit = " + str(self.replacements) # Build char to char mapping... self.charMap = None if not wholeWords: charMap = map(chr, range(256)) for fromValue, toValue in self.replacements.items(): if len(fromValue) <> 1 or len(toValue) <> 1: break if caseInsensitive: charMap[ord(fromValue.upper())] = toValue charMap[ord(fromValue.lower())] = toValue else: charMap[ord(fromValue)] = toValue else: self.charMap = "".join(charMap) return # String to string mapping - use a regular expression fromVals = self.replacements.keys() fromVals.sort() # Build regexp pattern if not wholeWords: rePattern = '|'.join(map(re.escape, fromVals)) else: rePattern = r'\b(' \ + '|'.join(map(re.escape, fromVals)) + r')\b' ## print "rePattern = " + rePattern # Compile regexp if caseInsensitive: self.reObject = re.compile(rePattern, re.I) else: self.reObject = re.compile(rePattern) def __call__(self, string): ## print 'Got called' # apply replacement to string # Char to char mapping if self.charMap: ## print "self.charMap" return string.translate(self.charMap) # String to string mapping ## print "Doing re" ## print "Original string: " + string result = self.reObject.sub(self.replaceMatch, string) ## print "Re-ed string: " + result return result def replaceMatch(self, match): item = match.group(0) ## print "Got a match - " + item ## print "replacing with " + self.replacements.get(item) return self.replacements.get(item) class VerboseMultiReplacer(MultiReplacer): ## def __init__(self, *args, **kwargs): ## super(VerboseMultiReplacer, self) ## print "Replacements dict = " + str(self.replacements) def replaceMatch(self, match): print '"%s" replaced with "%s"' % (match.group(0), self.replacements.get(match.group(0))) return super(VerboseMultiReplacer, self).replaceMatch(match) if __name__ == '__main__': import sys multirep(sys.argv[1:])