multirep.py

#!/usr/bin/env python
# Module     : multirep.py
# Synopsis   : Multiple replacements
# Programmer : Simon Brunning - simon@brunningonline.net
# Date       : 20/09/2000
# Notes      : Thanks to Fredrik Lundh for the MultiReplace class, which I have butchered.
'''Perform multiple replacements.
Takes a list of delimited from and to values from a replacements file,
and replaces the from values with the to values in the target file(s).

Usage  : multirep.py [options] replacementsfile targetfile(s)
Options: -h = help
         -d = delimiter (defaults to tabs, s = spaces, c = commas)
         -p = prefix for new files (defaults to no prefix)
         -c = case insensitive match
         -w = replace whole words only
         -q = quiet mode - no output
         -v = verbose mode - lists all changes made
              (Will not work for single caracter replacements)
         -l = list all replacements to be made'''

def multirep(arguments):
    import getopt, glob, operator
    
    # Split arguments list into options and arguments
    options, arguments = getopt.getopt(arguments, '?hd:p:wcqvl')

    # Set defaults
    delimiter = '\t'
    prefix = ''
    wholeWords = False
    caseInsensitive = False
    quiet = False
    verbose = False
    listReplacements = False

    # Options - override defaults ond show help    
    for option, value in options:
        if option[-1] in '?h':
            print; print __doc__
            raw_input("Hit enter to continue...")
        elif option[-1] == 'd': # Specify delimiter
            if value == 's':
                delimiter = ' '
            elif value == 'c':
                delimiter = ','
        elif option[-1] == 'p': # Specify prefix
            prefix = value
        elif option[-1] == 'w': # Whole words
            wholeWords = True
        elif option[-1] == 'c': # Case insensitive
            caseInsensitive = True
        elif option[-1] == 'q': # Quiet
            quiet = True
        elif option[-1] == 'v': # Verbose
            verbose = True
        elif option[-1] == 'l': # List replacements
            listReplacements = True

    # Build replacement function from replacements file
    try:
        if verbose:
            replacer = VerboseMultiReplacer(arguments[0], delimiter, wholeWords, caseInsensitive)
        else:
            replacer = MultiReplacer(arguments[0], delimiter, wholeWords, caseInsensitive)
    except IndexError:
        print; print __doc__
        raw_input("Hit enter to continue...")
        return
    except ValueError:
        print; print 'Invalid replacements file.'
        return

    if listReplacements:
        print "Replacements to be made:"; print
        replacementsList = replacer.replacements.items()
        replacementsList.sort()
        for replacement in replacementsList:
            print 'Replacing "%s"\t with "%s"' % replacement
        print
    
    # Expand remaining arguments into target file list
    try:
        targetFiles = reduce(operator.add, map(glob.glob, arguments[1:]))
    except TypeError:
        print; print __doc__
        return

    # Perform replacement on each file
    processedCount = 0
    for file in targetFiles:
        replaceFile(file, replacer, prefix)
        if not quiet:
            print "File %s processed." % file
            processedCount += 1
    if not quiet:
        print; print "%s files processed." % processedCount

def replaceFile(infilename, replacer, prefix=''):
    import os
    
    infile = open(infilename, 'rb')
    oldtext = infile.read()
    infile.close()
    
    newtext = replacer(oldtext)

    outfilename = os.path.join(os.path.dirname(infilename),
                               ''.join((prefix,
                                        os.path.basename(infilename))))
    outfile = open(outfilename, 'wb')
    outfile.write(newtext)
    outfile.close()

class MultiReplacer(object):
    def __init__(self, replacements, delimiter='\t', wholeWords=None, caseInsensitive=None):
        import re

        # Build replacements dictionary - may come in as a mapping or as a file         
        self.replacements = {}
        try:
            # replacements is a mapping
##            print "replacements before update = " + str(replacements)
##            print "self.replacements before update = " + str(self.replacements)
            self.replacements.update(replacements)
##            print "replacements after update = " + str(replacements)
##            print "self.replacements after update = " + str(self.replacements)
        except (AttributeError, ValueError):
            # replacements is a file
            replacementsFile = open(replacements, 'r')
            for line in replacementsFile.readlines():
                fromValue, toValue = line.split(delimiter)[:2] # Split line
                
                while toValue[-1] in '\r\n': # Strip newlines
                    toValue = toValue[:-1]

                self.replacements[fromValue] = toValue # Add to dictionary
            replacementsFile.close()

##        print "self.replacements after file shit = " + str(self.replacements)
  
        # Build char to char mapping...
        self.charMap = None
        if not wholeWords:
            charMap = map(chr, range(256))
            for fromValue, toValue in self.replacements.items():
                if len(fromValue) <> 1 or len(toValue) <> 1:
                    break
                if caseInsensitive:
                    charMap[ord(fromValue.upper())] = toValue
                    charMap[ord(fromValue.lower())] = toValue
                else:
                    charMap[ord(fromValue)] = toValue
            else:
                self.charMap = "".join(charMap)
                return

        # String to string mapping - use a regular expression
        fromVals = self.replacements.keys()
        fromVals.sort()

        # Build regexp pattern
        if not wholeWords:
            rePattern = '|'.join(map(re.escape, fromVals))
        else:
            rePattern = r'\b(' \
                      + '|'.join(map(re.escape, fromVals)) + r')\b'
##        print "rePattern = " + rePattern
        
        # Compile regexp
        if caseInsensitive: 
            self.reObject = re.compile(rePattern, re.I)
        else:
            self.reObject = re.compile(rePattern)

    def __call__(self, string):
##        print 'Got called'
        # apply replacement to string
        
        # Char to char mapping
        if self.charMap:
##            print "self.charMap"
            return string.translate(self.charMap)

        # String to string mapping
##        print "Doing re"
##        print "Original string: " + string
        result =  self.reObject.sub(self.replaceMatch, string)
##        print "Re-ed string: " + result
        return result        
    
    def replaceMatch(self, match):
        item = match.group(0)
##        print "Got a match - " + item
##        print "replacing with " + self.replacements.get(item)
        return self.replacements.get(item)

class VerboseMultiReplacer(MultiReplacer):

##    def __init__(self, *args, **kwargs):
##        super(VerboseMultiReplacer, self)
##        print "Replacements dict = " + str(self.replacements)
    
    def replaceMatch(self, match):
        print '"%s" replaced with "%s"' % (match.group(0), self.replacements.get(match.group(0)))
        return super(VerboseMultiReplacer, self).replaceMatch(match)
        
if __name__ == '__main__':
    import sys
    multirep(sys.argv[1:])