import re POSTAL_ZONES = ['AB', 'AL', 'B' , 'BA', 'BB', 'BD', 'BH', 'BL', 'BN', 'BR', 'BS', 'BT', 'CA', 'CB', 'CF', 'CH', 'CM', 'CO', 'CR', 'CT', 'CV', 'CW', 'DA', 'DD', 'DE', 'DG', 'DH', 'DL', 'DN', 'DT', 'DY', 'E' , 'EC', 'EH', 'EN', 'EX', 'FK', 'FY', 'G' , 'GL', 'GY', 'GU', 'HA', 'HD', 'HG', 'HP', 'HR', 'HS', 'HU', 'HX', 'IG', 'IM', 'IP', 'IV', 'JE', 'KA', 'KT', 'KW', 'KY', 'L' , 'LA', 'LD', 'LE', 'LL', 'LN', 'LS', 'LU', 'M' , 'ME', 'MK', 'ML', 'N' , 'NE', 'NG', 'NN', 'NP', 'NR', 'NW', 'OL', 'OX', 'PA', 'PE', 'PH', 'PL', 'PO', 'PR', 'RG', 'RH', 'RM', 'S' , 'SA', 'SE', 'SG', 'SK', 'SL', 'SM', 'SN', 'SO', 'SP', 'SR', 'SS', 'ST', 'SW', 'SY', 'TA', 'TD', 'TF', 'TN', 'TQ', 'TR', 'TS', 'TW', 'UB', 'W' , 'WA', 'WC', 'WD', 'WF', 'WN', 'WR', 'WS', 'WV', 'YO', 'ZE'] FOURTH_POS_CHARS = ['A', 'B', 'E', 'H', 'M', 'N', 'P', 'R', 'V', 'W', 'X', 'Y'] INCODE_CHARS = ['A', 'B', 'D', 'E', 'F', 'G', 'H', 'J', 'L', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'W', 'X', 'Y', 'Z'] OUTCODE_PATTERN = (r'((?:' + '|'.join(POSTAL_ZONES) + r')(?:\d[' + ''.join(FOURTH_POS_CHARS) + r']|\d{1,2}))') INCODE_PATTERN = (r'(\d[' + ''.join(INCODE_CHARS) + r'][' + ''.join(INCODE_CHARS) + r'])') POSTCODE_PATTERN = OUTCODE_PATTERN + r'\s?' + INCODE_PATTERN STANDALONE_OUTCODE_PATTERN = OUTCODE_PATTERN + r'\s*$' def parse_uk_postcode(postcode, strict=True, incode_mandatory=True): postcode = postcode.replace(' ', '').upper() if strict: postcode_match = re.match(POSTCODE_PATTERN, postcode) if postcode_match: return postcode_match.group(1, 2) outcode_match = re.match(STANDALONE_OUTCODE_PATTERN, postcode) if outcode_match: if incode_mandatory: raise ValueError('Incode mandatory') else: return outcode_match.group(1), '' else: if postcode == 'GIR0AA': return 'GIR', '0AA' elif postcode == 'GIR': if incode_mandatory: raise ValueError('Incode mandatory') else: return 'GIR', '' raise ValueError('Invalid postcode') else: if len(postcode) <= 4: if incode_mandatory: raise ValueError('Incode mandatory') else: return postcode, '' else: return postcode[:-3], postcode[-3:] if __name__ == '__main__': # Self test test_data = [ ('cr0 2yr' , False, False, ('CR0' , '2YR')), ('cr02yr' , False, False, ('CR0' , '2YR')), ('dn16 9aa', False, False, ('DN16', '9AA')), ('dn169aa' , False, False, ('DN16', '9AA')), ('ec1a 1hq', False, False, ('EC1A', '1HQ')), ('ec1a1hq' , False, False, ('EC1A', '1HQ')), ('m2 5bq' , False, False, ('M2' , '5BQ')), ('m25bq' , False, False, ('M2' , '5BQ')), ('m34 4ab' , False, False, ('M34' , '4AB')), ('m344ab' , False, False, ('M34' , '4AB')), ('sw19 2et', False, False, ('SW19', '2ET')), ('sw192et' , False, False, ('SW19', '2ET')), ('w1a 4zz' , False, False, ('W1A' , '4ZZ')), ('w1a4zz' , False, False, ('W1A' , '4ZZ')), ('cr0' , False, False, ('CR0' , '' )), ('sw19' , False, False, ('SW19', '' )), ('xx0 2yr' , False, False, ('XX0' , '2YR')), ('3r0 2yr' , False, False, ('3R0' , '2YR')), ('20 2yr' , False, False, ('20' , '2YR')), ('3r0 ayr' , False, False, ('3R0' , 'AYR')), ('3r0 22r' , False, False, ('3R0' , '22R')), ('w1i 4zz' , False, False, ('W1I' , '4ZZ')), ('3r0' , False, False, ('3R0' , '' )), ('ec1c 1hq', False, False, ('EC1C', '1HQ')), ('m344cb' , False, False, ('M34' , '4CB')), ('gir 0aa' , False, False, ('GIR' , '0AA')), ('gir' , False, False, ('GIR' , '' )), ('cr0 2yr' , False, True , ('CR0', '2YR')), ('cr02yr' , False, True , ('CR0', '2YR')), ('dn16 9aa', False, True , ('DN16', '9AA')), ('dn169aa' , False, True , ('DN16', '9AA')), ('ec1a 1hq', False, True , ('EC1A', '1HQ')), ('ec1a1hq' , False, True , ('EC1A', '1HQ')), ('m2 5bq' , False, True , ('M2' , '5BQ')), ('m25bq' , False, True , ('M2' , '5BQ')), ('m34 4ab' , False, True , ('M34' , '4AB')), ('m344ab' , False, True , ('M34' , '4AB')), ('sw19 2et', False, True , ('SW19', '2ET')), ('sw192et' , False, True , ('SW19', '2ET')), ('w1a 4zz' , False, True , ('W1A' , '4ZZ')), ('w1a4zz' , False, True , ('W1A' , '4ZZ')), ('cr0' , False, True , 'ValueError' ), ('sw19' , False, True , 'ValueError' ), ('xx0 2yr' , False, True , ('XX0' , '2YR')), ('3r0 2yr' , False, True , ('3R0' , '2YR')), ('20 2yr' , False, True , ('20' , '2YR')), ('3r0 ayr' , False, True , ('3R0' , 'AYR')), ('3r0 22r' , False, True , ('3R0' , '22R')), ('w1i 4zz' , False, True , ('W1I' , '4ZZ')), ('3r0' , False, True , 'ValueError' ), ('ec1c 1hq', False, True , ('EC1C', '1HQ')), ('m344cb' , False, True , ('M34' , '4CB')), ('gir 0aa' , False, True , ('GIR' , '0AA')), ('gir' , False, True , 'ValueError' ), ('cr0 2yr' , True , False, ('CR0' , '2YR')), ('cr02yr' , True , False, ('CR0' , '2YR')), ('dn16 9aa', True , False, ('DN16', '9AA')), ('dn169aa' , True , False, ('DN16', '9AA')), ('ec1a 1hq', True , False, ('EC1A', '1HQ')), ('ec1a1hq' , True , False, ('EC1A', '1HQ')), ('m2 5bq' , True , False, ('M2' , '5BQ')), ('m25bq' , True , False, ('M2' , '5BQ')), ('m34 4ab' , True , False, ('M34' , '4AB')), ('m344ab' , True , False, ('M34' , '4AB')), ('sw19 2et', True , False, ('SW19', '2ET')), ('sw192et' , True , False, ('SW19', '2ET')), ('w1a 4zz' , True , False, ('W1A' , '4ZZ')), ('w1a4zz' , True , False, ('W1A' , '4ZZ')), ('cr0' , True , False, ('CR0' , '' )), ('sw19' , True , False, ('SW19', '' )), ('xx0 2yr' , True , False, 'ValueError' ), ('3r0 2yr' , True , False, 'ValueError' ), ('20 2yr' , True , False, 'ValueError' ), ('3r0 ayr' , True , False, 'ValueError' ), ('3r0 22r' , True , False, 'ValueError' ), ('w1i 4zz' , True , False, 'ValueError' ), ('3r0' , True , False, 'ValueError' ), ('ec1c 1hq', True , False, 'ValueError' ), ('m344cb' , True , False, 'ValueError' ), ('gir 0aa' , True , False, ('GIR' , '0AA')), ('gir' , True , False, ('GIR' , '' )), ('cr0 2yr' , True , True , ('CR0', '2YR')), ('cr02yr' , True , True , ('CR0', '2YR')), ('dn16 9aa', True , True , ('DN16', '9AA')), ('dn169aa' , True , True , ('DN16', '9AA')), ('ec1a 1hq', True , True , ('EC1A', '1HQ')), ('ec1a1hq' , True , True , ('EC1A', '1HQ')), ('m2 5bq' , True , True , ('M2' , '5BQ')), ('m25bq' , True , True , ('M2' , '5BQ')), ('m34 4ab' , True , True , ('M34' , '4AB')), ('m344ab' , True , True , ('M34' , '4AB')), ('sw19 2et', True , True , ('SW19', '2ET')), ('sw192et' , True , True , ('SW19', '2ET')), ('w1a 4zz' , True , True , ('W1A' , '4ZZ')), ('w1a4zz' , True , True , ('W1A' , '4ZZ')), ('cr0' , True , True , 'ValueError' ), ('sw19' , True , True , 'ValueError' ), ('xx0 2yr' , True , True , 'ValueError' ), ('3r0 2yr' , True , True , 'ValueError' ), ('20 2yr' , True , True , 'ValueError' ), ('3r0 ayr' , True , True , 'ValueError' ), ('3r0 22r' , True , True , 'ValueError' ), ('w1i 4zz' , True , True , 'ValueError' ), ('3r0' , True , True , 'ValueError' ), ('ec1c 1hq', True , True , 'ValueError' ), ('m344cb' , True , True , 'ValueError' ), ('gir 0aa' , True , True , ('GIR' , '0AA')), ('gir' , True , True , 'ValueError' ), ] failures = 0 for postcode, strict, incode_mandatory, required_result in test_data: try: actual_result = parse_uk_postcode(postcode, strict, incode_mandatory) except ValueError: actual_result = 'ValueError' if actual_result != required_result: failures += 1 print 'Failed:', repr(actual_result), '!=', repr(required_result), \ 'for input postcode =', repr(postcode) + \ ', strict =', repr(strict) + \ ', incode_mandatory =', repr(incode_mandatory) if failures: print failures, "failures" else: print "Passed!"