# $Header$
# 
# (c) Greg Baker 2005
# This is an updated version of my previous AustralianPostcodes program.
#
# This has one class - AustralianPostcodes -- it needs to be passed an
# argument which is an iterable object containing the contents of the
# Australian postcodes reference book (which is best downloaded from the 
# AusPost website): http://www.auspost.com.au/postcodes
#
# Just instantiate it, and then pretend that it is a dictionary. 
# The postcode keys are <type 'int'>. The return result is a list of
# Location objects, each of which has methods "postcode", "locality" and "state"
#
# Unfortunately, it is very, very slow.

import csv
import string
import logging
log = logging.getLogger("AustralianPostcodes")

class Location:
    def __init__(self,header_line,data_line):
	x = {}
	for i in range(len(header_line)):
	    x[header_line[i]] = data_line[i]
	self.__postcode = string.atoi(x["Pcode"])
	self.__locality = x["Locality"]
	self.__state = x["State"]
    def postcode(self): return self.__postcode
    def locality(self): return self.__locality
    def name(self): return self.__locality
    def full_name(self):  return self.__locality + " " + self.__state 
    def really_full_name(self): return self.__locality + " " + state_abbreviations[self.__state]
    def state(self): return self.__state
    def __repr__(self): return self.__locality + " " + self.__state + " " + `self.__postcode`
    def __str__(self): return self.__locality + " " + self.__state + " " + `self.__postcode`

state_abbreviations = {
    'NSW' : "NEW SOUTH WALES",
    'QLD' : 'QUEENSLAND',
    'ACT' : "AUSTRALIAN CAPITAL TERRITORY",
    'NT' : "NORTHERN TERRITORY",
    "SA" : 'SOUTH AUSTRALIA',
    'WA' : 'WESTERN AUSTRALIA',
    'TAS' : 'TASMANIA',
    'VIC' : 'VICTORIA'
    }
    



class AustralianPostcodes:
    def __init__(self,auspost_datafile):
	self.csv_reader = csv.reader(auspost_datafile)
	self.by_postcode = {}
	self.by_name = {}
	self.by_state_and_name = {}
	self.ready = False

    def read_datafile(self):
	log.info("Preparing to read postcodes datafile; will take some time.")
	first_line = self.csv_reader.next()
	for line in self.csv_reader:
	    x = Location(first_line,line)
	    pcode = x.postcode()
	    locality = x.locality()
	    state = x.state()
	    if not(self.by_postcode.has_key(pcode)): 
		self.by_postcode[pcode] = []
	    self.by_postcode[pcode].append(x)
	    if not(self.by_state_and_name.has_key(state)):
		self.by_state_and_name[state] = {}
		# Put in the full names for states as well.
		if state_abbreviations.has_key(state):
		    self.by_state_and_name[state_abbreviations[state]] = self.by_state_and_name[state]
		else:
		    raise ValueError,state
	    if not(self.by_state_and_name[state].has_key(locality)):
		self.by_state_and_name[state][locality] = []
	    self.by_state_and_name[state][locality].append(x)
	    if not(self.by_name.has_key(locality)):
		self.by_name[locality] = []
	    self.by_name[locality].append(x)
	self.ready = True
	log.info("Finished reading postcodes datafile.")

    def all_locations(self,state=None):
	if not(self.ready): self.read_datafile()
	if state is None:
	    return self.by_name.values()
	return self.by_state_and_name[string.upper(state)].values()

    def search_for_placenames(self,str):
	if not(self.ready): self.read_datafile()
	# Upcase the string, and ditch obvious punctuation
	t = string.maketrans(string.lowercase + ',.-\t@/;:',string.uppercase + '        ')
	str = string.translate(str,t)
	while '  ' in str: str = string.replace(str,'  ',' ')
	results = {}
	state_matched = False
	for (name,locations) in self.by_name.items():
	    if name in str:
		if (name + ' ') in str or (name == str[-len(name):]):
		    # good, it wasn't just part of a word
		    pass
		else:
		    continue
		if (' ' + name) in str or (name == str[:len(name)]):
		    # good, it wasn't just part of a word
		    pass
		else:
		    continue
		# If str says "BOUGHT IN CASTLE HILL NSW", we don't want to 
		# include the location "CASTLE HILL QLD 4810"
		did_fit = filter(lambda l: l.full_name() in str or l.really_full_name() in str,locations)
		if did_fit == []:
		    # Fine; string doesn't include the state name at all.
		    results[name] = (False,locations)
		else:
		    state_matched = True
		    results[name] = (True,did_fit)
	if state_matched:
	    # If one name matched a state, ditch any that didn't.
	    for (n,(this_matched,where)) in results.items():
		if not(this_matched): del(results[n])
	names = results.keys()
	# Now, tidy up duplications like "SYDNEY" and "NORTH SYDNEY" both
	# matching -- clearly we just want the longest one.
	for n1 in names:
	    for n2 in names:
		if n1 in n2 and n1 != n2:
		    if len(n1) > len(n2):
			try: 
			    del results[n2]
			except:
			    pass
		    else:
			try:
			    del results[n1]
			except:
			    pass
	answer = []
	for (state_matching,v) in results.values():
	    answer = answer + v
	return answer
		    

    def __getitem__(self,what):
	if not(self.ready): self.read_datafile()
	if type(what) == type(0):
	    return self.by_postcode[what]
	if type(what) == type((0,)):
	    (state,locality) = what
	    state = string.upper(state)
	    locality = string.upper(locality)
	    if self.by_state_and_name.has_key(state) and self.by_state_and_name[state].has_key(locality):
		return self.by_state_and_name[state,locality]
	    (locality,state) = what
	    state = string.upper(state)
	    locality = string.upper(locality)
	    if self.by_state_and_name.has_key(state) and self.by_state_and_name[state].has_key(locality):
		return self.by_state_and_name[state,locality]
	    raise KeyError,what
	original_what = what
	what = string.upper(what)
	if self.by_name.has_key(what):
	    return self.by_name[what]
	# maybe it includes a state in the string (presumably with a space)
	for state in self.by_state_and_name.keys():
	    if (' ' + state) in what:
		what2 = what[:string.rindex(what,' ' + state)]
		if self.by_state_and_name[state].has_key(what2):
		    return self.by_state_and_name[state][what2]
	raise KeyError,original_what
		
	    
		
	
	    
	

