1 package net.sourceforge.jgeocoder.tiger;
2 import static net.sourceforge.jgeocoder.AddressComponent.CITY;
3 import static net.sourceforge.jgeocoder.AddressComponent.COUNTY;
4 import static net.sourceforge.jgeocoder.AddressComponent.LAT;
5 import static net.sourceforge.jgeocoder.AddressComponent.LON;
6 import static net.sourceforge.jgeocoder.AddressComponent.POSTDIR;
7 import static net.sourceforge.jgeocoder.AddressComponent.PREDIR;
8 import static net.sourceforge.jgeocoder.AddressComponent.STATE;
9 import static net.sourceforge.jgeocoder.AddressComponent.TLID;
10 import static net.sourceforge.jgeocoder.AddressComponent.TYPE;
11 import static net.sourceforge.jgeocoder.AddressComponent.ZIP;
12
13 import java.io.File;
14 import java.util.ArrayList;
15 import java.util.Collections;
16 import java.util.EnumMap;
17 import java.util.HashSet;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21
22 import net.sourceforge.jgeocoder.AddressComponent;
23 import net.sourceforge.jgeocoder.CommonUtils;
24 import net.sourceforge.jgeocoder.GeocodeAcuracy;
25 import net.sourceforge.jgeocoder.JGeocodeAddress;
26 import net.sourceforge.jgeocoder.us.AddressParser;
27 import net.sourceforge.jgeocoder.us.AddressStandardizer;
28
29 import org.apache.commons.collections.CollectionUtils;
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32
33 import com.sleepycat.je.DatabaseException;
34 import com.sleepycat.persist.EntityCursor;
35 /***
36 * TODO javadocs me
37 * @author jliang
38 *
39 */
40 public class JGeocoder{
41 private static final Log LOGGER = LogFactory.getLog(JGeocoder.class);
42 private ZipCodesDb _zipDb;
43 private ZipCodeDAO _zipDao;
44 private TigerLineDao _tigerDao;
45 public JGeocoder(){
46 this(JGeocoderConfig.DEFAULT);
47 }
48
49 private TigerLineHit getTigerLineHitByZip(Map<AddressComponent, String> normalizedAddr, String zip) throws TigerQueryFailedException, DatabaseException{
50 if(zip == null || !_zipDao.fillInCSByZip(normalizedAddr, zip)){
51 return null;
52 }
53 normalizedAddr.put(ZIP, zip);
54 return _tigerDao.getTigerLineHit(normalizedAddr);
55 }
56
57 private List<ZipCode> getZips(String city, String state) throws DatabaseException{
58 if(city == null || state == null){
59 return Collections.emptyList();
60 }
61 List<ZipCode> ret = new ArrayList<ZipCode>();
62 Location loc = new Location();
63 loc.setCity(city.replaceAll("//s+", ""));
64 loc.setState(state);
65 EntityCursor<ZipCode> zips = null;
66 try{
67 zips = _zipDao.getZipCodeByLocation().subIndex(loc).entities();
68 for(ZipCode zip : zips){
69 ret.add(zip);
70 }
71 }finally{
72 if(zips != null){
73 zips.close();
74 }
75 }
76 return ret;
77 }
78
79
80 private TigerLineHit getTigerLineHit(Map<AddressComponent, String> normalizedAddr) throws DatabaseException{
81 Map<AddressComponent, String> myMap = new EnumMap<AddressComponent, String>(normalizedAddr);
82 TigerLineHit hit = null;
83 Set<String> attemptedZips = new HashSet<String>();
84 try {
85 hit = getTigerLineHitByZip(normalizedAddr, normalizedAddr.get(ZIP));
86 if(normalizedAddr.get(ZIP)!=null){
87 attemptedZips.add(normalizedAddr.get(ZIP));
88 }
89 if(hit != null){
90 return hit;
91 }
92 if(myMap.get(CITY)==null || myMap.get(STATE) == null){
93 myMap.put(CITY, normalizedAddr.get(CITY));
94 myMap.put(STATE, normalizedAddr.get(STATE));
95 }
96 List<TigerLineHit> zipHits = new ArrayList<TigerLineHit>();
97
98 for(ZipCode zipcode : getZips(myMap.get(CITY), myMap.get(STATE))){
99 if(!attemptedZips.contains(zipcode.getZip())){
100 hit = getTigerLineHitByZip(myMap, zipcode.getZip());
101 if(hit != null){
102 zipHits.add(hit);
103 }
104 attemptedZips.add(zipcode.getZip());
105 }
106 }
107 if(CollectionUtils.isNotEmpty(zipHits)){
108 hit = TigerLineDao.findBest(myMap, zipHits);
109 }else{
110 County county = _zipDao.getCounty(normalizedAddr.get(CITY), normalizedAddr.get(STATE));
111 if(county != null){
112 for(String s : county.getZips()){
113 if(!attemptedZips.contains(s)){
114 hit = getTigerLineHitByZip(myMap, s);
115 }
116 if(hit != null){
117 zipHits.add(hit);
118 }
119 attemptedZips.add(s);
120 }
121 }
122 if(CollectionUtils.isNotEmpty(zipHits)){
123 hit = TigerLineDao.findBest(myMap, zipHits);
124 }
125 }
126 if(hit != null){
127 String zip = CommonUtils.nvl(hit.zipL, hit.zipR);
128 _zipDao.fillInCSByZip(myMap, zip);
129 normalizedAddr.putAll(myMap);
130 return hit;
131 }
132
133 } catch (TigerQueryFailedException e) {
134 LOGGER.warn("Tiger/Line DB query failed, street level geocoding will be skipped: "+e.getMessage());
135 if(LOGGER.isDebugEnabled()){
136 LOGGER.debug("", e);
137 }
138 return null;
139 }
140 return null;
141 }
142
143 public JGeocodeAddress geocodeAddress(String addrLine){
144 JGeocodeAddress ret = new JGeocodeAddress();
145 Map<AddressComponent, String> m = AddressParser.parseAddress(addrLine);
146 ret.setParsedAddr(m);
147 if(m == null) return ret;
148
149 m = AddressStandardizer.normalizeParsedAddress(m);
150 ret.setNormalizedAddr(m);
151
152 if(m.get(ZIP) == null &&
153 (m.get(STATE) == null || m.get(CITY)==null)){
154 return ret;
155 }
156
157 GeocodeAcuracy acuracy = GeocodeAcuracy.STREET;
158 m = new EnumMap<AddressComponent, String>(m);
159 TigerLineHit hit = null;
160 try {
161 hit = getTigerLineHit(m);
162 } catch (DatabaseException e) {
163 throw new RuntimeException("Unable to query tiger/line database "+e.getMessage());
164 }
165 if(hit != null){
166 acuracy = GeocodeAcuracy.STREET;
167 Geo geo = Geocoder.geocodeFromHit(Integer.parseInt(hit.streetNum), hit);
168 m.put(ZIP, String.valueOf(geo.zip));
169 m.put(PREDIR, hit.fedirp);
170 m.put(POSTDIR, hit.fedirs);
171 m.put(TYPE, hit.fetype);
172 m.put(TLID, String.valueOf(hit.tlid));
173 m.put(LAT, String.valueOf(geo.lat));
174 m.put(LON, String.valueOf(geo.lon));
175 ret.setGeocodedAddr(m);
176 }else if(_zipDao.geocodeByZip(m)){
177 acuracy = GeocodeAcuracy.ZIP;
178 ret.setGeocodedAddr(m);
179 }else if(_zipDao.geocodeByCityState(m)){
180 acuracy = GeocodeAcuracy.CITY_STATE;
181 ret.setGeocodedAddr(m);
182 }else{
183 return ret;
184 }
185
186 if(ret.getGeocodedAddr()!=null &&
187 ret.getGeocodedAddr().get(COUNTY) == null &&
188 ret.getGeocodedAddr().get(ZIP) != null){
189 try {
190 _zipDao.fillInCSByZip(ret.getGeocodedAddr(), ret.getGeocodedAddr().get(ZIP));
191 } catch (DatabaseException e) {
192 LOGGER.warn("Unable to query zip code", e);
193 }
194 }
195
196 ret.setAcuracy(acuracy);
197 return ret;
198 }
199
200 public JGeocoder(JGeocoderConfig config){
201 _zipDb = new ZipCodesDb();
202 _tigerDao = new TigerLineDao(config.getTigerDataSource());
203 try {
204 _zipDb.init(new File(config.getJgeocoderDataHome()), false, false);
205 _zipDao = new ZipCodeDAO(_zipDb.getStore());
206 } catch (Exception e) {
207 throw new RuntimeException("Unable to create zip db, make sure your system property 'jgeocoder.data.home' is correct"
208 +e.getMessage());
209 }
210
211 }
212
213 public void cleanup(){
214 if(_zipDb != null){
215 try {
216 _zipDb.shutdown();
217 } catch (DatabaseException e) {
218 throw new RuntimeException("Unable to shutdown zip db, "+e.getMessage());
219 }
220 _zipDb = null;
221 }
222 }
223
224 @Override
225 protected void finalize() throws Throwable {
226 super.finalize();
227 cleanup();
228 }
229 }