View Javadoc

1   package net.sourceforge.jgeocoder.tiger;
2   import static net.sourceforge.jgeocoder.AddressComponent.CITY;
3   import static net.sourceforge.jgeocoder.AddressComponent.COUNTY;
4   import static net.sourceforge.jgeocoder.AddressComponent.LAT;
5   import static net.sourceforge.jgeocoder.AddressComponent.LON;
6   import static net.sourceforge.jgeocoder.AddressComponent.POSTDIR;
7   import static net.sourceforge.jgeocoder.AddressComponent.PREDIR;
8   import static net.sourceforge.jgeocoder.AddressComponent.STATE;
9   import static net.sourceforge.jgeocoder.AddressComponent.TLID;
10  import static net.sourceforge.jgeocoder.AddressComponent.TYPE;
11  import static net.sourceforge.jgeocoder.AddressComponent.ZIP;
12  
13  import java.io.File;
14  import java.util.ArrayList;
15  import java.util.Collections;
16  import java.util.EnumMap;
17  import java.util.HashSet;
18  import java.util.List;
19  import java.util.Map;
20  import java.util.Set;
21  
22  import net.sourceforge.jgeocoder.AddressComponent;
23  import net.sourceforge.jgeocoder.CommonUtils;
24  import net.sourceforge.jgeocoder.GeocodeAcuracy;
25  import net.sourceforge.jgeocoder.JGeocodeAddress;
26  import net.sourceforge.jgeocoder.us.AddressParser;
27  import net.sourceforge.jgeocoder.us.AddressStandardizer;
28  
29  import org.apache.commons.collections.CollectionUtils;
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  
33  import com.sleepycat.je.DatabaseException;
34  import com.sleepycat.persist.EntityCursor;
35  /***
36   * TODO javadocs me
37   * @author jliang
38   *
39   */
40  public class JGeocoder{
41    private static final Log LOGGER = LogFactory.getLog(JGeocoder.class);
42    private ZipCodesDb _zipDb;
43    private ZipCodeDAO _zipDao;
44    private TigerLineDao _tigerDao;
45    public JGeocoder(){
46      this(JGeocoderConfig.DEFAULT);
47    }
48  
49    private TigerLineHit getTigerLineHitByZip(Map<AddressComponent, String> normalizedAddr, String zip) throws TigerQueryFailedException, DatabaseException{
50        if(zip == null || !_zipDao.fillInCSByZip(normalizedAddr, zip)){
51            return null;
52        }
53        normalizedAddr.put(ZIP, zip);
54        return _tigerDao.getTigerLineHit(normalizedAddr);
55    }
56    
57    private List<ZipCode> getZips(String city, String state) throws DatabaseException{
58        if(city == null || state == null){
59            return Collections.emptyList();
60        }
61        List<ZipCode> ret = new ArrayList<ZipCode>();
62        Location loc = new Location();
63        loc.setCity(city.replaceAll("//s+", ""));
64        loc.setState(state);
65        EntityCursor<ZipCode> zips = null;
66        try{
67          zips = _zipDao.getZipCodeByLocation().subIndex(loc).entities();
68          for(ZipCode zip : zips){
69            ret.add(zip);
70          }
71        }finally{
72          if(zips != null){
73            zips.close();
74          }
75        }
76        return ret;
77    }
78    
79    
80    private TigerLineHit getTigerLineHit(Map<AddressComponent, String> normalizedAddr) throws DatabaseException{
81      Map<AddressComponent, String> myMap = new EnumMap<AddressComponent, String>(normalizedAddr);
82      TigerLineHit hit = null;
83      Set<String> attemptedZips = new HashSet<String>();
84      try { //try the parsed zip
85        hit = getTigerLineHitByZip(normalizedAddr, normalizedAddr.get(ZIP));
86        if(normalizedAddr.get(ZIP)!=null){
87            attemptedZips.add(normalizedAddr.get(ZIP));
88        }
89        if(hit != null){
90            return hit;
91        }
92        if(myMap.get(CITY)==null || myMap.get(STATE) == null){ //use the zip's city, state if the input does not have one
93            myMap.put(CITY, normalizedAddr.get(CITY));
94            myMap.put(STATE, normalizedAddr.get(STATE));
95        }
96        List<TigerLineHit> zipHits = new ArrayList<TigerLineHit>();
97        
98        for(ZipCode zipcode : getZips(myMap.get(CITY), myMap.get(STATE))){
99            if(!attemptedZips.contains(zipcode.getZip())){
100               hit = getTigerLineHitByZip(myMap, zipcode.getZip());
101               if(hit != null){
102                   zipHits.add(hit);
103               }
104               attemptedZips.add(zipcode.getZip());
105           }
106       }
107       if(CollectionUtils.isNotEmpty(zipHits)){
108           hit = TigerLineDao.findBest(myMap, zipHits);
109       }else{
110           County county = _zipDao.getCounty(normalizedAddr.get(CITY), normalizedAddr.get(STATE));
111           if(county != null){
112               for(String s : county.getZips()){
113                   if(!attemptedZips.contains(s)){
114                       hit = getTigerLineHitByZip(myMap, s);
115                   }
116                   if(hit != null){
117                       zipHits.add(hit);
118                   }
119                   attemptedZips.add(s); //
120               }
121           }
122           if(CollectionUtils.isNotEmpty(zipHits)){
123               hit = TigerLineDao.findBest(myMap, zipHits);
124           }
125       }
126       if(hit != null){
127           String zip = CommonUtils.nvl(hit.zipL, hit.zipR);
128           _zipDao.fillInCSByZip(myMap, zip);
129           normalizedAddr.putAll(myMap);
130           return hit;
131       }
132       
133     } catch (TigerQueryFailedException e) {
134         LOGGER.warn("Tiger/Line DB query failed, street level geocoding will be skipped: "+e.getMessage());
135         if(LOGGER.isDebugEnabled()){
136             LOGGER.debug("", e);
137         }
138         return null;
139     }
140     return null;
141   }
142   
143   public JGeocodeAddress geocodeAddress(String addrLine){
144     JGeocodeAddress ret = new JGeocodeAddress();
145     Map<AddressComponent, String> m  = AddressParser.parseAddress(addrLine);
146     ret.setParsedAddr(m);
147     if(m == null) return ret;//FIXME: throw exception instead
148     
149     m = AddressStandardizer.normalizeParsedAddress(m);
150     ret.setNormalizedAddr(m);
151     
152     if(m.get(ZIP) == null &&  //if zip is missing
153         (m.get(STATE) == null || m.get(CITY)==null)){ //city or state is missing 
154       return ret;
155     }
156     
157     GeocodeAcuracy acuracy = GeocodeAcuracy.STREET;
158     m = new EnumMap<AddressComponent, String>(m);
159     TigerLineHit hit = null;
160     try {
161       hit = getTigerLineHit(m);
162     } catch (DatabaseException e) {
163       throw new RuntimeException("Unable to query tiger/line database "+e.getMessage());
164     }
165     if(hit != null){
166       acuracy = GeocodeAcuracy.STREET;
167       Geo geo = Geocoder.geocodeFromHit(Integer.parseInt(hit.streetNum), hit);
168       m.put(ZIP, String.valueOf(geo.zip));
169       m.put(PREDIR, hit.fedirp);
170       m.put(POSTDIR, hit.fedirs);
171       m.put(TYPE, hit.fetype);
172       m.put(TLID, String.valueOf(hit.tlid));
173       m.put(LAT, String.valueOf(geo.lat));
174       m.put(LON, String.valueOf(geo.lon));
175       ret.setGeocodedAddr(m);
176     }else if(_zipDao.geocodeByZip(m)){
177       acuracy = GeocodeAcuracy.ZIP;
178       ret.setGeocodedAddr(m);
179     }else if(_zipDao.geocodeByCityState(m)){
180       acuracy = GeocodeAcuracy.CITY_STATE;
181       ret.setGeocodedAddr(m);
182     }else{
183       return ret;
184     }
185     
186     if(ret.getGeocodedAddr()!=null && 
187        ret.getGeocodedAddr().get(COUNTY) == null &&
188        ret.getGeocodedAddr().get(ZIP) != null){
189       try {
190         _zipDao.fillInCSByZip(ret.getGeocodedAddr(), ret.getGeocodedAddr().get(ZIP));
191       } catch (DatabaseException e) {
192         LOGGER.warn("Unable to query zip code", e);
193       }
194     }
195     
196     ret.setAcuracy(acuracy);
197     return ret;
198   }
199   
200   public JGeocoder(JGeocoderConfig config){
201     _zipDb = new ZipCodesDb();
202     _tigerDao = new TigerLineDao(config.getTigerDataSource());
203     try {
204       _zipDb.init(new File(config.getJgeocoderDataHome()), false, false);
205       _zipDao = new ZipCodeDAO(_zipDb.getStore());
206     } catch (Exception e) {
207       throw new RuntimeException("Unable to create zip db, make sure your system property 'jgeocoder.data.home' is correct"
208           +e.getMessage());
209     }
210     
211   }
212   
213   public void cleanup(){
214     if(_zipDb != null){
215       try {
216         _zipDb.shutdown();
217       } catch (DatabaseException e) {
218         throw new RuntimeException("Unable to shutdown zip db, "+e.getMessage());
219       }
220       _zipDb = null;
221     }
222   }
223   
224   @Override
225   protected void finalize() throws Throwable {
226     super.finalize();
227     cleanup();
228   }
229 }