View Javadoc

1   package net.sourceforge.jgeocoder.us;
2   
3   import static net.sourceforge.jgeocoder.AddressComponent.CITY;
4   import static net.sourceforge.jgeocoder.AddressComponent.LINE2;
5   import static net.sourceforge.jgeocoder.AddressComponent.NAME;
6   import static net.sourceforge.jgeocoder.AddressComponent.NUMBER;
7   import static net.sourceforge.jgeocoder.AddressComponent.POSTDIR;
8   import static net.sourceforge.jgeocoder.AddressComponent.POSTDIR2;
9   import static net.sourceforge.jgeocoder.AddressComponent.PREDIR;
10  import static net.sourceforge.jgeocoder.AddressComponent.PREDIR2;
11  import static net.sourceforge.jgeocoder.AddressComponent.STATE;
12  import static net.sourceforge.jgeocoder.AddressComponent.STREET;
13  import static net.sourceforge.jgeocoder.AddressComponent.STREET2;
14  import static net.sourceforge.jgeocoder.AddressComponent.TYPE;
15  import static net.sourceforge.jgeocoder.AddressComponent.TYPE2;
16  import static net.sourceforge.jgeocoder.AddressComponent.ZIP;
17  import static net.sourceforge.jgeocoder.us.AddressRegexLibrary.LINE2A_GROUPED;
18  import static net.sourceforge.jgeocoder.us.Data.getDIRECTIONAL_MAP;
19  import static net.sourceforge.jgeocoder.us.Data.getNUMBER_MAP;
20  import static net.sourceforge.jgeocoder.us.Data.getSTATE_CODE_MAP;
21  import static net.sourceforge.jgeocoder.us.Data.getSTREET_TYPE_MAP;
22  import static net.sourceforge.jgeocoder.us.Data.getUNIT_MAP;
23  import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_NUM_0_99;
24  import static net.sourceforge.jgeocoder.us.Utils.nvl;
25  
26  import java.util.EnumMap;
27  import java.util.Map;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import net.sourceforge.jgeocoder.AddressComponent;
32  
33  import org.apache.commons.lang.StringUtils;
34  
35  //TODO might want to consider synonym resolutions for common city names
36  /***
37   * TODO javadocs me
38   * @author jliang
39   *
40   */
41  public class AddressStandardizer{
42    
43    /***
44     * Turn input map into one line of format
45     * 
46     * {name, num predir street type postdir, line2, city, state, zip}
47     * 
48     * @param parsedAddr
49     * @return
50     */
51    public static String toSingleLine(Map<AddressComponent, String> parsedAddr){
52      if(parsedAddr == null){
53        return null;
54      }
55      StringBuilder sb = new StringBuilder();
56      appendIfNotNull(sb, parsedAddr.get(NAME), ", ");
57      appendIfNotNull(sb, parsedAddr.get(NUMBER), " ");
58      appendIfNotNull(sb, parsedAddr.get(PREDIR), " ");
59      appendIfNotNull(sb, parsedAddr.get(STREET), " ");
60      if(parsedAddr.get(STREET2) != null){
61        appendIfNotNull(sb, parsedAddr.get(TYPE2), " ");
62        appendIfNotNull(sb, parsedAddr.get(POSTDIR2), " ");
63        sb.append("& ");
64        appendIfNotNull(sb, parsedAddr.get(PREDIR2), " ");
65        appendIfNotNull(sb, parsedAddr.get(STREET2), " ");
66      }
67      appendIfNotNull(sb, parsedAddr.get(TYPE), " ");
68      appendIfNotNull(sb, parsedAddr.get(POSTDIR), " ");
69      if(StringUtils.isNotBlank(sb.toString())){
70        sb.append(", ");
71      }
72      appendIfNotNull(sb, parsedAddr.get(LINE2), ", ");
73      appendIfNotNull(sb, parsedAddr.get(CITY), ", ");
74      appendIfNotNull(sb, parsedAddr.get(STATE), " ");
75      appendIfNotNull(sb, parsedAddr.get(ZIP), " ");
76      return sb.toString().replaceAll(" ,", ",");
77    }
78    
79    private static void appendIfNotNull(StringBuilder sb, String s, String suffix){
80      if(s != null){
81        sb.append(s).append(suffix);
82      }
83    }
84    
85    /***
86     * Normalize the input parsedAddr map into a standardize format
87     * 
88     * @param parsedAddr
89     * @return normalized address in a map
90     */
91    public static Map<AddressComponent, String>  normalizeParsedAddress(Map<AddressComponent, String> parsedAddr){
92      Map<AddressComponent, String> ret = new EnumMap<AddressComponent, String>(AddressComponent.class);
93      //just take the digits from the number component
94      for(Map.Entry<AddressComponent, String> e : parsedAddr.entrySet()){
95        String v = StringUtils.upperCase(e.getValue());
96        switch (e.getKey()) {
97          case PREDIR: ret.put(PREDIR, normalizeDir(v)); break;
98          case POSTDIR: ret.put(POSTDIR, normalizeDir(v)); break;
99          case TYPE: ret.put(TYPE, normalizeStreetType(v)); break;
100         case PREDIR2: ret.put(PREDIR2, normalizeDir(v)); break;
101         case POSTDIR2: ret.put(POSTDIR2, normalizeDir(v)); break;
102         case TYPE2: ret.put(TYPE2, normalizeStreetType(v)); break;
103         case NUMBER: ret.put(NUMBER, normalizeNum(v)); break;
104         case STATE: ret.put(STATE, normalizeState(v)); break;
105         case ZIP: ret.put(ZIP, normalizeZip(v)); break;
106         case LINE2: ret.put(LINE2, normalizeLine2(v)); break;
107         case CITY: ret.put(CITY, saintAbbrExpansion(v)); break;
108         case STREET: ret.put(STREET, normalizeOrdinal(saintAbbrExpansion(v))); break;
109         case STREET2: ret.put(STREET2, normalizeOrdinal(saintAbbrExpansion(v))); break;
110         default: ret.put(e.getKey(), v); break;
111       }
112     }
113     ret.put(CITY, resolveCityAlias(ret.get(CITY), ret.get(STATE)));
114     return ret;
115   }
116   //oh man... what had i got myself into...
117   //XXX this class is tightly coupled with the regex library classes
118   private static final Pattern TXT_NUM = Pattern.compile("^//W*("+TXT_NUM_0_99+")//W*");
119   private static final Pattern DIGIT = Pattern.compile("(.*?//d+)//W*(.+)?");
120   private static String normalizeNum(String num){
121     if(num == null) return null;
122     Matcher m = TXT_NUM.matcher(num);
123     String ret = null;
124     if(m.matches()){
125       ret = m.group(1);
126       if(ret.contains("-") || ret.contains(" ")){//it's a 2 part number
127         String[] pair = ret.split("[ -]");
128         String pre = getNUMBER_MAP().get(pair[0]).substring(0, 1);
129         ret = pre+getNUMBER_MAP().get(pair[1]);
130       }else{
131         ret = getNUMBER_MAP().get(ret);
132       }
133     }else{
134       m = DIGIT.matcher(num);
135       if(m.matches()){
136         ret = m.group(2) == null? m.group(1): m.group(1)+"-"+m.group(2);
137       }
138     }
139     return nvl(ret, num) ;
140   }
141 
142   private static String normalizeDir(String dir){
143     if(dir == null) return null;
144     dir = dir.replace(" ", "");
145     return dir.length() > 2 ? getDIRECTIONAL_MAP().get(dir): dir;
146   }
147   
148   private static String normalizeStreetType(String type){
149     return nvl(getSTREET_TYPE_MAP().get(type), type);
150   }
151   
152   public static String normalizeState(String state){
153     return nvl(getSTATE_CODE_MAP().get(state), state);
154   }
155   private static final Pattern LINE2A = Pattern.compile("//W*(?:"+LINE2A_GROUPED+")//W*");
156   private static String normalizeLine2(String line2){
157     if(line2 == null) return null;
158     Matcher m = LINE2A.matcher(line2);
159     if(m.matches()){
160       for(Map.Entry<String, String> e : getUNIT_MAP().entrySet()){
161         if(line2.startsWith(e.getKey()+" ")){
162           line2 = line2.replaceFirst(e.getKey()+" ", e.getValue()+" ");
163           break;
164         }
165       }
166     }
167     return line2;
168   }
169   
170   
171   private static String normalizeZip(String zip){
172     return StringUtils.length(zip) > 5 ? zip.substring(0, 5) : zip;
173   }
174   
175   private static String resolveCityAlias(String city, String state){
176     return AliasResolver.resolveCityAlias(city, state);
177   }
178   
179   //TODO: document this craziness  
180   private static String saintAbbrExpansion(String city){
181     String exp = null;
182     if((exp = Data.getSAINT_NAME_MAP().get(city))!=null){
183       return exp;
184     }
185     return city;
186   }
187   
188   private static String normalizeOrdinal(String street){
189     String ordinal = null;
190     if((ordinal = Data.getORDINAL_MAP().get(street))!=null){
191       return ordinal;
192     }
193     return street;
194   }
195   
196 
197 }