1 package net.sourceforge.jgeocoder.us;
2
3 import static net.sourceforge.jgeocoder.AddressComponent.CITY;
4 import static net.sourceforge.jgeocoder.AddressComponent.LINE2;
5 import static net.sourceforge.jgeocoder.AddressComponent.NAME;
6 import static net.sourceforge.jgeocoder.AddressComponent.NUMBER;
7 import static net.sourceforge.jgeocoder.AddressComponent.POSTDIR;
8 import static net.sourceforge.jgeocoder.AddressComponent.POSTDIR2;
9 import static net.sourceforge.jgeocoder.AddressComponent.PREDIR;
10 import static net.sourceforge.jgeocoder.AddressComponent.PREDIR2;
11 import static net.sourceforge.jgeocoder.AddressComponent.STATE;
12 import static net.sourceforge.jgeocoder.AddressComponent.STREET;
13 import static net.sourceforge.jgeocoder.AddressComponent.STREET2;
14 import static net.sourceforge.jgeocoder.AddressComponent.TYPE;
15 import static net.sourceforge.jgeocoder.AddressComponent.TYPE2;
16 import static net.sourceforge.jgeocoder.AddressComponent.ZIP;
17 import static net.sourceforge.jgeocoder.us.AddressRegexLibrary.LINE2A_GROUPED;
18 import static net.sourceforge.jgeocoder.us.Data.getDIRECTIONAL_MAP;
19 import static net.sourceforge.jgeocoder.us.Data.getNUMBER_MAP;
20 import static net.sourceforge.jgeocoder.us.Data.getSTATE_CODE_MAP;
21 import static net.sourceforge.jgeocoder.us.Data.getSTREET_TYPE_MAP;
22 import static net.sourceforge.jgeocoder.us.Data.getUNIT_MAP;
23 import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_NUM_0_99;
24 import static net.sourceforge.jgeocoder.us.Utils.nvl;
25
26 import java.util.EnumMap;
27 import java.util.Map;
28 import java.util.regex.Matcher;
29 import java.util.regex.Pattern;
30
31 import net.sourceforge.jgeocoder.AddressComponent;
32
33 import org.apache.commons.lang.StringUtils;
34
35
36 /***
37 * TODO javadocs me
38 * @author jliang
39 *
40 */
41 public class AddressStandardizer{
42
43 /***
44 * Turn input map into one line of format
45 *
46 * {name, num predir street type postdir, line2, city, state, zip}
47 *
48 * @param parsedAddr
49 * @return
50 */
51 public static String toSingleLine(Map<AddressComponent, String> parsedAddr){
52 if(parsedAddr == null){
53 return null;
54 }
55 StringBuilder sb = new StringBuilder();
56 appendIfNotNull(sb, parsedAddr.get(NAME), ", ");
57 appendIfNotNull(sb, parsedAddr.get(NUMBER), " ");
58 appendIfNotNull(sb, parsedAddr.get(PREDIR), " ");
59 appendIfNotNull(sb, parsedAddr.get(STREET), " ");
60 if(parsedAddr.get(STREET2) != null){
61 appendIfNotNull(sb, parsedAddr.get(TYPE2), " ");
62 appendIfNotNull(sb, parsedAddr.get(POSTDIR2), " ");
63 sb.append("& ");
64 appendIfNotNull(sb, parsedAddr.get(PREDIR2), " ");
65 appendIfNotNull(sb, parsedAddr.get(STREET2), " ");
66 }
67 appendIfNotNull(sb, parsedAddr.get(TYPE), " ");
68 appendIfNotNull(sb, parsedAddr.get(POSTDIR), " ");
69 if(StringUtils.isNotBlank(sb.toString())){
70 sb.append(", ");
71 }
72 appendIfNotNull(sb, parsedAddr.get(LINE2), ", ");
73 appendIfNotNull(sb, parsedAddr.get(CITY), ", ");
74 appendIfNotNull(sb, parsedAddr.get(STATE), " ");
75 appendIfNotNull(sb, parsedAddr.get(ZIP), " ");
76 return sb.toString().replaceAll(" ,", ",");
77 }
78
79 private static void appendIfNotNull(StringBuilder sb, String s, String suffix){
80 if(s != null){
81 sb.append(s).append(suffix);
82 }
83 }
84
85 /***
86 * Normalize the input parsedAddr map into a standardize format
87 *
88 * @param parsedAddr
89 * @return normalized address in a map
90 */
91 public static Map<AddressComponent, String> normalizeParsedAddress(Map<AddressComponent, String> parsedAddr){
92 Map<AddressComponent, String> ret = new EnumMap<AddressComponent, String>(AddressComponent.class);
93
94 for(Map.Entry<AddressComponent, String> e : parsedAddr.entrySet()){
95 String v = StringUtils.upperCase(e.getValue());
96 switch (e.getKey()) {
97 case PREDIR: ret.put(PREDIR, normalizeDir(v)); break;
98 case POSTDIR: ret.put(POSTDIR, normalizeDir(v)); break;
99 case TYPE: ret.put(TYPE, normalizeStreetType(v)); break;
100 case PREDIR2: ret.put(PREDIR2, normalizeDir(v)); break;
101 case POSTDIR2: ret.put(POSTDIR2, normalizeDir(v)); break;
102 case TYPE2: ret.put(TYPE2, normalizeStreetType(v)); break;
103 case NUMBER: ret.put(NUMBER, normalizeNum(v)); break;
104 case STATE: ret.put(STATE, normalizeState(v)); break;
105 case ZIP: ret.put(ZIP, normalizeZip(v)); break;
106 case LINE2: ret.put(LINE2, normalizeLine2(v)); break;
107 case CITY: ret.put(CITY, saintAbbrExpansion(v)); break;
108 case STREET: ret.put(STREET, normalizeOrdinal(saintAbbrExpansion(v))); break;
109 case STREET2: ret.put(STREET2, normalizeOrdinal(saintAbbrExpansion(v))); break;
110 default: ret.put(e.getKey(), v); break;
111 }
112 }
113 ret.put(CITY, resolveCityAlias(ret.get(CITY), ret.get(STATE)));
114 return ret;
115 }
116
117
118 private static final Pattern TXT_NUM = Pattern.compile("^//W*("+TXT_NUM_0_99+")//W*");
119 private static final Pattern DIGIT = Pattern.compile("(.*?//d+)//W*(.+)?");
120 private static String normalizeNum(String num){
121 if(num == null) return null;
122 Matcher m = TXT_NUM.matcher(num);
123 String ret = null;
124 if(m.matches()){
125 ret = m.group(1);
126 if(ret.contains("-") || ret.contains(" ")){
127 String[] pair = ret.split("[ -]");
128 String pre = getNUMBER_MAP().get(pair[0]).substring(0, 1);
129 ret = pre+getNUMBER_MAP().get(pair[1]);
130 }else{
131 ret = getNUMBER_MAP().get(ret);
132 }
133 }else{
134 m = DIGIT.matcher(num);
135 if(m.matches()){
136 ret = m.group(2) == null? m.group(1): m.group(1)+"-"+m.group(2);
137 }
138 }
139 return nvl(ret, num) ;
140 }
141
142 private static String normalizeDir(String dir){
143 if(dir == null) return null;
144 dir = dir.replace(" ", "");
145 return dir.length() > 2 ? getDIRECTIONAL_MAP().get(dir): dir;
146 }
147
148 private static String normalizeStreetType(String type){
149 return nvl(getSTREET_TYPE_MAP().get(type), type);
150 }
151
152 public static String normalizeState(String state){
153 return nvl(getSTATE_CODE_MAP().get(state), state);
154 }
155 private static final Pattern LINE2A = Pattern.compile("//W*(?:"+LINE2A_GROUPED+")//W*");
156 private static String normalizeLine2(String line2){
157 if(line2 == null) return null;
158 Matcher m = LINE2A.matcher(line2);
159 if(m.matches()){
160 for(Map.Entry<String, String> e : getUNIT_MAP().entrySet()){
161 if(line2.startsWith(e.getKey()+" ")){
162 line2 = line2.replaceFirst(e.getKey()+" ", e.getValue()+" ");
163 break;
164 }
165 }
166 }
167 return line2;
168 }
169
170
171 private static String normalizeZip(String zip){
172 return StringUtils.length(zip) > 5 ? zip.substring(0, 5) : zip;
173 }
174
175 private static String resolveCityAlias(String city, String state){
176 return AliasResolver.resolveCityAlias(city, state);
177 }
178
179
180 private static String saintAbbrExpansion(String city){
181 String exp = null;
182 if((exp = Data.getSAINT_NAME_MAP().get(city))!=null){
183 return exp;
184 }
185 return city;
186 }
187
188 private static String normalizeOrdinal(String street){
189 String ordinal = null;
190 if((ordinal = Data.getORDINAL_MAP().get(street))!=null){
191 return ordinal;
192 }
193 return street;
194 }
195
196
197 }