1 package net.sourceforge.jgeocoder.us;
2 import static net.sourceforge.jgeocoder.us.RegexLibrary.ADDR_UNIT;
3 import static net.sourceforge.jgeocoder.us.RegexLibrary.DIRECTIONS;
4 import static net.sourceforge.jgeocoder.us.RegexLibrary.ORDINAL_ALL;
5 import static net.sourceforge.jgeocoder.us.RegexLibrary.STREET_DESIGNATOR;
6 import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_NUM_0_9;
7 import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_NUM_10_19;
8 import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_ORDINAL_0_99;
9 import static net.sourceforge.jgeocoder.us.RegexLibrary.US_STATES;
10 import static net.sourceforge.jgeocoder.us.Utils.compile;
11 import net.sourceforge.jgeocoder.us.Utils.NamedGroupPattern;
12 /***
13 * TODO javadocs me
14 * @author jliang
15 *
16 */
17 class AddressRegexLibrary{
18
19 private static final String NUMBER =
20 "(?://p{Alpha})?//d+(?:[- ][//p{Alpha}&&[^NSEW]]" +
21 "(?!//s+(?:st|street|ave|aven|avenu|avenue|blvd|boulv|boulevard|boulv|plz|plaza|plza)))?" +
22 "|//d+-?//d*(?:-?//p{Alpha})?|"+TXT_NUM_0_9+"|" +TXT_NUM_10_19;
23
24 private static final String FRACTION = "//d+/////d+";
25
26 private static final String LINE1A =
27 "(?P<street>"+DIRECTIONS+")//W+" +
28 "(?P<type>"+STREET_DESIGNATOR+")//b";
29
30 private static final String LINE1B =
31 "(?:(?P<predir>"+DIRECTIONS+")//W+)?" +
32 "(?:" +
33 "(?P<street>[^,]+)" +
34 "(?:[^//w,]+(?P<type>"+STREET_DESIGNATOR+")//b)" +
35 "(?:[^//w,]+(?P<postdir>"+DIRECTIONS+")//b)?" +
36 "|" +
37 "(?P<street>[^,]*//d)" +
38 "(?:(?P<postdir>"+DIRECTIONS+")//b)" +
39 "|" +
40 "(?P<street>[^,]+?)" +
41 "(?:[^//w,]+(?P<type>"+STREET_DESIGNATOR+")//b)?" +
42 "(?:[^//w,]+(?P<postdir>"+DIRECTIONS+")//b)?" +
43 ")";
44
45 private static final String LINE1A2 =
46 "(?P<street2>"+DIRECTIONS+")//W+" +
47 "(?P<type2>"+STREET_DESIGNATOR+")//b";
48
49 private static final String LINE1B2 =
50 "(?:(?P<predir2>"+DIRECTIONS+")//W+)?" +
51 "(?:" +
52 "(?P<street2>[^,]+)" +
53 "(?:[^//w,]+(?P<type2>"+STREET_DESIGNATOR+")//b)" +
54 "(?:[^//w,]+(?P<postdir2>"+DIRECTIONS+")//b)?" +
55 "|" +
56 "(?P<street2>[^,]*//d)" +
57 "(?:(?P<postdir2>"+DIRECTIONS+")//b)" +
58 "|" +
59 "(?P<street2>[^,]+?)" +
60 "(?:[^//w,]+(?P<type2>"+STREET_DESIGNATOR+")//b)?" +
61 "(?:[^//w,]+(?P<postdir2>"+DIRECTIONS+")//b)?" +
62 ")";
63
64 private static final String LINE1 =
65 "(?P<number>(?:" + NUMBER + ")(?://W+"+FRACTION+")?)//W+" +
66 "(?:" + LINE1B + "|" + LINE1A + ")";
67
68
69 private static final String UNIT_NUMBER =
70 "(?://b//p{Alpha}{1}//s+|//p{Alpha}*[-/]?)?" +
71 "(?://d+|//b//p{Alpha}//b(?=//s|$))" +
72 "(?:[ ]*//p{Alpha}//b|-//w+)?";
73 private static final String ZIP = "//d{5}(?:[- ]//d{3,4})?";
74 private static final String NOT_STATE_OR_ZIP = "(?![^,]*//W+(?://b(?:"+US_STATES+")//b(?://W*$|(?:"+ZIP+")//W*$))|(?://b(?:"+ZIP+")//b//W*$))";
75 private static final String LINE2A = "(?:"+ADDR_UNIT+")[s]?//W*?(?:"+UNIT_NUMBER+")";
76 public static final String LINE2A_GROUPED = "("+ADDR_UNIT+")[s]?//W*?("+UNIT_NUMBER+")";
77 private static final String LINE2B = "(?:(?:"+TXT_ORDINAL_0_99+"|"+ORDINAL_ALL+")//W*(?:"+ADDR_UNIT+")[s]?)";
78 private static final String LINE2 = "(?:(?P<line2>"+LINE2A+"|"+LINE2B+"|[^,]*?"+NOT_STATE_OR_ZIP+")//W+)??";
79
80 private static final String LASTLINE =
81 "(?:" +
82 "(?P<city>[^//d,]+?)//W+" +
83 "//b(?P<state>(?:"+US_STATES+")//b)?//W*" +
84 ")?" +
85 "(?P<zip>"+ZIP+")?";
86
87 private static final String ADDR_NAME = "//W*(?:(?P<name>[^,]+)//W+)??";
88
89 private static final String STREET_ADDRESS =
90 ADDR_NAME + LINE1 + "(?P<tlid>//W+)"+ LINE2 + LASTLINE +"//W*";
91
92 private static final String CORNER = "(?://band//b|//bat//b|&|//@)";
93
94 private static final String INTERSECTION = ADDR_NAME +
95 "(?:" + LINE1A + "|" + LINE1B + ")" + "//W*//s+" + CORNER + "//s+" +
96 "(?:" + LINE1A2 + "|" + LINE1B2 + ")" + "//W+" + LASTLINE +"//W*";
97
98 public static final NamedGroupPattern P_CSZ = compile("(?i:"+LASTLINE+")");
99 public static final NamedGroupPattern P_STREET_ADDRESS = compile("(?i:"+STREET_ADDRESS+")");
100 public static final NamedGroupPattern P_INTERSECTION = compile("(?i:"+INTERSECTION+")");
101 public static final NamedGroupPattern P_CORNER = compile("(?i:"+CORNER+")");
102
103
104 }