View Javadoc

1   package net.sourceforge.jgeocoder.us;
2   import static net.sourceforge.jgeocoder.us.RegexLibrary.ADDR_UNIT;
3   import static net.sourceforge.jgeocoder.us.RegexLibrary.DIRECTIONS;
4   import static net.sourceforge.jgeocoder.us.RegexLibrary.ORDINAL_ALL;
5   import static net.sourceforge.jgeocoder.us.RegexLibrary.STREET_DESIGNATOR;
6   import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_NUM_0_9;
7   import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_NUM_10_19;
8   import static net.sourceforge.jgeocoder.us.RegexLibrary.TXT_ORDINAL_0_99;
9   import static net.sourceforge.jgeocoder.us.RegexLibrary.US_STATES;
10  import static net.sourceforge.jgeocoder.us.Utils.compile;
11  import net.sourceforge.jgeocoder.us.Utils.NamedGroupPattern;
12  /***
13   * TODO javadocs me
14   * @author jliang
15   *
16   */
17  class AddressRegexLibrary{
18    
19    private static final String NUMBER =
20      "(?://p{Alpha})?//d+(?:[- ][//p{Alpha}&&[^NSEW]]" +
21      "(?!//s+(?:st|street|ave|aven|avenu|avenue|blvd|boulv|boulevard|boulv|plz|plaza|plza)))?" +
22      "|//d+-?//d*(?:-?//p{Alpha})?|"+TXT_NUM_0_9+"|" +TXT_NUM_10_19; 
23    
24    private static final String FRACTION = "//d+/////d+";
25    
26    private static final String LINE1A = 
27      "(?P<street>"+DIRECTIONS+")//W+" + 
28      "(?P<type>"+STREET_DESIGNATOR+")//b";
29    
30    private static final String LINE1B = 
31      "(?:(?P<predir>"+DIRECTIONS+")//W+)?" +
32      "(?:" +
33        "(?P<street>[^,]+)" +
34        "(?:[^//w,]+(?P<type>"+STREET_DESIGNATOR+")//b)" +
35        "(?:[^//w,]+(?P<postdir>"+DIRECTIONS+")//b)?" +
36       "|" +
37         "(?P<street>[^,]*//d)" +
38         "(?:(?P<postdir>"+DIRECTIONS+")//b)" +
39       "|" +
40         "(?P<street>[^,]+?)" +
41         "(?:[^//w,]+(?P<type>"+STREET_DESIGNATOR+")//b)?" +
42         "(?:[^//w,]+(?P<postdir>"+DIRECTIONS+")//b)?" +       
43      ")";
44    
45    private static final String LINE1A2 = 
46      "(?P<street2>"+DIRECTIONS+")//W+" + 
47      "(?P<type2>"+STREET_DESIGNATOR+")//b";
48    
49    private static final String LINE1B2 = 
50      "(?:(?P<predir2>"+DIRECTIONS+")//W+)?" +
51      "(?:" +
52        "(?P<street2>[^,]+)" +
53        "(?:[^//w,]+(?P<type2>"+STREET_DESIGNATOR+")//b)" +
54        "(?:[^//w,]+(?P<postdir2>"+DIRECTIONS+")//b)?" +
55       "|" +
56         "(?P<street2>[^,]*//d)" +
57         "(?:(?P<postdir2>"+DIRECTIONS+")//b)" +
58       "|" +
59         "(?P<street2>[^,]+?)" +
60         "(?:[^//w,]+(?P<type2>"+STREET_DESIGNATOR+")//b)?" +
61         "(?:[^//w,]+(?P<postdir2>"+DIRECTIONS+")//b)?" +       
62      ")";
63    
64    private static final String LINE1 =
65      "(?P<number>(?:" + NUMBER + ")(?://W+"+FRACTION+")?)//W+" + 
66      "(?:" + LINE1B + "|" + LINE1A + ")";
67    
68    //A, 2A, 22, A2, 2-a, 2/a, etc...
69    private static final String UNIT_NUMBER = 
70      "(?://b//p{Alpha}{1}//s+|//p{Alpha}*[-/]?)?" +
71      "(?://d+|//b//p{Alpha}//b(?=//s|$))" +
72      "(?:[ ]*//p{Alpha}//b|-//w+)?";
73    private static final String ZIP = "//d{5}(?:[- ]//d{3,4})?";
74    private static final String NOT_STATE_OR_ZIP = "(?![^,]*//W+(?://b(?:"+US_STATES+")//b(?://W*$|(?:"+ZIP+")//W*$))|(?://b(?:"+ZIP+")//b//W*$))";
75    private static final String LINE2A = "(?:"+ADDR_UNIT+")[s]?//W*?(?:"+UNIT_NUMBER+")";
76    public static final String LINE2A_GROUPED = "("+ADDR_UNIT+")[s]?//W*?("+UNIT_NUMBER+")";
77    private static final String LINE2B = "(?:(?:"+TXT_ORDINAL_0_99+"|"+ORDINAL_ALL+")//W*(?:"+ADDR_UNIT+")[s]?)";
78    private static final String LINE2 = "(?:(?P<line2>"+LINE2A+"|"+LINE2B+"|[^,]*?"+NOT_STATE_OR_ZIP+")//W+)??";
79    
80    private static final String LASTLINE = 
81      "(?:" +
82        "(?P<city>[^//d,]+?)//W+" +  //city                                              
83        "//b(?P<state>(?:"+US_STATES+")//b)?//W*" + //state                                          
84      ")?" +
85      "(?P<zip>"+ZIP+")?";      //zip
86  
87    private static final String ADDR_NAME =  "//W*(?:(?P<name>[^,]+)//W+)??"; 
88    
89    private static final String STREET_ADDRESS = 
90     ADDR_NAME + LINE1 + "(?P<tlid>//W+)"+ LINE2 + LASTLINE +"//W*"; //the group name is a hack
91    
92    private static final String CORNER = "(?://band//b|//bat//b|&|//@)";
93  
94    private static final String INTERSECTION = ADDR_NAME +
95    "(?:" + LINE1A + "|" + LINE1B + ")" + "//W*//s+" + CORNER + "//s+" +
96    "(?:" + LINE1A2 + "|" + LINE1B2 + ")" + "//W+" + LASTLINE +"//W*";
97    
98    public static final NamedGroupPattern P_CSZ = compile("(?i:"+LASTLINE+")");
99    public static final NamedGroupPattern P_STREET_ADDRESS = compile("(?i:"+STREET_ADDRESS+")");
100   public static final NamedGroupPattern P_INTERSECTION = compile("(?i:"+INTERSECTION+")");
101   public static final NamedGroupPattern P_CORNER = compile("(?i:"+CORNER+")");
102   
103 
104 }