00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include <config.h>
00018
00019 #include <isc/file.h>
00020 #include <isc/regex.h>
00021 #include <isc/string.h>
00022
00023 #if VALREGEX_REPORT_REASON
00024 #define FAIL(x) do { reason = (x); goto error; } while(0)
00025 #else
00026 #define FAIL(x) goto error
00027 #endif
00028
00029
00030
00031
00032 int
00033 isc_regex_validate(const char *c) {
00034 enum {
00035 none, parse_bracket, parse_bound,
00036 parse_ce, parse_ec, parse_cc
00037 } state = none;
00038
00039 const char *cc[] = {
00040 ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
00041 ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
00042 ":print:", ":xdigit:"
00043 };
00044 isc_boolean_t seen_comma = ISC_FALSE;
00045 isc_boolean_t seen_high = ISC_FALSE;
00046 isc_boolean_t seen_char = ISC_FALSE;
00047 isc_boolean_t seen_ec = ISC_FALSE;
00048 isc_boolean_t seen_ce = ISC_FALSE;
00049 isc_boolean_t have_atom = ISC_FALSE;
00050 int group = 0;
00051 int range = 0;
00052 int sub = 0;
00053 isc_boolean_t empty_ok = ISC_FALSE;
00054 isc_boolean_t neg = ISC_FALSE;
00055 isc_boolean_t was_multiple = ISC_FALSE;
00056 unsigned int low = 0;
00057 unsigned int high = 0;
00058 const char *ccname = NULL;
00059 int range_start = 0;
00060 #if VALREGEX_REPORT_REASON
00061 const char *reason = "";
00062 #endif
00063
00064 if (c == NULL || *c == 0)
00065 FAIL("empty string");
00066
00067 while (c != NULL && *c != 0) {
00068 switch (state) {
00069 case none:
00070 switch (*c) {
00071 case '\\':
00072 ++c;
00073 switch (*c) {
00074 case '1': case '2': case '3':
00075 case '4': case '5': case '6':
00076 case '7': case '8': case '9':
00077 if ((*c - '0') > sub)
00078 FAIL("bad back reference");
00079 have_atom = ISC_TRUE;
00080 was_multiple = ISC_FALSE;
00081 break;
00082 case 0:
00083 FAIL("escaped end-of-string");
00084 default:
00085 goto literal;
00086 }
00087 ++c;
00088 break;
00089 case '[':
00090 ++c;
00091 neg = ISC_FALSE;
00092 was_multiple = ISC_FALSE;
00093 seen_char = ISC_FALSE;
00094 state = parse_bracket;
00095 break;
00096 case '{':
00097 switch (c[1]) {
00098 case '0': case '1': case '2': case '3':
00099 case '4': case '5': case '6': case '7':
00100 case '8': case '9':
00101 if (!have_atom)
00102 FAIL("no atom");
00103 if (was_multiple)
00104 FAIL("was multiple");
00105 seen_comma = ISC_FALSE;
00106 seen_high = ISC_FALSE;
00107 low = high = 0;
00108 state = parse_bound;
00109 break;
00110 default:
00111 goto literal;
00112 }
00113 ++c;
00114 have_atom = ISC_TRUE;
00115 was_multiple = ISC_TRUE;
00116 break;
00117 case '}':
00118 goto literal;
00119 case '(':
00120 have_atom = ISC_FALSE;
00121 was_multiple = ISC_FALSE;
00122 empty_ok = ISC_TRUE;
00123 ++group;
00124 ++sub;
00125 ++c;
00126 break;
00127 case ')':
00128 if (group && !have_atom && !empty_ok)
00129 FAIL("empty alternative");
00130 have_atom = ISC_TRUE;
00131 was_multiple = ISC_FALSE;
00132 if (group != 0)
00133 --group;
00134 ++c;
00135 break;
00136 case '|':
00137 if (!have_atom)
00138 FAIL("no atom");
00139 have_atom = ISC_FALSE;
00140 empty_ok = ISC_FALSE;
00141 was_multiple = ISC_FALSE;
00142 ++c;
00143 break;
00144 case '^':
00145 case '$':
00146 have_atom = ISC_TRUE;
00147 was_multiple = ISC_TRUE;
00148 ++c;
00149 break;
00150 case '+':
00151 case '*':
00152 case '?':
00153 if (was_multiple)
00154 FAIL("was multiple");
00155 if (!have_atom)
00156 FAIL("no atom");
00157 have_atom = ISC_TRUE;
00158 was_multiple = ISC_TRUE;
00159 ++c;
00160 break;
00161 case '.':
00162 default:
00163 literal:
00164 have_atom = ISC_TRUE;
00165 was_multiple = ISC_FALSE;
00166 ++c;
00167 break;
00168 }
00169 break;
00170 case parse_bound:
00171 switch (*c) {
00172 case '0': case '1': case '2': case '3': case '4':
00173 case '5': case '6': case '7': case '8': case '9':
00174 if (!seen_comma) {
00175 low = low * 10 + *c - '0';
00176 if (low > 255)
00177 FAIL("lower bound too big");
00178 } else {
00179 seen_high = ISC_TRUE;
00180 high = high * 10 + *c - '0';
00181 if (high > 255)
00182 FAIL("upper bound too big");
00183 }
00184 ++c;
00185 break;
00186 case ',':
00187 if (seen_comma)
00188 FAIL("multiple commas");
00189 seen_comma = ISC_TRUE;
00190 ++c;
00191 break;
00192 default:
00193 case '{':
00194 FAIL("non digit/comma");
00195 case '}':
00196 if (seen_high && low > high)
00197 FAIL("bad parse bound");
00198 seen_comma = ISC_FALSE;
00199 state = none;
00200 ++c;
00201 break;
00202 }
00203 break;
00204 case parse_bracket:
00205 switch (*c) {
00206 case '^':
00207 if (seen_char || neg) goto inside;
00208 neg = ISC_TRUE;
00209 ++c;
00210 break;
00211 case '-':
00212 if (range == 2) goto inside;
00213 if (!seen_char) goto inside;
00214 if (range == 1)
00215 FAIL("bad range");
00216 range = 2;
00217 ++c;
00218 break;
00219 case '[':
00220 ++c;
00221 switch (*c) {
00222 case '.':
00223 if (range != 0) --range;
00224 ++c;
00225 state = parse_ce;
00226 seen_ce = ISC_FALSE;
00227 break;
00228 case '=':
00229 if (range == 2)
00230 FAIL("equivalence class in range");
00231 ++c;
00232 state = parse_ec;
00233 seen_ec = ISC_FALSE;
00234 break;
00235 case ':':
00236 if (range == 2)
00237 FAIL("character class in range");
00238 ccname = c;
00239 ++c;
00240 state = parse_cc;
00241 break;
00242 }
00243 seen_char = ISC_TRUE;
00244 break;
00245 case ']':
00246 if (!c[1] && !seen_char)
00247 FAIL("unfinished brace");
00248 if (!seen_char)
00249 goto inside;
00250 ++c;
00251 range = 0;
00252 have_atom = ISC_TRUE;
00253 state = none;
00254 break;
00255 default:
00256 inside:
00257 seen_char = ISC_TRUE;
00258 if (range == 2 && (*c & 0xff) < range_start)
00259 FAIL("out of order range");
00260 if (range != 0)
00261 --range;
00262 range_start = *c & 0xff;
00263 ++c;
00264 break;
00265 };
00266 break;
00267 case parse_ce:
00268 switch (*c) {
00269 case '.':
00270 ++c;
00271 switch (*c) {
00272 case ']':
00273 if (!seen_ce)
00274 FAIL("empty ce");
00275 ++c;
00276 state = parse_bracket;
00277 break;
00278 default:
00279 if (seen_ce)
00280 range_start = 256;
00281 else
00282 range_start = '.';
00283 seen_ce = ISC_TRUE;
00284 break;
00285 }
00286 break;
00287 default:
00288 if (seen_ce)
00289 range_start = 256;
00290 else
00291 range_start = *c;
00292 seen_ce = ISC_TRUE;
00293 ++c;
00294 break;
00295 }
00296 break;
00297 case parse_ec:
00298 switch (*c) {
00299 case '=':
00300 ++c;
00301 switch (*c) {
00302 case ']':
00303 if (!seen_ec)
00304 FAIL("no ec");
00305 ++c;
00306 state = parse_bracket;
00307 break;
00308 default:
00309 seen_ec = ISC_TRUE;
00310 break;
00311 }
00312 break;
00313 default:
00314 seen_ec = ISC_TRUE;
00315 ++c;
00316 break;
00317 }
00318 break;
00319 case parse_cc:
00320 switch (*c) {
00321 case ':':
00322 ++c;
00323 switch (*c) {
00324 case ']': {
00325 unsigned int i;
00326 isc_boolean_t found = ISC_FALSE;
00327 for (i = 0;
00328 i < sizeof(cc)/sizeof(*cc);
00329 i++)
00330 {
00331 unsigned int len;
00332 len = strlen(cc[i]);
00333 if (len !=
00334 (unsigned int)(c - ccname))
00335 continue;
00336 if (strncmp(cc[i], ccname, len))
00337 continue;
00338 found = ISC_TRUE;
00339 }
00340 if (!found)
00341 FAIL("unknown cc");
00342 ++c;
00343 state = parse_bracket;
00344 break;
00345 }
00346 default:
00347 break;
00348 }
00349 break;
00350 default:
00351 ++c;
00352 break;
00353 }
00354 break;
00355 }
00356 }
00357 if (group != 0)
00358 FAIL("group open");
00359 if (state != none)
00360 FAIL("incomplete");
00361 if (!have_atom)
00362 FAIL("no atom");
00363 return (sub);
00364
00365 error:
00366 #if VALREGEX_REPORT_REASON
00367 fprintf(stderr, "%s\n", reason);
00368 #endif
00369 return (-1);
00370 }