00001 /* 00002 * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") 00003 * Copyright (C) 1998-2002 Internet Software Consortium. 00004 * 00005 * Permission to use, copy, modify, and/or distribute this software for any 00006 * purpose with or without fee is hereby granted, provided that the above 00007 * copyright notice and this permission notice appear in all copies. 00008 * 00009 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 00010 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 00011 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 00012 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 00013 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 00014 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 00015 * PERFORMANCE OF THIS SOFTWARE. 00016 */ 00017 00018 /* $Id: lex.h,v 1.37 2008/05/30 23:47:01 tbox Exp $ */ 00019 00020 #ifndef ISC_LEX_H 00021 #define ISC_LEX_H 1 00022 00023 /***** 00024 ***** Module Info 00025 *****/ 00026 00027 /*! \file isc/lex.h 00028 * \brief The "lex" module provides a lightweight tokenizer. It can operate 00029 * on files or buffers, and can handle "include". It is designed for 00030 * parsing of DNS master files and the BIND configuration file, but 00031 * should be general enough to tokenize other things, e.g. HTTP. 00032 * 00033 * \li MP: 00034 * No synchronization is provided. Clients must ensure exclusive 00035 * access. 00036 * 00037 * \li Reliability: 00038 * No anticipated impact. 00039 * 00040 * \li Resources: 00041 * TBS 00042 * 00043 * \li Security: 00044 * No anticipated impact. 00045 * 00046 * \li Standards: 00047 * None. 00048 */ 00049 00050 /*** 00051 *** Imports 00052 ***/ 00053 00054 #include <stdio.h> 00055 00056 #include <isc/lang.h> 00057 #include <isc/region.h> 00058 #include <isc/types.h> 00059 00060 ISC_LANG_BEGINDECLS 00061 00062 /*** 00063 *** Options 00064 ***/ 00065 00066 /*@{*/ 00067 /*! 00068 * Various options for isc_lex_gettoken(). 00069 */ 00070 00071 #define ISC_LEXOPT_EOL 0x01 /*%< Want end-of-line token. */ 00072 #define ISC_LEXOPT_EOF 0x02 /*%< Want end-of-file token. */ 00073 #define ISC_LEXOPT_INITIALWS 0x04 /*%< Want initial whitespace. */ 00074 #define ISC_LEXOPT_NUMBER 0x08 /*%< Recognize numbers. */ 00075 #define ISC_LEXOPT_QSTRING 0x10 /*%< Recognize qstrings. */ 00076 /*@}*/ 00077 00078 /*@{*/ 00079 /*! 00080 * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in 00081 * the DNS master file format. If this option is set, then the 00082 * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when 00083 * the paren count is > 0. To use this option, '(' and ')' must be special 00084 * characters. 00085 */ 00086 #define ISC_LEXOPT_DNSMULTILINE 0x20 /*%< Handle '(' and ')'. */ 00087 #define ISC_LEXOPT_NOMORE 0x40 /*%< Want "no more" token. */ 00088 00089 #define ISC_LEXOPT_CNUMBER 0x80 /*%< Recognize octal and hex. */ 00090 #define ISC_LEXOPT_ESCAPE 0x100 /*%< Recognize escapes. */ 00091 #define ISC_LEXOPT_QSTRINGMULTILINE 0x200 /*%< Allow multiline "" strings */ 00092 #define ISC_LEXOPT_OCTAL 0x400 /*%< Expect a octal number. */ 00093 /*@}*/ 00094 /*@{*/ 00095 /*! 00096 * Various commenting styles, which may be changed at any time with 00097 * isc_lex_setcomments(). 00098 */ 00099 00100 #define ISC_LEXCOMMENT_C 0x01 00101 #define ISC_LEXCOMMENT_CPLUSPLUS 0x02 00102 #define ISC_LEXCOMMENT_SHELL 0x04 00103 #define ISC_LEXCOMMENT_DNSMASTERFILE 0x08 00104 /*@}*/ 00105 00106 /*** 00107 *** Types 00108 ***/ 00109 00110 /*! Lex */ 00111 00112 typedef char isc_lexspecials_t[256]; 00113 00114 /* Tokens */ 00115 00116 typedef enum { 00117 isc_tokentype_unknown = 0, 00118 isc_tokentype_string = 1, 00119 isc_tokentype_number = 2, 00120 isc_tokentype_qstring = 3, 00121 isc_tokentype_eol = 4, 00122 isc_tokentype_eof = 5, 00123 isc_tokentype_initialws = 6, 00124 isc_tokentype_special = 7, 00125 isc_tokentype_nomore = 8 00126 } isc_tokentype_t; 00127 00128 typedef union { 00129 char as_char; 00130 unsigned long as_ulong; 00131 isc_region_t as_region; 00132 isc_textregion_t as_textregion; 00133 void * as_pointer; 00134 } isc_tokenvalue_t; 00135 00136 typedef struct isc_token { 00137 isc_tokentype_t type; 00138 isc_tokenvalue_t value; 00139 } isc_token_t; 00140 00141 /*** 00142 *** Functions 00143 ***/ 00144 00145 isc_result_t 00146 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp); 00147 /*%< 00148 * Create a lexer. 00149 * 00150 * 'max_token' is a hint of the number of bytes in the largest token. 00151 * 00152 * Requires: 00153 *\li '*lexp' is a valid lexer. 00154 * 00155 *\li max_token > 0. 00156 * 00157 * Ensures: 00158 *\li On success, *lexp is attached to the newly created lexer. 00159 * 00160 * Returns: 00161 *\li #ISC_R_SUCCESS 00162 *\li #ISC_R_NOMEMORY 00163 */ 00164 00165 void 00166 isc_lex_destroy(isc_lex_t **lexp); 00167 /*%< 00168 * Destroy the lexer. 00169 * 00170 * Requires: 00171 *\li '*lexp' is a valid lexer. 00172 * 00173 * Ensures: 00174 *\li *lexp == NULL 00175 */ 00176 00177 unsigned int 00178 isc_lex_getcomments(isc_lex_t *lex); 00179 /*%< 00180 * Return the current lexer commenting styles. 00181 * 00182 * Requires: 00183 *\li 'lex' is a valid lexer. 00184 * 00185 * Returns: 00186 *\li The commenting sytles which are currently allowed. 00187 */ 00188 00189 void 00190 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments); 00191 /*%< 00192 * Set allowed lexer commenting styles. 00193 * 00194 * Requires: 00195 *\li 'lex' is a valid lexer. 00196 * 00197 *\li 'comments' has meaningful values. 00198 */ 00199 00200 void 00201 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials); 00202 /*%< 00203 * Put the current list of specials into 'specials'. 00204 * 00205 * Requires: 00206 *\li 'lex' is a valid lexer. 00207 */ 00208 00209 void 00210 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials); 00211 /*!< 00212 * The characters in 'specials' are returned as tokens. Along with 00213 * whitespace, they delimit strings and numbers. 00214 * 00215 * Note: 00216 *\li Comment processing takes precedence over special character 00217 * recognition. 00218 * 00219 * Requires: 00220 *\li 'lex' is a valid lexer. 00221 */ 00222 00223 isc_result_t 00224 isc_lex_openfile(isc_lex_t *lex, const char *filename); 00225 /*%< 00226 * Open 'filename' and make it the current input source for 'lex'. 00227 * 00228 * Requires: 00229 *\li 'lex' is a valid lexer. 00230 * 00231 *\li filename is a valid C string. 00232 * 00233 * Returns: 00234 *\li #ISC_R_SUCCESS 00235 *\li #ISC_R_NOMEMORY Out of memory 00236 *\li #ISC_R_NOTFOUND File not found 00237 *\li #ISC_R_NOPERM No permission to open file 00238 *\li #ISC_R_FAILURE Couldn't open file, not sure why 00239 *\li #ISC_R_UNEXPECTED 00240 */ 00241 00242 isc_result_t 00243 isc_lex_openstream(isc_lex_t *lex, FILE *stream); 00244 /*%< 00245 * Make 'stream' the current input source for 'lex'. 00246 * 00247 * Requires: 00248 *\li 'lex' is a valid lexer. 00249 * 00250 *\li 'stream' is a valid C stream. 00251 * 00252 * Returns: 00253 *\li #ISC_R_SUCCESS 00254 *\li #ISC_R_NOMEMORY Out of memory 00255 */ 00256 00257 isc_result_t 00258 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer); 00259 /*%< 00260 * Make 'buffer' the current input source for 'lex'. 00261 * 00262 * Requires: 00263 *\li 'lex' is a valid lexer. 00264 * 00265 *\li 'buffer' is a valid buffer. 00266 * 00267 * Returns: 00268 *\li #ISC_R_SUCCESS 00269 *\li #ISC_R_NOMEMORY Out of memory 00270 */ 00271 00272 isc_result_t 00273 isc_lex_close(isc_lex_t *lex); 00274 /*%< 00275 * Close the most recently opened object (i.e. file or buffer). 00276 * 00277 * Returns: 00278 *\li #ISC_R_SUCCESS 00279 *\li #ISC_R_NOMORE No more input sources 00280 */ 00281 00282 isc_result_t 00283 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp); 00284 /*%< 00285 * Get the next token. 00286 * 00287 * Requires: 00288 *\li 'lex' is a valid lexer. 00289 * 00290 *\li 'lex' has an input source. 00291 * 00292 *\li 'options' contains valid options. 00293 * 00294 *\li '*tokenp' is a valid pointer. 00295 * 00296 * Returns: 00297 *\li #ISC_R_SUCCESS 00298 *\li #ISC_R_UNEXPECTEDEND 00299 *\li #ISC_R_NOMEMORY 00300 * 00301 * These two results are returned only if their corresponding lexer 00302 * options are not set. 00303 * 00304 *\li #ISC_R_EOF End of input source 00305 *\li #ISC_R_NOMORE No more input sources 00306 */ 00307 00308 isc_result_t 00309 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token, 00310 isc_tokentype_t expect, isc_boolean_t eol); 00311 /*%< 00312 * Get the next token from a DNS master file type stream. This is a 00313 * convenience function that sets appropriate options and handles quoted 00314 * strings and end of line correctly for master files. It also ungets 00315 * unexpected tokens. 00316 * 00317 * Requires: 00318 *\li 'lex' is a valid lexer. 00319 * 00320 *\li 'token' is a valid pointer 00321 * 00322 * Returns: 00323 * 00324 * \li any return code from isc_lex_gettoken(). 00325 */ 00326 00327 isc_result_t 00328 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol); 00329 /*%< 00330 * Get the next token from a DNS master file type stream. This is a 00331 * convenience function that sets appropriate options and handles end 00332 * of line correctly for master files. It also ungets unexpected tokens. 00333 * 00334 * Requires: 00335 *\li 'lex' is a valid lexer. 00336 * 00337 *\li 'token' is a valid pointer 00338 * 00339 * Returns: 00340 * 00341 * \li any return code from isc_lex_gettoken(). 00342 */ 00343 00344 void 00345 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp); 00346 /*%< 00347 * Unget the current token. 00348 * 00349 * Requires: 00350 *\li 'lex' is a valid lexer. 00351 * 00352 *\li 'lex' has an input source. 00353 * 00354 *\li 'tokenp' points to a valid token. 00355 * 00356 *\li There is no ungotten token already. 00357 */ 00358 00359 void 00360 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r); 00361 /*%< 00362 * Returns a region containing the text of the last token returned. 00363 * 00364 * Requires: 00365 *\li 'lex' is a valid lexer. 00366 * 00367 *\li 'lex' has an input source. 00368 * 00369 *\li 'tokenp' points to a valid token. 00370 * 00371 *\li A token has been gotten and not ungotten. 00372 */ 00373 00374 char * 00375 isc_lex_getsourcename(isc_lex_t *lex); 00376 /*%< 00377 * Return the input source name. 00378 * 00379 * Requires: 00380 *\li 'lex' is a valid lexer. 00381 * 00382 * Returns: 00383 * \li source name or NULL if no current source. 00384 *\li result valid while current input source exists. 00385 */ 00386 00387 00388 unsigned long 00389 isc_lex_getsourceline(isc_lex_t *lex); 00390 /*%< 00391 * Return the input source line number. 00392 * 00393 * Requires: 00394 *\li 'lex' is a valid lexer. 00395 * 00396 * Returns: 00397 *\li Current line number or 0 if no current source. 00398 */ 00399 00400 isc_result_t 00401 isc_lex_setsourcename(isc_lex_t *lex, const char *name); 00402 /*%< 00403 * Assigns a new name to the input source. 00404 * 00405 * Requires: 00406 * 00407 * \li 'lex' is a valid lexer. 00408 * 00409 * Returns: 00410 * \li #ISC_R_SUCCESS 00411 * \li #ISC_R_NOMEMORY 00412 * \li #ISC_R_NOTFOUND - there are no sources. 00413 */ 00414 00415 isc_boolean_t 00416 isc_lex_isfile(isc_lex_t *lex); 00417 /*%< 00418 * Return whether the current input source is a file. 00419 * 00420 * Requires: 00421 *\li 'lex' is a valid lexer. 00422 * 00423 * Returns: 00424 * \li #ISC_TRUE if the current input is a file, 00425 *\li #ISC_FALSE otherwise. 00426 */ 00427 00428 00429 ISC_LANG_ENDDECLS 00430 00431 #endif /* ISC_LEX_H */