lex.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2004, 2005, 2007, 2008  Internet Systems Consortium, Inc. ("ISC")
00003  * Copyright (C) 1998-2002  Internet Software Consortium.
00004  *
00005  * Permission to use, copy, modify, and/or distribute this software for any
00006  * purpose with or without fee is hereby granted, provided that the above
00007  * copyright notice and this permission notice appear in all copies.
00008  *
00009  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
00010  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
00011  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
00012  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
00013  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
00014  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
00015  * PERFORMANCE OF THIS SOFTWARE.
00016  */
00017 
00018 /* $Id: lex.h,v 1.37 2008/05/30 23:47:01 tbox Exp $ */
00019 
00020 #ifndef ISC_LEX_H
00021 #define ISC_LEX_H 1
00022 
00023 /*****
00024  ***** Module Info
00025  *****/
00026 
00027 /*! \file isc/lex.h
00028  * \brief The "lex" module provides a lightweight tokenizer.  It can operate
00029  * on files or buffers, and can handle "include".  It is designed for
00030  * parsing of DNS master files and the BIND configuration file, but
00031  * should be general enough to tokenize other things, e.g. HTTP.
00032  *
00033  * \li MP:
00034  *      No synchronization is provided.  Clients must ensure exclusive
00035  *      access.
00036  *
00037  * \li Reliability:
00038  *      No anticipated impact.
00039  *
00040  * \li Resources:
00041  *      TBS
00042  *
00043  * \li Security:
00044  *      No anticipated impact.
00045  *
00046  * \li Standards:
00047  *      None.
00048  */
00049 
00050 /***
00051  *** Imports
00052  ***/
00053 
00054 #include <stdio.h>
00055 
00056 #include <isc/lang.h>
00057 #include <isc/region.h>
00058 #include <isc/types.h>
00059 
00060 ISC_LANG_BEGINDECLS
00061 
00062 /***
00063  *** Options
00064  ***/
00065 
00066 /*@{*/
00067 /*!
00068  * Various options for isc_lex_gettoken().
00069  */
00070 
00071 #define ISC_LEXOPT_EOL                  0x01    /*%< Want end-of-line token. */
00072 #define ISC_LEXOPT_EOF                  0x02    /*%< Want end-of-file token. */
00073 #define ISC_LEXOPT_INITIALWS            0x04    /*%< Want initial whitespace. */
00074 #define ISC_LEXOPT_NUMBER               0x08    /*%< Recognize numbers. */
00075 #define ISC_LEXOPT_QSTRING              0x10    /*%< Recognize qstrings. */
00076 /*@}*/
00077 
00078 /*@{*/
00079 /*!
00080  * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in
00081  * the DNS master file format.  If this option is set, then the
00082  * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when
00083  * the paren count is > 0.  To use this option, '(' and ')' must be special
00084  * characters.
00085  */
00086 #define ISC_LEXOPT_DNSMULTILINE         0x20    /*%< Handle '(' and ')'. */
00087 #define ISC_LEXOPT_NOMORE               0x40    /*%< Want "no more" token. */
00088 
00089 #define ISC_LEXOPT_CNUMBER              0x80    /*%< Recognize octal and hex. */
00090 #define ISC_LEXOPT_ESCAPE               0x100   /*%< Recognize escapes. */
00091 #define ISC_LEXOPT_QSTRINGMULTILINE     0x200   /*%< Allow multiline "" strings */
00092 #define ISC_LEXOPT_OCTAL                0x400   /*%< Expect a octal number. */
00093 /*@}*/
00094 /*@{*/
00095 /*!
00096  * Various commenting styles, which may be changed at any time with
00097  * isc_lex_setcomments().
00098  */
00099 
00100 #define ISC_LEXCOMMENT_C                0x01
00101 #define ISC_LEXCOMMENT_CPLUSPLUS        0x02
00102 #define ISC_LEXCOMMENT_SHELL            0x04
00103 #define ISC_LEXCOMMENT_DNSMASTERFILE    0x08
00104 /*@}*/
00105 
00106 /***
00107  *** Types
00108  ***/
00109 
00110 /*! Lex */
00111 
00112 typedef char isc_lexspecials_t[256];
00113 
00114 /* Tokens */
00115 
00116 typedef enum {
00117         isc_tokentype_unknown = 0,
00118         isc_tokentype_string = 1,
00119         isc_tokentype_number = 2,
00120         isc_tokentype_qstring = 3,
00121         isc_tokentype_eol = 4,
00122         isc_tokentype_eof = 5,
00123         isc_tokentype_initialws = 6,
00124         isc_tokentype_special = 7,
00125         isc_tokentype_nomore = 8
00126 } isc_tokentype_t;
00127 
00128 typedef union {
00129         char                            as_char;
00130         unsigned long                   as_ulong;
00131         isc_region_t                    as_region;
00132         isc_textregion_t                as_textregion;
00133         void *                          as_pointer;
00134 } isc_tokenvalue_t;
00135 
00136 typedef struct isc_token {
00137         isc_tokentype_t                 type;
00138         isc_tokenvalue_t                value;
00139 } isc_token_t;
00140 
00141 /***
00142  *** Functions
00143  ***/
00144 
00145 isc_result_t
00146 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp);
00147 /*%<
00148  * Create a lexer.
00149  *
00150  * 'max_token' is a hint of the number of bytes in the largest token.
00151  *
00152  * Requires:
00153  *\li   '*lexp' is a valid lexer.
00154  *
00155  *\li   max_token > 0.
00156  *
00157  * Ensures:
00158  *\li   On success, *lexp is attached to the newly created lexer.
00159  *
00160  * Returns:
00161  *\li   #ISC_R_SUCCESS
00162  *\li   #ISC_R_NOMEMORY
00163  */
00164 
00165 void
00166 isc_lex_destroy(isc_lex_t **lexp);
00167 /*%<
00168  * Destroy the lexer.
00169  *
00170  * Requires:
00171  *\li   '*lexp' is a valid lexer.
00172  *
00173  * Ensures:
00174  *\li   *lexp == NULL
00175  */
00176 
00177 unsigned int
00178 isc_lex_getcomments(isc_lex_t *lex);
00179 /*%<
00180  * Return the current lexer commenting styles.
00181  *
00182  * Requires:
00183  *\li   'lex' is a valid lexer.
00184  *
00185  * Returns:
00186  *\li   The commenting sytles which are currently allowed.
00187  */
00188 
00189 void
00190 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
00191 /*%<
00192  * Set allowed lexer commenting styles.
00193  *
00194  * Requires:
00195  *\li   'lex' is a valid lexer.
00196  *
00197  *\li   'comments' has meaningful values.
00198  */
00199 
00200 void
00201 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials);
00202 /*%<
00203  * Put the current list of specials into 'specials'.
00204  *
00205  * Requires:
00206  *\li   'lex' is a valid lexer.
00207  */
00208 
00209 void
00210 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
00211 /*!<
00212  * The characters in 'specials' are returned as tokens.  Along with
00213  * whitespace, they delimit strings and numbers.
00214  *
00215  * Note:
00216  *\li   Comment processing takes precedence over special character
00217  *      recognition.
00218  *
00219  * Requires:
00220  *\li   'lex' is a valid lexer.
00221  */
00222 
00223 isc_result_t
00224 isc_lex_openfile(isc_lex_t *lex, const char *filename);
00225 /*%<
00226  * Open 'filename' and make it the current input source for 'lex'.
00227  *
00228  * Requires:
00229  *\li   'lex' is a valid lexer.
00230  *
00231  *\li   filename is a valid C string.
00232  *
00233  * Returns:
00234  *\li   #ISC_R_SUCCESS
00235  *\li   #ISC_R_NOMEMORY                 Out of memory
00236  *\li   #ISC_R_NOTFOUND                 File not found
00237  *\li   #ISC_R_NOPERM                   No permission to open file
00238  *\li   #ISC_R_FAILURE                  Couldn't open file, not sure why
00239  *\li   #ISC_R_UNEXPECTED
00240  */
00241 
00242 isc_result_t
00243 isc_lex_openstream(isc_lex_t *lex, FILE *stream);
00244 /*%<
00245  * Make 'stream' the current input source for 'lex'.
00246  *
00247  * Requires:
00248  *\li   'lex' is a valid lexer.
00249  *
00250  *\li   'stream' is a valid C stream.
00251  *
00252  * Returns:
00253  *\li   #ISC_R_SUCCESS
00254  *\li   #ISC_R_NOMEMORY                 Out of memory
00255  */
00256 
00257 isc_result_t
00258 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer);
00259 /*%<
00260  * Make 'buffer' the current input source for 'lex'.
00261  *
00262  * Requires:
00263  *\li   'lex' is a valid lexer.
00264  *
00265  *\li   'buffer' is a valid buffer.
00266  *
00267  * Returns:
00268  *\li   #ISC_R_SUCCESS
00269  *\li   #ISC_R_NOMEMORY                 Out of memory
00270  */
00271 
00272 isc_result_t
00273 isc_lex_close(isc_lex_t *lex);
00274 /*%<
00275  * Close the most recently opened object (i.e. file or buffer).
00276  *
00277  * Returns:
00278  *\li   #ISC_R_SUCCESS
00279  *\li   #ISC_R_NOMORE                   No more input sources
00280  */
00281 
00282 isc_result_t
00283 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
00284 /*%<
00285  * Get the next token.
00286  *
00287  * Requires:
00288  *\li   'lex' is a valid lexer.
00289  *
00290  *\li   'lex' has an input source.
00291  *
00292  *\li   'options' contains valid options.
00293  *
00294  *\li   '*tokenp' is a valid pointer.
00295  *
00296  * Returns:
00297  *\li   #ISC_R_SUCCESS
00298  *\li   #ISC_R_UNEXPECTEDEND
00299  *\li   #ISC_R_NOMEMORY
00300  *
00301  *      These two results are returned only if their corresponding lexer
00302  *      options are not set.
00303  *
00304  *\li   #ISC_R_EOF                      End of input source
00305  *\li   #ISC_R_NOMORE                   No more input sources
00306  */
00307 
00308 isc_result_t
00309 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
00310                        isc_tokentype_t expect, isc_boolean_t eol);
00311 /*%<
00312  * Get the next token from a DNS master file type stream.  This is a
00313  * convenience function that sets appropriate options and handles quoted
00314  * strings and end of line correctly for master files.  It also ungets
00315  * unexpected tokens.
00316  *
00317  * Requires:
00318  *\li   'lex' is a valid lexer.
00319  *
00320  *\li   'token' is a valid pointer
00321  *
00322  * Returns:
00323  *
00324  * \li  any return code from isc_lex_gettoken().
00325  */
00326 
00327 isc_result_t
00328 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol);
00329 /*%<
00330  * Get the next token from a DNS master file type stream.  This is a
00331  * convenience function that sets appropriate options and handles end
00332  * of line correctly for master files.  It also ungets unexpected tokens.
00333  *
00334  * Requires:
00335  *\li   'lex' is a valid lexer.
00336  *
00337  *\li   'token' is a valid pointer
00338  *
00339  * Returns:
00340  *
00341  * \li  any return code from isc_lex_gettoken().
00342  */
00343 
00344 void
00345 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
00346 /*%<
00347  * Unget the current token.
00348  *
00349  * Requires:
00350  *\li   'lex' is a valid lexer.
00351  *
00352  *\li   'lex' has an input source.
00353  *
00354  *\li   'tokenp' points to a valid token.
00355  *
00356  *\li   There is no ungotten token already.
00357  */
00358 
00359 void
00360 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
00361 /*%<
00362  * Returns a region containing the text of the last token returned.
00363  *
00364  * Requires:
00365  *\li   'lex' is a valid lexer.
00366  *
00367  *\li   'lex' has an input source.
00368  *
00369  *\li   'tokenp' points to a valid token.
00370  *
00371  *\li   A token has been gotten and not ungotten.
00372  */
00373 
00374 char *
00375 isc_lex_getsourcename(isc_lex_t *lex);
00376 /*%<
00377  * Return the input source name.
00378  *
00379  * Requires:
00380  *\li   'lex' is a valid lexer.
00381  *
00382  * Returns:
00383  * \li  source name or NULL if no current source.
00384  *\li   result valid while current input source exists.
00385  */
00386 
00387 
00388 unsigned long
00389 isc_lex_getsourceline(isc_lex_t *lex);
00390 /*%<
00391  * Return the input source line number.
00392  *
00393  * Requires:
00394  *\li   'lex' is a valid lexer.
00395  *
00396  * Returns:
00397  *\li   Current line number or 0 if no current source.
00398  */
00399 
00400 isc_result_t
00401 isc_lex_setsourcename(isc_lex_t *lex, const char *name);
00402 /*%<
00403  * Assigns a new name to the input source.
00404  *
00405  * Requires:
00406  *
00407  * \li  'lex' is a valid lexer.
00408  *
00409  * Returns:
00410  * \li  #ISC_R_SUCCESS
00411  * \li  #ISC_R_NOMEMORY
00412  * \li  #ISC_R_NOTFOUND - there are no sources.
00413  */
00414 
00415 isc_boolean_t
00416 isc_lex_isfile(isc_lex_t *lex);
00417 /*%<
00418  * Return whether the current input source is a file.
00419  *
00420  * Requires:
00421  *\li   'lex' is a valid lexer.
00422  *
00423  * Returns:
00424  * \li  #ISC_TRUE if the current input is a file,
00425  *\li   #ISC_FALSE otherwise.
00426  */
00427 
00428 
00429 ISC_LANG_ENDDECLS
00430 
00431 #endif /* ISC_LEX_H */

Generated on Tue Apr 28 17:41:04 2015 by Doxygen 1.5.4 for BIND9 Internals 9.11.0pre-alpha