journal.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2004, 2005, 2007-2011, 2013-2015  Internet Systems Consortium, Inc. ("ISC")
00003  * Copyright (C) 1999-2002  Internet Software Consortium.
00004  *
00005  * Permission to use, copy, modify, and/or distribute this software for any
00006  * purpose with or without fee is hereby granted, provided that the above
00007  * copyright notice and this permission notice appear in all copies.
00008  *
00009  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
00010  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
00011  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
00012  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
00013  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
00014  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
00015  * PERFORMANCE OF THIS SOFTWARE.
00016  */
00017 
00018 /* $Id: journal.c,v 1.120 2011/12/22 07:32:41 each Exp $ */
00019 
00020 #include <config.h>
00021 
00022 #include <stdlib.h>
00023 #include <unistd.h>
00024 #include <errno.h>
00025 
00026 #include <isc/file.h>
00027 #include <isc/mem.h>
00028 #include <isc/stdio.h>
00029 #include <isc/string.h>
00030 #include <isc/util.h>
00031 
00032 #include <dns/compress.h>
00033 #include <dns/db.h>
00034 #include <dns/dbiterator.h>
00035 #include <dns/diff.h>
00036 #include <dns/fixedname.h>
00037 #include <dns/journal.h>
00038 #include <dns/log.h>
00039 #include <dns/rdataset.h>
00040 #include <dns/rdatasetiter.h>
00041 #include <dns/result.h>
00042 #include <dns/soa.h>
00043 
00044 /*! \file
00045  * \brief Journaling.
00046  *
00047  * A journal file consists of
00048  *
00049  *   \li A fixed-size header of type journal_rawheader_t.
00050  *
00051  *   \li The index.  This is an unordered array of index entries
00052  *     of type journal_rawpos_t giving the locations
00053  *     of some arbitrary subset of the journal's addressable
00054  *     transactions.  The index entries are used as hints to
00055  *     speed up the process of locating a transaction with a given
00056  *     serial number.  Unused index entries have an "offset"
00057  *     field of zero.  The size of the index can vary between
00058  *     journal files, but does not change during the lifetime
00059  *     of a file.  The size can be zero.
00060  *
00061  *   \li The journal data.  This  consists of one or more transactions.
00062  *     Each transaction begins with a transaction header of type
00063  *     journal_rawxhdr_t.  The transaction header is followed by a
00064  *     sequence of RRs, similar in structure to an IXFR difference
00065  *     sequence (RFC1995).  That is, the pre-transaction SOA,
00066  *     zero or more other deleted RRs, the post-transaction SOA,
00067  *     and zero or more other added RRs.  Unlike in IXFR, each RR
00068  *     is prefixed with a 32-bit length.
00069  *
00070  *     The journal data part grows as new transactions are
00071  *     appended to the file.  Only those transactions
00072  *     whose serial number is current-(2^31-1) to current
00073  *     are considered "addressable" and may be pointed
00074  *     to from the header or index.  They may be preceded
00075  *     by old transactions that are no longer addressable,
00076  *     and they may be followed by transactions that were
00077  *     appended to the journal but never committed by updating
00078  *     the "end" position in the header.  The latter will
00079  *     be overwritten when new transactions are added.
00080  */
00081 /*%
00082  * When true, accept IXFR difference sequences where the
00083  * SOA serial number does not change (BIND 8 sends such
00084  * sequences).
00085  */
00086 static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */
00087 
00088 /**************************************************************************/
00089 /*
00090  * Miscellaneous utilities.
00091  */
00092 
00093 #define JOURNAL_COMMON_LOGARGS \
00094         dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
00095 
00096 #define JOURNAL_DEBUG_LOGARGS(n) \
00097         JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
00098 
00099 /*%
00100  * It would be non-sensical (or at least obtuse) to use FAIL() with an
00101  * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
00102  * from complaining about "end-of-loop code not reached".
00103  */
00104 #define FAIL(code) \
00105         do { result = (code);                                   \
00106                 if (result != ISC_R_SUCCESS) goto failure;      \
00107         } while (0)
00108 
00109 #define CHECK(op) \
00110         do { result = (op);                                     \
00111                 if (result != ISC_R_SUCCESS) goto failure;      \
00112         } while (0)
00113 
00114 #define JOURNAL_SERIALSET       0x01U
00115 
00116 static isc_result_t index_to_disk(dns_journal_t *);
00117 
00118 static inline isc_uint32_t
00119 decode_uint32(unsigned char *p) {
00120         return ((p[0] << 24) +
00121                 (p[1] << 16) +
00122                 (p[2] <<  8) +
00123                 (p[3] <<  0));
00124 }
00125 
00126 static inline void
00127 encode_uint32(isc_uint32_t val, unsigned char *p) {
00128         p[0] = (isc_uint8_t)(val >> 24);
00129         p[1] = (isc_uint8_t)(val >> 16);
00130         p[2] = (isc_uint8_t)(val >>  8);
00131         p[3] = (isc_uint8_t)(val >>  0);
00132 }
00133 
00134 isc_result_t
00135 dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
00136                       dns_diffop_t op, dns_difftuple_t **tp)
00137 {
00138         isc_result_t result;
00139         dns_dbnode_t *node;
00140         dns_rdataset_t rdataset;
00141         dns_rdata_t rdata = DNS_RDATA_INIT;
00142         dns_fixedname_t fixed;
00143         dns_name_t *zonename;
00144 
00145         dns_fixedname_init(&fixed);
00146         zonename = dns_fixedname_name(&fixed);
00147         dns_name_copy(dns_db_origin(db), zonename, NULL);
00148 
00149         node = NULL;
00150         result = dns_db_findnode(db, zonename, ISC_FALSE, &node);
00151         if (result != ISC_R_SUCCESS)
00152                 goto nonode;
00153 
00154         dns_rdataset_init(&rdataset);
00155         result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
00156                                      (isc_stdtime_t)0, &rdataset, NULL);
00157         if (result != ISC_R_SUCCESS)
00158                 goto freenode;
00159 
00160         result = dns_rdataset_first(&rdataset);
00161         if (result != ISC_R_SUCCESS)
00162                 goto freenode;
00163 
00164         dns_rdataset_current(&rdataset, &rdata);
00165         dns_rdataset_getownercase(&rdataset, zonename);
00166 
00167         result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl,
00168                                       &rdata, tp);
00169 
00170         dns_rdataset_disassociate(&rdataset);
00171         dns_db_detachnode(db, &node);
00172         return (result);
00173 
00174  freenode:
00175         dns_db_detachnode(db, &node);
00176  nonode:
00177         UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA");
00178         return (result);
00179 }
00180 
00181 /* Journaling */
00182 
00183 /*%
00184  * On-disk representation of a "pointer" to a journal entry.
00185  * These are used in the journal header to locate the beginning
00186  * and end of the journal, and in the journal index to locate
00187  * other transactions.
00188  */
00189 typedef struct {
00190         unsigned char   serial[4];  /*%< SOA serial before update. */
00191         /*
00192          * XXXRTH  Should offset be 8 bytes?
00193          * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
00194          * XXXAG  ... but we will not be able to seek >2G anyway on many
00195          *            platforms as long as we are using fseek() rather
00196          *            than lseek().
00197          */
00198         unsigned char   offset[4];  /*%< Offset from beginning of file. */
00199 } journal_rawpos_t;
00200 
00201 
00202 /*%
00203  * The header is of a fixed size, with some spare room for future
00204  * extensions.
00205  */
00206 #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
00207 
00208 /*%
00209  * The on-disk representation of the journal header.
00210  * All numbers are stored in big-endian order.
00211  */
00212 typedef union {
00213         struct {
00214                 /*% File format version ID. */
00215                 unsigned char           format[16];
00216                 /*% Position of the first addressable transaction */
00217                 journal_rawpos_t        begin;
00218                 /*% Position of the next (yet nonexistent) transaction. */
00219                 journal_rawpos_t        end;
00220                 /*% Number of index entries following the header. */
00221                 unsigned char           index_size[4];
00222                 /*% Source serial number. */
00223                 unsigned char           sourceserial[4];
00224                 unsigned char           flags;
00225         } h;
00226         /* Pad the header to a fixed size. */
00227         unsigned char pad[JOURNAL_HEADER_SIZE];
00228 } journal_rawheader_t;
00229 
00230 /*%
00231  * The on-disk representation of the transaction header.
00232  * There is one of these at the beginning of each transaction.
00233  */
00234 typedef struct {
00235         unsigned char   size[4];        /*%< In bytes, excluding header. */
00236         unsigned char   serial0[4];     /*%< SOA serial before update. */
00237         unsigned char   serial1[4];     /*%< SOA serial after update. */
00238 } journal_rawxhdr_t;
00239 
00240 /*%
00241  * The on-disk representation of the RR header.
00242  * There is one of these at the beginning of each RR.
00243  */
00244 typedef struct {
00245         unsigned char   size[4];        /*%< In bytes, excluding header. */
00246 } journal_rawrrhdr_t;
00247 
00248 /*%
00249  * The in-core representation of the journal header.
00250  */
00251 typedef struct {
00252         isc_uint32_t    serial;
00253         isc_offset_t    offset;
00254 } journal_pos_t;
00255 
00256 #define POS_VALID(pos)          ((pos).offset != 0)
00257 #define POS_INVALIDATE(pos)     ((pos).offset = 0, (pos).serial = 0)
00258 
00259 typedef struct {
00260         unsigned char   format[16];
00261         journal_pos_t   begin;
00262         journal_pos_t   end;
00263         isc_uint32_t    index_size;
00264         isc_uint32_t    sourceserial;
00265         isc_boolean_t   serialset;
00266 } journal_header_t;
00267 
00268 /*%
00269  * The in-core representation of the transaction header.
00270  */
00271 
00272 typedef struct {
00273         isc_uint32_t    size;
00274         isc_uint32_t    serial0;
00275         isc_uint32_t    serial1;
00276 } journal_xhdr_t;
00277 
00278 /*%
00279  * The in-core representation of the RR header.
00280  */
00281 typedef struct {
00282         isc_uint32_t    size;
00283 } journal_rrhdr_t;
00284 
00285 
00286 /*%
00287  * Initial contents to store in the header of a newly created
00288  * journal file.
00289  *
00290  * The header starts with the magic string ";BIND LOG V9\n"
00291  * to identify the file as a BIND 9 journal file.  An ASCII
00292  * identification string is used rather than a binary magic
00293  * number to be consistent with BIND 8 (BIND 8 journal files
00294  * are ASCII text files).
00295  */
00296 
00297 static journal_header_t
00298 initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0, 0, 0 };
00299 
00300 #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
00301 
00302 typedef enum {
00303         JOURNAL_STATE_INVALID,
00304         JOURNAL_STATE_READ,
00305         JOURNAL_STATE_WRITE,
00306         JOURNAL_STATE_TRANSACTION,
00307         JOURNAL_STATE_INLINE
00308 } journal_state_t;
00309 
00310 struct dns_journal {
00311         unsigned int            magic;          /*%< JOUR */
00312         isc_mem_t               *mctx;          /*%< Memory context */
00313         journal_state_t         state;
00314         char                    *filename;      /*%< Journal file name */
00315         FILE *                  fp;             /*%< File handle */
00316         isc_offset_t            offset;         /*%< Current file offset */
00317         journal_header_t        header;         /*%< In-core journal header */
00318         unsigned char           *rawindex;      /*%< In-core buffer for journal index in on-disk format */
00319         journal_pos_t           *index;         /*%< In-core journal index */
00320 
00321         /*% Current transaction state (when writing). */
00322         struct {
00323                 unsigned int    n_soa;          /*%< Number of SOAs seen */
00324                 journal_pos_t   pos[2];         /*%< Begin/end position */
00325         } x;
00326 
00327         /*% Iteration state (when reading). */
00328         struct {
00329                 /* These define the part of the journal we iterate over. */
00330                 journal_pos_t bpos;             /*%< Position before first, */
00331                 journal_pos_t epos;             /*%< and after last transaction */
00332                 /* The rest is iterator state. */
00333                 isc_uint32_t current_serial;    /*%< Current SOA serial */
00334                 isc_buffer_t source;            /*%< Data from disk */
00335                 isc_buffer_t target;            /*%< Data from _fromwire check */
00336                 dns_decompress_t dctx;          /*%< Dummy decompression ctx */
00337                 dns_name_t name;                /*%< Current domain name */
00338                 dns_rdata_t rdata;              /*%< Current rdata */
00339                 isc_uint32_t ttl;               /*%< Current TTL */
00340                 unsigned int xsize;             /*%< Size of transaction data */
00341                 unsigned int xpos;              /*%< Current position in it */
00342                 isc_result_t result;            /*%< Result of last call */
00343         } it;
00344 };
00345 
00346 #define DNS_JOURNAL_MAGIC       ISC_MAGIC('J', 'O', 'U', 'R')
00347 #define DNS_JOURNAL_VALID(t)    ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
00348 
00349 static void
00350 journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
00351         cooked->serial = decode_uint32(raw->serial);
00352         cooked->offset = decode_uint32(raw->offset);
00353 }
00354 
00355 static void
00356 journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
00357         encode_uint32(cooked->serial, raw->serial);
00358         encode_uint32(cooked->offset, raw->offset);
00359 }
00360 
00361 static void
00362 journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
00363         INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
00364         memmove(cooked->format, raw->h.format, sizeof(cooked->format));
00365         journal_pos_decode(&raw->h.begin, &cooked->begin);
00366         journal_pos_decode(&raw->h.end, &cooked->end);
00367         cooked->index_size = decode_uint32(raw->h.index_size);
00368         cooked->sourceserial = decode_uint32(raw->h.sourceserial);
00369         cooked->serialset = ISC_TF(raw->h.flags & JOURNAL_SERIALSET);
00370 }
00371 
00372 static void
00373 journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
00374         unsigned char flags = 0;
00375 
00376         INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
00377         memset(raw->pad, 0, sizeof(raw->pad));
00378         memmove(raw->h.format, cooked->format, sizeof(raw->h.format));
00379         journal_pos_encode(&raw->h.begin, &cooked->begin);
00380         journal_pos_encode(&raw->h.end, &cooked->end);
00381         encode_uint32(cooked->index_size, raw->h.index_size);
00382         encode_uint32(cooked->sourceserial, raw->h.sourceserial);
00383         if (cooked->serialset)
00384                 flags |= JOURNAL_SERIALSET;
00385         raw->h.flags = flags;
00386 }
00387 
00388 /*
00389  * Journal file I/O subroutines, with error checking and reporting.
00390  */
00391 static isc_result_t
00392 journal_seek(dns_journal_t *j, isc_uint32_t offset) {
00393         isc_result_t result;
00394 
00395         result = isc_stdio_seek(j->fp, (off_t)offset, SEEK_SET);
00396         if (result != ISC_R_SUCCESS) {
00397                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00398                               "%s: seek: %s", j->filename,
00399                               isc_result_totext(result));
00400                 return (ISC_R_UNEXPECTED);
00401         }
00402         j->offset = offset;
00403         return (ISC_R_SUCCESS);
00404 }
00405 
00406 static isc_result_t
00407 journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
00408         isc_result_t result;
00409 
00410         result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
00411         if (result != ISC_R_SUCCESS) {
00412                 if (result == ISC_R_EOF)
00413                         return (ISC_R_NOMORE);
00414                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00415                               "%s: read: %s",
00416                               j->filename, isc_result_totext(result));
00417                 return (ISC_R_UNEXPECTED);
00418         }
00419         j->offset += (isc_offset_t)nbytes;
00420         return (ISC_R_SUCCESS);
00421 }
00422 
00423 static isc_result_t
00424 journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
00425         isc_result_t result;
00426 
00427         result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
00428         if (result != ISC_R_SUCCESS) {
00429                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00430                               "%s: write: %s",
00431                               j->filename, isc_result_totext(result));
00432                 return (ISC_R_UNEXPECTED);
00433         }
00434         j->offset += (isc_offset_t)nbytes;
00435         return (ISC_R_SUCCESS);
00436 }
00437 
00438 static isc_result_t
00439 journal_fsync(dns_journal_t *j) {
00440         isc_result_t result;
00441         result = isc_stdio_flush(j->fp);
00442         if (result != ISC_R_SUCCESS) {
00443                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00444                               "%s: flush: %s",
00445                               j->filename, isc_result_totext(result));
00446                 return (ISC_R_UNEXPECTED);
00447         }
00448         result = isc_stdio_sync(j->fp);
00449         if (result != ISC_R_SUCCESS) {
00450                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00451                               "%s: fsync: %s",
00452                               j->filename, isc_result_totext(result));
00453                 return (ISC_R_UNEXPECTED);
00454         }
00455         return (ISC_R_SUCCESS);
00456 }
00457 
00458 /*
00459  * Read/write a transaction header at the current file position.
00460  */
00461 
00462 static isc_result_t
00463 journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
00464         journal_rawxhdr_t raw;
00465         isc_result_t result;
00466         result = journal_read(j, &raw, sizeof(raw));
00467         if (result != ISC_R_SUCCESS)
00468                 return (result);
00469         xhdr->size = decode_uint32(raw.size);
00470         xhdr->serial0 = decode_uint32(raw.serial0);
00471         xhdr->serial1 = decode_uint32(raw.serial1);
00472         return (ISC_R_SUCCESS);
00473 }
00474 
00475 static isc_result_t
00476 journal_write_xhdr(dns_journal_t *j, isc_uint32_t size,
00477                    isc_uint32_t serial0, isc_uint32_t serial1)
00478 {
00479         journal_rawxhdr_t raw;
00480         encode_uint32(size, raw.size);
00481         encode_uint32(serial0, raw.serial0);
00482         encode_uint32(serial1, raw.serial1);
00483         return (journal_write(j, &raw, sizeof(raw)));
00484 }
00485 
00486 
00487 /*
00488  * Read an RR header at the current file position.
00489  */
00490 
00491 static isc_result_t
00492 journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
00493         journal_rawrrhdr_t raw;
00494         isc_result_t result;
00495         result = journal_read(j, &raw, sizeof(raw));
00496         if (result != ISC_R_SUCCESS)
00497                 return (result);
00498         rrhdr->size = decode_uint32(raw.size);
00499         return (ISC_R_SUCCESS);
00500 }
00501 
00502 static isc_result_t
00503 journal_file_create(isc_mem_t *mctx, const char *filename) {
00504         FILE *fp = NULL;
00505         isc_result_t result;
00506         journal_header_t header;
00507         journal_rawheader_t rawheader;
00508         int index_size = 56; /* XXX configurable */
00509         int size;
00510         void *mem; /* Memory for temporary index image. */
00511 
00512         INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
00513 
00514         result = isc_stdio_open(filename, "wb", &fp);
00515         if (result != ISC_R_SUCCESS) {
00516                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00517                               "%s: create: %s",
00518                               filename, isc_result_totext(result));
00519                 return (ISC_R_UNEXPECTED);
00520         }
00521 
00522         header = initial_journal_header;
00523         header.index_size = index_size;
00524         journal_header_encode(&header, &rawheader);
00525 
00526         size = sizeof(journal_rawheader_t) +
00527                 index_size * sizeof(journal_rawpos_t);
00528 
00529         mem = isc_mem_get(mctx, size);
00530         if (mem == NULL) {
00531                 (void)isc_stdio_close(fp);
00532                 (void)isc_file_remove(filename);
00533                 return (ISC_R_NOMEMORY);
00534         }
00535         memset(mem, 0, size);
00536         memmove(mem, &rawheader, sizeof(rawheader));
00537 
00538         result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL);
00539         if (result != ISC_R_SUCCESS) {
00540                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00541                                  "%s: write: %s",
00542                                  filename, isc_result_totext(result));
00543                 (void)isc_stdio_close(fp);
00544                 (void)isc_file_remove(filename);
00545                 isc_mem_put(mctx, mem, size);
00546                 return (ISC_R_UNEXPECTED);
00547         }
00548         isc_mem_put(mctx, mem, size);
00549 
00550         result = isc_stdio_close(fp);
00551         if (result != ISC_R_SUCCESS) {
00552                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00553                                  "%s: close: %s",
00554                                  filename, isc_result_totext(result));
00555                 (void)isc_file_remove(filename);
00556                 return (ISC_R_UNEXPECTED);
00557         }
00558 
00559         return (ISC_R_SUCCESS);
00560 }
00561 
00562 static isc_result_t
00563 journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t writable,
00564              isc_boolean_t create, dns_journal_t **journalp)
00565 {
00566         FILE *fp = NULL;
00567         isc_result_t result;
00568         journal_rawheader_t rawheader;
00569         dns_journal_t *j;
00570 
00571         INSIST(journalp != NULL && *journalp == NULL);
00572         j = isc_mem_get(mctx, sizeof(*j));
00573         if (j == NULL)
00574                 return (ISC_R_NOMEMORY);
00575 
00576         j->mctx = NULL;
00577         isc_mem_attach(mctx, &j->mctx);
00578         j->state = JOURNAL_STATE_INVALID;
00579         j->fp = NULL;
00580         j->filename = isc_mem_strdup(mctx, filename);
00581         j->index = NULL;
00582         j->rawindex = NULL;
00583 
00584         if (j->filename == NULL)
00585                 FAIL(ISC_R_NOMEMORY);
00586 
00587         result = isc_stdio_open(j->filename, writable ? "rb+" : "rb", &fp);
00588 
00589         if (result == ISC_R_FILENOTFOUND) {
00590                 if (create) {
00591                         isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(1),
00592                                       "journal file %s does not exist, "
00593                                       "creating it", j->filename);
00594                         CHECK(journal_file_create(mctx, filename));
00595                         /*
00596                          * Retry.
00597                          */
00598                         result = isc_stdio_open(j->filename, "rb+", &fp);
00599                 } else {
00600                         FAIL(ISC_R_NOTFOUND);
00601                 }
00602         }
00603         if (result != ISC_R_SUCCESS) {
00604                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00605                               "%s: open: %s",
00606                               j->filename, isc_result_totext(result));
00607                 FAIL(ISC_R_UNEXPECTED);
00608         }
00609 
00610         j->fp = fp;
00611 
00612         /*
00613          * Set magic early so that seek/read can succeed.
00614          */
00615         j->magic = DNS_JOURNAL_MAGIC;
00616 
00617         CHECK(journal_seek(j, 0));
00618         CHECK(journal_read(j, &rawheader, sizeof(rawheader)));
00619 
00620         if (memcmp(rawheader.h.format, initial_journal_header.format,
00621                    sizeof(initial_journal_header.format)) != 0) {
00622                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00623                                  "%s: journal format not recognized",
00624                                  j->filename);
00625                 FAIL(ISC_R_UNEXPECTED);
00626         }
00627         journal_header_decode(&rawheader, &j->header);
00628 
00629         /*
00630          * If there is an index, read the raw index into a dynamically
00631          * allocated buffer and then convert it into a cooked index.
00632          */
00633         if (j->header.index_size != 0) {
00634                 unsigned int i;
00635                 unsigned int rawbytes;
00636                 unsigned char *p;
00637 
00638                 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
00639                 j->rawindex = isc_mem_get(mctx, rawbytes);
00640                 if (j->rawindex == NULL)
00641                         FAIL(ISC_R_NOMEMORY);
00642 
00643                 CHECK(journal_read(j, j->rawindex, rawbytes));
00644 
00645                 j->index = isc_mem_get(mctx, j->header.index_size *
00646                                        sizeof(journal_pos_t));
00647                 if (j->index == NULL)
00648                         FAIL(ISC_R_NOMEMORY);
00649 
00650                 p = j->rawindex;
00651                 for (i = 0; i < j->header.index_size; i++) {
00652                         j->index[i].serial = decode_uint32(p);
00653                         p += 4;
00654                         j->index[i].offset = decode_uint32(p);
00655                         p += 4;
00656                 }
00657                 INSIST(p == j->rawindex + rawbytes);
00658         }
00659         j->offset = -1; /* Invalid, must seek explicitly. */
00660 
00661         /*
00662          * Initialize the iterator.
00663          */
00664         dns_name_init(&j->it.name, NULL);
00665         dns_rdata_init(&j->it.rdata);
00666 
00667         /*
00668          * Set up empty initial buffers for unchecked and checked
00669          * wire format RR data.  They will be reallocated
00670          * later.
00671          */
00672         isc_buffer_init(&j->it.source, NULL, 0);
00673         isc_buffer_init(&j->it.target, NULL, 0);
00674         dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE);
00675 
00676         j->state =
00677                 writable ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
00678 
00679         *journalp = j;
00680         return (ISC_R_SUCCESS);
00681 
00682  failure:
00683         j->magic = 0;
00684         if (j->index != NULL) {
00685                 isc_mem_put(j->mctx, j->index, j->header.index_size *
00686                             sizeof(journal_rawpos_t));
00687                 j->index = NULL;
00688         }
00689         if (j->filename != NULL)
00690                 isc_mem_free(j->mctx, j->filename);
00691         if (j->fp != NULL)
00692                 (void)isc_stdio_close(j->fp);
00693         isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
00694         return (result);
00695 }
00696 
00697 isc_result_t
00698 dns_journal_open(isc_mem_t *mctx, const char *filename, unsigned int mode,
00699                  dns_journal_t **journalp)
00700 {
00701         isc_result_t result;
00702         size_t namelen;
00703         char backup[1024];
00704         isc_boolean_t writable, create;
00705 
00706         create = ISC_TF(mode & DNS_JOURNAL_CREATE);
00707         writable = ISC_TF(mode & (DNS_JOURNAL_WRITE|DNS_JOURNAL_CREATE));
00708 
00709         result = journal_open(mctx, filename, writable, create, journalp);
00710         if (result == ISC_R_NOTFOUND) {
00711                 namelen = strlen(filename);
00712                 if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0)
00713                         namelen -= 4;
00714 
00715                 result = isc_string_printf(backup, sizeof(backup), "%.*s.jbk",
00716                                            (int)namelen, filename);
00717                 if (result != ISC_R_SUCCESS)
00718                         return (result);
00719                 result = journal_open(mctx, backup, writable, writable,
00720                                       journalp);
00721         }
00722         return (result);
00723 }
00724 
00725 /*
00726  * A comparison function defining the sorting order for
00727  * entries in the IXFR-style journal file.
00728  *
00729  * The IXFR format requires that deletions are sorted before
00730  * additions, and within either one, SOA records are sorted
00731  * before others.
00732  *
00733  * Also sort the non-SOA records by type as a courtesy to the
00734  * server receiving the IXFR - it may help reduce the amount of
00735  * rdataset merging it has to do.
00736  */
00737 static int
00738 ixfr_order(const void *av, const void *bv) {
00739         dns_difftuple_t const * const *ap = av;
00740         dns_difftuple_t const * const *bp = bv;
00741         dns_difftuple_t const *a = *ap;
00742         dns_difftuple_t const *b = *bp;
00743         int r;
00744         int bop = 0, aop = 0;
00745 
00746         switch (a->op) {
00747         case DNS_DIFFOP_DEL:
00748         case DNS_DIFFOP_DELRESIGN:
00749                 aop = 1;
00750                 break;
00751         case DNS_DIFFOP_ADD:
00752         case DNS_DIFFOP_ADDRESIGN:
00753                 aop = 0;
00754                 break;
00755         default:
00756                 INSIST(0);
00757         }
00758 
00759         switch (b->op) {
00760         case DNS_DIFFOP_DEL:
00761         case DNS_DIFFOP_DELRESIGN:
00762                 bop = 1;
00763                 break;
00764         case DNS_DIFFOP_ADD:
00765         case DNS_DIFFOP_ADDRESIGN:
00766                 bop = 0;
00767                 break;
00768         default:
00769                 INSIST(0);
00770         }
00771 
00772         r = bop - aop;
00773         if (r != 0)
00774                 return (r);
00775 
00776         r = (b->rdata.type == dns_rdatatype_soa) -
00777                 (a->rdata.type == dns_rdatatype_soa);
00778         if (r != 0)
00779                 return (r);
00780 
00781         r = (a->rdata.type - b->rdata.type);
00782         return (r);
00783 }
00784 
00785 /*
00786  * Advance '*pos' to the next journal transaction.
00787  *
00788  * Requires:
00789  *      *pos refers to a valid journal transaction.
00790  *
00791  * Ensures:
00792  *      When ISC_R_SUCCESS is returned,
00793  *      *pos refers to the next journal transaction.
00794  *
00795  * Returns one of:
00796  *
00797  *    ISC_R_SUCCESS
00798  *    ISC_R_NOMORE      *pos pointed at the last transaction
00799  *    Other results due to file errors are possible.
00800  */
00801 static isc_result_t
00802 journal_next(dns_journal_t *j, journal_pos_t *pos) {
00803         isc_result_t result;
00804         journal_xhdr_t xhdr;
00805         REQUIRE(DNS_JOURNAL_VALID(j));
00806 
00807         result = journal_seek(j, pos->offset);
00808         if (result != ISC_R_SUCCESS)
00809                 return (result);
00810 
00811         if (pos->serial == j->header.end.serial)
00812                 return (ISC_R_NOMORE);
00813         /*
00814          * Read the header of the current transaction.
00815          * This will return ISC_R_NOMORE if we are at EOF.
00816          */
00817         result = journal_read_xhdr(j, &xhdr);
00818         if (result != ISC_R_SUCCESS)
00819                 return (result);
00820 
00821         /*
00822          * Check serial number consistency.
00823          */
00824         if (xhdr.serial0 != pos->serial) {
00825                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00826                               "%s: journal file corrupt: "
00827                               "expected serial %u, got %u",
00828                               j->filename, pos->serial, xhdr.serial0);
00829                 return (ISC_R_UNEXPECTED);
00830         }
00831 
00832         /*
00833          * Check for offset wraparound.
00834          */
00835         if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size)
00836             < pos->offset) {
00837                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
00838                               "%s: offset too large", j->filename);
00839                 return (ISC_R_UNEXPECTED);
00840         }
00841 
00842         pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size;
00843         pos->serial = xhdr.serial1;
00844         return (ISC_R_SUCCESS);
00845 }
00846 
00847 /*
00848  * If the index of the journal 'j' contains an entry "better"
00849  * than '*best_guess', replace '*best_guess' with it.
00850  *
00851  * "Better" means having a serial number closer to 'serial'
00852  * but not greater than 'serial'.
00853  */
00854 static void
00855 index_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *best_guess) {
00856         unsigned int i;
00857         if (j->index == NULL)
00858                 return;
00859         for (i = 0; i < j->header.index_size; i++) {
00860                 if (POS_VALID(j->index[i]) &&
00861                     DNS_SERIAL_GE(serial, j->index[i].serial) &&
00862                     DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
00863                         *best_guess = j->index[i];
00864         }
00865 }
00866 
00867 /*
00868  * Add a new index entry.  If there is no room, make room by removing
00869  * the odd-numbered entries and compacting the others into the first
00870  * half of the index.  This decimates old index entries exponentially
00871  * over time, so that the index always contains a much larger fraction
00872  * of recent serial numbers than of old ones.  This is deliberate -
00873  * most index searches are for outgoing IXFR, and IXFR tends to request
00874  * recent versions more often than old ones.
00875  */
00876 static void
00877 index_add(dns_journal_t *j, journal_pos_t *pos) {
00878         unsigned int i;
00879         if (j->index == NULL)
00880                 return;
00881         /*
00882          * Search for a vacant position.
00883          */
00884         for (i = 0; i < j->header.index_size; i++) {
00885                 if (! POS_VALID(j->index[i]))
00886                         break;
00887         }
00888         if (i == j->header.index_size) {
00889                 unsigned int k = 0;
00890                 /*
00891                  * Found no vacant position.  Make some room.
00892                  */
00893                 for (i = 0; i < j->header.index_size; i += 2) {
00894                         j->index[k++] = j->index[i];
00895                 }
00896                 i = k; /* 'i' identifies the first vacant position. */
00897                 while (k < j->header.index_size) {
00898                         POS_INVALIDATE(j->index[k]);
00899                         k++;
00900                 }
00901         }
00902         INSIST(i < j->header.index_size);
00903         INSIST(! POS_VALID(j->index[i]));
00904 
00905         /*
00906          * Store the new index entry.
00907          */
00908         j->index[i] = *pos;
00909 }
00910 
00911 /*
00912  * Invalidate any existing index entries that could become
00913  * ambiguous when a new transaction with number 'serial' is added.
00914  */
00915 static void
00916 index_invalidate(dns_journal_t *j, isc_uint32_t serial) {
00917         unsigned int i;
00918         if (j->index == NULL)
00919                 return;
00920         for (i = 0; i < j->header.index_size; i++) {
00921                 if (! DNS_SERIAL_GT(serial, j->index[i].serial))
00922                         POS_INVALIDATE(j->index[i]);
00923         }
00924 }
00925 
00926 /*
00927  * Try to find a transaction with initial serial number 'serial'
00928  * in the journal 'j'.
00929  *
00930  * If found, store its position at '*pos' and return ISC_R_SUCCESS.
00931  *
00932  * If 'serial' is current (= the ending serial number of the
00933  * last transaction in the journal), set '*pos' to
00934  * the position immediately following the last transaction and
00935  * return ISC_R_SUCCESS.
00936  *
00937  * If 'serial' is within the range of addressable serial numbers
00938  * covered by the journal but that particular serial number is missing
00939  * (from the journal, not just from the index), return ISC_R_NOTFOUND.
00940  *
00941  * If 'serial' is outside the range of addressable serial numbers
00942  * covered by the journal, return ISC_R_RANGE.
00943  *
00944  */
00945 static isc_result_t
00946 journal_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *pos) {
00947         isc_result_t result;
00948         journal_pos_t current_pos;
00949         REQUIRE(DNS_JOURNAL_VALID(j));
00950 
00951         if (DNS_SERIAL_GT(j->header.begin.serial, serial))
00952                 return (ISC_R_RANGE);
00953         if (DNS_SERIAL_GT(serial, j->header.end.serial))
00954                 return (ISC_R_RANGE);
00955         if (serial == j->header.end.serial) {
00956                 *pos = j->header.end;
00957                 return (ISC_R_SUCCESS);
00958         }
00959 
00960         current_pos = j->header.begin;
00961         index_find(j, serial, &current_pos);
00962 
00963         while (current_pos.serial != serial) {
00964                 if (DNS_SERIAL_GT(current_pos.serial, serial))
00965                         return (ISC_R_NOTFOUND);
00966                 result = journal_next(j, &current_pos);
00967                 if (result != ISC_R_SUCCESS)
00968                         return (result);
00969         }
00970         *pos = current_pos;
00971         return (ISC_R_SUCCESS);
00972 }
00973 
00974 isc_result_t
00975 dns_journal_begin_transaction(dns_journal_t *j) {
00976         isc_uint32_t offset;
00977         isc_result_t result;
00978         journal_rawxhdr_t hdr;
00979 
00980         REQUIRE(DNS_JOURNAL_VALID(j));
00981         REQUIRE(j->state == JOURNAL_STATE_WRITE ||
00982                 j->state == JOURNAL_STATE_INLINE);
00983 
00984         /*
00985          * Find the file offset where the new transaction should
00986          * be written, and seek there.
00987          */
00988         if (JOURNAL_EMPTY(&j->header)) {
00989                 offset = sizeof(journal_rawheader_t) +
00990                         j->header.index_size * sizeof(journal_rawpos_t);
00991         } else {
00992                 offset = j->header.end.offset;
00993         }
00994         j->x.pos[0].offset = offset;
00995         j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
00996         j->x.n_soa = 0;
00997 
00998         CHECK(journal_seek(j, offset));
00999 
01000         /*
01001          * Write a dummy transaction header of all zeroes to reserve
01002          * space.  It will be filled in when the transaction is
01003          * finished.
01004          */
01005         memset(&hdr, 0, sizeof(hdr));
01006         CHECK(journal_write(j, &hdr, sizeof(hdr)));
01007         j->x.pos[1].offset = j->offset;
01008 
01009         j->state = JOURNAL_STATE_TRANSACTION;
01010         result = ISC_R_SUCCESS;
01011  failure:
01012         return (result);
01013 }
01014 
01015 isc_result_t
01016 dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) {
01017         dns_difftuple_t *t;
01018         isc_buffer_t buffer;
01019         void *mem = NULL;
01020         unsigned int size;
01021         isc_result_t result;
01022         isc_region_t used;
01023 
01024         REQUIRE(DNS_DIFF_VALID(diff));
01025         REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
01026 
01027         isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
01028         (void)dns_diff_print(diff, NULL);
01029 
01030         /*
01031          * Pass 1: determine the buffer size needed, and
01032          * keep track of SOA serial numbers.
01033          */
01034         size = 0;
01035         for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
01036              t = ISC_LIST_NEXT(t, link))
01037         {
01038                 if (t->rdata.type == dns_rdatatype_soa) {
01039                         if (j->x.n_soa < 2)
01040                                 j->x.pos[j->x.n_soa].serial =
01041                                         dns_soa_getserial(&t->rdata);
01042                         j->x.n_soa++;
01043                 }
01044                 size += sizeof(journal_rawrrhdr_t);
01045                 size += t->name.length; /* XXX should have access macro? */
01046                 size += 10;
01047                 size += t->rdata.length;
01048         }
01049 
01050         mem = isc_mem_get(j->mctx, size);
01051         if (mem == NULL)
01052                 return (ISC_R_NOMEMORY);
01053 
01054         isc_buffer_init(&buffer, mem, size);
01055 
01056         /*
01057          * Pass 2.  Write RRs to buffer.
01058          */
01059         for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
01060              t = ISC_LIST_NEXT(t, link))
01061         {
01062                 /*
01063                  * Write the RR header.
01064                  */
01065                 isc_buffer_putuint32(&buffer, t->name.length + 10 +
01066                                      t->rdata.length);
01067                 /*
01068                  * Write the owner name, RR header, and RR data.
01069                  */
01070                 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length);
01071                 isc_buffer_putuint16(&buffer, t->rdata.type);
01072                 isc_buffer_putuint16(&buffer, t->rdata.rdclass);
01073                 isc_buffer_putuint32(&buffer, t->ttl);
01074                 INSIST(t->rdata.length < 65536);
01075                 isc_buffer_putuint16(&buffer, (isc_uint16_t)t->rdata.length);
01076                 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length);
01077                 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length);
01078         }
01079 
01080         isc_buffer_usedregion(&buffer, &used);
01081         INSIST(used.length == size);
01082 
01083         j->x.pos[1].offset += used.length;
01084 
01085         /*
01086          * Write the buffer contents to the journal file.
01087          */
01088         CHECK(journal_write(j, used.base, used.length));
01089 
01090         result = ISC_R_SUCCESS;
01091 
01092  failure:
01093         if (mem != NULL)
01094                 isc_mem_put(j->mctx, mem, size);
01095         return (result);
01096 
01097 }
01098 
01099 isc_result_t
01100 dns_journal_commit(dns_journal_t *j) {
01101         isc_result_t result;
01102         journal_rawheader_t rawheader;
01103 
01104         REQUIRE(DNS_JOURNAL_VALID(j));
01105         REQUIRE(j->state == JOURNAL_STATE_TRANSACTION ||
01106                 j->state == JOURNAL_STATE_INLINE);
01107 
01108         /*
01109          * Just write out a updated header.
01110          */
01111         if (j->state == JOURNAL_STATE_INLINE) {
01112                 CHECK(journal_fsync(j));
01113                 journal_header_encode(&j->header, &rawheader);
01114                 CHECK(journal_seek(j, 0));
01115                 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
01116                 CHECK(journal_fsync(j));
01117                 j->state = JOURNAL_STATE_WRITE;
01118                 return (ISC_R_SUCCESS);
01119         }
01120 
01121         /*
01122          * Perform some basic consistency checks.
01123          */
01124         if (j->x.n_soa != 2) {
01125                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01126                               "%s: malformed transaction: %d SOAs",
01127                               j->filename, j->x.n_soa);
01128                 return (ISC_R_UNEXPECTED);
01129         }
01130         if (! (DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial) ||
01131                (bind8_compat &&
01132                 j->x.pos[1].serial == j->x.pos[0].serial)))
01133         {
01134                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01135                               "%s: malformed transaction: serial number "
01136                               "would decrease", j->filename);
01137                 return (ISC_R_UNEXPECTED);
01138         }
01139         if (! JOURNAL_EMPTY(&j->header)) {
01140                 if (j->x.pos[0].serial != j->header.end.serial) {
01141                         isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01142                                          "malformed transaction: "
01143                                          "%s last serial %u != "
01144                                          "transaction first serial %u",
01145                                          j->filename,
01146                                          j->header.end.serial,
01147                                          j->x.pos[0].serial);
01148                         return (ISC_R_UNEXPECTED);
01149                 }
01150         }
01151 
01152         /*
01153          * Some old journal entries may become non-addressable
01154          * when we increment the current serial number.  Purge them
01155          * by stepping header.begin forward to the first addressable
01156          * transaction.  Also purge them from the index.
01157          */
01158         if (! JOURNAL_EMPTY(&j->header)) {
01159                 while (! DNS_SERIAL_GT(j->x.pos[1].serial,
01160                                        j->header.begin.serial)) {
01161                         CHECK(journal_next(j, &j->header.begin));
01162                 }
01163                 index_invalidate(j, j->x.pos[1].serial);
01164         }
01165 #ifdef notyet
01166         if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) {
01167                 force_dump(...);
01168         }
01169 #endif
01170 
01171         /*
01172          * Commit the transaction data to stable storage.
01173          */
01174         CHECK(journal_fsync(j));
01175 
01176         if (j->state == JOURNAL_STATE_TRANSACTION) {
01177                 isc_offset_t offset;
01178                 offset = (j->x.pos[1].offset - j->x.pos[0].offset) -
01179                                  sizeof(journal_rawxhdr_t);
01180                 /*
01181                  * Update the transaction header.
01182                  */
01183                 CHECK(journal_seek(j, j->x.pos[0].offset));
01184                 CHECK(journal_write_xhdr(j, offset, j->x.pos[0].serial,
01185                                          j->x.pos[1].serial));
01186         }
01187 
01188         /*
01189          * Update the journal header.
01190          */
01191         if (JOURNAL_EMPTY(&j->header))
01192                 j->header.begin = j->x.pos[0];
01193         j->header.end = j->x.pos[1];
01194         journal_header_encode(&j->header, &rawheader);
01195         CHECK(journal_seek(j, 0));
01196         CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
01197 
01198         /*
01199          * Update the index.
01200          */
01201         index_add(j, &j->x.pos[0]);
01202 
01203         /*
01204          * Convert the index into on-disk format and write
01205          * it to disk.
01206          */
01207         CHECK(index_to_disk(j));
01208 
01209         /*
01210          * Commit the header to stable storage.
01211          */
01212         CHECK(journal_fsync(j));
01213 
01214         /*
01215          * We no longer have a transaction open.
01216          */
01217         j->state = JOURNAL_STATE_WRITE;
01218 
01219         result = ISC_R_SUCCESS;
01220 
01221  failure:
01222         return (result);
01223 }
01224 
01225 isc_result_t
01226 dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) {
01227         isc_result_t result;
01228         CHECK(dns_diff_sort(diff, ixfr_order));
01229         CHECK(dns_journal_begin_transaction(j));
01230         CHECK(dns_journal_writediff(j, diff));
01231         CHECK(dns_journal_commit(j));
01232         result = ISC_R_SUCCESS;
01233  failure:
01234         return (result);
01235 }
01236 
01237 void
01238 dns_journal_destroy(dns_journal_t **journalp) {
01239         dns_journal_t *j = *journalp;
01240         REQUIRE(DNS_JOURNAL_VALID(j));
01241 
01242         j->it.result = ISC_R_FAILURE;
01243         dns_name_invalidate(&j->it.name);
01244         dns_decompress_invalidate(&j->it.dctx);
01245         if (j->rawindex != NULL)
01246                 isc_mem_put(j->mctx, j->rawindex, j->header.index_size *
01247                             sizeof(journal_rawpos_t));
01248         if (j->index != NULL)
01249                 isc_mem_put(j->mctx, j->index, j->header.index_size *
01250                             sizeof(journal_pos_t));
01251         if (j->it.target.base != NULL)
01252                 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length);
01253         if (j->it.source.base != NULL)
01254                 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length);
01255         if (j->filename != NULL)
01256                 isc_mem_free(j->mctx, j->filename);
01257         if (j->fp != NULL)
01258                 (void)isc_stdio_close(j->fp);
01259         j->magic = 0;
01260         isc_mem_putanddetach(&j->mctx, j, sizeof(*j));
01261         *journalp = NULL;
01262 }
01263 
01264 /*
01265  * Roll the open journal 'j' into the database 'db'.
01266  * A new database version will be created.
01267  */
01268 
01269 /* XXX Share code with incoming IXFR? */
01270 
01271 static isc_result_t
01272 roll_forward(dns_journal_t *j, dns_db_t *db, unsigned int options) {
01273         isc_buffer_t source;            /* Transaction data from disk */
01274         isc_buffer_t target;            /* Ditto after _fromwire check */
01275         isc_uint32_t db_serial;         /* Database SOA serial */
01276         isc_uint32_t end_serial;        /* Last journal SOA serial */
01277         isc_result_t result;
01278         dns_dbversion_t *ver = NULL;
01279         journal_pos_t pos;
01280         dns_diff_t diff;
01281         unsigned int n_soa = 0;
01282         unsigned int n_put = 0;
01283         dns_diffop_t op;
01284 
01285         REQUIRE(DNS_JOURNAL_VALID(j));
01286         REQUIRE(DNS_DB_VALID(db));
01287 
01288         dns_diff_init(j->mctx, &diff);
01289 
01290         /*
01291          * Set up empty initial buffers for unchecked and checked
01292          * wire format transaction data.  They will be reallocated
01293          * later.
01294          */
01295         isc_buffer_init(&source, NULL, 0);
01296         isc_buffer_init(&target, NULL, 0);
01297 
01298         /*
01299          * Create the new database version.
01300          */
01301         CHECK(dns_db_newversion(db, &ver));
01302 
01303         /*
01304          * Get the current database SOA serial number.
01305          */
01306         CHECK(dns_db_getsoaserial(db, ver, &db_serial));
01307 
01308         /*
01309          * Locate a journal entry for the current database serial.
01310          */
01311         CHECK(journal_find(j, db_serial, &pos));
01312         /*
01313          * XXX do more drastic things, like marking zone stale,
01314          * if this fails?
01315          */
01316         /*
01317          * XXXRTH  The zone code should probably mark the zone as bad and
01318          *         scream loudly into the log if this is a dynamic update
01319          *         log reply that failed.
01320          */
01321 
01322         end_serial = dns_journal_last_serial(j);
01323         if (db_serial == end_serial)
01324                 CHECK(DNS_R_UPTODATE);
01325 
01326         CHECK(dns_journal_iter_init(j, db_serial, end_serial));
01327 
01328         for (result = dns_journal_first_rr(j);
01329              result == ISC_R_SUCCESS;
01330              result = dns_journal_next_rr(j))
01331         {
01332                 dns_name_t *name;
01333                 isc_uint32_t ttl;
01334                 dns_rdata_t *rdata;
01335                 dns_difftuple_t *tuple = NULL;
01336 
01337                 name = NULL;
01338                 rdata = NULL;
01339                 dns_journal_current_rr(j, &name, &ttl, &rdata);
01340 
01341                 if (rdata->type == dns_rdatatype_soa) {
01342                         n_soa++;
01343                         if (n_soa == 2)
01344                                 db_serial = j->it.current_serial;
01345                 }
01346 
01347                 if (n_soa == 3)
01348                         n_soa = 1;
01349                 if (n_soa == 0) {
01350                         isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01351                                          "%s: journal file corrupt: missing "
01352                                          "initial SOA", j->filename);
01353                         FAIL(ISC_R_UNEXPECTED);
01354                 }
01355                 if ((options & DNS_JOURNALOPT_RESIGN) != 0)
01356                         op = (n_soa == 1) ? DNS_DIFFOP_DELRESIGN :
01357                                             DNS_DIFFOP_ADDRESIGN;
01358                 else
01359                         op = (n_soa == 1) ? DNS_DIFFOP_DEL : DNS_DIFFOP_ADD;
01360 
01361                 CHECK(dns_difftuple_create(diff.mctx, op, name, ttl, rdata,
01362                                            &tuple));
01363                 dns_diff_append(&diff, &tuple);
01364 
01365                 if (++n_put > 100)  {
01366                         isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
01367                                       "%s: applying diff to database (%u)",
01368                                       j->filename, db_serial);
01369                         (void)dns_diff_print(&diff, NULL);
01370                         CHECK(dns_diff_apply(&diff, db, ver));
01371                         dns_diff_clear(&diff);
01372                         n_put = 0;
01373                 }
01374         }
01375         if (result == ISC_R_NOMORE)
01376                 result = ISC_R_SUCCESS;
01377         CHECK(result);
01378 
01379         if (n_put != 0) {
01380                 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
01381                               "%s: applying final diff to database (%u)",
01382                               j->filename, db_serial);
01383                 (void)dns_diff_print(&diff, NULL);
01384                 CHECK(dns_diff_apply(&diff, db, ver));
01385                 dns_diff_clear(&diff);
01386         }
01387 
01388  failure:
01389         if (ver != NULL)
01390                 dns_db_closeversion(db, &ver, result == ISC_R_SUCCESS ?
01391                                     ISC_TRUE : ISC_FALSE);
01392 
01393         if (source.base != NULL)
01394                 isc_mem_put(j->mctx, source.base, source.length);
01395         if (target.base != NULL)
01396                 isc_mem_put(j->mctx, target.base, target.length);
01397 
01398         dns_diff_clear(&diff);
01399 
01400         INSIST(ver == NULL);
01401 
01402         return (result);
01403 }
01404 
01405 isc_result_t
01406 dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, unsigned int options,
01407                         const char *filename)
01408 {
01409         dns_journal_t *j;
01410         isc_result_t result;
01411 
01412         REQUIRE(DNS_DB_VALID(db));
01413         REQUIRE(filename != NULL);
01414 
01415         j = NULL;
01416         result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
01417         if (result == ISC_R_NOTFOUND) {
01418                 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
01419                               "no journal file, but that's OK");
01420                 return (DNS_R_NOJOURNAL);
01421         }
01422         if (result != ISC_R_SUCCESS)
01423                 return (result);
01424         if (JOURNAL_EMPTY(&j->header))
01425                 result = DNS_R_UPTODATE;
01426         else
01427                 result = roll_forward(j, db, options);
01428 
01429         dns_journal_destroy(&j);
01430 
01431         return (result);
01432 }
01433 
01434 isc_result_t
01435 dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) {
01436         dns_journal_t *j;
01437         isc_buffer_t source;            /* Transaction data from disk */
01438         isc_buffer_t target;            /* Ditto after _fromwire check */
01439         isc_uint32_t start_serial;              /* Database SOA serial */
01440         isc_uint32_t end_serial;        /* Last journal SOA serial */
01441         isc_result_t result;
01442         dns_diff_t diff;
01443         unsigned int n_soa = 0;
01444         unsigned int n_put = 0;
01445 
01446         REQUIRE(filename != NULL);
01447 
01448         j = NULL;
01449         result = dns_journal_open(mctx, filename, DNS_JOURNAL_READ, &j);
01450         if (result == ISC_R_NOTFOUND) {
01451                 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
01452                 return (DNS_R_NOJOURNAL);
01453         }
01454 
01455         if (result != ISC_R_SUCCESS) {
01456                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01457                               "journal open failure: %s: %s",
01458                               isc_result_totext(result), filename);
01459                 return (result);
01460         }
01461 
01462         if (j->header.serialset)
01463                 fprintf(file, "Source serial = %u\n", j->header.sourceserial);
01464         dns_diff_init(j->mctx, &diff);
01465 
01466         /*
01467          * Set up empty initial buffers for unchecked and checked
01468          * wire format transaction data.  They will be reallocated
01469          * later.
01470          */
01471         isc_buffer_init(&source, NULL, 0);
01472         isc_buffer_init(&target, NULL, 0);
01473 
01474         start_serial = dns_journal_first_serial(j);
01475         end_serial = dns_journal_last_serial(j);
01476 
01477         CHECK(dns_journal_iter_init(j, start_serial, end_serial));
01478 
01479         for (result = dns_journal_first_rr(j);
01480              result == ISC_R_SUCCESS;
01481              result = dns_journal_next_rr(j))
01482         {
01483                 dns_name_t *name;
01484                 isc_uint32_t ttl;
01485                 dns_rdata_t *rdata;
01486                 dns_difftuple_t *tuple = NULL;
01487 
01488                 name = NULL;
01489                 rdata = NULL;
01490                 dns_journal_current_rr(j, &name, &ttl, &rdata);
01491 
01492                 if (rdata->type == dns_rdatatype_soa)
01493                         n_soa++;
01494 
01495                 if (n_soa == 3)
01496                         n_soa = 1;
01497                 if (n_soa == 0) {
01498                         isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01499                                       "%s: journal file corrupt: missing "
01500                                       "initial SOA", j->filename);
01501                         FAIL(ISC_R_UNEXPECTED);
01502                 }
01503                 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ?
01504                                            DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
01505                                            name, ttl, rdata, &tuple));
01506                 dns_diff_append(&diff, &tuple);
01507 
01508                 if (++n_put > 100)  {
01509                         result = dns_diff_print(&diff, file);
01510                         dns_diff_clear(&diff);
01511                         n_put = 0;
01512                         if (result != ISC_R_SUCCESS)
01513                                 break;
01514                 }
01515         }
01516         if (result == ISC_R_NOMORE)
01517                 result = ISC_R_SUCCESS;
01518         CHECK(result);
01519 
01520         if (n_put != 0) {
01521                 result = dns_diff_print(&diff, file);
01522                 dns_diff_clear(&diff);
01523         }
01524         goto cleanup;
01525 
01526  failure:
01527         isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01528                       "%s: cannot print: journal file corrupt", j->filename);
01529 
01530  cleanup:
01531         if (source.base != NULL)
01532                 isc_mem_put(j->mctx, source.base, source.length);
01533         if (target.base != NULL)
01534                 isc_mem_put(j->mctx, target.base, target.length);
01535 
01536         dns_diff_clear(&diff);
01537         dns_journal_destroy(&j);
01538 
01539         return (result);
01540 }
01541 
01542 /**************************************************************************/
01543 /*
01544  * Miscellaneous accessors.
01545  */
01546 isc_uint32_t
01547 dns_journal_first_serial(dns_journal_t *j) {
01548         return (j->header.begin.serial);
01549 }
01550 
01551 isc_uint32_t
01552 dns_journal_last_serial(dns_journal_t *j) {
01553         return (j->header.end.serial);
01554 }
01555 
01556 void
01557 dns_journal_set_sourceserial(dns_journal_t *j, isc_uint32_t sourceserial) {
01558 
01559         REQUIRE(j->state == JOURNAL_STATE_WRITE ||
01560                 j->state == JOURNAL_STATE_INLINE ||
01561                 j->state == JOURNAL_STATE_TRANSACTION);
01562 
01563         j->header.sourceserial = sourceserial;
01564         j->header.serialset = ISC_TRUE;
01565         if (j->state == JOURNAL_STATE_WRITE)
01566                 j->state = JOURNAL_STATE_INLINE;
01567 }
01568 
01569 isc_boolean_t
01570 dns_journal_get_sourceserial(dns_journal_t *j, isc_uint32_t *sourceserial) {
01571         REQUIRE(sourceserial != NULL);
01572 
01573         if (!j->header.serialset)
01574                 return (ISC_FALSE);
01575         *sourceserial = j->header.sourceserial;
01576         return (ISC_TRUE);
01577 }
01578 
01579 /**************************************************************************/
01580 /*
01581  * Iteration support.
01582  *
01583  * When serving an outgoing IXFR, we transmit a part the journal starting
01584  * at the serial number in the IXFR request and ending at the serial
01585  * number that is current when the IXFR request arrives.  The ending
01586  * serial number is not necessarily at the end of the journal:
01587  * the journal may grow while the IXFR is in progress, but we stop
01588  * when we reach the serial number that was current when the IXFR started.
01589  */
01590 
01591 static isc_result_t read_one_rr(dns_journal_t *j);
01592 
01593 /*
01594  * Make sure the buffer 'b' is has at least 'size' bytes
01595  * allocated, and clear it.
01596  *
01597  * Requires:
01598  *      Either b->base is NULL, or it points to b->length bytes of memory
01599  *      previously allocated by isc_mem_get().
01600  */
01601 
01602 static isc_result_t
01603 size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) {
01604         if (b->length < size) {
01605                 void *mem = isc_mem_get(mctx, size);
01606                 if (mem == NULL)
01607                         return (ISC_R_NOMEMORY);
01608                 if (b->base != NULL)
01609                         isc_mem_put(mctx, b->base, b->length);
01610                 b->base = mem;
01611                 b->length = size;
01612         }
01613         isc_buffer_clear(b);
01614         return (ISC_R_SUCCESS);
01615 }
01616 
01617 isc_result_t
01618 dns_journal_iter_init(dns_journal_t *j,
01619                       isc_uint32_t begin_serial, isc_uint32_t end_serial)
01620 {
01621         isc_result_t result;
01622 
01623         CHECK(journal_find(j, begin_serial, &j->it.bpos));
01624         INSIST(j->it.bpos.serial == begin_serial);
01625 
01626         CHECK(journal_find(j, end_serial, &j->it.epos));
01627         INSIST(j->it.epos.serial == end_serial);
01628 
01629         result = ISC_R_SUCCESS;
01630  failure:
01631         j->it.result = result;
01632         return (j->it.result);
01633 }
01634 
01635 
01636 isc_result_t
01637 dns_journal_first_rr(dns_journal_t *j) {
01638         isc_result_t result;
01639 
01640         /*
01641          * Seek to the beginning of the first transaction we are
01642          * interested in.
01643          */
01644         CHECK(journal_seek(j, j->it.bpos.offset));
01645         j->it.current_serial = j->it.bpos.serial;
01646 
01647         j->it.xsize = 0;  /* We have no transaction data yet... */
01648         j->it.xpos = 0;   /* ...and haven't used any of it. */
01649 
01650         return (read_one_rr(j));
01651 
01652  failure:
01653         return (result);
01654 }
01655 
01656 static isc_result_t
01657 read_one_rr(dns_journal_t *j) {
01658         isc_result_t result;
01659 
01660         dns_rdatatype_t rdtype;
01661         dns_rdataclass_t rdclass;
01662         unsigned int rdlen;
01663         isc_uint32_t ttl;
01664         journal_xhdr_t xhdr;
01665         journal_rrhdr_t rrhdr;
01666 
01667         INSIST(j->offset <= j->it.epos.offset);
01668         if (j->offset == j->it.epos.offset)
01669                 return (ISC_R_NOMORE);
01670         if (j->it.xpos == j->it.xsize) {
01671                 /*
01672                  * We are at a transaction boundary.
01673                  * Read another transaction header.
01674                  */
01675                 CHECK(journal_read_xhdr(j, &xhdr));
01676                 if (xhdr.size == 0) {
01677                         isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01678                                       "%s: journal corrupt: empty transaction",
01679                                       j->filename);
01680                         FAIL(ISC_R_UNEXPECTED);
01681                 }
01682                 if (xhdr.serial0 != j->it.current_serial) {
01683                         isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01684                                          "%s: journal file corrupt: "
01685                                          "expected serial %u, got %u",
01686                                          j->filename,
01687                                          j->it.current_serial, xhdr.serial0);
01688                         FAIL(ISC_R_UNEXPECTED);
01689                 }
01690                 j->it.xsize = xhdr.size;
01691                 j->it.xpos = 0;
01692         }
01693         /*
01694          * Read an RR.
01695          */
01696         CHECK(journal_read_rrhdr(j, &rrhdr));
01697         /*
01698          * Perform a sanity check on the journal RR size.
01699          * The smallest possible RR has a 1-byte owner name
01700          * and a 10-byte header.  The largest possible
01701          * RR has 65535 bytes of data, a header, and a maximum-
01702          * size owner name, well below 70 k total.
01703          */
01704         if (rrhdr.size < 1+10 || rrhdr.size > 70000) {
01705                 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
01706                                  "%s: journal corrupt: impossible RR size "
01707                                  "(%d bytes)", j->filename, rrhdr.size);
01708                 FAIL(ISC_R_UNEXPECTED);
01709         }
01710 
01711         CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size));
01712         CHECK(journal_read(j, j->it.source.base, rrhdr.size));
01713         isc_buffer_add(&j->it.source, rrhdr.size);
01714 
01715         /*
01716          * The target buffer is made the same size
01717          * as the source buffer, with the assumption that when
01718          * no compression in present, the output of dns_*_fromwire()
01719          * is no larger than the input.
01720          */
01721         CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size));
01722 
01723         /*
01724          * Parse the owner name.  We don't know where it
01725          * ends yet, so we make the entire "remaining"
01726          * part of the buffer "active".
01727          */
01728         isc_buffer_setactive(&j->it.source,
01729                              j->it.source.used - j->it.source.current);
01730         CHECK(dns_name_fromwire(&j->it.name, &j->it.source,
01731                                 &j->it.dctx, 0, &j->it.target));
01732 
01733         /*
01734          * Check that the RR header is there, and parse it.
01735          */
01736         if (isc_buffer_remaininglength(&j->it.source) < 10)
01737                 FAIL(DNS_R_FORMERR);
01738 
01739         rdtype = isc_buffer_getuint16(&j->it.source);
01740         rdclass = isc_buffer_getuint16(&j->it.source);
01741         ttl = isc_buffer_getuint32(&j->it.source);
01742         rdlen = isc_buffer_getuint16(&j->it.source);
01743 
01744         /*
01745          * Parse the rdata.
01746          */
01747         if (isc_buffer_remaininglength(&j->it.source) != rdlen)
01748                 FAIL(DNS_R_FORMERR);
01749         isc_buffer_setactive(&j->it.source, rdlen);
01750         dns_rdata_reset(&j->it.rdata);
01751         CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass,
01752                                  rdtype, &j->it.source, &j->it.dctx,
01753                                  0, &j->it.target));
01754         j->it.ttl = ttl;
01755 
01756         j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size;
01757         if (rdtype == dns_rdatatype_soa) {
01758                 /* XXX could do additional consistency checks here */
01759                 j->it.current_serial = dns_soa_getserial(&j->it.rdata);
01760         }
01761 
01762         result = ISC_R_SUCCESS;
01763 
01764  failure:
01765         j->it.result = result;
01766         return (result);
01767 }
01768 
01769 isc_result_t
01770 dns_journal_next_rr(dns_journal_t *j) {
01771         j->it.result = read_one_rr(j);
01772         return (j->it.result);
01773 }
01774 
01775 void
01776 dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, isc_uint32_t *ttl,
01777                    dns_rdata_t **rdata)
01778 {
01779         REQUIRE(j->it.result == ISC_R_SUCCESS);
01780         *name = &j->it.name;
01781         *ttl = j->it.ttl;
01782         *rdata = &j->it.rdata;
01783 }
01784 
01785 /**************************************************************************/
01786 /*
01787  * Generating diffs from databases
01788  */
01789 
01790 /*
01791  * Construct a diff containing all the RRs at the current name of the
01792  * database iterator 'dbit' in database 'db', version 'ver'.
01793  * Set '*name' to the current name, and append the diff to 'diff'.
01794  * All new tuples will have the operation 'op'.
01795  *
01796  * Requires: 'name' must have buffer large enough to hold the name.
01797  * Typically, a dns_fixedname_t would be used.
01798  */
01799 static isc_result_t
01800 get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now,
01801               dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op,
01802               dns_diff_t *diff)
01803 {
01804         isc_result_t result;
01805         dns_dbnode_t *node = NULL;
01806         dns_rdatasetiter_t *rdsiter = NULL;
01807         dns_difftuple_t *tuple = NULL;
01808 
01809         result = dns_dbiterator_current(dbit, &node, name);
01810         if (result != ISC_R_SUCCESS)
01811                 return (result);
01812 
01813         result = dns_db_allrdatasets(db, node, ver, now, &rdsiter);
01814         if (result != ISC_R_SUCCESS)
01815                 goto cleanup_node;
01816 
01817         for (result = dns_rdatasetiter_first(rdsiter);
01818              result == ISC_R_SUCCESS;
01819              result = dns_rdatasetiter_next(rdsiter))
01820         {
01821                 dns_rdataset_t rdataset;
01822 
01823                 dns_rdataset_init(&rdataset);
01824                 dns_rdatasetiter_current(rdsiter, &rdataset);
01825 
01826                 for (result = dns_rdataset_first(&rdataset);
01827                      result == ISC_R_SUCCESS;
01828                      result = dns_rdataset_next(&rdataset))
01829                 {
01830                         dns_rdata_t rdata = DNS_RDATA_INIT;
01831                         dns_rdataset_current(&rdataset, &rdata);
01832                         result = dns_difftuple_create(diff->mctx, op, name,
01833                                                       rdataset.ttl, &rdata,
01834                                                       &tuple);
01835                         if (result != ISC_R_SUCCESS) {
01836                                 dns_rdataset_disassociate(&rdataset);
01837                                 goto cleanup_iterator;
01838                         }
01839                         dns_diff_append(diff, &tuple);
01840                 }
01841                 dns_rdataset_disassociate(&rdataset);
01842                 if (result != ISC_R_NOMORE)
01843                         goto cleanup_iterator;
01844         }
01845         if (result != ISC_R_NOMORE)
01846                 goto cleanup_iterator;
01847 
01848         result = ISC_R_SUCCESS;
01849 
01850  cleanup_iterator:
01851         dns_rdatasetiter_destroy(&rdsiter);
01852 
01853  cleanup_node:
01854         dns_db_detachnode(db, &node);
01855 
01856         return (result);
01857 }
01858 
01859 /*
01860  * Comparison function for use by dns_diff_subtract when sorting
01861  * the diffs to be subtracted.  The sort keys are the rdata type
01862  * and the rdata itself.  The owner name is ignored, because
01863  * it is known to be the same for all tuples.
01864  */
01865 static int
01866 rdata_order(const void *av, const void *bv) {
01867         dns_difftuple_t const * const *ap = av;
01868         dns_difftuple_t const * const *bp = bv;
01869         dns_difftuple_t const *a = *ap;
01870         dns_difftuple_t const *b = *bp;
01871         int r;
01872         r = (b->rdata.type - a->rdata.type);
01873         if (r != 0)
01874                 return (r);
01875         r = dns_rdata_compare(&a->rdata, &b->rdata);
01876         return (r);
01877 }
01878 
01879 static isc_result_t
01880 dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) {
01881         isc_result_t result;
01882         dns_difftuple_t *p[2];
01883         int i, t;
01884         isc_boolean_t append;
01885 
01886         CHECK(dns_diff_sort(&diff[0], rdata_order));
01887         CHECK(dns_diff_sort(&diff[1], rdata_order));
01888 
01889         for (;;) {
01890                 p[0] = ISC_LIST_HEAD(diff[0].tuples);
01891                 p[1] = ISC_LIST_HEAD(diff[1].tuples);
01892                 if (p[0] == NULL && p[1] == NULL)
01893                         break;
01894 
01895                 for (i = 0; i < 2; i++)
01896                         if (p[!i] == NULL) {
01897                                 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
01898                                 ISC_LIST_APPEND(r->tuples, p[i], link);
01899                                 goto next;
01900                         }
01901                 t = rdata_order(&p[0], &p[1]);
01902                 if (t < 0) {
01903                         ISC_LIST_UNLINK(diff[0].tuples, p[0], link);
01904                         ISC_LIST_APPEND(r->tuples, p[0], link);
01905                         goto next;
01906                 }
01907                 if (t > 0) {
01908                         ISC_LIST_UNLINK(diff[1].tuples, p[1], link);
01909                         ISC_LIST_APPEND(r->tuples, p[1], link);
01910                         goto next;
01911                 }
01912                 INSIST(t == 0);
01913                 /*
01914                  * Identical RRs in both databases; skip them both
01915                  * if the ttl differs.
01916                  */
01917                 append = ISC_TF(p[0]->ttl != p[1]->ttl);
01918                 for (i = 0; i < 2; i++) {
01919                         ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
01920                         if (append) {
01921                                 ISC_LIST_APPEND(r->tuples, p[i], link);
01922                         } else {
01923                                 dns_difftuple_free(&p[i]);
01924                         }
01925                 }
01926         next: ;
01927         }
01928         result = ISC_R_SUCCESS;
01929  failure:
01930         return (result);
01931 }
01932 
01933 static isc_result_t
01934 diff_namespace(dns_db_t *dba, dns_dbversion_t *dbvera,
01935                dns_db_t *dbb, dns_dbversion_t *dbverb,
01936                unsigned int options, dns_diff_t *resultdiff)
01937 {
01938         dns_db_t *db[2];
01939         dns_dbversion_t *ver[2];
01940         dns_dbiterator_t *dbit[2] = { NULL, NULL };
01941         isc_boolean_t have[2] = { ISC_FALSE, ISC_FALSE };
01942         dns_fixedname_t fixname[2];
01943         isc_result_t result, itresult[2];
01944         dns_diff_t diff[2];
01945         int i, t;
01946 
01947         db[0] = dba, db[1] = dbb;
01948         ver[0] = dbvera, ver[1] = dbverb;
01949 
01950         dns_diff_init(resultdiff->mctx, &diff[0]);
01951         dns_diff_init(resultdiff->mctx, &diff[1]);
01952 
01953         dns_fixedname_init(&fixname[0]);
01954         dns_fixedname_init(&fixname[1]);
01955 
01956         result = dns_db_createiterator(db[0], options, &dbit[0]);
01957         if (result != ISC_R_SUCCESS)
01958                 return (result);
01959         result = dns_db_createiterator(db[1], options, &dbit[1]);
01960         if (result != ISC_R_SUCCESS)
01961                 goto cleanup_iterator;
01962 
01963         itresult[0] = dns_dbiterator_first(dbit[0]);
01964         itresult[1] = dns_dbiterator_first(dbit[1]);
01965 
01966         for (;;) {
01967                 for (i = 0; i < 2; i++) {
01968                         if (! have[i] && itresult[i] == ISC_R_SUCCESS) {
01969                                 CHECK(get_name_diff(db[i], ver[i], 0, dbit[i],
01970                                             dns_fixedname_name(&fixname[i]),
01971                                             i == 0 ?
01972                                             DNS_DIFFOP_ADD :
01973                                             DNS_DIFFOP_DEL,
01974                                             &diff[i]));
01975                                 itresult[i] = dns_dbiterator_next(dbit[i]);
01976                                 have[i] = ISC_TRUE;
01977                         }
01978                 }
01979 
01980                 if (! have[0] && ! have[1]) {
01981                         INSIST(ISC_LIST_EMPTY(diff[0].tuples));
01982                         INSIST(ISC_LIST_EMPTY(diff[1].tuples));
01983                         break;
01984                 }
01985 
01986                 for (i = 0; i < 2; i++) {
01987                         if (! have[!i]) {
01988                                 ISC_LIST_APPENDLIST(resultdiff->tuples,
01989                                                     diff[i].tuples, link);
01990                                 INSIST(ISC_LIST_EMPTY(diff[i].tuples));
01991                                 have[i] = ISC_FALSE;
01992                                 goto next;
01993                         }
01994                 }
01995 
01996                 t = dns_name_compare(dns_fixedname_name(&fixname[0]),
01997                                      dns_fixedname_name(&fixname[1]));
01998                 if (t < 0) {
01999                         ISC_LIST_APPENDLIST(resultdiff->tuples,
02000                                             diff[0].tuples, link);
02001                         INSIST(ISC_LIST_EMPTY(diff[0].tuples));
02002                         have[0] = ISC_FALSE;
02003                         continue;
02004                 }
02005                 if (t > 0) {
02006                         ISC_LIST_APPENDLIST(resultdiff->tuples,
02007                                             diff[1].tuples, link);
02008                         INSIST(ISC_LIST_EMPTY(diff[1].tuples));
02009                         have[1] = ISC_FALSE;
02010                         continue;
02011                 }
02012                 INSIST(t == 0);
02013                 CHECK(dns_diff_subtract(diff, resultdiff));
02014                 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
02015                 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
02016                 have[0] = have[1] = ISC_FALSE;
02017         next: ;
02018         }
02019         if (itresult[0] != ISC_R_NOMORE)
02020                 FAIL(itresult[0]);
02021         if (itresult[1] != ISC_R_NOMORE)
02022                 FAIL(itresult[1]);
02023 
02024         INSIST(ISC_LIST_EMPTY(diff[0].tuples));
02025         INSIST(ISC_LIST_EMPTY(diff[1].tuples));
02026 
02027  failure:
02028         dns_dbiterator_destroy(&dbit[1]);
02029 
02030  cleanup_iterator:
02031         dns_dbiterator_destroy(&dbit[0]);
02032         dns_diff_clear(&diff[0]);
02033         dns_diff_clear(&diff[1]);
02034         return (result);
02035 }
02036 
02037 /*
02038  * Compare the databases 'dba' and 'dbb' and generate a journal
02039  * entry containing the changes to make 'dba' from 'dbb' (note
02040  * the order).  This journal entry will consist of a single,
02041  * possibly very large transaction.
02042  */
02043 isc_result_t
02044 dns_db_diff(isc_mem_t *mctx, dns_db_t *dba, dns_dbversion_t *dbvera,
02045             dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename)
02046 {
02047         isc_result_t result;
02048         dns_diff_t diff;
02049 
02050         dns_diff_init(mctx, &diff);
02051 
02052         result = dns_db_diffx(&diff, dba, dbvera, dbb, dbverb, filename);
02053 
02054         dns_diff_clear(&diff);
02055 
02056         return (result);
02057 }
02058 
02059 isc_result_t
02060 dns_db_diffx(dns_diff_t *diff, dns_db_t *dba, dns_dbversion_t *dbvera,
02061              dns_db_t *dbb, dns_dbversion_t *dbverb, const char *filename)
02062 {
02063         isc_result_t result;
02064         dns_journal_t *journal = NULL;
02065 
02066         if (filename != NULL) {
02067                 result = dns_journal_open(diff->mctx, filename,
02068                                           DNS_JOURNAL_CREATE, &journal);
02069                 if (result != ISC_R_SUCCESS)
02070                         return (result);
02071         }
02072 
02073         CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NONSEC3, diff));
02074         CHECK(diff_namespace(dba, dbvera, dbb, dbverb, DNS_DB_NSEC3ONLY, diff));
02075 
02076         if (journal != NULL) {
02077                 if (ISC_LIST_EMPTY(diff->tuples))
02078                         isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
02079                 else
02080                         CHECK(dns_journal_write_transaction(journal, diff));
02081         }
02082 
02083  failure:
02084         if (journal != NULL)
02085                 dns_journal_destroy(&journal);
02086         return (result);
02087 }
02088 
02089 isc_result_t
02090 dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial,
02091                     isc_uint32_t target_size)
02092 {
02093         unsigned int i;
02094         journal_pos_t best_guess;
02095         journal_pos_t current_pos;
02096         dns_journal_t *j = NULL;
02097         dns_journal_t *new = NULL;
02098         journal_rawheader_t rawheader;
02099         unsigned int copy_length;
02100         size_t namelen;
02101         char *buf = NULL;
02102         unsigned int size = 0;
02103         isc_result_t result;
02104         unsigned int indexend;
02105         char newname[1024];
02106         char backup[1024];
02107         isc_boolean_t is_backup = ISC_FALSE;
02108 
02109         namelen = strlen(filename);
02110         if (namelen > 4U && strcmp(filename + namelen - 4, ".jnl") == 0)
02111                 namelen -= 4;
02112 
02113         result = isc_string_printf(newname, sizeof(newname), "%.*s.jnw",
02114                                    (int)namelen, filename);
02115         if (result != ISC_R_SUCCESS)
02116                 return (result);
02117 
02118         result = isc_string_printf(backup, sizeof(backup), "%.*s.jbk",
02119                                    (int)namelen, filename);
02120         if (result != ISC_R_SUCCESS)
02121                 return (result);
02122 
02123         result = journal_open(mctx, filename, ISC_FALSE, ISC_FALSE, &j);
02124         if (result == ISC_R_NOTFOUND) {
02125                 is_backup = ISC_TRUE;
02126                 result = journal_open(mctx, backup, ISC_FALSE, ISC_FALSE, &j);
02127         }
02128         if (result != ISC_R_SUCCESS)
02129                 return (result);
02130 
02131         if (JOURNAL_EMPTY(&j->header)) {
02132                 dns_journal_destroy(&j);
02133                 return (ISC_R_SUCCESS);
02134         }
02135 
02136         if (DNS_SERIAL_GT(j->header.begin.serial, serial) ||
02137             DNS_SERIAL_GT(serial, j->header.end.serial)) {
02138                 dns_journal_destroy(&j);
02139                 return (ISC_R_RANGE);
02140         }
02141 
02142         /*
02143          * Cope with very small target sizes.
02144          */
02145         indexend = sizeof(journal_rawheader_t) +
02146                    j->header.index_size * sizeof(journal_rawpos_t);
02147         if (target_size < indexend * 2)
02148                 target_size = target_size/2 + indexend;
02149 
02150         /*
02151          * See if there is any work to do.
02152          */
02153         if ((isc_uint32_t) j->header.end.offset < target_size) {
02154                 dns_journal_destroy(&j);
02155                 return (ISC_R_SUCCESS);
02156         }
02157 
02158         CHECK(journal_open(mctx, newname, ISC_TRUE, ISC_TRUE, &new));
02159 
02160         /*
02161          * Remove overhead so space test below can succeed.
02162          */
02163         if (target_size >= indexend)
02164                 target_size -= indexend;
02165 
02166         /*
02167          * Find if we can create enough free space.
02168          */
02169         best_guess = j->header.begin;
02170         for (i = 0; i < j->header.index_size; i++) {
02171                 if (POS_VALID(j->index[i]) &&
02172                     DNS_SERIAL_GE(serial, j->index[i].serial) &&
02173                     ((isc_uint32_t)(j->header.end.offset - j->index[i].offset)
02174                      >= target_size / 2) &&
02175                     j->index[i].offset > best_guess.offset)
02176                         best_guess = j->index[i];
02177         }
02178 
02179         current_pos = best_guess;
02180         while (current_pos.serial != serial) {
02181                 CHECK(journal_next(j, &current_pos));
02182                 if (current_pos.serial == j->header.end.serial)
02183                         break;
02184 
02185                 if (DNS_SERIAL_GE(serial, current_pos.serial) &&
02186                    ((isc_uint32_t)(j->header.end.offset - current_pos.offset)
02187                      >= (target_size / 2)) &&
02188                     current_pos.offset > best_guess.offset)
02189                         best_guess = current_pos;
02190                 else
02191                         break;
02192         }
02193 
02194         INSIST(best_guess.serial != j->header.end.serial);
02195         if (best_guess.serial != serial)
02196                 CHECK(journal_next(j, &best_guess));
02197 
02198         /*
02199          * We should now be roughly half target_size provided
02200          * we did not reach 'serial'.  If not we will just copy
02201          * all uncommitted deltas regardless of the size.
02202          */
02203         copy_length = j->header.end.offset - best_guess.offset;
02204 
02205         if (copy_length != 0) {
02206                 /*
02207                  * Copy best_guess to end into space just freed.
02208                  */
02209                 size = 64*1024;
02210                 if (copy_length < size)
02211                         size = copy_length;
02212                 buf = isc_mem_get(mctx, size);
02213                 if (buf == NULL) {
02214                         result = ISC_R_NOMEMORY;
02215                         goto failure;
02216                 }
02217 
02218                 CHECK(journal_seek(j, best_guess.offset));
02219                 CHECK(journal_seek(new, indexend));
02220                 for (i = 0; i < copy_length; i += size) {
02221                         unsigned int len = (copy_length - i) > size ? size :
02222                                                          (copy_length - i);
02223                         CHECK(journal_read(j, buf, len));
02224                         CHECK(journal_write(new, buf, len));
02225                 }
02226 
02227                 CHECK(journal_fsync(new));
02228 
02229                 /*
02230                  * Compute new header.
02231                  */
02232                 new->header.begin.serial = best_guess.serial;
02233                 new->header.begin.offset = indexend;
02234                 new->header.end.serial = j->header.end.serial;
02235                 new->header.end.offset = indexend + copy_length;
02236                 new->header.sourceserial = j->header.sourceserial;
02237                 new->header.serialset = j->header.serialset;
02238 
02239                 /*
02240                  * Update the journal header.
02241                  */
02242                 journal_header_encode(&new->header, &rawheader);
02243                 CHECK(journal_seek(new, 0));
02244                 CHECK(journal_write(new, &rawheader, sizeof(rawheader)));
02245                 CHECK(journal_fsync(new));
02246 
02247                 /*
02248                  * Build new index.
02249                  */
02250                 current_pos = new->header.begin;
02251                 while (current_pos.serial != new->header.end.serial) {
02252                         index_add(new, &current_pos);
02253                         CHECK(journal_next(new, &current_pos));
02254                 }
02255 
02256                 /*
02257                  * Write index.
02258                  */
02259                 CHECK(index_to_disk(new));
02260                 CHECK(journal_fsync(new));
02261 
02262                 indexend = new->header.end.offset;
02263                 POST(indexend);
02264         }
02265 
02266         /*
02267          * Close both journals before trying to rename files (this is
02268          * necessary on WIN32).
02269          */
02270         dns_journal_destroy(&j);
02271         dns_journal_destroy(&new);
02272 
02273         /*
02274          * With a UFS file system this should just succeed and be atomic.
02275          * Any IXFR outs will just continue and the old journal will be
02276          * removed on final close.
02277          *
02278          * With MSDOS / NTFS we need to do a two stage rename, triggered
02279          * by EEXIST.  (If any IXFR's are running in other threads, however,
02280          * this will fail, and the journal will not be compacted.  But
02281          * if so, hopefully they'll be finished by the next time we
02282          * compact.)
02283          */
02284         if (rename(newname, filename) == -1) {
02285                 if (errno == EEXIST && !is_backup) {
02286                         result = isc_file_remove(backup);
02287                         if (result != ISC_R_SUCCESS &&
02288                             result != ISC_R_FILENOTFOUND)
02289                                 goto failure;
02290                         if (rename(filename, backup) == -1)
02291                                 goto maperrno;
02292                         if (rename(newname, filename) == -1)
02293                                 goto maperrno;
02294                         (void)isc_file_remove(backup);
02295                 } else {
02296  maperrno:
02297                         result = ISC_R_FAILURE;
02298                         goto failure;
02299                 }
02300         }
02301 
02302         result = ISC_R_SUCCESS;
02303 
02304  failure:
02305         (void)isc_file_remove(newname);
02306         if (buf != NULL)
02307                 isc_mem_put(mctx, buf, size);
02308         if (j != NULL)
02309                 dns_journal_destroy(&j);
02310         if (new != NULL)
02311                 dns_journal_destroy(&new);
02312         return (result);
02313 }
02314 
02315 static isc_result_t
02316 index_to_disk(dns_journal_t *j) {
02317         isc_result_t result = ISC_R_SUCCESS;
02318 
02319         if (j->header.index_size != 0) {
02320                 unsigned int i;
02321                 unsigned char *p;
02322                 unsigned int rawbytes;
02323 
02324                 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
02325 
02326                 p = j->rawindex;
02327                 for (i = 0; i < j->header.index_size; i++) {
02328                         encode_uint32(j->index[i].serial, p);
02329                         p += 4;
02330                         encode_uint32(j->index[i].offset, p);
02331                         p += 4;
02332                 }
02333                 INSIST(p == j->rawindex + rawbytes);
02334 
02335                 CHECK(journal_seek(j, sizeof(journal_rawheader_t)));
02336                 CHECK(journal_write(j, j->rawindex, rawbytes));
02337         }
02338 failure:
02339         return (result);
02340 }

Generated on Tue Apr 28 17:40:57 2015 by Doxygen 1.5.4 for BIND9 Internals 9.11.0pre-alpha