rbtdb.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2004-2015  Internet Systems Consortium, Inc. ("ISC")
00003  * Copyright (C) 1999-2003  Internet Software Consortium.
00004  *
00005  * Permission to use, copy, modify, and/or distribute this software for any
00006  * purpose with or without fee is hereby granted, provided that the above
00007  * copyright notice and this permission notice appear in all copies.
00008  *
00009  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
00010  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
00011  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
00012  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
00013  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
00014  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
00015  * PERFORMANCE OF THIS SOFTWARE.
00016  */
00017 
00018 /*! \file */
00019 
00020 /*
00021  * Principal Author: Bob Halley
00022  */
00023 
00024 #include <config.h>
00025 
00026 /* #define inline */
00027 
00028 #ifdef HAVE_INTTYPES_H
00029 #include <inttypes.h> /* uintptr_t */
00030 #endif
00031 
00032 #include <isc/crc64.h>
00033 #include <isc/event.h>
00034 #include <isc/heap.h>
00035 #include <isc/file.h>
00036 #include <isc/hex.h>
00037 #include <isc/mem.h>
00038 #include <isc/mutex.h>
00039 #include <isc/once.h>
00040 #include <isc/platform.h>
00041 #include <isc/print.h>
00042 #include <isc/random.h>
00043 #include <isc/refcount.h>
00044 #include <isc/rwlock.h>
00045 #include <isc/serial.h>
00046 #include <isc/socket.h>
00047 #include <isc/stdio.h>
00048 #include <isc/string.h>
00049 #include <isc/task.h>
00050 #include <isc/time.h>
00051 #include <isc/util.h>
00052 
00053 #include <dns/acache.h>
00054 #include <dns/callbacks.h>
00055 #include <dns/db.h>
00056 #include <dns/dbiterator.h>
00057 #include <dns/events.h>
00058 #include <dns/fixedname.h>
00059 #include <dns/lib.h>
00060 #include <dns/log.h>
00061 #include <dns/masterdump.h>
00062 #include <dns/nsec.h>
00063 #include <dns/nsec3.h>
00064 #include <dns/rbt.h>
00065 #include <dns/rpz.h>
00066 #include <dns/rdata.h>
00067 #include <dns/rdataset.h>
00068 #include <dns/rdatasetiter.h>
00069 #include <dns/rdataslab.h>
00070 #include <dns/rdatastruct.h>
00071 #include <dns/result.h>
00072 #include <dns/stats.h>
00073 #include <dns/version.h>
00074 #include <dns/view.h>
00075 #include <dns/zone.h>
00076 #include <dns/zonekey.h>
00077 
00078 #ifndef WIN32
00079 #include <sys/mman.h>
00080 #else
00081 #define PROT_READ       0x01
00082 #define PROT_WRITE      0x02
00083 #define MAP_PRIVATE     0x0002
00084 #define MAP_FAILED      ((void *)-1)
00085 #endif
00086 
00087 #ifdef DNS_RBTDB_VERSION64
00088 #include "rbtdb64.h"
00089 #else
00090 #include "rbtdb.h"
00091 #endif
00092 
00093 #ifdef DNS_RBTDB_VERSION64
00094 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
00095 #else
00096 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
00097 #endif
00098 
00099 #define CHECK(op) \
00100         do { result = (op); \
00101                 if (result != ISC_R_SUCCESS) goto failure; \
00102         } while (0)
00103 
00104 /*
00105  * This is the map file header for RBTDB images.  It is populated, and then
00106  * written, as the LAST thing done to the file.  Writing this last (with
00107  * zeros in the header area initially) will ensure that the header is only
00108  * valid when the RBTDB image is also valid.
00109  */
00110 typedef struct rbtdb_file_header rbtdb_file_header_t;
00111 
00112 /* Header length, always the same size regardless of structure size */
00113 #define RBTDB_HEADER_LENGTH     1024
00114 
00115 struct rbtdb_file_header {
00116         char version1[32];
00117         isc_uint32_t ptrsize;
00118         unsigned int bigendian:1;
00119         isc_uint64_t tree;
00120         isc_uint64_t nsec;
00121         isc_uint64_t nsec3;
00122 
00123         char version2[32];              /* repeated; must match version1 */
00124 };
00125 
00126 
00127 /*%
00128  * Note that "impmagic" is not the first four bytes of the struct, so
00129  * ISC_MAGIC_VALID cannot be used.
00130  */
00131 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
00132                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
00133 
00134 #ifdef DNS_RBTDB_VERSION64
00135 typedef isc_uint64_t                    rbtdb_serial_t;
00136 /*%
00137  * Make casting easier in symbolic debuggers by using different names
00138  * for the 64 bit version.
00139  */
00140 #define dns_rbtdb_t dns_rbtdb64_t
00141 #define rdatasetheader_t rdatasetheader64_t
00142 #define rbtdb_version_t rbtdb_version64_t
00143 
00144 #define once once64
00145 #define FILE_VERSION FILE_VERSION64
00146 #define init_count init_count64
00147 
00148 #define cache_methods cache_methods64
00149 #define dbiterator_methods dbiterator_methods64
00150 #define rdataset_methods rdataset_methods64
00151 #define rdatasetiter_methods rdatasetiter_methods64
00152 #define slab_methods slab_methods64
00153 #define zone_methods zone_methods64
00154 
00155 #define acache_callback acache_callback64
00156 #define acache_cancelentry acache_cancelentry64
00157 #define activeempty activeempty64
00158 #define activeemtpynode activeemtpynode64
00159 #define add32 add64
00160 #define add_changed add_changed64
00161 #define add_empty_wildcards add_empty_wildcards64
00162 #define add_wildcard_magic add_wildcard_magic64
00163 #define addrdataset addrdataset64
00164 #define allrdatasets allrdatasets64
00165 #define attach attach64
00166 #define attachnode attachnode64
00167 #define attachversion attachversion64
00168 #define beginload beginload64
00169 #define bind_rdataset bind_rdataset64
00170 #define cache_find cache_find64
00171 #define cache_findrdataset cache_findrdataset64
00172 #define cache_findzonecut cache_findzonecut64
00173 #define cache_zonecut_callback cache_zonecut_callback64
00174 #define cleanup_dead_nodes cleanup_dead_nodes64
00175 #define cleanup_dead_nodes_callback cleanup_dead_nodes_callback64
00176 #define closeversion closeversion64
00177 #define createiterator createiterator64
00178 #define currentversion currentversion64
00179 #define dbiterator_current dbiterator_current64
00180 #define dbiterator_destroy dbiterator_destroy64
00181 #define dbiterator_first dbiterator_first64
00182 #define dbiterator_last dbiterator_last64
00183 #define dbiterator_next dbiterator_next64
00184 #define dbiterator_origin dbiterator_origin64
00185 #define dbiterator_pause dbiterator_pause64
00186 #define dbiterator_prev dbiterator_prev64
00187 #define dbiterator_seek dbiterator_seek64
00188 #define decrement_reference decrement_reference64
00189 #define delete_callback delete_callback64
00190 #define delete_node delete_node64
00191 #define deleterdataset deleterdataset64
00192 #define deserialize32 deserialize64
00193 #define detach detach64
00194 #define detachnode detachnode64
00195 #define dump dump64
00196 #define endload endload64
00197 #define expire_header expire_header64
00198 #define expirenode expirenode64
00199 #define find_closest_nsec find_closest_nsec64
00200 #define find_coveringnsec find_coveringnsec64
00201 #define find_deepest_zonecut find_deepest_zonecut64
00202 #define findnode findnode64
00203 #define findnodeintree findnodeintree64
00204 #define findnsec3node findnsec3node64
00205 #define flush_deletions flush_deletions64
00206 #define free_acachearray free_acachearray64
00207 #define free_noqname free_noqname64
00208 #define free_rbtdb free_rbtdb64
00209 #define free_rbtdb_callback free_rbtdb_callback64
00210 #define free_rdataset free_rdataset64
00211 #define getnsec3parameters getnsec3parameters64
00212 #define getoriginnode getoriginnode64
00213 #define getrrsetstats getrrsetstats64
00214 #define getsigningtime getsigningtime64
00215 #define hashsize hashsize64
00216 #define init_file_version init_file_version64
00217 #define isdnssec isdnssec64
00218 #define ispersistent ispersistent64
00219 #define issecure issecure64
00220 #define iszonesecure iszonesecure64
00221 #define loading_addrdataset loading_addrdataset64
00222 #define loadnode loadnode64
00223 #define matchparams matchparams64
00224 #define maybe_free_rbtdb maybe_free_rbtdb64
00225 #define new_reference new_reference64
00226 #define newversion newversion64
00227 #define nodecount nodecount64
00228 #define overmem overmem64
00229 #define previous_closest_nsec previous_closest_nsec64
00230 #define printnode printnode64
00231 #define prune_tree prune_tree64
00232 #define rbt_datafixer rbt_datafixer64
00233 #define rbt_datawriter rbt_datawriter64
00234 #define rdataset_clearprefetch rdataset_clearprefetch64
00235 #define rdataset_clone rdataset_clone64
00236 #define rdataset_count rdataset_count64
00237 #define rdataset_current rdataset_current64
00238 #define rdataset_disassociate rdataset_disassociate64
00239 #define rdataset_expire rdataset_expire64
00240 #define rdataset_first rdataset_first64
00241 #define rdataset_getadditional rdataset_getadditional64
00242 #define rdataset_getclosest rdataset_getclosest64
00243 #define rdataset_getnoqname rdataset_getnoqname64
00244 #define rdataset_getownercase rdataset_getownercase64
00245 #define rdataset_next rdataset_next64
00246 #define rdataset_putadditional rdataset_putadditional64
00247 #define rdataset_setadditional rdataset_setadditional64
00248 #define rdataset_setownercase rdataset_setownercase64
00249 #define rdataset_settrust rdataset_settrust64
00250 #define rdatasetiter_current rdatasetiter_current64
00251 #define rdatasetiter_destroy rdatasetiter_destroy64
00252 #define rdatasetiter_first rdatasetiter_first64
00253 #define rdatasetiter_next rdatasetiter_next64
00254 #define reactivate_node reactivate_node64
00255 #define resign_delete resign_delete64
00256 #define resign_insert resign_insert64
00257 #define resign_sooner resign_sooner64
00258 #define resigned resigned64
00259 #define rpz_attach rpz_attach64
00260 #define rpz_ready rpz_ready64
00261 #define serialize serialize64
00262 #define set_index set_index64
00263 #define set_ttl set_ttl64
00264 #define setcachestats setcachestats64
00265 #define setownercase setownercase64
00266 #define setsigningtime setsigningtime64
00267 #define settask settask64
00268 #define setup_delegation setup_delegation64
00269 #define subtractrdataset subtractrdataset64
00270 #define ttl_sooner ttl_sooner64
00271 #define update_cachestats update_cachestats64
00272 #define update_header update_header64
00273 #define update_newheader update_newheader64
00274 #define update_rrsetstats update_rrsetstats64
00275 #define zone_find zone_find64
00276 #define zone_findrdataset zone_findrdataset64
00277 #define zone_findzonecut zone_findzonecut64
00278 #define zone_zonecut_callback zone_zonecut_callback64
00279 
00280 #else
00281 typedef isc_uint32_t                    rbtdb_serial_t;
00282 #endif
00283 
00284 typedef isc_uint32_t                    rbtdb_rdatatype_t;
00285 
00286 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
00287 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
00288 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
00289 
00290 #define RBTDB_RDATATYPE_SIGNSEC \
00291                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
00292 #define RBTDB_RDATATYPE_SIGNSEC3 \
00293                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
00294 #define RBTDB_RDATATYPE_SIGNS \
00295                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
00296 #define RBTDB_RDATATYPE_SIGCNAME \
00297                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
00298 #define RBTDB_RDATATYPE_SIGDNAME \
00299                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
00300 #define RBTDB_RDATATYPE_SIGDDS \
00301                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
00302 #define RBTDB_RDATATYPE_NCACHEANY \
00303                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
00304 
00305 /*
00306  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
00307  * Using rwlock is effective with regard to lookup performance only when
00308  * it is implemented in an efficient way.
00309  * Otherwise, it is generally wise to stick to the simple locking since rwlock
00310  * would require more memory or can even make lookups slower due to its own
00311  * overhead (when it internally calls mutex locks).
00312  */
00313 #ifdef ISC_RWLOCK_USEATOMIC
00314 #define DNS_RBTDB_USERWLOCK 1
00315 #else
00316 #define DNS_RBTDB_USERWLOCK 0
00317 #endif
00318 
00319 #if DNS_RBTDB_USERWLOCK
00320 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
00321 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
00322 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
00323 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
00324 #else
00325 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
00326 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
00327 #define RBTDB_LOCK(l, t)        LOCK(l)
00328 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
00329 #endif
00330 
00331 /*
00332  * Since node locking is sensitive to both performance and memory footprint,
00333  * we need some trick here.  If we have both high-performance rwlock and
00334  * high performance and small-memory reference counters, we use rwlock for
00335  * node lock and isc_refcount for node references.  In this case, we don't have
00336  * to protect the access to the counters by locks.
00337  * Otherwise, we simply use ordinary mutex lock for node locking, and use
00338  * simple integers as reference counters which is protected by the lock.
00339  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
00340  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
00341  * counters first and then protect other parts of a node as read-only data.
00342  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
00343  * provided for these special cases.  When we can use the efficient backend
00344  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
00345  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
00346  * section including the access to the reference counter.
00347  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
00348  * section is also protected by NODE_STRONGLOCK().
00349  */
00350 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
00351 typedef isc_rwlock_t nodelock_t;
00352 
00353 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
00354 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
00355 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
00356 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
00357 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
00358 
00359 #define NODE_STRONGLOCK(l)      ((void)0)
00360 #define NODE_STRONGUNLOCK(l)    ((void)0)
00361 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
00362 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
00363 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
00364 #else
00365 typedef isc_mutex_t nodelock_t;
00366 
00367 #define NODE_INITLOCK(l)        isc_mutex_init(l)
00368 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
00369 #define NODE_LOCK(l, t)         LOCK(l)
00370 #define NODE_UNLOCK(l, t)       UNLOCK(l)
00371 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
00372 
00373 #define NODE_STRONGLOCK(l)      LOCK(l)
00374 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
00375 #define NODE_WEAKLOCK(l, t)     ((void)0)
00376 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
00377 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
00378 #endif
00379 
00380 /*%
00381  * Whether to rate-limit updating the LRU to avoid possible thread contention.
00382  * Our performance measurement has shown the cost is marginal, so it's defined
00383  * to be 0 by default either with or without threads.
00384  */
00385 #ifndef DNS_RBTDB_LIMITLRUUPDATE
00386 #define DNS_RBTDB_LIMITLRUUPDATE 0
00387 #endif
00388 
00389 /*
00390  * Allow clients with a virtual time of up to 5 minutes in the past to see
00391  * records that would have otherwise have expired.
00392  */
00393 #define RBTDB_VIRTUAL 300
00394 
00395 struct noqname {
00396         dns_name_t      name;
00397         void *          neg;
00398         void *          negsig;
00399         dns_rdatatype_t type;
00400 };
00401 
00402 typedef struct acachectl acachectl_t;
00403 
00404 typedef struct rdatasetheader {
00405         /*%
00406          * Locked by the owning node's lock.
00407          */
00408         rbtdb_serial_t                  serial;
00409         dns_ttl_t                       rdh_ttl;
00410         rbtdb_rdatatype_t               type;
00411         isc_uint16_t                    attributes;
00412         dns_trust_t                     trust;
00413         struct noqname                  *noqname;
00414         struct noqname                  *closest;
00415         unsigned int                    is_mmapped : 1;
00416         unsigned int                    next_is_relative : 1;
00417         unsigned int                    node_is_relative : 1;
00418         /*%<
00419          * We don't use the LIST macros, because the LIST structure has
00420          * both head and tail pointers, and is doubly linked.
00421          */
00422 
00423         struct rdatasetheader           *next;
00424         /*%<
00425          * If this is the top header for an rdataset, 'next' points
00426          * to the top header for the next rdataset (i.e., the next type).
00427          * Otherwise, it points up to the header whose down pointer points
00428          * at this header.
00429          */
00430 
00431         struct rdatasetheader           *down;
00432         /*%<
00433          * Points to the header for the next older version of
00434          * this rdataset.
00435          */
00436 
00437         isc_uint32_t                    count;
00438         /*%<
00439          * Monotonously increased every time this rdataset is bound so that
00440          * it is used as the base of the starting point in DNS responses
00441          * when the "cyclic" rrset-order is required.  Since the ordering
00442          * should not be so crucial, no lock is set for the counter for
00443          * performance reasons.
00444          */
00445 
00446         acachectl_t                     *additional_auth;
00447         acachectl_t                     *additional_glue;
00448 
00449         dns_rbtnode_t                   *node;
00450         isc_stdtime_t                   last_used;
00451         ISC_LINK(struct rdatasetheader) link;
00452 
00453         unsigned int                    heap_index;
00454         /*%<
00455          * Used for TTL-based cache cleaning.
00456          */
00457         isc_stdtime_t                   resign;
00458         /*%<
00459          * Case vector.  If the bit is set then the corresponding
00460          * character in the owner name needs to be AND'd with 0x20,
00461          * rendering that character upper case.
00462          */
00463         unsigned char                   upper[32];
00464 } rdatasetheader_t;
00465 
00466 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
00467 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
00468 
00469 #define RDATASET_ATTR_NONEXISTENT       0x0001
00470 #define RDATASET_ATTR_STALE             0x0002
00471 #define RDATASET_ATTR_IGNORE            0x0004
00472 #define RDATASET_ATTR_RETAIN            0x0008
00473 #define RDATASET_ATTR_NXDOMAIN          0x0010
00474 #define RDATASET_ATTR_RESIGN            0x0020
00475 #define RDATASET_ATTR_STATCOUNT         0x0040
00476 #define RDATASET_ATTR_OPTOUT            0x0080
00477 #define RDATASET_ATTR_NEGATIVE          0x0100
00478 #define RDATASET_ATTR_PREFETCH          0x0200
00479 #define RDATASET_ATTR_CASESET           0x0400
00480 
00481 typedef struct acache_cbarg {
00482         dns_rdatasetadditional_t        type;
00483         unsigned int                    count;
00484         dns_db_t                        *db;
00485         dns_dbnode_t                    *node;
00486         rdatasetheader_t                *header;
00487 } acache_cbarg_t;
00488 
00489 struct acachectl {
00490         dns_acacheentry_t               *entry;
00491         acache_cbarg_t                  *cbarg;
00492 };
00493 
00494 /*
00495  * XXX
00496  * When the cache will pre-expire data (due to memory low or other
00497  * situations) before the rdataset's TTL has expired, it MUST
00498  * respect the RETAIN bit and not expire the data until its TTL is
00499  * expired.
00500  */
00501 
00502 #undef IGNORE                   /* WIN32 winbase.h defines this. */
00503 
00504 #define EXISTS(header) \
00505         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
00506 #define NONEXISTENT(header) \
00507         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
00508 #define IGNORE(header) \
00509         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
00510 #define RETAIN(header) \
00511         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
00512 #define NXDOMAIN(header) \
00513         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
00514 #define RESIGN(header) \
00515         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
00516 #define OPTOUT(header) \
00517         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
00518 #define NEGATIVE(header) \
00519         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
00520 #define PREFETCH(header) \
00521         (((header)->attributes & RDATASET_ATTR_PREFETCH) != 0)
00522 #define CASESET(header) \
00523         (((header)->attributes & RDATASET_ATTR_CASESET) != 0)
00524 
00525 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
00526 
00527 /*%
00528  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
00529  * There is a tradeoff issue about configuring this value: if this is too
00530  * small, it may cause heavier contention between threads; if this is too large,
00531  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
00532  * The default value should work well for most environments, but this can
00533  * also be configurable at compilation time via the
00534  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
00535  * 1 due to the assumption of overmem_purge().
00536  */
00537 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
00538 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
00539 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
00540 #else
00541 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
00542 #endif
00543 #else
00544 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
00545 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
00546 
00547 typedef struct {
00548         nodelock_t                      lock;
00549         /* Protected in the refcount routines. */
00550         isc_refcount_t                  references;
00551         /* Locked by lock. */
00552         isc_boolean_t                   exiting;
00553 } rbtdb_nodelock_t;
00554 
00555 typedef struct rbtdb_changed {
00556         dns_rbtnode_t *                 node;
00557         isc_boolean_t                   dirty;
00558         ISC_LINK(struct rbtdb_changed)  link;
00559 } rbtdb_changed_t;
00560 
00561 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
00562 
00563 typedef enum {
00564         dns_db_insecure,
00565         dns_db_partial,
00566         dns_db_secure
00567 } dns_db_secure_t;
00568 
00569 typedef struct dns_rbtdb dns_rbtdb_t;
00570 
00571 /* Reason for expiring a record from cache */
00572 typedef enum {
00573         expire_lru,
00574         expire_ttl,
00575         expire_flush
00576 } expire_t;
00577 
00578 typedef struct rbtdb_version {
00579         /* Not locked */
00580         rbtdb_serial_t                  serial;
00581         dns_rbtdb_t *                   rbtdb;
00582         /*
00583          * Protected in the refcount routines.
00584          * XXXJT: should we change the lock policy based on the refcount
00585          * performance?
00586          */
00587         isc_refcount_t                  references;
00588         /* Locked by database lock. */
00589         isc_boolean_t                   writer;
00590         isc_boolean_t                   commit_ok;
00591         rbtdb_changedlist_t             changed_list;
00592         rdatasetheaderlist_t            resigned_list;
00593         ISC_LINK(struct rbtdb_version)  link;
00594         dns_db_secure_t                 secure;
00595         isc_boolean_t                   havensec3;
00596         /* NSEC3 parameters */
00597         dns_hash_t                      hash;
00598         isc_uint8_t                     flags;
00599         isc_uint16_t                    iterations;
00600         isc_uint8_t                     salt_length;
00601         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
00602 } rbtdb_version_t;
00603 
00604 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
00605 
00606 struct dns_rbtdb {
00607         /* Unlocked. */
00608         dns_db_t                        common;
00609         /* Locks the data in this struct */
00610 #if DNS_RBTDB_USERWLOCK
00611         isc_rwlock_t                    lock;
00612 #else
00613         isc_mutex_t                     lock;
00614 #endif
00615         /* Locks the tree structure (prevents nodes appearing/disappearing) */
00616         isc_rwlock_t                    tree_lock;
00617         /* Locks for individual tree nodes */
00618         unsigned int                    node_lock_count;
00619         rbtdb_nodelock_t *              node_locks;
00620         dns_rbtnode_t *                 origin_node;
00621         dns_stats_t *                   rrsetstats; /* cache DB only */
00622         isc_stats_t *                   cachestats; /* cache DB only */
00623         /* Locked by lock. */
00624         unsigned int                    active;
00625         isc_refcount_t                  references;
00626         unsigned int                    attributes;
00627         rbtdb_serial_t                  current_serial;
00628         rbtdb_serial_t                  least_serial;
00629         rbtdb_serial_t                  next_serial;
00630         rbtdb_version_t *               current_version;
00631         rbtdb_version_t *               future_version;
00632         rbtdb_versionlist_t             open_versions;
00633         isc_task_t *                    task;
00634         dns_dbnode_t                    *soanode;
00635         dns_dbnode_t                    *nsnode;
00636 
00637         /*
00638          * This is a linked list used to implement the LRU cache.  There will
00639          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
00640          * placed on the linked list rdatasets[1].
00641          */
00642         rdatasetheaderlist_t            *rdatasets;
00643 
00644         /*%
00645          * Temporary storage for stale cache nodes and dynamically deleted
00646          * nodes that await being cleaned up.
00647          */
00648         rbtnodelist_t                   *deadnodes;
00649 
00650         /*
00651          * Heaps.  These are used for TTL based expiry in a cache,
00652          * or for zone resigning in a zone DB.  hmctx is the memory
00653          * context to use for the heap (which differs from the main
00654          * database memory context in the case of a cache).
00655          */
00656         isc_mem_t *                     hmctx;
00657         isc_heap_t                      **heaps;
00658 
00659         /*
00660          * Base values for the mmap() code.
00661          */
00662         void *                          mmap_location;
00663         size_t                          mmap_size;
00664 
00665         /* Locked by tree_lock. */
00666         dns_rbt_t *                     tree;
00667         dns_rbt_t *                     nsec;
00668         dns_rbt_t *                     nsec3;
00669         dns_rpz_zones_t                 *rpzs;
00670         dns_rpz_num_t                   rpz_num;
00671         dns_rpz_zones_t                 *load_rpzs;
00672 
00673         /* Unlocked */
00674         unsigned int                    quantum;
00675 };
00676 
00677 #define RBTDB_ATTR_LOADED               0x01
00678 #define RBTDB_ATTR_LOADING              0x02
00679 
00680 /*%
00681  * Search Context
00682  */
00683 typedef struct {
00684         dns_rbtdb_t *           rbtdb;
00685         rbtdb_version_t *       rbtversion;
00686         rbtdb_serial_t          serial;
00687         unsigned int            options;
00688         dns_rbtnodechain_t      chain;
00689         isc_boolean_t           copy_name;
00690         isc_boolean_t           need_cleanup;
00691         isc_boolean_t           wild;
00692         dns_rbtnode_t *         zonecut;
00693         rdatasetheader_t *      zonecut_rdataset;
00694         rdatasetheader_t *      zonecut_sigrdataset;
00695         dns_fixedname_t         zonecut_name;
00696         isc_stdtime_t           now;
00697 } rbtdb_search_t;
00698 
00699 /*%
00700  * Load Context
00701  */
00702 typedef struct {
00703         dns_rbtdb_t *           rbtdb;
00704         isc_stdtime_t           now;
00705 } rbtdb_load_t;
00706 
00707 static void delete_callback(void *data, void *arg);
00708 static void rdataset_disassociate(dns_rdataset_t *rdataset);
00709 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
00710 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
00711 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
00712 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
00713 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
00714 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
00715                                         dns_name_t *name,
00716                                         dns_rdataset_t *neg,
00717                                         dns_rdataset_t *negsig);
00718 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
00719                                         dns_name_t *name,
00720                                         dns_rdataset_t *neg,
00721                                         dns_rdataset_t *negsig);
00722 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
00723                                            dns_rdatasetadditional_t type,
00724                                            dns_rdatatype_t qtype,
00725                                            dns_acache_t *acache,
00726                                            dns_zone_t **zonep,
00727                                            dns_db_t **dbp,
00728                                            dns_dbversion_t **versionp,
00729                                            dns_dbnode_t **nodep,
00730                                            dns_name_t *fname,
00731                                            dns_message_t *msg,
00732                                            isc_stdtime_t now);
00733 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
00734                                            dns_rdatasetadditional_t type,
00735                                            dns_rdatatype_t qtype,
00736                                            dns_acache_t *acache,
00737                                            dns_zone_t *zone,
00738                                            dns_db_t *db,
00739                                            dns_dbversion_t *version,
00740                                            dns_dbnode_t *node,
00741                                            dns_name_t *fname);
00742 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
00743                                            dns_rdataset_t *rdataset,
00744                                            dns_rdatasetadditional_t type,
00745                                            dns_rdatatype_t qtype);
00746 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
00747                                               isc_stdtime_t now);
00748 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
00749                           isc_stdtime_t now);
00750 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
00751                           isc_boolean_t tree_locked, expire_t reason);
00752 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
00753                           isc_stdtime_t now, isc_boolean_t tree_locked);
00754 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
00755                                   rdatasetheader_t *newheader);
00756 static void resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
00757                           rdatasetheader_t *header);
00758 static void prune_tree(isc_task_t *task, isc_event_t *event);
00759 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
00760 static void rdataset_expire(dns_rdataset_t *rdataset);
00761 static void rdataset_clearprefetch(dns_rdataset_t *rdataset);
00762 static void rdataset_setownercase(dns_rdataset_t *rdataset,
00763                                   const dns_name_t *name);
00764 static void rdataset_getownercase(const dns_rdataset_t *rdataset,
00765                                   dns_name_t *name);
00766 
00767 static dns_rdatasetmethods_t rdataset_methods = {
00768         rdataset_disassociate,
00769         rdataset_first,
00770         rdataset_next,
00771         rdataset_current,
00772         rdataset_clone,
00773         rdataset_count,
00774         NULL,
00775         rdataset_getnoqname,
00776         NULL,
00777         rdataset_getclosest,
00778         rdataset_getadditional,
00779         rdataset_setadditional,
00780         rdataset_putadditional,
00781         rdataset_settrust,
00782         rdataset_expire,
00783         rdataset_clearprefetch,
00784         rdataset_setownercase,
00785         rdataset_getownercase
00786 };
00787 
00788 static dns_rdatasetmethods_t slab_methods = {
00789         rdataset_disassociate,
00790         rdataset_first,
00791         rdataset_next,
00792         rdataset_current,
00793         rdataset_clone,
00794         rdataset_count,
00795         NULL,
00796         NULL,
00797         NULL,
00798         NULL,
00799         NULL,
00800         NULL,
00801         NULL,
00802         NULL,
00803         NULL,
00804         NULL,
00805         NULL,
00806         NULL
00807 };
00808 
00809 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
00810 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
00811 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
00812 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
00813                                  dns_rdataset_t *rdataset);
00814 
00815 static dns_rdatasetitermethods_t rdatasetiter_methods = {
00816         rdatasetiter_destroy,
00817         rdatasetiter_first,
00818         rdatasetiter_next,
00819         rdatasetiter_current
00820 };
00821 
00822 typedef struct rbtdb_rdatasetiter {
00823         dns_rdatasetiter_t              common;
00824         rdatasetheader_t *              current;
00825 } rbtdb_rdatasetiter_t;
00826 
00827 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
00828 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
00829 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
00830 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
00831                                         dns_name_t *name);
00832 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
00833 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
00834 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
00835                                            dns_dbnode_t **nodep,
00836                                            dns_name_t *name);
00837 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
00838 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
00839                                           dns_name_t *name);
00840 
00841 static dns_dbiteratormethods_t dbiterator_methods = {
00842         dbiterator_destroy,
00843         dbiterator_first,
00844         dbiterator_last,
00845         dbiterator_seek,
00846         dbiterator_prev,
00847         dbiterator_next,
00848         dbiterator_current,
00849         dbiterator_pause,
00850         dbiterator_origin
00851 };
00852 
00853 #define DELETION_BATCH_MAX 64
00854 
00855 /*
00856  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
00857  */
00858 typedef struct rbtdb_dbiterator {
00859         dns_dbiterator_t                common;
00860         isc_boolean_t                   paused;
00861         isc_boolean_t                   new_origin;
00862         isc_rwlocktype_t                tree_locked;
00863         isc_result_t                    result;
00864         dns_fixedname_t                 name;
00865         dns_fixedname_t                 origin;
00866         dns_rbtnodechain_t              chain;
00867         dns_rbtnodechain_t              nsec3chain;
00868         dns_rbtnodechain_t              *current;
00869         dns_rbtnode_t                   *node;
00870         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
00871         int                             delete;
00872         isc_boolean_t                   nsec3only;
00873         isc_boolean_t                   nonsec3;
00874 } rbtdb_dbiterator_t;
00875 
00876 
00877 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
00878 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
00879 
00880 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
00881                        isc_event_t *event);
00882 static void overmem(dns_db_t *db, isc_boolean_t over);
00883 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
00884 static void setownercase(rdatasetheader_t *header, const dns_name_t *name);
00885 
00886 /* Pad to 32 bytes */
00887 static char FILE_VERSION[32] = "\0";
00888 
00889 /*%
00890  * 'init_count' is used to initialize 'newheader->count' which inturn
00891  * is used to determine where in the cycle rrset-order cyclic starts.
00892  * We don't lock this as we don't care about simultaneous updates.
00893  *
00894  * Note:
00895  *      Both init_count and header->count can be ISC_UINT32_MAX.
00896  *      The count on the returned rdataset however can't be as
00897  *      that indicates that the database does not implement cyclic
00898  *      processing.
00899  */
00900 static unsigned int init_count;
00901 
00902 /*
00903  * Locking
00904  *
00905  * If a routine is going to lock more than one lock in this module, then
00906  * the locking must be done in the following order:
00907  *
00908  *      Tree Lock
00909  *
00910  *      Node Lock       (Only one from the set may be locked at one time by
00911  *                       any caller)
00912  *
00913  *      Database Lock
00914  *
00915  * Failure to follow this hierarchy can result in deadlock.
00916  */
00917 
00918 /*
00919  * Deleting Nodes
00920  *
00921  * For zone databases the node for the origin of the zone MUST NOT be deleted.
00922  */
00923 
00924 /*
00925  * Debugging routines
00926  */
00927 #ifdef DEBUG
00928 static void
00929 hexdump(const char *desc, unsigned char *data, size_t size) {
00930         char hexdump[BUFSIZ * 2 + 1];
00931         isc_buffer_t b;
00932         isc_region_t r;
00933         isc_result_t result;
00934         size_t bytes;
00935 
00936         fprintf(stderr, "%s: ", desc);
00937         do {
00938                 isc_buffer_init(&b, hexdump, sizeof(hexdump));
00939                 r.base = data;
00940                 r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
00941                 result = isc_hex_totext(&r, 0, "", &b);
00942                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
00943                 isc_buffer_putuint8(&b, 0);
00944                 fprintf(stderr, "%s", hexdump);
00945                 data += bytes;
00946                 size -= bytes;
00947         } while (size > 0);
00948         fprintf(stderr, "\n");
00949 }
00950 #endif
00951 
00952 
00953 /*
00954  * DB Routines
00955  */
00956 
00957 static void
00958 attach(dns_db_t *source, dns_db_t **targetp) {
00959         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
00960 
00961         REQUIRE(VALID_RBTDB(rbtdb));
00962 
00963         isc_refcount_increment(&rbtdb->references, NULL);
00964 
00965         *targetp = source;
00966 }
00967 
00968 static void
00969 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
00970         dns_rbtdb_t *rbtdb = event->ev_arg;
00971 
00972         UNUSED(task);
00973 
00974         free_rbtdb(rbtdb, ISC_TRUE, event);
00975 }
00976 
00977 static void
00978 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
00979         INSIST(IS_CACHE(rbtdb));
00980 
00981         if (rbtdb->cachestats == NULL)
00982                 return;
00983 
00984         switch (result) {
00985         case ISC_R_SUCCESS:
00986         case DNS_R_CNAME:
00987         case DNS_R_DNAME:
00988         case DNS_R_DELEGATION:
00989         case DNS_R_NCACHENXDOMAIN:
00990         case DNS_R_NCACHENXRRSET:
00991                 isc_stats_increment(rbtdb->cachestats,
00992                                     dns_cachestatscounter_hits);
00993                 break;
00994         default:
00995                 isc_stats_increment(rbtdb->cachestats,
00996                                     dns_cachestatscounter_misses);
00997         }
00998 }
00999 
01000 static void
01001 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
01002                   isc_boolean_t increment)
01003 {
01004         dns_rdatastatstype_t statattributes = 0;
01005         dns_rdatastatstype_t base = 0;
01006         dns_rdatastatstype_t type;
01007 
01008         /* At the moment we count statistics only for cache DB */
01009         INSIST(IS_CACHE(rbtdb));
01010 
01011         if (NEGATIVE(header)) {
01012                 if (NXDOMAIN(header))
01013                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
01014                 else {
01015                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
01016                         base = RBTDB_RDATATYPE_EXT(header->type);
01017                 }
01018         } else
01019                 base = RBTDB_RDATATYPE_BASE(header->type);
01020 
01021         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
01022         if (increment)
01023                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
01024         else
01025                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
01026 }
01027 
01028 static void
01029 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
01030         int idx;
01031         isc_heap_t *heap;
01032         dns_ttl_t oldttl;
01033 
01034 
01035         if (!IS_CACHE(rbtdb)) {
01036                 header->rdh_ttl = newttl;
01037                 return;
01038         }
01039 
01040         oldttl = header->rdh_ttl;
01041         header->rdh_ttl = newttl;
01042 
01043         /*
01044          * It's possible the rbtdb is not a cache.  If this is the case,
01045          * we will not have a heap, and we move on.  If we do, though,
01046          * we might need to adjust things.
01047          */
01048         if (header->heap_index == 0 || newttl == oldttl)
01049                 return;
01050         idx = header->node->locknum;
01051         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
01052             return;
01053         heap = rbtdb->heaps[idx];
01054 
01055         if (newttl < oldttl)
01056                 isc_heap_increased(heap, header->heap_index);
01057         else
01058                 isc_heap_decreased(heap, header->heap_index);
01059 }
01060 
01061 /*%
01062  * These functions allow the heap code to rank the priority of each
01063  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
01064  */
01065 static isc_boolean_t
01066 ttl_sooner(void *v1, void *v2) {
01067         rdatasetheader_t *h1 = v1;
01068         rdatasetheader_t *h2 = v2;
01069 
01070         if (h1->rdh_ttl < h2->rdh_ttl)
01071                 return (ISC_TRUE);
01072         return (ISC_FALSE);
01073 }
01074 
01075 static isc_boolean_t
01076 resign_sooner(void *v1, void *v2) {
01077         rdatasetheader_t *h1 = v1;
01078         rdatasetheader_t *h2 = v2;
01079 
01080         if (isc_serial_lt(h1->resign, h2->resign))
01081                 return (ISC_TRUE);
01082         return (ISC_FALSE);
01083 }
01084 
01085 /*%
01086  * This function sets the heap index into the header.
01087  */
01088 static void
01089 set_index(void *what, unsigned int idx) {
01090         rdatasetheader_t *h = what;
01091 
01092         h->heap_index = idx;
01093 }
01094 
01095 /*%
01096  * Work out how many nodes can be deleted in the time between two
01097  * requests to the nameserver.  Smooth the resulting number and use it
01098  * as a estimate for the number of nodes to be deleted in the next
01099  * iteration.
01100  */
01101 static unsigned int
01102 adjust_quantum(unsigned int old, isc_time_t *start) {
01103         unsigned int pps = dns_pps;     /* packets per second */
01104         unsigned int interval;
01105         isc_uint64_t usecs;
01106         isc_time_t end;
01107         unsigned int new;
01108 
01109         if (pps < 100)
01110                 pps = 100;
01111         isc_time_now(&end);
01112 
01113         interval = 1000000 / pps;       /* interval in usec */
01114         if (interval == 0)
01115                 interval = 1;
01116         usecs = isc_time_microdiff(&end, start);
01117         if (usecs == 0) {
01118                 /*
01119                  * We were unable to measure the amount of time taken.
01120                  * Double the nodes deleted next time.
01121                  */
01122                 old *= 2;
01123                 if (old > 1000)
01124                         old = 1000;
01125                 return (old);
01126         }
01127         new = old * interval;
01128         new /= (unsigned int)usecs;
01129         if (new == 0)
01130                 new = 1;
01131         else if (new > 1000)
01132                 new = 1000;
01133 
01134         /* Smooth */
01135         new = (new + old * 3) / 4;
01136 
01137         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
01138                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
01139 
01140         return (new);
01141 }
01142 
01143 static void
01144 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
01145         unsigned int i;
01146         isc_ondestroy_t ondest;
01147         isc_result_t result;
01148         char buf[DNS_NAME_FORMATSIZE];
01149         dns_rbt_t **treep;
01150         isc_time_t start;
01151 
01152         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
01153                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
01154 
01155         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
01156         REQUIRE(rbtdb->future_version == NULL);
01157 
01158         if (rbtdb->current_version != NULL) {
01159                 unsigned int refs;
01160 
01161                 isc_refcount_decrement(&rbtdb->current_version->references,
01162                                        &refs);
01163                 INSIST(refs == 0);
01164                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
01165                 isc_refcount_destroy(&rbtdb->current_version->references);
01166                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
01167                             sizeof(rbtdb_version_t));
01168         }
01169 
01170         /*
01171          * We assume the number of remaining dead nodes is reasonably small;
01172          * the overhead of unlinking all nodes here should be negligible.
01173          */
01174         for (i = 0; i < rbtdb->node_lock_count; i++) {
01175                 dns_rbtnode_t *node;
01176 
01177                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
01178                 while (node != NULL) {
01179                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
01180                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
01181                 }
01182         }
01183 
01184         if (event == NULL)
01185                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
01186 
01187         for (;;) {
01188                 /*
01189                  * pick the next tree to (start to) destroy
01190                  */
01191                 treep = &rbtdb->tree;
01192                 if (*treep == NULL) {
01193                         treep = &rbtdb->nsec;
01194                         if (*treep == NULL) {
01195                                 treep = &rbtdb->nsec3;
01196                                 /*
01197                                  * we're finished after clear cutting
01198                                  */
01199                                 if (*treep == NULL)
01200                                         break;
01201                         }
01202                 }
01203 
01204                 isc_time_now(&start);
01205                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
01206                 if (result == ISC_R_QUOTA) {
01207                         INSIST(rbtdb->task != NULL);
01208                         if (rbtdb->quantum != 0)
01209                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
01210                                                                 &start);
01211                         if (event == NULL)
01212                                 event = isc_event_allocate(rbtdb->common.mctx,
01213                                                            NULL,
01214                                                          DNS_EVENT_FREESTORAGE,
01215                                                            free_rbtdb_callback,
01216                                                            rbtdb,
01217                                                            sizeof(isc_event_t));
01218                         if (event == NULL)
01219                                 continue;
01220                         isc_task_send(rbtdb->task, &event);
01221                         return;
01222                 }
01223                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
01224         }
01225 
01226         if (event != NULL)
01227                 isc_event_free(&event);
01228         if (log) {
01229                 if (dns_name_dynamic(&rbtdb->common.origin))
01230                         dns_name_format(&rbtdb->common.origin, buf,
01231                                         sizeof(buf));
01232                 else
01233                         strcpy(buf, "<UNKNOWN>");
01234                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
01235                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
01236                               "done free_rbtdb(%s)", buf);
01237         }
01238         if (dns_name_dynamic(&rbtdb->common.origin))
01239                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
01240         for (i = 0; i < rbtdb->node_lock_count; i++) {
01241                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
01242                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
01243         }
01244 
01245         /*
01246          * Clean up LRU / re-signing order lists.
01247          */
01248         if (rbtdb->rdatasets != NULL) {
01249                 for (i = 0; i < rbtdb->node_lock_count; i++)
01250                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
01251                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
01252                             rbtdb->node_lock_count *
01253                             sizeof(rdatasetheaderlist_t));
01254         }
01255         /*
01256          * Clean up dead node buckets.
01257          */
01258         if (rbtdb->deadnodes != NULL) {
01259                 for (i = 0; i < rbtdb->node_lock_count; i++)
01260                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
01261                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
01262                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
01263         }
01264         /*
01265          * Clean up heap objects.
01266          */
01267         if (rbtdb->heaps != NULL) {
01268                 for (i = 0; i < rbtdb->node_lock_count; i++)
01269                         isc_heap_destroy(&rbtdb->heaps[i]);
01270                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
01271                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
01272         }
01273 
01274         if (rbtdb->rrsetstats != NULL)
01275                 dns_stats_detach(&rbtdb->rrsetstats);
01276         if (rbtdb->cachestats != NULL)
01277                 isc_stats_detach(&rbtdb->cachestats);
01278 
01279         if (rbtdb->load_rpzs != NULL) {
01280                 /*
01281                  * We must be cleaning up after a failed zone loading.
01282                  */
01283                 REQUIRE(rbtdb->rpzs != NULL &&
01284                         rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
01285                 dns_rpz_detach_rpzs(&rbtdb->load_rpzs);
01286         }
01287         if (rbtdb->rpzs != NULL) {
01288                 REQUIRE(rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
01289                 dns_rpz_detach_rpzs(&rbtdb->rpzs);
01290         }
01291 
01292         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
01293                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
01294         isc_rwlock_destroy(&rbtdb->tree_lock);
01295         isc_refcount_destroy(&rbtdb->references);
01296         if (rbtdb->task != NULL)
01297                 isc_task_detach(&rbtdb->task);
01298 
01299         RBTDB_DESTROYLOCK(&rbtdb->lock);
01300         rbtdb->common.magic = 0;
01301         rbtdb->common.impmagic = 0;
01302         ondest = rbtdb->common.ondest;
01303         isc_mem_detach(&rbtdb->hmctx);
01304 
01305         if (rbtdb->mmap_location != NULL)
01306                 isc_file_munmap(rbtdb->mmap_location,
01307                                 (size_t) rbtdb->mmap_size);
01308 
01309         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
01310         isc_ondestroy_notify(&ondest, rbtdb);
01311 }
01312 
01313 static inline void
01314 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
01315         isc_boolean_t want_free = ISC_FALSE;
01316         unsigned int i;
01317         unsigned int inactive = 0;
01318 
01319         /* XXX check for open versions here */
01320 
01321         if (rbtdb->soanode != NULL)
01322                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
01323         if (rbtdb->nsnode != NULL)
01324                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
01325 
01326         /*
01327          * Even though there are no external direct references, there still
01328          * may be nodes in use.
01329          */
01330         for (i = 0; i < rbtdb->node_lock_count; i++) {
01331                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
01332                 rbtdb->node_locks[i].exiting = ISC_TRUE;
01333                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
01334                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
01335                     == 0) {
01336                         inactive++;
01337                 }
01338         }
01339 
01340         if (inactive != 0) {
01341                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
01342                 rbtdb->active -= inactive;
01343                 if (rbtdb->active == 0)
01344                         want_free = ISC_TRUE;
01345                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
01346                 if (want_free) {
01347                         char buf[DNS_NAME_FORMATSIZE];
01348                         if (dns_name_dynamic(&rbtdb->common.origin))
01349                                 dns_name_format(&rbtdb->common.origin, buf,
01350                                                 sizeof(buf));
01351                         else
01352                                 strcpy(buf, "<UNKNOWN>");
01353                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
01354                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
01355                                       "calling free_rbtdb(%s)", buf);
01356                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
01357                 }
01358         }
01359 }
01360 
01361 static void
01362 detach(dns_db_t **dbp) {
01363         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
01364         unsigned int refs;
01365 
01366         REQUIRE(VALID_RBTDB(rbtdb));
01367 
01368         isc_refcount_decrement(&rbtdb->references, &refs);
01369 
01370         if (refs == 0)
01371                 maybe_free_rbtdb(rbtdb);
01372 
01373         *dbp = NULL;
01374 }
01375 
01376 static void
01377 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
01378         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
01379         rbtdb_version_t *version;
01380         unsigned int refs;
01381 
01382         REQUIRE(VALID_RBTDB(rbtdb));
01383 
01384         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
01385         version = rbtdb->current_version;
01386         isc_refcount_increment(&version->references, &refs);
01387         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
01388 
01389         *versionp = (dns_dbversion_t *)version;
01390 }
01391 
01392 static inline rbtdb_version_t *
01393 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
01394                  unsigned int references, isc_boolean_t writer)
01395 {
01396         isc_result_t result;
01397         rbtdb_version_t *version;
01398 
01399         version = isc_mem_get(mctx, sizeof(*version));
01400         if (version == NULL)
01401                 return (NULL);
01402         version->serial = serial;
01403         result = isc_refcount_init(&version->references, references);
01404         if (result != ISC_R_SUCCESS) {
01405                 isc_mem_put(mctx, version, sizeof(*version));
01406                 return (NULL);
01407         }
01408         version->writer = writer;
01409         version->commit_ok = ISC_FALSE;
01410         ISC_LIST_INIT(version->changed_list);
01411         ISC_LIST_INIT(version->resigned_list);
01412         ISC_LINK_INIT(version, link);
01413 
01414         return (version);
01415 }
01416 
01417 static isc_result_t
01418 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
01419         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
01420         rbtdb_version_t *version;
01421 
01422         REQUIRE(VALID_RBTDB(rbtdb));
01423         REQUIRE(versionp != NULL && *versionp == NULL);
01424         REQUIRE(rbtdb->future_version == NULL);
01425 
01426         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
01427         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
01428         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
01429                                    ISC_TRUE);
01430         if (version != NULL) {
01431                 version->rbtdb = rbtdb;
01432                 version->commit_ok = ISC_TRUE;
01433                 version->secure = rbtdb->current_version->secure;
01434                 version->havensec3 = rbtdb->current_version->havensec3;
01435                 if (version->havensec3) {
01436                         version->flags = rbtdb->current_version->flags;
01437                         version->iterations =
01438                                 rbtdb->current_version->iterations;
01439                         version->hash = rbtdb->current_version->hash;
01440                         version->salt_length =
01441                                 rbtdb->current_version->salt_length;
01442                         memmove(version->salt, rbtdb->current_version->salt,
01443                                 version->salt_length);
01444                 } else {
01445                         version->flags = 0;
01446                         version->iterations = 0;
01447                         version->hash = 0;
01448                         version->salt_length = 0;
01449                         memset(version->salt, 0, sizeof(version->salt));
01450                 }
01451                 rbtdb->next_serial++;
01452                 rbtdb->future_version = version;
01453         }
01454         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
01455 
01456         if (version == NULL)
01457                 return (ISC_R_NOMEMORY);
01458 
01459         *versionp = version;
01460 
01461         return (ISC_R_SUCCESS);
01462 }
01463 
01464 static void
01465 attachversion(dns_db_t *db, dns_dbversion_t *source,
01466               dns_dbversion_t **targetp)
01467 {
01468         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
01469         rbtdb_version_t *rbtversion = source;
01470         unsigned int refs;
01471 
01472         REQUIRE(VALID_RBTDB(rbtdb));
01473         INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
01474 
01475         isc_refcount_increment(&rbtversion->references, &refs);
01476         INSIST(refs > 1);
01477 
01478         *targetp = rbtversion;
01479 }
01480 
01481 static rbtdb_changed_t *
01482 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
01483             dns_rbtnode_t *node)
01484 {
01485         rbtdb_changed_t *changed;
01486         unsigned int refs;
01487 
01488         /*
01489          * Caller must be holding the node lock if its reference must be
01490          * protected by the lock.
01491          */
01492 
01493         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
01494 
01495         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
01496 
01497         REQUIRE(version->writer);
01498 
01499         if (changed != NULL) {
01500                 dns_rbtnode_refincrement(node, &refs);
01501                 INSIST(refs != 0);
01502                 changed->node = node;
01503                 changed->dirty = ISC_FALSE;
01504                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
01505         } else
01506                 version->commit_ok = ISC_FALSE;
01507 
01508         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
01509 
01510         return (changed);
01511 }
01512 
01513 static void
01514 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
01515                  acachectl_t *array)
01516 {
01517         unsigned int count;
01518         unsigned int i;
01519         unsigned char *raw;     /* RDATASLAB */
01520 
01521         /*
01522          * The caller must be holding the corresponding node lock.
01523          */
01524 
01525         if (array == NULL)
01526                 return;
01527 
01528         raw = (unsigned char *)header + sizeof(*header);
01529         count = raw[0] * 256 + raw[1];
01530 
01531         /*
01532          * Sanity check: since an additional cache entry has a reference to
01533          * the original DB node (in the callback arg), there should be no
01534          * acache entries when the node can be freed.
01535          */
01536         for (i = 0; i < count; i++)
01537                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
01538 
01539         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
01540 }
01541 
01542 static inline void
01543 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
01544 
01545         if (dns_name_dynamic(&(*noqname)->name))
01546                 dns_name_free(&(*noqname)->name, mctx);
01547         if ((*noqname)->neg != NULL)
01548                 isc_mem_put(mctx, (*noqname)->neg,
01549                             dns_rdataslab_size((*noqname)->neg, 0));
01550         if ((*noqname)->negsig != NULL)
01551                 isc_mem_put(mctx, (*noqname)->negsig,
01552                             dns_rdataslab_size((*noqname)->negsig, 0));
01553         isc_mem_put(mctx, *noqname, sizeof(**noqname));
01554         *noqname = NULL;
01555 }
01556 
01557 static inline void
01558 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) {
01559         ISC_LINK_INIT(h, link);
01560         h->heap_index = 0;
01561         h->is_mmapped = 0;
01562         h->next_is_relative = 0;
01563         h->node_is_relative = 0;
01564 
01565 #if TRACE_HEADER
01566         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
01567                 fprintf(stderr, "initialized header: %p\n", h);
01568 #else
01569         UNUSED(rbtdb);
01570 #endif
01571 }
01572 
01573 /*
01574  * Update the copied values of 'next' and 'node' if they are relative.
01575  */
01576 static void
01577 update_newheader(rdatasetheader_t *new, rdatasetheader_t *old) {
01578         char *p;
01579 
01580         if (old->next_is_relative) {
01581                 p = (char *) old;
01582                 p += (uintptr_t)old->next;
01583                 new->next = (rdatasetheader_t *)p;
01584         }
01585         if (old->node_is_relative) {
01586                 p = (char *) old;
01587                 p += (uintptr_t)old->node;
01588                 new->node = (dns_rbtnode_t *)p;
01589         }
01590         if (CASESET(old)) {
01591                 memmove(new->upper, old->upper, sizeof(old->upper));
01592                 new->attributes |= RDATASET_ATTR_CASESET;
01593         }
01594 }
01595 
01596 static inline rdatasetheader_t *
01597 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) {
01598         rdatasetheader_t *h;
01599 
01600         h = isc_mem_get(mctx, sizeof(*h));
01601         if (h == NULL)
01602                 return (NULL);
01603 
01604 #if TRACE_HEADER
01605         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
01606                 fprintf(stderr, "allocated header: %p\n", h);
01607 #endif
01608         memset(h->upper, 0xeb, sizeof(h->upper));
01609         init_rdataset(rbtdb, h);
01610         h->rdh_ttl = 0;
01611         return (h);
01612 }
01613 
01614 static inline void
01615 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
01616         unsigned int size;
01617         int idx;
01618 
01619         if (EXISTS(rdataset) &&
01620             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
01621                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
01622         }
01623 
01624         idx = rdataset->node->locknum;
01625         if (ISC_LINK_LINKED(rdataset, link)) {
01626                 INSIST(IS_CACHE(rbtdb));
01627                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
01628         }
01629 
01630         if (rdataset->heap_index != 0)
01631                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
01632         rdataset->heap_index = 0;
01633 
01634         if (rdataset->noqname != NULL)
01635                 free_noqname(mctx, &rdataset->noqname);
01636         if (rdataset->closest != NULL)
01637                 free_noqname(mctx, &rdataset->closest);
01638 
01639         free_acachearray(mctx, rdataset, rdataset->additional_auth);
01640         free_acachearray(mctx, rdataset, rdataset->additional_glue);
01641 
01642         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
01643                 size = sizeof(*rdataset);
01644         else
01645                 size = dns_rdataslab_size((unsigned char *)rdataset,
01646                                           sizeof(*rdataset));
01647 
01648         if (rdataset->is_mmapped == 1)
01649                 return;
01650 
01651         isc_mem_put(mctx, rdataset, size);
01652 }
01653 
01654 static inline void
01655 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
01656         rdatasetheader_t *header, *dcurrent;
01657         isc_boolean_t make_dirty = ISC_FALSE;
01658 
01659         /*
01660          * Caller must hold the node lock.
01661          */
01662 
01663         /*
01664          * We set the IGNORE attribute on rdatasets with serial number
01665          * 'serial'.  When the reference count goes to zero, these rdatasets
01666          * will be cleaned up; until that time, they will be ignored.
01667          */
01668         for (header = node->data; header != NULL; header = header->next) {
01669                 if (header->serial == serial) {
01670                         header->attributes |= RDATASET_ATTR_IGNORE;
01671                         make_dirty = ISC_TRUE;
01672                 }
01673                 for (dcurrent = header->down;
01674                      dcurrent != NULL;
01675                      dcurrent = dcurrent->down) {
01676                         if (dcurrent->serial == serial) {
01677                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
01678                                 make_dirty = ISC_TRUE;
01679                         }
01680                 }
01681         }
01682         if (make_dirty)
01683                 node->dirty = 1;
01684 }
01685 
01686 static inline void
01687 mark_stale_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
01688 
01689         /*
01690          * If we are already stale there is nothing to do.
01691          */
01692         if ((header->attributes & RDATASET_ATTR_STALE) != 0)
01693                 return;
01694 
01695         header->attributes |= RDATASET_ATTR_STALE;
01696         header->node->dirty = 1;
01697 
01698         /*
01699          * If we have not been counted then there is nothing to do.
01700          */
01701         if ((header->attributes & RDATASET_ATTR_STATCOUNT) == 0)
01702                 return;
01703 
01704         if (EXISTS(header))
01705                 update_rrsetstats(rbtdb, header, ISC_TRUE);
01706 }
01707 
01708 static inline void
01709 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
01710 {
01711         rdatasetheader_t *d, *down_next;
01712 
01713         for (d = top->down; d != NULL; d = down_next) {
01714                 down_next = d->down;
01715                 free_rdataset(rbtdb, mctx, d);
01716         }
01717         top->down = NULL;
01718 }
01719 
01720 static inline void
01721 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
01722         rdatasetheader_t *current, *top_prev, *top_next;
01723         isc_mem_t *mctx = rbtdb->common.mctx;
01724 
01725         /*
01726          * Caller must be holding the node lock.
01727          */
01728 
01729         top_prev = NULL;
01730         for (current = node->data; current != NULL; current = top_next) {
01731                 top_next = current->next;
01732                 clean_stale_headers(rbtdb, mctx, current);
01733                 /*
01734                  * If current is nonexistent or stale, we can clean it up.
01735                  */
01736                 if ((current->attributes &
01737                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
01738                         if (top_prev != NULL)
01739                                 top_prev->next = current->next;
01740                         else
01741                                 node->data = current->next;
01742                         free_rdataset(rbtdb, mctx, current);
01743                 } else
01744                         top_prev = current;
01745         }
01746         node->dirty = 0;
01747 }
01748 
01749 static inline void
01750 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
01751                 rbtdb_serial_t least_serial)
01752 {
01753         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
01754         rdatasetheader_t *top_prev, *top_next;
01755         isc_mem_t *mctx = rbtdb->common.mctx;
01756         isc_boolean_t still_dirty = ISC_FALSE;
01757 
01758         /*
01759          * Caller must be holding the node lock.
01760          */
01761         REQUIRE(least_serial != 0);
01762 
01763         top_prev = NULL;
01764         for (current = node->data; current != NULL; current = top_next) {
01765                 top_next = current->next;
01766 
01767                 /*
01768                  * First, we clean up any instances of multiple rdatasets
01769                  * with the same serial number, or that have the IGNORE
01770                  * attribute.
01771                  */
01772                 dparent = current;
01773                 for (dcurrent = current->down;
01774                      dcurrent != NULL;
01775                      dcurrent = down_next) {
01776                         down_next = dcurrent->down;
01777                         INSIST(dcurrent->serial <= dparent->serial);
01778                         if (dcurrent->serial == dparent->serial ||
01779                             IGNORE(dcurrent)) {
01780                                 if (down_next != NULL)
01781                                         down_next->next = dparent;
01782                                 dparent->down = down_next;
01783                                 free_rdataset(rbtdb, mctx, dcurrent);
01784                         } else
01785                                 dparent = dcurrent;
01786                 }
01787 
01788                 /*
01789                  * We've now eliminated all IGNORE datasets with the possible
01790                  * exception of current, which we now check.
01791                  */
01792                 if (IGNORE(current)) {
01793                         down_next = current->down;
01794                         if (down_next == NULL) {
01795                                 if (top_prev != NULL)
01796                                         top_prev->next = current->next;
01797                                 else
01798                                         node->data = current->next;
01799                                 free_rdataset(rbtdb, mctx, current);
01800                                 /*
01801                                  * current no longer exists, so we can
01802                                  * just continue with the loop.
01803                                  */
01804                                 continue;
01805                         } else {
01806                                 /*
01807                                  * Pull up current->down, making it the new
01808                                  * current.
01809                                  */
01810                                 if (top_prev != NULL)
01811                                         top_prev->next = down_next;
01812                                 else
01813                                         node->data = down_next;
01814                                 down_next->next = top_next;
01815                                 free_rdataset(rbtdb, mctx, current);
01816                                 current = down_next;
01817                         }
01818                 }
01819 
01820                 /*
01821                  * We now try to find the first down node less than the
01822                  * least serial.
01823                  */
01824                 dparent = current;
01825                 for (dcurrent = current->down;
01826                      dcurrent != NULL;
01827                      dcurrent = down_next) {
01828                         down_next = dcurrent->down;
01829                         if (dcurrent->serial < least_serial)
01830                                 break;
01831                         dparent = dcurrent;
01832                 }
01833 
01834                 /*
01835                  * If there is a such an rdataset, delete it and any older
01836                  * versions.
01837                  */
01838                 if (dcurrent != NULL) {
01839                         do {
01840                                 down_next = dcurrent->down;
01841                                 INSIST(dcurrent->serial <= least_serial);
01842                                 free_rdataset(rbtdb, mctx, dcurrent);
01843                                 dcurrent = down_next;
01844                         } while (dcurrent != NULL);
01845                         dparent->down = NULL;
01846                 }
01847 
01848                 /*
01849                  * Note.  The serial number of 'current' might be less than
01850                  * least_serial too, but we cannot delete it because it is
01851                  * the most recent version, unless it is a NONEXISTENT
01852                  * rdataset.
01853                  */
01854                 if (current->down != NULL) {
01855                         still_dirty = ISC_TRUE;
01856                         top_prev = current;
01857                 } else {
01858                         /*
01859                          * If this is a NONEXISTENT rdataset, we can delete it.
01860                          */
01861                         if (NONEXISTENT(current)) {
01862                                 if (top_prev != NULL)
01863                                         top_prev->next = current->next;
01864                                 else
01865                                         node->data = current->next;
01866                                 free_rdataset(rbtdb, mctx, current);
01867                         } else
01868                                 top_prev = current;
01869                 }
01870         }
01871         if (!still_dirty)
01872                 node->dirty = 0;
01873 }
01874 
01875 static void
01876 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
01877 {
01878         dns_rbtnode_t *nsecnode;
01879         dns_fixedname_t fname;
01880         dns_name_t *name;
01881         isc_result_t result = ISC_R_UNEXPECTED;
01882 
01883         INSIST(!ISC_LINK_LINKED(node, deadlink));
01884 
01885         switch (node->nsec) {
01886         case DNS_RBT_NSEC_NORMAL:
01887                 if (rbtdb->rpzs != NULL && node->rpz) {
01888                         dns_fixedname_init(&fname);
01889                         name = dns_fixedname_name(&fname);
01890                         dns_rbt_fullnamefromnode(node, name);
01891                         dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
01892                 }
01893                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
01894                 break;
01895         case DNS_RBT_NSEC_HAS_NSEC:
01896                 dns_fixedname_init(&fname);
01897                 name = dns_fixedname_name(&fname);
01898                 dns_rbt_fullnamefromnode(node, name);
01899                 /*
01900                  * Delete the corresponding node from the auxiliary NSEC
01901                  * tree before deleting from the main tree.
01902                  */
01903                 nsecnode = NULL;
01904                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
01905                                           NULL, DNS_RBTFIND_EMPTYDATA,
01906                                           NULL, NULL);
01907                 if (result != ISC_R_SUCCESS) {
01908                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
01909                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
01910                                       "delete_node: "
01911                                       "dns_rbt_findnode(nsec): %s",
01912                                       isc_result_totext(result));
01913                 } else {
01914                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
01915                                                     ISC_FALSE);
01916                         if (result != ISC_R_SUCCESS) {
01917                                 isc_log_write(dns_lctx,
01918                                               DNS_LOGCATEGORY_DATABASE,
01919                                               DNS_LOGMODULE_CACHE,
01920                                               ISC_LOG_WARNING,
01921                                               "delete_node(): "
01922                                               "dns_rbt_deletenode(nsecnode): %s",
01923                                               isc_result_totext(result));
01924                         }
01925                 }
01926                 if (rbtdb->rpzs != NULL && node->rpz)
01927                         dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
01928                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
01929                 break;
01930         case DNS_RBT_NSEC_NSEC:
01931                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
01932                 break;
01933         case DNS_RBT_NSEC_NSEC3:
01934                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
01935                 break;
01936         }
01937         if (result != ISC_R_SUCCESS) {
01938                 isc_log_write(dns_lctx,
01939                               DNS_LOGCATEGORY_DATABASE,
01940                               DNS_LOGMODULE_CACHE,
01941                               ISC_LOG_WARNING,
01942                               "delete_node(): "
01943                               "dns_rbt_deletenode: %s",
01944                               isc_result_totext(result));
01945         }
01946 }
01947 
01948 /*%
01949  * Clean up dead nodes.  These are nodes which have no references, and
01950  * have no data.  They are dead but we could not or chose not to delete
01951  * them when we deleted all the data at that node because we did not want
01952  * to wait for the tree write lock.
01953  *
01954  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
01955  */
01956 static void
01957 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
01958         dns_rbtnode_t *node;
01959         int count = 10;         /* XXXJT: should be adjustable */
01960 
01961         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
01962         while (node != NULL && count > 0) {
01963                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
01964 
01965                 /*
01966                  * Since we're holding a tree write lock, it should be
01967                  * impossible for this node to be referenced by others.
01968                  */
01969                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
01970                        node->data == NULL);
01971 
01972                 delete_node(rbtdb, node);
01973 
01974                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
01975                 count--;
01976         }
01977 }
01978 
01979 /*
01980  * Caller must be holding the node lock.
01981  */
01982 static inline void
01983 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
01984         unsigned int lockrefs, noderefs;
01985         isc_refcount_t *lockref;
01986 
01987         INSIST(!ISC_LINK_LINKED(node, deadlink));
01988         dns_rbtnode_refincrement0(node, &noderefs);
01989         if (noderefs == 1) {    /* this is the first reference to the node */
01990                 lockref = &rbtdb->node_locks[node->locknum].references;
01991                 isc_refcount_increment0(lockref, &lockrefs);
01992                 INSIST(lockrefs != 0);
01993         }
01994         INSIST(noderefs != 0);
01995 }
01996 
01997 /*
01998  * This function is assumed to be called when a node is newly referenced
01999  * and can be in the deadnode list.  In that case the node must be retrieved
02000  * from the list because it is going to be used.  In addition, if the caller
02001  * happens to hold a write lock on the tree, it's a good chance to purge dead
02002  * nodes.
02003  * Note: while a new reference is gained in multiple places, there are only very
02004  * few cases where the node can be in the deadnode list (only empty nodes can
02005  * have been added to the list).
02006  */
02007 static inline void
02008 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
02009                 isc_rwlocktype_t treelocktype)
02010 {
02011         isc_rwlocktype_t locktype = isc_rwlocktype_read;
02012         nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
02013         isc_boolean_t maybe_cleanup = ISC_FALSE;
02014 
02015         POST(locktype);
02016 
02017         NODE_STRONGLOCK(nodelock);
02018         NODE_WEAKLOCK(nodelock, locktype);
02019 
02020         /*
02021          * Check if we can possibly cleanup the dead node.  If so, upgrade
02022          * the node lock below to perform the cleanup.
02023          */
02024         if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
02025             treelocktype == isc_rwlocktype_write) {
02026                 maybe_cleanup = ISC_TRUE;
02027         }
02028 
02029         if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
02030                 /*
02031                  * Upgrade the lock and test if we still need to unlink.
02032                  */
02033                 NODE_WEAKUNLOCK(nodelock, locktype);
02034                 locktype = isc_rwlocktype_write;
02035                 POST(locktype);
02036                 NODE_WEAKLOCK(nodelock, locktype);
02037                 if (ISC_LINK_LINKED(node, deadlink))
02038                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
02039                                         node, deadlink);
02040                 if (maybe_cleanup)
02041                         cleanup_dead_nodes(rbtdb, node->locknum);
02042         }
02043 
02044         new_reference(rbtdb, node);
02045 
02046         NODE_WEAKUNLOCK(nodelock, locktype);
02047         NODE_STRONGUNLOCK(nodelock);
02048 }
02049 
02050 /*
02051  * Caller must be holding the node lock; either the "strong", read or write
02052  * lock.  Note that the lock must be held even when node references are
02053  * atomically modified; in that case the decrement operation itself does not
02054  * have to be protected, but we must avoid a race condition where multiple
02055  * threads are decreasing the reference to zero simultaneously and at least
02056  * one of them is going to free the node.
02057  * This function returns ISC_TRUE if and only if the node reference decreases
02058  * to zero.
02059  */
02060 static isc_boolean_t
02061 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
02062                     rbtdb_serial_t least_serial,
02063                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
02064                     isc_boolean_t pruning)
02065 {
02066         isc_result_t result;
02067         isc_boolean_t write_locked;
02068         rbtdb_nodelock_t *nodelock;
02069         unsigned int refs, nrefs;
02070         int bucket = node->locknum;
02071         isc_boolean_t no_reference = ISC_TRUE;
02072 
02073         nodelock = &rbtdb->node_locks[bucket];
02074 
02075 #define KEEP_NODE(n, r) \
02076         ((n)->data != NULL || (n)->down != NULL || (n) == (r)->origin_node)
02077 
02078         /* Handle easy and typical case first. */
02079         if (!node->dirty && KEEP_NODE(node, rbtdb)) {
02080                 dns_rbtnode_refdecrement(node, &nrefs);
02081                 INSIST((int)nrefs >= 0);
02082                 if (nrefs == 0) {
02083                         isc_refcount_decrement(&nodelock->references, &refs);
02084                         INSIST((int)refs >= 0);
02085                 }
02086                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
02087         }
02088 
02089         /* Upgrade the lock? */
02090         if (nlock == isc_rwlocktype_read) {
02091                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
02092                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
02093         }
02094 
02095         dns_rbtnode_refdecrement(node, &nrefs);
02096         INSIST((int)nrefs >= 0);
02097         if (nrefs > 0) {
02098                 /* Restore the lock? */
02099                 if (nlock == isc_rwlocktype_read)
02100                         NODE_WEAKDOWNGRADE(&nodelock->lock);
02101                 return (ISC_FALSE);
02102         }
02103 
02104         if (node->dirty) {
02105                 if (IS_CACHE(rbtdb))
02106                         clean_cache_node(rbtdb, node);
02107                 else {
02108                         if (least_serial == 0) {
02109                                 /*
02110                                  * Caller doesn't know the least serial.
02111                                  * Get it.
02112                                  */
02113                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
02114                                 least_serial = rbtdb->least_serial;
02115                                 RBTDB_UNLOCK(&rbtdb->lock,
02116                                              isc_rwlocktype_read);
02117                         }
02118                         clean_zone_node(rbtdb, node, least_serial);
02119                 }
02120         }
02121 
02122         /*
02123          * Attempt to switch to a write lock on the tree.  If this fails,
02124          * we will add this node to a linked list of nodes in this locking
02125          * bucket which we will free later.
02126          */
02127         if (tlock != isc_rwlocktype_write) {
02128                 /*
02129                  * Locking hierarchy notwithstanding, we don't need to free
02130                  * the node lock before acquiring the tree write lock because
02131                  * we only do a trylock.
02132                  */
02133                 if (tlock == isc_rwlocktype_read)
02134                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
02135                 else
02136                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
02137                                                     isc_rwlocktype_write);
02138                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
02139                               result == ISC_R_LOCKBUSY);
02140 
02141                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
02142         } else
02143                 write_locked = ISC_TRUE;
02144 
02145         isc_refcount_decrement(&nodelock->references, &refs);
02146         INSIST((int)refs >= 0);
02147 
02148         if (KEEP_NODE(node, rbtdb))
02149                 goto restore_locks;
02150 
02151 #undef KEEP_NODE
02152 
02153         if (write_locked) {
02154                 /*
02155                  * We can now delete the node.
02156                  */
02157 
02158                 /*
02159                  * If this node is the only one in the level it's in, deleting
02160                  * this node may recursively make its parent the only node in
02161                  * the parent level; if so, and if no one is currently using
02162                  * the parent node, this is almost the only opportunity to
02163                  * clean it up.  But the recursive cleanup is not that trivial
02164                  * since the child and parent may be in different lock buckets,
02165                  * which would cause a lock order reversal problem.  To avoid
02166                  * the trouble, we'll dispatch a separate event for batch
02167                  * cleaning.  We need to check whether we're deleting the node
02168                  * as a result of pruning to avoid infinite dispatching.
02169                  * Note: pruning happens only when a task has been set for the
02170                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
02171                  * it's their responsibility to purge stale leaves (e.g. by
02172                  * periodic walk-through).
02173                  */
02174                 if (!pruning && node->parent != NULL &&
02175                     node->parent->down == node && node->left == NULL &&
02176                     node->right == NULL && rbtdb->task != NULL) {
02177                         isc_event_t *ev;
02178                         dns_db_t *db;
02179 
02180                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
02181                                                 DNS_EVENT_RBTPRUNE,
02182                                                 prune_tree, node,
02183                                                 sizeof(isc_event_t));
02184                         if (ev != NULL) {
02185                                 new_reference(rbtdb, node);
02186                                 db = NULL;
02187                                 attach((dns_db_t *)rbtdb, &db);
02188                                 ev->ev_sender = db;
02189                                 isc_task_send(rbtdb->task, &ev);
02190                                 no_reference = ISC_FALSE;
02191                         } else {
02192                                 /*
02193                                  * XXX: this is a weird situation.  We could
02194                                  * ignore this error case, but then the stale
02195                                  * node will unlikely be purged except via a
02196                                  * rare condition such as manual cleanup.  So
02197                                  * we queue it in the deadnodes list, hoping
02198                                  * the memory shortage is temporary and the node
02199                                  * will be deleted later.
02200                                  */
02201                                 isc_log_write(dns_lctx,
02202                                               DNS_LOGCATEGORY_DATABASE,
02203                                               DNS_LOGMODULE_CACHE,
02204                                               ISC_LOG_INFO,
02205                                               "decrement_reference: failed to "
02206                                               "allocate pruning event");
02207                                 INSIST(node->data == NULL);
02208                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
02209                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
02210                                                 deadlink);
02211                         }
02212                 } else {
02213                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
02214                                 char printname[DNS_NAME_FORMATSIZE];
02215 
02216                                 isc_log_write(dns_lctx,
02217                                               DNS_LOGCATEGORY_DATABASE,
02218                                               DNS_LOGMODULE_CACHE,
02219                                               ISC_LOG_DEBUG(1),
02220                                               "decrement_reference: "
02221                                               "delete from rbt: %p %s",
02222                                               node,
02223                                               dns_rbt_formatnodename(node,
02224                                                         printname,
02225                                                         sizeof(printname)));
02226                         }
02227 
02228                         delete_node(rbtdb, node);
02229                 }
02230         } else {
02231                 INSIST(node->data == NULL);
02232                 INSIST(!ISC_LINK_LINKED(node, deadlink));
02233                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
02234         }
02235 
02236  restore_locks:
02237         /* Restore the lock? */
02238         if (nlock == isc_rwlocktype_read)
02239                 NODE_WEAKDOWNGRADE(&nodelock->lock);
02240 
02241         /*
02242          * Relock a read lock, or unlock the write lock if no lock was held.
02243          */
02244         if (tlock == isc_rwlocktype_none)
02245                 if (write_locked)
02246                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02247 
02248         if (tlock == isc_rwlocktype_read)
02249                 if (write_locked)
02250                         isc_rwlock_downgrade(&rbtdb->tree_lock);
02251 
02252         return (no_reference);
02253 }
02254 
02255 /*
02256  * Prune the tree by recursively cleaning-up single leaves.  In the worst
02257  * case, the number of iteration is the number of tree levels, which is at
02258  * most the maximum number of domain name labels, i.e, 127.  In practice, this
02259  * should be much smaller (only a few times), and even the worst case would be
02260  * acceptable for a single event.
02261  */
02262 static void
02263 prune_tree(isc_task_t *task, isc_event_t *event) {
02264         dns_rbtdb_t *rbtdb = event->ev_sender;
02265         dns_rbtnode_t *node = event->ev_arg;
02266         dns_rbtnode_t *parent;
02267         unsigned int locknum;
02268 
02269         UNUSED(task);
02270 
02271         isc_event_free(&event);
02272 
02273         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02274         locknum = node->locknum;
02275         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
02276         do {
02277                 parent = node->parent;
02278                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
02279                                     isc_rwlocktype_write, ISC_TRUE);
02280 
02281                 if (parent != NULL && parent->down == NULL) {
02282                         /*
02283                          * node was the only down child of the parent and has
02284                          * just been removed.  We'll then need to examine the
02285                          * parent.  Keep the lock if possible; otherwise,
02286                          * release the old lock and acquire one for the parent.
02287                          */
02288                         if (parent->locknum != locknum) {
02289                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
02290                                             isc_rwlocktype_write);
02291                                 locknum = parent->locknum;
02292                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
02293                                           isc_rwlocktype_write);
02294                         }
02295 
02296                         /*
02297                          * We need to gain a reference to the node before
02298                          * decrementing it in the next iteration.  In addition,
02299                          * if the node is in the dead-nodes list, extract it
02300                          * from the list beforehand as we do in
02301                          * reactivate_node().
02302                          */
02303                         if (ISC_LINK_LINKED(parent, deadlink))
02304                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
02305                                                 parent, deadlink);
02306                         new_reference(rbtdb, parent);
02307                 } else
02308                         parent = NULL;
02309 
02310                 node = parent;
02311         } while (node != NULL);
02312         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
02313         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02314 
02315         detach((dns_db_t **)&rbtdb);
02316 }
02317 
02318 static inline void
02319 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
02320                    rbtdb_changedlist_t *cleanup_list)
02321 {
02322         /*
02323          * Caller must be holding the database lock.
02324          */
02325 
02326         rbtdb->least_serial = version->serial;
02327         *cleanup_list = version->changed_list;
02328         ISC_LIST_INIT(version->changed_list);
02329 }
02330 
02331 static inline void
02332 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
02333         rbtdb_changed_t *changed, *next_changed;
02334 
02335         /*
02336          * If the changed record is dirty, then
02337          * an update created multiple versions of
02338          * a given rdataset.  We keep this list
02339          * until we're the least open version, at
02340          * which point it's safe to get rid of any
02341          * older versions.
02342          *
02343          * If the changed record isn't dirty, then
02344          * we don't need it anymore since we're
02345          * committing and not rolling back.
02346          *
02347          * The caller must be holding the database lock.
02348          */
02349         for (changed = HEAD(version->changed_list);
02350              changed != NULL;
02351              changed = next_changed) {
02352                 next_changed = NEXT(changed, link);
02353                 if (!changed->dirty) {
02354                         UNLINK(version->changed_list,
02355                                changed, link);
02356                         APPEND(*cleanup_list,
02357                                changed, link);
02358                 }
02359         }
02360 }
02361 
02362 static void
02363 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
02364         dns_rdataset_t keyset;
02365         dns_rdataset_t nsecset, signsecset;
02366         isc_boolean_t haszonekey = ISC_FALSE;
02367         isc_boolean_t hasnsec = ISC_FALSE;
02368         isc_result_t result;
02369 
02370         dns_rdataset_init(&keyset);
02371         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
02372                                      0, 0, &keyset, NULL);
02373         if (result == ISC_R_SUCCESS) {
02374                 result = dns_rdataset_first(&keyset);
02375                 while (result == ISC_R_SUCCESS) {
02376                         dns_rdata_t keyrdata = DNS_RDATA_INIT;
02377                         dns_rdataset_current(&keyset, &keyrdata);
02378                         if (dns_zonekey_iszonekey(&keyrdata)) {
02379                                 haszonekey = ISC_TRUE;
02380                                 break;
02381                         }
02382                         result = dns_rdataset_next(&keyset);
02383                 }
02384                 dns_rdataset_disassociate(&keyset);
02385         }
02386         if (!haszonekey) {
02387                 version->secure = dns_db_insecure;
02388                 version->havensec3 = ISC_FALSE;
02389                 return;
02390         }
02391 
02392         dns_rdataset_init(&nsecset);
02393         dns_rdataset_init(&signsecset);
02394         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
02395                                      0, 0, &nsecset, &signsecset);
02396         if (result == ISC_R_SUCCESS) {
02397                 if (dns_rdataset_isassociated(&signsecset)) {
02398                         hasnsec = ISC_TRUE;
02399                         dns_rdataset_disassociate(&signsecset);
02400                 }
02401                 dns_rdataset_disassociate(&nsecset);
02402         }
02403 
02404         setnsec3parameters(db, version);
02405 
02406         /*
02407          * Do we have a valid NSEC/NSEC3 chain?
02408          */
02409         if (version->havensec3 || hasnsec)
02410                 version->secure = dns_db_secure;
02411         else
02412                 version->secure = dns_db_insecure;
02413 }
02414 
02415 /*%<
02416  * Walk the origin node looking for NSEC3PARAM records.
02417  * Cache the nsec3 parameters.
02418  */
02419 static void
02420 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
02421         dns_rbtnode_t *node;
02422         dns_rdata_nsec3param_t nsec3param;
02423         dns_rdata_t rdata = DNS_RDATA_INIT;
02424         isc_region_t region;
02425         isc_result_t result;
02426         rdatasetheader_t *header, *header_next;
02427         unsigned char *raw;             /* RDATASLAB */
02428         unsigned int count, length;
02429         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02430 
02431         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
02432         version->havensec3 = ISC_FALSE;
02433         node = rbtdb->origin_node;
02434         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
02435                   isc_rwlocktype_read);
02436         for (header = node->data;
02437              header != NULL;
02438              header = header_next) {
02439                 header_next = header->next;
02440                 do {
02441                         if (header->serial <= version->serial &&
02442                             !IGNORE(header)) {
02443                                 if (NONEXISTENT(header))
02444                                         header = NULL;
02445                                 break;
02446                         } else
02447                                 header = header->down;
02448                 } while (header != NULL);
02449 
02450                 if (header != NULL &&
02451                     (header->type == dns_rdatatype_nsec3param)) {
02452                         /*
02453                          * Find A NSEC3PARAM with a supported algorithm.
02454                          */
02455                         raw = (unsigned char *)header + sizeof(*header);
02456                         count = raw[0] * 256 + raw[1]; /* count */
02457 #if DNS_RDATASET_FIXED
02458                         raw += count * 4 + 2;
02459 #else
02460                         raw += 2;
02461 #endif
02462                         while (count-- > 0U) {
02463                                 length = raw[0] * 256 + raw[1];
02464 #if DNS_RDATASET_FIXED
02465                                 raw += 4;
02466 #else
02467                                 raw += 2;
02468 #endif
02469                                 region.base = raw;
02470                                 region.length = length;
02471                                 raw += length;
02472                                 dns_rdata_fromregion(&rdata,
02473                                                      rbtdb->common.rdclass,
02474                                                      dns_rdatatype_nsec3param,
02475                                                      &region);
02476                                 result = dns_rdata_tostruct(&rdata,
02477                                                             &nsec3param,
02478                                                             NULL);
02479                                 INSIST(result == ISC_R_SUCCESS);
02480                                 dns_rdata_reset(&rdata);
02481 
02482                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
02483                                     !dns_nsec3_supportedhash(nsec3param.hash))
02484                                         continue;
02485 
02486                                 if (nsec3param.flags != 0)
02487                                         continue;
02488 
02489                                 memmove(version->salt, nsec3param.salt,
02490                                         nsec3param.salt_length);
02491                                 version->hash = nsec3param.hash;
02492                                 version->salt_length = nsec3param.salt_length;
02493                                 version->iterations = nsec3param.iterations;
02494                                 version->flags = nsec3param.flags;
02495                                 version->havensec3 = ISC_TRUE;
02496                                 /*
02497                                  * Look for a better algorithm than the
02498                                  * unknown test algorithm.
02499                                  */
02500                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
02501                                         goto unlock;
02502                         }
02503                 }
02504         }
02505  unlock:
02506         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
02507                     isc_rwlocktype_read);
02508         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
02509 }
02510 
02511 static void
02512 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
02513         dns_rbtdb_t *rbtdb = event->ev_arg;
02514         isc_boolean_t again = ISC_FALSE;
02515         unsigned int locknum;
02516         unsigned int refs;
02517 
02518         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02519         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
02520                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
02521                           isc_rwlocktype_write);
02522                 cleanup_dead_nodes(rbtdb, locknum);
02523                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
02524                         again = ISC_TRUE;
02525                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
02526                             isc_rwlocktype_write);
02527         }
02528         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02529         if (again)
02530                 isc_task_send(task, &event);
02531         else {
02532                 isc_event_free(&event);
02533                 isc_refcount_decrement(&rbtdb->references, &refs);
02534                 if (refs == 0)
02535                         maybe_free_rbtdb(rbtdb);
02536         }
02537 }
02538 
02539 static void
02540 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
02541         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02542         rbtdb_version_t *version, *cleanup_version, *least_greater;
02543         isc_boolean_t rollback = ISC_FALSE;
02544         rbtdb_changedlist_t cleanup_list;
02545         rdatasetheaderlist_t resigned_list;
02546         rbtdb_changed_t *changed, *next_changed;
02547         rbtdb_serial_t serial, least_serial;
02548         dns_rbtnode_t *rbtnode;
02549         unsigned int refs;
02550         rdatasetheader_t *header;
02551 
02552         REQUIRE(VALID_RBTDB(rbtdb));
02553         version = (rbtdb_version_t *)*versionp;
02554         INSIST(version->rbtdb == rbtdb);
02555 
02556         cleanup_version = NULL;
02557         ISC_LIST_INIT(cleanup_list);
02558         ISC_LIST_INIT(resigned_list);
02559 
02560         isc_refcount_decrement(&version->references, &refs);
02561         if (refs > 0) {         /* typical and easy case first */
02562                 if (commit) {
02563                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
02564                         INSIST(!version->writer);
02565                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
02566                 }
02567                 goto end;
02568         }
02569 
02570         /*
02571          * Update the zone's secure status in version before making
02572          * it the current version.
02573          */
02574         if (version->writer && commit && !IS_CACHE(rbtdb))
02575                 iszonesecure(db, version, rbtdb->origin_node);
02576 
02577         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
02578         serial = version->serial;
02579         if (version->writer) {
02580                 if (commit) {
02581                         unsigned cur_ref;
02582                         rbtdb_version_t *cur_version;
02583 
02584                         INSIST(version->commit_ok);
02585                         INSIST(version == rbtdb->future_version);
02586                         /*
02587                          * The current version is going to be replaced.
02588                          * Release the (likely last) reference to it from the
02589                          * DB itself and unlink it from the open list.
02590                          */
02591                         cur_version = rbtdb->current_version;
02592                         isc_refcount_decrement(&cur_version->references,
02593                                                &cur_ref);
02594                         if (cur_ref == 0) {
02595                                 if (cur_version->serial == rbtdb->least_serial)
02596                                         INSIST(EMPTY(cur_version->changed_list));
02597                                 UNLINK(rbtdb->open_versions,
02598                                        cur_version, link);
02599                         }
02600                         if (EMPTY(rbtdb->open_versions)) {
02601                                 /*
02602                                  * We're going to become the least open
02603                                  * version.
02604                                  */
02605                                 make_least_version(rbtdb, version,
02606                                                    &cleanup_list);
02607                         } else {
02608                                 /*
02609                                  * Some other open version is the
02610                                  * least version.  We can't cleanup
02611                                  * records that were changed in this
02612                                  * version because the older versions
02613                                  * may still be in use by an open
02614                                  * version.
02615                                  *
02616                                  * We can, however, discard the
02617                                  * changed records for things that
02618                                  * we've added that didn't exist in
02619                                  * prior versions.
02620                                  */
02621                                 cleanup_nondirty(version, &cleanup_list);
02622                         }
02623                         /*
02624                          * If the (soon to be former) current version
02625                          * isn't being used by anyone, we can clean
02626                          * it up.
02627                          */
02628                         if (cur_ref == 0) {
02629                                 cleanup_version = cur_version;
02630                                 APPENDLIST(version->changed_list,
02631                                            cleanup_version->changed_list,
02632                                            link);
02633                         }
02634                         /*
02635                          * Become the current version.
02636                          */
02637                         version->writer = ISC_FALSE;
02638                         rbtdb->current_version = version;
02639                         rbtdb->current_serial = version->serial;
02640                         rbtdb->future_version = NULL;
02641 
02642                         /*
02643                          * Keep the current version in the open list, and
02644                          * gain a reference for the DB itself (see the DB
02645                          * creation function below).  This must be the only
02646                          * case where we need to increment the counter from
02647                          * zero and need to use isc_refcount_increment0().
02648                          */
02649                         isc_refcount_increment0(&version->references,
02650                                                 &cur_ref);
02651                         INSIST(cur_ref == 1);
02652                         PREPEND(rbtdb->open_versions,
02653                                 rbtdb->current_version, link);
02654                         resigned_list = version->resigned_list;
02655                         ISC_LIST_INIT(version->resigned_list);
02656                 } else {
02657                         /*
02658                          * We're rolling back this transaction.
02659                          */
02660                         cleanup_list = version->changed_list;
02661                         ISC_LIST_INIT(version->changed_list);
02662                         resigned_list = version->resigned_list;
02663                         ISC_LIST_INIT(version->resigned_list);
02664                         rollback = ISC_TRUE;
02665                         cleanup_version = version;
02666                         rbtdb->future_version = NULL;
02667                 }
02668         } else {
02669                 if (version != rbtdb->current_version) {
02670                         /*
02671                          * There are no external or internal references
02672                          * to this version and it can be cleaned up.
02673                          */
02674                         cleanup_version = version;
02675 
02676                         /*
02677                          * Find the version with the least serial
02678                          * number greater than ours.
02679                          */
02680                         least_greater = PREV(version, link);
02681                         if (least_greater == NULL)
02682                                 least_greater = rbtdb->current_version;
02683 
02684                         INSIST(version->serial < least_greater->serial);
02685                         /*
02686                          * Is this the least open version?
02687                          */
02688                         if (version->serial == rbtdb->least_serial) {
02689                                 /*
02690                                  * Yes.  Install the new least open
02691                                  * version.
02692                                  */
02693                                 make_least_version(rbtdb,
02694                                                    least_greater,
02695                                                    &cleanup_list);
02696                         } else {
02697                                 /*
02698                                  * Add any unexecuted cleanups to
02699                                  * those of the least greater version.
02700                                  */
02701                                 APPENDLIST(least_greater->changed_list,
02702                                            version->changed_list,
02703                                            link);
02704                         }
02705                 } else if (version->serial == rbtdb->least_serial)
02706                         INSIST(EMPTY(version->changed_list));
02707                 UNLINK(rbtdb->open_versions, version, link);
02708         }
02709         least_serial = rbtdb->least_serial;
02710         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
02711 
02712         if (cleanup_version != NULL) {
02713                 INSIST(EMPTY(cleanup_version->changed_list));
02714                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
02715                             sizeof(*cleanup_version));
02716         }
02717 
02718         /*
02719          * Commit/rollback re-signed headers.
02720          */
02721         for (header = HEAD(resigned_list);
02722              header != NULL;
02723              header = HEAD(resigned_list)) {
02724                 nodelock_t *lock;
02725 
02726                 ISC_LIST_UNLINK(resigned_list, header, link);
02727 
02728                 lock = &rbtdb->node_locks[header->node->locknum].lock;
02729                 NODE_LOCK(lock, isc_rwlocktype_write);
02730                 if (rollback && !IGNORE(header)) {
02731                         isc_result_t result;
02732                         result = resign_insert(rbtdb, header->node->locknum,
02733                                                header);
02734                         if (result != ISC_R_SUCCESS)
02735                                 isc_log_write(dns_lctx,
02736                                               DNS_LOGCATEGORY_DATABASE,
02737                                               DNS_LOGMODULE_ZONE, ISC_LOG_ERROR,
02738                                               "Unable to reinsert header to "
02739                                               "re-signing heap: %s\n",
02740                                 dns_result_totext(result));
02741                 }
02742                 decrement_reference(rbtdb, header->node, least_serial,
02743                                     isc_rwlocktype_write, isc_rwlocktype_none,
02744                                     ISC_FALSE);
02745                 NODE_UNLOCK(lock, isc_rwlocktype_write);
02746         }
02747 
02748         if (!EMPTY(cleanup_list)) {
02749                 isc_event_t *event = NULL;
02750                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
02751 
02752                 if (rbtdb->task != NULL)
02753                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
02754                                                    DNS_EVENT_RBTDEADNODES,
02755                                                    cleanup_dead_nodes_callback,
02756                                                    rbtdb, sizeof(isc_event_t));
02757                 if (event == NULL) {
02758                         /*
02759                          * We acquire a tree write lock here in order to make
02760                          * sure that stale nodes will be removed in
02761                          * decrement_reference().  If we didn't have the lock,
02762                          * those nodes could miss the chance to be removed
02763                          * until the server stops.  The write lock is
02764                          * expensive, but this event should be rare enough
02765                          * to justify the cost.
02766                          */
02767                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02768                         tlock = isc_rwlocktype_write;
02769                 }
02770 
02771                 for (changed = HEAD(cleanup_list);
02772                      changed != NULL;
02773                      changed = next_changed) {
02774                         nodelock_t *lock;
02775 
02776                         next_changed = NEXT(changed, link);
02777                         rbtnode = changed->node;
02778                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
02779 
02780                         NODE_LOCK(lock, isc_rwlocktype_write);
02781                         /*
02782                          * This is a good opportunity to purge any dead nodes,
02783                          * so use it.
02784                          */
02785                         if (event == NULL)
02786                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
02787 
02788                         if (rollback)
02789                                 rollback_node(rbtnode, serial);
02790                         decrement_reference(rbtdb, rbtnode, least_serial,
02791                                             isc_rwlocktype_write, tlock,
02792                                             ISC_FALSE);
02793 
02794                         NODE_UNLOCK(lock, isc_rwlocktype_write);
02795 
02796                         isc_mem_put(rbtdb->common.mctx, changed,
02797                                     sizeof(*changed));
02798                 }
02799                 if (event != NULL) {
02800                         isc_refcount_increment(&rbtdb->references, NULL);
02801                         isc_task_send(rbtdb->task, &event);
02802                 } else
02803                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02804         }
02805 
02806  end:
02807         *versionp = NULL;
02808 }
02809 
02810 /*
02811  * Add the necessary magic for the wildcard name 'name'
02812  * to be found in 'rbtdb'.
02813  *
02814  * In order for wildcard matching to work correctly in
02815  * zone_find(), we must ensure that a node for the wildcarding
02816  * level exists in the database, and has its 'find_callback'
02817  * and 'wild' bits set.
02818  *
02819  * E.g. if the wildcard name is "*.sub.example." then we
02820  * must ensure that "sub.example." exists and is marked as
02821  * a wildcard level.
02822  */
02823 static isc_result_t
02824 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
02825         isc_result_t result;
02826         dns_name_t foundname;
02827         dns_offsets_t offsets;
02828         unsigned int n;
02829         dns_rbtnode_t *node = NULL;
02830 
02831         dns_name_init(&foundname, offsets);
02832         n = dns_name_countlabels(name);
02833         INSIST(n >= 2);
02834         n--;
02835         dns_name_getlabelsequence(name, 1, n, &foundname);
02836         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
02837         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
02838                 return (result);
02839         if (result == ISC_R_SUCCESS)
02840                 node->nsec = DNS_RBT_NSEC_NORMAL;
02841         node->find_callback = 1;
02842         node->wild = 1;
02843         return (ISC_R_SUCCESS);
02844 }
02845 
02846 static isc_result_t
02847 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
02848         isc_result_t result;
02849         dns_name_t foundname;
02850         dns_offsets_t offsets;
02851         unsigned int n, l, i;
02852 
02853         dns_name_init(&foundname, offsets);
02854         n = dns_name_countlabels(name);
02855         l = dns_name_countlabels(&rbtdb->common.origin);
02856         i = l + 1;
02857         while (i < n) {
02858                 dns_rbtnode_t *node = NULL;     /* dummy */
02859                 dns_name_getlabelsequence(name, n - i, i, &foundname);
02860                 if (dns_name_iswildcard(&foundname)) {
02861                         result = add_wildcard_magic(rbtdb, &foundname);
02862                         if (result != ISC_R_SUCCESS)
02863                                 return (result);
02864                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
02865                                                  &node);
02866                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
02867                                 return (result);
02868                         if (result == ISC_R_SUCCESS)
02869                                 node->nsec = DNS_RBT_NSEC_NORMAL;
02870                 }
02871                 i++;
02872         }
02873         return (ISC_R_SUCCESS);
02874 }
02875 
02876 static isc_result_t
02877 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
02878                isc_boolean_t create, dns_dbnode_t **nodep)
02879 {
02880         dns_rbtnode_t *node = NULL;
02881         dns_name_t nodename;
02882         isc_result_t result;
02883         isc_rwlocktype_t locktype = isc_rwlocktype_read;
02884 
02885         INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
02886 
02887         dns_name_init(&nodename, NULL);
02888         RWLOCK(&rbtdb->tree_lock, locktype);
02889         result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
02890                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
02891         if (result != ISC_R_SUCCESS) {
02892                 RWUNLOCK(&rbtdb->tree_lock, locktype);
02893                 if (!create) {
02894                         if (result == DNS_R_PARTIALMATCH)
02895                                 result = ISC_R_NOTFOUND;
02896                         return (result);
02897                 }
02898                 /*
02899                  * It would be nice to try to upgrade the lock instead of
02900                  * unlocking then relocking.
02901                  */
02902                 locktype = isc_rwlocktype_write;
02903                 RWLOCK(&rbtdb->tree_lock, locktype);
02904                 node = NULL;
02905                 result = dns_rbt_addnode(tree, name, &node);
02906                 if (result == ISC_R_SUCCESS) {
02907                         dns_rbt_namefromnode(node, &nodename);
02908 #ifdef DNS_RBT_USEHASH
02909                         node->locknum = node->hashval % rbtdb->node_lock_count;
02910 #else
02911                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
02912                                 rbtdb->node_lock_count;
02913 #endif
02914                         if (tree == rbtdb->tree) {
02915                                 add_empty_wildcards(rbtdb, name);
02916 
02917                                 if (dns_name_iswildcard(name)) {
02918                                         result = add_wildcard_magic(rbtdb, name);
02919                                         if (result != ISC_R_SUCCESS) {
02920                                                 RWUNLOCK(&rbtdb->tree_lock, locktype);
02921                                                 return (result);
02922                                         }
02923                                 }
02924                         }
02925                         if (tree == rbtdb->nsec3)
02926                                 node->nsec = DNS_RBT_NSEC_NSEC3;
02927                 } else if (result != ISC_R_EXISTS) {
02928                         RWUNLOCK(&rbtdb->tree_lock, locktype);
02929                         return (result);
02930                 }
02931         }
02932 
02933         if (tree == rbtdb->nsec3)
02934                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
02935 
02936         reactivate_node(rbtdb, node, locktype);
02937 
02938         /*
02939          * Always try to add the policy zone data, because this node might
02940          * already have been implicitly created by the previous addition of
02941          * a longer domain.  A common example is adding *.example.com
02942          * (implicitly creating example.com) followed by explicitly adding
02943          * example.com.
02944          */
02945         if (create && rbtdb->rpzs != NULL && tree == rbtdb->tree) {
02946                 dns_fixedname_t fnamef;
02947                 dns_name_t *fname;
02948 
02949                 dns_fixedname_init(&fnamef);
02950                 fname = dns_fixedname_name(&fnamef);
02951                 dns_rbt_fullnamefromnode(node, fname);
02952                 result = dns_rpz_add(rbtdb->rpzs, rbtdb->rpz_num, fname);
02953                 if (result == ISC_R_SUCCESS)
02954                         node->rpz = 1;
02955                 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
02956                         /*
02957                          * It is too late to give up, so merely complain.
02958                          */
02959                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
02960                                       DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
02961                                       "dns_rpz_add(): %s",
02962                                       isc_result_totext(result));
02963                 }
02964         }
02965 
02966         RWUNLOCK(&rbtdb->tree_lock, locktype);
02967 
02968         *nodep = (dns_dbnode_t *)node;
02969 
02970         return (ISC_R_SUCCESS);
02971 }
02972 
02973 static isc_result_t
02974 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
02975          dns_dbnode_t **nodep)
02976 {
02977         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02978 
02979         REQUIRE(VALID_RBTDB(rbtdb));
02980 
02981         return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
02982 }
02983 
02984 static isc_result_t
02985 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
02986               dns_dbnode_t **nodep)
02987 {
02988         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02989 
02990         REQUIRE(VALID_RBTDB(rbtdb));
02991 
02992         return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
02993 }
02994 
02995 static isc_result_t
02996 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
02997         rbtdb_search_t *search = arg;
02998         rdatasetheader_t *header, *header_next;
02999         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
03000         rdatasetheader_t *found;
03001         isc_result_t result;
03002         dns_rbtnode_t *onode;
03003 
03004         /*
03005          * We only want to remember the topmost zone cut, since it's the one
03006          * that counts, so we'll just continue if we've already found a
03007          * zonecut.
03008          */
03009         if (search->zonecut != NULL)
03010                 return (DNS_R_CONTINUE);
03011 
03012         found = NULL;
03013         result = DNS_R_CONTINUE;
03014         onode = search->rbtdb->origin_node;
03015 
03016         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03017                   isc_rwlocktype_read);
03018 
03019         /*
03020          * Look for an NS or DNAME rdataset active in our version.
03021          */
03022         ns_header = NULL;
03023         dname_header = NULL;
03024         sigdname_header = NULL;
03025         for (header = node->data; header != NULL; header = header_next) {
03026                 header_next = header->next;
03027                 if (header->type == dns_rdatatype_ns ||
03028                     header->type == dns_rdatatype_dname ||
03029                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
03030                         do {
03031                                 if (header->serial <= search->serial &&
03032                                     !IGNORE(header)) {
03033                                         /*
03034                                          * Is this a "this rdataset doesn't
03035                                          * exist" record?
03036                                          */
03037                                         if (NONEXISTENT(header))
03038                                                 header = NULL;
03039                                         break;
03040                                 } else
03041                                         header = header->down;
03042                         } while (header != NULL);
03043                         if (header != NULL) {
03044                                 if (header->type == dns_rdatatype_dname)
03045                                         dname_header = header;
03046                                 else if (header->type ==
03047                                            RBTDB_RDATATYPE_SIGDNAME)
03048                                         sigdname_header = header;
03049                                 else if (node != onode ||
03050                                          IS_STUB(search->rbtdb)) {
03051                                         /*
03052                                          * We've found an NS rdataset that
03053                                          * isn't at the origin node.  We check
03054                                          * that they're not at the origin node,
03055                                          * because otherwise we'd erroneously
03056                                          * treat the zone top as if it were
03057                                          * a delegation.
03058                                          */
03059                                         ns_header = header;
03060                                 }
03061                         }
03062                 }
03063         }
03064 
03065         /*
03066          * Did we find anything?
03067          */
03068         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
03069             ns_header != NULL) {
03070                 /*
03071                  * Note that NS has precedence over DNAME if both exist
03072                  * in a zone.  Otherwise DNAME take precedence over NS.
03073                  */
03074                 found = ns_header;
03075                 search->zonecut_sigrdataset = NULL;
03076         } else if (dname_header != NULL) {
03077                 found = dname_header;
03078                 search->zonecut_sigrdataset = sigdname_header;
03079         } else if (ns_header != NULL) {
03080                 found = ns_header;
03081                 search->zonecut_sigrdataset = NULL;
03082         }
03083 
03084         if (found != NULL) {
03085                 /*
03086                  * We increment the reference count on node to ensure that
03087                  * search->zonecut_rdataset will still be valid later.
03088                  */
03089                 new_reference(search->rbtdb, node);
03090                 search->zonecut = node;
03091                 search->zonecut_rdataset = found;
03092                 search->need_cleanup = ISC_TRUE;
03093                 /*
03094                  * Since we've found a zonecut, anything beneath it is
03095                  * glue and is not subject to wildcard matching, so we
03096                  * may clear search->wild.
03097                  */
03098                 search->wild = ISC_FALSE;
03099                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
03100                         /*
03101                          * If the caller does not want to find glue, then
03102                          * this is the best answer and the search should
03103                          * stop now.
03104                          */
03105                         result = DNS_R_PARTIALMATCH;
03106                 } else {
03107                         dns_name_t *zcname;
03108 
03109                         /*
03110                          * The search will continue beneath the zone cut.
03111                          * This may or may not be the best match.  In case it
03112                          * is, we need to remember the node name.
03113                          */
03114                         zcname = dns_fixedname_name(&search->zonecut_name);
03115                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
03116                                       ISC_R_SUCCESS);
03117                         search->copy_name = ISC_TRUE;
03118                 }
03119         } else {
03120                 /*
03121                  * There is no zonecut at this node which is active in this
03122                  * version.
03123                  *
03124                  * If this is a "wild" node and the caller hasn't disabled
03125                  * wildcard matching, remember that we've seen a wild node
03126                  * in case we need to go searching for wildcard matches
03127                  * later on.
03128                  */
03129                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
03130                         search->wild = ISC_TRUE;
03131         }
03132 
03133         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03134                     isc_rwlocktype_read);
03135 
03136         return (result);
03137 }
03138 
03139 static inline void
03140 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
03141               rdatasetheader_t *header, isc_stdtime_t now,
03142               dns_rdataset_t *rdataset)
03143 {
03144         unsigned char *raw;     /* RDATASLAB */
03145 
03146         /*
03147          * Caller must be holding the node reader lock.
03148          * XXXJT: technically, we need a writer lock, since we'll increment
03149          * the header count below.  However, since the actual counter value
03150          * doesn't matter, we prioritize performance here.  (We may want to
03151          * use atomic increment when available).
03152          */
03153 
03154         if (rdataset == NULL)
03155                 return;
03156 
03157         new_reference(rbtdb, node);
03158 
03159         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
03160 
03161         rdataset->methods = &rdataset_methods;
03162         rdataset->rdclass = rbtdb->common.rdclass;
03163         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
03164         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
03165         rdataset->ttl = header->rdh_ttl - now;
03166         rdataset->trust = header->trust;
03167         if (NEGATIVE(header))
03168                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
03169         if (NXDOMAIN(header))
03170                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
03171         if (OPTOUT(header))
03172                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
03173         if (PREFETCH(header))
03174                 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
03175         rdataset->private1 = rbtdb;
03176         rdataset->private2 = node;
03177         raw = (unsigned char *)header + sizeof(*header);
03178         rdataset->private3 = raw;
03179         rdataset->count = header->count++;
03180         if (rdataset->count == ISC_UINT32_MAX)
03181                 rdataset->count = 0;
03182 
03183         /*
03184          * Reset iterator state.
03185          */
03186         rdataset->privateuint4 = 0;
03187         rdataset->private5 = NULL;
03188 
03189         /*
03190          * Add noqname proof.
03191          */
03192         rdataset->private6 = header->noqname;
03193         if (rdataset->private6 != NULL)
03194                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
03195         rdataset->private7 = header->closest;
03196         if (rdataset->private7 != NULL)
03197                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
03198 
03199         /*
03200          * Copy out re-signing information.
03201          */
03202         if (RESIGN(header)) {
03203                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
03204                 rdataset->resign = header->resign;
03205         } else
03206                 rdataset->resign = 0;
03207 }
03208 
03209 static inline isc_result_t
03210 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
03211                  dns_name_t *foundname, dns_rdataset_t *rdataset,
03212                  dns_rdataset_t *sigrdataset)
03213 {
03214         isc_result_t result;
03215         dns_name_t *zcname;
03216         rbtdb_rdatatype_t type;
03217         dns_rbtnode_t *node;
03218 
03219         /*
03220          * The caller MUST NOT be holding any node locks.
03221          */
03222 
03223         node = search->zonecut;
03224         type = search->zonecut_rdataset->type;
03225 
03226         /*
03227          * If we have to set foundname, we do it before anything else.
03228          * If we were to set foundname after we had set nodep or bound the
03229          * rdataset, then we'd have to undo that work if dns_name_copy()
03230          * failed.  By setting foundname first, there's nothing to undo if
03231          * we have trouble.
03232          */
03233         if (foundname != NULL && search->copy_name) {
03234                 zcname = dns_fixedname_name(&search->zonecut_name);
03235                 result = dns_name_copy(zcname, foundname, NULL);
03236                 if (result != ISC_R_SUCCESS)
03237                         return (result);
03238         }
03239         if (nodep != NULL) {
03240                 /*
03241                  * Note that we don't have to increment the node's reference
03242                  * count here because we're going to use the reference we
03243                  * already have in the search block.
03244                  */
03245                 *nodep = node;
03246                 search->need_cleanup = ISC_FALSE;
03247         }
03248         if (rdataset != NULL) {
03249                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03250                           isc_rwlocktype_read);
03251                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
03252                               search->now, rdataset);
03253                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
03254                         bind_rdataset(search->rbtdb, node,
03255                                       search->zonecut_sigrdataset,
03256                                       search->now, sigrdataset);
03257                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03258                             isc_rwlocktype_read);
03259         }
03260 
03261         if (type == dns_rdatatype_dname)
03262                 return (DNS_R_DNAME);
03263         return (DNS_R_DELEGATION);
03264 }
03265 
03266 static inline isc_boolean_t
03267 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
03268            dns_rbtnode_t *node)
03269 {
03270         unsigned char *raw;     /* RDATASLAB */
03271         unsigned int count, size;
03272         dns_name_t ns_name;
03273         isc_boolean_t valid = ISC_FALSE;
03274         dns_offsets_t offsets;
03275         isc_region_t region;
03276         rdatasetheader_t *header;
03277 
03278         /*
03279          * No additional locking is required.
03280          */
03281 
03282         /*
03283          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
03284          * if it occurs at a zone cut, but is not valid below it.
03285          */
03286         if (type == dns_rdatatype_ns) {
03287                 if (node != search->zonecut) {
03288                         return (ISC_FALSE);
03289                 }
03290         } else if (type != dns_rdatatype_a &&
03291                    type != dns_rdatatype_aaaa &&
03292                    type != dns_rdatatype_a6) {
03293                 return (ISC_FALSE);
03294         }
03295 
03296         header = search->zonecut_rdataset;
03297         raw = (unsigned char *)header + sizeof(*header);
03298         count = raw[0] * 256 + raw[1];
03299 #if DNS_RDATASET_FIXED
03300         raw += 2 + (4 * count);
03301 #else
03302         raw += 2;
03303 #endif
03304 
03305         while (count > 0) {
03306                 count--;
03307                 size = raw[0] * 256 + raw[1];
03308 #if DNS_RDATASET_FIXED
03309                 raw += 4;
03310 #else
03311                 raw += 2;
03312 #endif
03313                 region.base = raw;
03314                 region.length = size;
03315                 raw += size;
03316                 /*
03317                  * XXX Until we have rdata structures, we have no choice but
03318                  * to directly access the rdata format.
03319                  */
03320                 dns_name_init(&ns_name, offsets);
03321                 dns_name_fromregion(&ns_name, &region);
03322                 if (dns_name_compare(&ns_name, name) == 0) {
03323                         valid = ISC_TRUE;
03324                         break;
03325                 }
03326         }
03327 
03328         return (valid);
03329 }
03330 
03331 static inline isc_boolean_t
03332 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
03333             dns_name_t *name)
03334 {
03335         dns_fixedname_t fnext;
03336         dns_fixedname_t forigin;
03337         dns_name_t *next;
03338         dns_name_t *origin;
03339         dns_name_t prefix;
03340         dns_rbtdb_t *rbtdb;
03341         dns_rbtnode_t *node;
03342         isc_result_t result;
03343         isc_boolean_t answer = ISC_FALSE;
03344         rdatasetheader_t *header;
03345 
03346         rbtdb = search->rbtdb;
03347 
03348         dns_name_init(&prefix, NULL);
03349         dns_fixedname_init(&fnext);
03350         next = dns_fixedname_name(&fnext);
03351         dns_fixedname_init(&forigin);
03352         origin = dns_fixedname_name(&forigin);
03353 
03354         result = dns_rbtnodechain_next(chain, NULL, NULL);
03355         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
03356                 node = NULL;
03357                 result = dns_rbtnodechain_current(chain, &prefix,
03358                                                   origin, &node);
03359                 if (result != ISC_R_SUCCESS)
03360                         break;
03361                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03362                           isc_rwlocktype_read);
03363                 for (header = node->data;
03364                      header != NULL;
03365                      header = header->next) {
03366                         if (header->serial <= search->serial &&
03367                             !IGNORE(header) && EXISTS(header))
03368                                 break;
03369                 }
03370                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03371                             isc_rwlocktype_read);
03372                 if (header != NULL)
03373                         break;
03374                 result = dns_rbtnodechain_next(chain, NULL, NULL);
03375         }
03376         if (result == ISC_R_SUCCESS)
03377                 result = dns_name_concatenate(&prefix, origin, next, NULL);
03378         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
03379                 answer = ISC_TRUE;
03380         return (answer);
03381 }
03382 
03383 static inline isc_boolean_t
03384 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
03385         dns_fixedname_t fnext;
03386         dns_fixedname_t forigin;
03387         dns_fixedname_t fprev;
03388         dns_name_t *next;
03389         dns_name_t *origin;
03390         dns_name_t *prev;
03391         dns_name_t name;
03392         dns_name_t rname;
03393         dns_name_t tname;
03394         dns_rbtdb_t *rbtdb;
03395         dns_rbtnode_t *node;
03396         dns_rbtnodechain_t chain;
03397         isc_boolean_t check_next = ISC_TRUE;
03398         isc_boolean_t check_prev = ISC_TRUE;
03399         isc_boolean_t answer = ISC_FALSE;
03400         isc_result_t result;
03401         rdatasetheader_t *header;
03402         unsigned int n;
03403 
03404         rbtdb = search->rbtdb;
03405 
03406         dns_name_init(&name, NULL);
03407         dns_name_init(&tname, NULL);
03408         dns_name_init(&rname, NULL);
03409         dns_fixedname_init(&fnext);
03410         next = dns_fixedname_name(&fnext);
03411         dns_fixedname_init(&fprev);
03412         prev = dns_fixedname_name(&fprev);
03413         dns_fixedname_init(&forigin);
03414         origin = dns_fixedname_name(&forigin);
03415 
03416         /*
03417          * Find if qname is at or below a empty node.
03418          * Use our own copy of the chain.
03419          */
03420 
03421         chain = search->chain;
03422         do {
03423                 node = NULL;
03424                 result = dns_rbtnodechain_current(&chain, &name,
03425                                                   origin, &node);
03426                 if (result != ISC_R_SUCCESS)
03427                         break;
03428                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03429                           isc_rwlocktype_read);
03430                 for (header = node->data;
03431                      header != NULL;
03432                      header = header->next) {
03433                         if (header->serial <= search->serial &&
03434                             !IGNORE(header) && EXISTS(header))
03435                                 break;
03436                 }
03437                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03438                             isc_rwlocktype_read);
03439                 if (header != NULL)
03440                         break;
03441                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
03442         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
03443         if (result == ISC_R_SUCCESS)
03444                 result = dns_name_concatenate(&name, origin, prev, NULL);
03445         if (result != ISC_R_SUCCESS)
03446                 check_prev = ISC_FALSE;
03447 
03448         result = dns_rbtnodechain_next(&chain, NULL, NULL);
03449         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
03450                 node = NULL;
03451                 result = dns_rbtnodechain_current(&chain, &name,
03452                                                   origin, &node);
03453                 if (result != ISC_R_SUCCESS)
03454                         break;
03455                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03456                           isc_rwlocktype_read);
03457                 for (header = node->data;
03458                      header != NULL;
03459                      header = header->next) {
03460                         if (header->serial <= search->serial &&
03461                             !IGNORE(header) && EXISTS(header))
03462                                 break;
03463                 }
03464                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03465                             isc_rwlocktype_read);
03466                 if (header != NULL)
03467                         break;
03468                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
03469         }
03470         if (result == ISC_R_SUCCESS)
03471                 result = dns_name_concatenate(&name, origin, next, NULL);
03472         if (result != ISC_R_SUCCESS)
03473                 check_next = ISC_FALSE;
03474 
03475         dns_name_clone(qname, &rname);
03476 
03477         /*
03478          * Remove the wildcard label to find the terminal name.
03479          */
03480         n = dns_name_countlabels(wname);
03481         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
03482 
03483         do {
03484                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
03485                     (check_next && dns_name_issubdomain(next, &rname))) {
03486                         answer = ISC_TRUE;
03487                         break;
03488                 }
03489                 /*
03490                  * Remove the left hand label.
03491                  */
03492                 n = dns_name_countlabels(&rname);
03493                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
03494         } while (!dns_name_equal(&rname, &tname));
03495         return (answer);
03496 }
03497 
03498 static inline isc_result_t
03499 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
03500               dns_name_t *qname)
03501 {
03502         unsigned int i, j;
03503         dns_rbtnode_t *node, *level_node, *wnode;
03504         rdatasetheader_t *header;
03505         isc_result_t result = ISC_R_NOTFOUND;
03506         dns_name_t name;
03507         dns_name_t *wname;
03508         dns_fixedname_t fwname;
03509         dns_rbtdb_t *rbtdb;
03510         isc_boolean_t done, wild, active;
03511         dns_rbtnodechain_t wchain;
03512 
03513         /*
03514          * Caller must be holding the tree lock and MUST NOT be holding
03515          * any node locks.
03516          */
03517 
03518         /*
03519          * Examine each ancestor level.  If the level's wild bit
03520          * is set, then construct the corresponding wildcard name and
03521          * search for it.  If the wildcard node exists, and is active in
03522          * this version, we're done.  If not, then we next check to see
03523          * if the ancestor is active in this version.  If so, then there
03524          * can be no possible wildcard match and again we're done.  If not,
03525          * continue the search.
03526          */
03527 
03528         rbtdb = search->rbtdb;
03529         i = search->chain.level_matches;
03530         done = ISC_FALSE;
03531         node = *nodep;
03532         do {
03533                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03534                           isc_rwlocktype_read);
03535 
03536                 /*
03537                  * First we try to figure out if this node is active in
03538                  * the search's version.  We do this now, even though we
03539                  * may not need the information, because it simplifies the
03540                  * locking and code flow.
03541                  */
03542                 for (header = node->data;
03543                      header != NULL;
03544                      header = header->next) {
03545                         if (header->serial <= search->serial &&
03546                             !IGNORE(header) && EXISTS(header))
03547                                 break;
03548                 }
03549                 if (header != NULL)
03550                         active = ISC_TRUE;
03551                 else
03552                         active = ISC_FALSE;
03553 
03554                 if (node->wild)
03555                         wild = ISC_TRUE;
03556                 else
03557                         wild = ISC_FALSE;
03558 
03559                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03560                             isc_rwlocktype_read);
03561 
03562                 if (wild) {
03563                         /*
03564                          * Construct the wildcard name for this level.
03565                          */
03566                         dns_name_init(&name, NULL);
03567                         dns_rbt_namefromnode(node, &name);
03568                         dns_fixedname_init(&fwname);
03569                         wname = dns_fixedname_name(&fwname);
03570                         result = dns_name_concatenate(dns_wildcardname, &name,
03571                                                       wname, NULL);
03572                         j = i;
03573                         while (result == ISC_R_SUCCESS && j != 0) {
03574                                 j--;
03575                                 level_node = search->chain.levels[j];
03576                                 dns_name_init(&name, NULL);
03577                                 dns_rbt_namefromnode(level_node, &name);
03578                                 result = dns_name_concatenate(wname,
03579                                                               &name,
03580                                                               wname,
03581                                                               NULL);
03582                         }
03583                         if (result != ISC_R_SUCCESS)
03584                                 break;
03585 
03586                         wnode = NULL;
03587                         dns_rbtnodechain_init(&wchain, NULL);
03588                         result = dns_rbt_findnode(rbtdb->tree, wname,
03589                                                   NULL, &wnode, &wchain,
03590                                                   DNS_RBTFIND_EMPTYDATA,
03591                                                   NULL, NULL);
03592                         if (result == ISC_R_SUCCESS) {
03593                                 nodelock_t *lock;
03594 
03595                                 /*
03596                                  * We have found the wildcard node.  If it
03597                                  * is active in the search's version, we're
03598                                  * done.
03599                                  */
03600                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
03601                                 NODE_LOCK(lock, isc_rwlocktype_read);
03602                                 for (header = wnode->data;
03603                                      header != NULL;
03604                                      header = header->next) {
03605                                         if (header->serial <= search->serial &&
03606                                             !IGNORE(header) && EXISTS(header))
03607                                                 break;
03608                                 }
03609                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
03610                                 if (header != NULL ||
03611                                     activeempty(search, &wchain, wname)) {
03612                                         if (activeemtpynode(search, qname,
03613                                                             wname)) {
03614                                                 return (ISC_R_NOTFOUND);
03615                                         }
03616                                         /*
03617                                          * The wildcard node is active!
03618                                          *
03619                                          * Note: result is still ISC_R_SUCCESS
03620                                          * so we don't have to set it.
03621                                          */
03622                                         *nodep = wnode;
03623                                         break;
03624                                 }
03625                         } else if (result != ISC_R_NOTFOUND &&
03626                                    result != DNS_R_PARTIALMATCH) {
03627                                 /*
03628                                  * An error has occurred.  Bail out.
03629                                  */
03630                                 break;
03631                         }
03632                 }
03633 
03634                 if (active) {
03635                         /*
03636                          * The level node is active.  Any wildcarding
03637                          * present at higher levels has no
03638                          * effect and we're done.
03639                          */
03640                         result = ISC_R_NOTFOUND;
03641                         break;
03642                 }
03643 
03644                 if (i > 0) {
03645                         i--;
03646                         node = search->chain.levels[i];
03647                 } else
03648                         done = ISC_TRUE;
03649         } while (!done);
03650 
03651         return (result);
03652 }
03653 
03654 static isc_boolean_t
03655 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
03656 {
03657         dns_rdata_t rdata = DNS_RDATA_INIT;
03658         dns_rdata_nsec3_t nsec3;
03659         unsigned char *raw;                     /* RDATASLAB */
03660         unsigned int rdlen, count;
03661         isc_region_t region;
03662         isc_result_t result;
03663 
03664         REQUIRE(header->type == dns_rdatatype_nsec3);
03665 
03666         raw = (unsigned char *)header + sizeof(*header);
03667         count = raw[0] * 256 + raw[1]; /* count */
03668 #if DNS_RDATASET_FIXED
03669         raw += count * 4 + 2;
03670 #else
03671         raw += 2;
03672 #endif
03673         while (count-- > 0) {
03674                 rdlen = raw[0] * 256 + raw[1];
03675 #if DNS_RDATASET_FIXED
03676                 raw += 4;
03677 #else
03678                 raw += 2;
03679 #endif
03680                 region.base = raw;
03681                 region.length = rdlen;
03682                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
03683                                      dns_rdatatype_nsec3, &region);
03684                 raw += rdlen;
03685                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
03686                 INSIST(result == ISC_R_SUCCESS);
03687                 if (nsec3.hash == search->rbtversion->hash &&
03688                     nsec3.iterations == search->rbtversion->iterations &&
03689                     nsec3.salt_length == search->rbtversion->salt_length &&
03690                     memcmp(nsec3.salt, search->rbtversion->salt,
03691                            nsec3.salt_length) == 0)
03692                         return (ISC_TRUE);
03693                 dns_rdata_reset(&rdata);
03694         }
03695         return (ISC_FALSE);
03696 }
03697 
03698 /*
03699  * Find node of the NSEC/NSEC3 record that is 'name'.
03700  */
03701 static inline isc_result_t
03702 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
03703                     dns_name_t *name, dns_name_t *origin,
03704                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
03705                     isc_boolean_t *firstp)
03706 {
03707         dns_fixedname_t ftarget;
03708         dns_name_t *target;
03709         dns_rbtnode_t *nsecnode;
03710         isc_result_t result;
03711 
03712         REQUIRE(nodep != NULL && *nodep == NULL);
03713 
03714         if (type == dns_rdatatype_nsec3) {
03715                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
03716                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
03717                         return (result);
03718                 result = dns_rbtnodechain_current(&search->chain, name, origin,
03719                                                   nodep);
03720                 return (result);
03721         }
03722 
03723         dns_fixedname_init(&ftarget);
03724         target = dns_fixedname_name(&ftarget);
03725 
03726         for (;;) {
03727                 if (*firstp) {
03728                         /*
03729                          * Construct the name of the second node to check.
03730                          * It is the first node sought in the NSEC tree.
03731                          */
03732                         *firstp = ISC_FALSE;
03733                         dns_rbtnodechain_init(nsecchain, NULL);
03734                         result = dns_name_concatenate(name, origin,
03735                                                       target, NULL);
03736                         if (result != ISC_R_SUCCESS)
03737                                 return (result);
03738                         nsecnode = NULL;
03739                         result = dns_rbt_findnode(search->rbtdb->nsec,
03740                                                   target, NULL,
03741                                                   &nsecnode, nsecchain,
03742                                                   DNS_RBTFIND_NOOPTIONS,
03743                                                   NULL, NULL);
03744                         if (result == ISC_R_SUCCESS) {
03745                                 /*
03746                                  * Since this was the first loop, finding the
03747                                  * name in the NSEC tree implies that the first
03748                                  * node checked in the main tree had an
03749                                  * unacceptable NSEC record.
03750                                  * Try the previous node in the NSEC tree.
03751                                  */
03752                                 result = dns_rbtnodechain_prev(nsecchain,
03753                                                                name, origin);
03754                                 if (result == DNS_R_NEWORIGIN)
03755                                         result = ISC_R_SUCCESS;
03756                         } else if (result == ISC_R_NOTFOUND ||
03757                                    result == DNS_R_PARTIALMATCH) {
03758                                 result = dns_rbtnodechain_current(nsecchain,
03759                                                         name, origin, NULL);
03760                                 if (result == ISC_R_NOTFOUND)
03761                                         result = ISC_R_NOMORE;
03762                         }
03763                 } else {
03764                         /*
03765                          * This is a second or later trip through the auxiliary
03766                          * tree for the name of a third or earlier NSEC node in
03767                          * the main tree.  Previous trips through the NSEC tree
03768                          * must have found nodes in the main tree with NSEC
03769                          * records.  Perhaps they lacked signature records.
03770                          */
03771                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
03772                         if (result == DNS_R_NEWORIGIN)
03773                                 result = ISC_R_SUCCESS;
03774                 }
03775                 if (result != ISC_R_SUCCESS)
03776                         return (result);
03777 
03778                 /*
03779                  * Construct the name to seek in the main tree.
03780                  */
03781                 result = dns_name_concatenate(name, origin, target, NULL);
03782                 if (result != ISC_R_SUCCESS)
03783                         return (result);
03784 
03785                 *nodep = NULL;
03786                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
03787                                           nodep, &search->chain,
03788                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
03789                 if (result == ISC_R_SUCCESS)
03790                         return (result);
03791 
03792                 /*
03793                  * There should always be a node in the main tree with the
03794                  * same name as the node in the auxiliary NSEC tree, except for
03795                  * nodes in the auxiliary tree that are awaiting deletion.
03796                  */
03797                 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
03798                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
03799                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
03800                                       "previous_closest_nsec(): %s",
03801                                       isc_result_totext(result));
03802                         return (DNS_R_BADDB);
03803                 }
03804         }
03805 }
03806 
03807 /*
03808  * Find the NSEC/NSEC3 which is or before the current point on the
03809  * search chain.  For NSEC3 records only NSEC3 records that match the
03810  * current NSEC3PARAM record are considered.
03811  */
03812 static inline isc_result_t
03813 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
03814                   dns_name_t *foundname, dns_rdataset_t *rdataset,
03815                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
03816                   dns_db_secure_t secure)
03817 {
03818         dns_rbtnode_t *node, *prevnode;
03819         rdatasetheader_t *header, *header_next, *found, *foundsig;
03820         dns_rbtnodechain_t nsecchain;
03821         isc_boolean_t empty_node;
03822         isc_result_t result;
03823         dns_fixedname_t fname, forigin;
03824         dns_name_t *name, *origin;
03825         dns_rdatatype_t type;
03826         rbtdb_rdatatype_t sigtype;
03827         isc_boolean_t wraps;
03828         isc_boolean_t first = ISC_TRUE;
03829         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
03830 
03831         if (tree == search->rbtdb->nsec3) {
03832                 type = dns_rdatatype_nsec3;
03833                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
03834                 wraps = ISC_TRUE;
03835         } else {
03836                 type = dns_rdatatype_nsec;
03837                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
03838                 wraps = ISC_FALSE;
03839         }
03840 
03841         /*
03842          * Use the auxiliary tree only starting with the second node in the
03843          * hope that the original node will be right much of the time.
03844          */
03845         dns_fixedname_init(&fname);
03846         name = dns_fixedname_name(&fname);
03847         dns_fixedname_init(&forigin);
03848         origin = dns_fixedname_name(&forigin);
03849  again:
03850         node = NULL;
03851         prevnode = NULL;
03852         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
03853         if (result != ISC_R_SUCCESS)
03854                 return (result);
03855         do {
03856                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03857                           isc_rwlocktype_read);
03858                 found = NULL;
03859                 foundsig = NULL;
03860                 empty_node = ISC_TRUE;
03861                 for (header = node->data;
03862                      header != NULL;
03863                      header = header_next) {
03864                         header_next = header->next;
03865                         /*
03866                          * Look for an active, extant NSEC or RRSIG NSEC.
03867                          */
03868                         do {
03869                                 if (header->serial <= search->serial &&
03870                                     !IGNORE(header)) {
03871                                         /*
03872                                          * Is this a "this rdataset doesn't
03873                                          * exist" record?
03874                                          */
03875                                         if (NONEXISTENT(header))
03876                                                 header = NULL;
03877                                         break;
03878                                 } else
03879                                         header = header->down;
03880                         } while (header != NULL);
03881                         if (header != NULL) {
03882                                 /*
03883                                  * We now know that there is at least one
03884                                  * active rdataset at this node.
03885                                  */
03886                                 empty_node = ISC_FALSE;
03887                                 if (header->type == type) {
03888                                         found = header;
03889                                         if (foundsig != NULL)
03890                                                 break;
03891                                 } else if (header->type == sigtype) {
03892                                         foundsig = header;
03893                                         if (found != NULL)
03894                                                 break;
03895                                 }
03896                         }
03897                 }
03898                 if (!empty_node) {
03899                         if (found != NULL && search->rbtversion->havensec3 &&
03900                             found->type == dns_rdatatype_nsec3 &&
03901                             !matchparams(found, search)) {
03902                                 empty_node = ISC_TRUE;
03903                                 found = NULL;
03904                                 foundsig = NULL;
03905                                 result = previous_closest_nsec(type, search,
03906                                                                name, origin,
03907                                                                &prevnode, NULL,
03908                                                                NULL);
03909                         } else if (found != NULL &&
03910                                    (foundsig != NULL || !need_sig)) {
03911                                 /*
03912                                  * We've found the right NSEC/NSEC3 record.
03913                                  *
03914                                  * Note: for this to really be the right
03915                                  * NSEC record, it's essential that the NSEC
03916                                  * records of any nodes obscured by a zone
03917                                  * cut have been removed; we assume this is
03918                                  * the case.
03919                                  */
03920                                 result = dns_name_concatenate(name, origin,
03921                                                               foundname, NULL);
03922                                 if (result == ISC_R_SUCCESS) {
03923                                         if (nodep != NULL) {
03924                                                 new_reference(search->rbtdb,
03925                                                               node);
03926                                                 *nodep = node;
03927                                         }
03928                                         bind_rdataset(search->rbtdb, node,
03929                                                       found, search->now,
03930                                                       rdataset);
03931                                         if (foundsig != NULL)
03932                                                 bind_rdataset(search->rbtdb,
03933                                                               node,
03934                                                               foundsig,
03935                                                               search->now,
03936                                                               sigrdataset);
03937                                 }
03938                         } else if (found == NULL && foundsig == NULL) {
03939                                 /*
03940                                  * This node is active, but has no NSEC or
03941                                  * RRSIG NSEC.  That means it's glue or
03942                                  * other obscured zone data that isn't
03943                                  * relevant for our search.  Treat the
03944                                  * node as if it were empty and keep looking.
03945                                  */
03946                                 empty_node = ISC_TRUE;
03947                                 result = previous_closest_nsec(type, search,
03948                                                                name, origin,
03949                                                                &prevnode,
03950                                                                &nsecchain,
03951                                                                &first);
03952                         } else {
03953                                 /*
03954                                  * We found an active node, but either the
03955                                  * NSEC or the RRSIG NSEC is missing.  This
03956                                  * shouldn't happen.
03957                                  */
03958                                 result = DNS_R_BADDB;
03959                         }
03960                 } else {
03961                         /*
03962                          * This node isn't active.  We've got to keep
03963                          * looking.
03964                          */
03965                         result = previous_closest_nsec(type, search,
03966                                                        name, origin, &prevnode,
03967                                                        &nsecchain, &first);
03968                 }
03969                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03970                             isc_rwlocktype_read);
03971                 node = prevnode;
03972                 prevnode = NULL;
03973         } while (empty_node && result == ISC_R_SUCCESS);
03974 
03975         if (!first)
03976                 dns_rbtnodechain_invalidate(&nsecchain);
03977 
03978         if (result == ISC_R_NOMORE && wraps) {
03979                 result = dns_rbtnodechain_last(&search->chain, tree,
03980                                                NULL, NULL);
03981                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
03982                         wraps = ISC_FALSE;
03983                         goto again;
03984                 }
03985         }
03986 
03987         /*
03988          * If the result is ISC_R_NOMORE, then we got to the beginning of
03989          * the database and didn't find a NSEC record.  This shouldn't
03990          * happen.
03991          */
03992         if (result == ISC_R_NOMORE)
03993                 result = DNS_R_BADDB;
03994 
03995         return (result);
03996 }
03997 
03998 static isc_result_t
03999 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
04000           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
04001           dns_dbnode_t **nodep, dns_name_t *foundname,
04002           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04003 {
04004         dns_rbtnode_t *node = NULL;
04005         isc_result_t result;
04006         rbtdb_search_t search;
04007         isc_boolean_t cname_ok = ISC_TRUE;
04008         isc_boolean_t close_version = ISC_FALSE;
04009         isc_boolean_t maybe_zonecut = ISC_FALSE;
04010         isc_boolean_t at_zonecut = ISC_FALSE;
04011         isc_boolean_t wild;
04012         isc_boolean_t empty_node;
04013         rdatasetheader_t *header, *header_next, *found, *nsecheader;
04014         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
04015         rbtdb_rdatatype_t sigtype;
04016         isc_boolean_t active;
04017         dns_rbtnodechain_t chain;
04018         nodelock_t *lock;
04019         dns_rbt_t *tree;
04020 
04021         search.rbtdb = (dns_rbtdb_t *)db;
04022 
04023         REQUIRE(VALID_RBTDB(search.rbtdb));
04024         INSIST(version == NULL ||
04025                ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
04026 
04027         /*
04028          * We don't care about 'now'.
04029          */
04030         UNUSED(now);
04031 
04032         /*
04033          * If the caller didn't supply a version, attach to the current
04034          * version.
04035          */
04036         if (version == NULL) {
04037                 currentversion(db, &version);
04038                 close_version = ISC_TRUE;
04039         }
04040 
04041         search.rbtversion = version;
04042         search.serial = search.rbtversion->serial;
04043         search.options = options;
04044         search.copy_name = ISC_FALSE;
04045         search.need_cleanup = ISC_FALSE;
04046         search.wild = ISC_FALSE;
04047         search.zonecut = NULL;
04048         dns_fixedname_init(&search.zonecut_name);
04049         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
04050         search.now = 0;
04051 
04052         /*
04053          * 'wild' will be true iff. we've matched a wildcard.
04054          */
04055         wild = ISC_FALSE;
04056 
04057         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
04058 
04059         /*
04060          * Search down from the root of the tree.  If, while going down, we
04061          * encounter a callback node, zone_zonecut_callback() will search the
04062          * rdatasets at the zone cut for active DNAME or NS rdatasets.
04063          */
04064         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
04065                                                          search.rbtdb->tree;
04066         result = dns_rbt_findnode(tree, name, foundname, &node,
04067                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
04068                                   zone_zonecut_callback, &search);
04069 
04070         if (result == DNS_R_PARTIALMATCH) {
04071         partial_match:
04072                 if (search.zonecut != NULL) {
04073                     result = setup_delegation(&search, nodep, foundname,
04074                                               rdataset, sigrdataset);
04075                     goto tree_exit;
04076                 }
04077 
04078                 if (search.wild) {
04079                         /*
04080                          * At least one of the levels in the search chain
04081                          * potentially has a wildcard.  For each such level,
04082                          * we must see if there's a matching wildcard active
04083                          * in the current version.
04084                          */
04085                         result = find_wildcard(&search, &node, name);
04086                         if (result == ISC_R_SUCCESS) {
04087                                 result = dns_name_copy(name, foundname, NULL);
04088                                 if (result != ISC_R_SUCCESS)
04089                                         goto tree_exit;
04090                                 wild = ISC_TRUE;
04091                                 goto found;
04092                         }
04093                         else if (result != ISC_R_NOTFOUND)
04094                                 goto tree_exit;
04095                 }
04096 
04097                 chain = search.chain;
04098                 active = activeempty(&search, &chain, name);
04099 
04100                 /*
04101                  * If we're here, then the name does not exist, is not
04102                  * beneath a zonecut, and there's no matching wildcard.
04103                  */
04104                 if ((search.rbtversion->secure == dns_db_secure &&
04105                      !search.rbtversion->havensec3) ||
04106                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
04107                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
04108                 {
04109                         result = find_closest_nsec(&search, nodep, foundname,
04110                                                    rdataset, sigrdataset, tree,
04111                                                    search.rbtversion->secure);
04112                         if (result == ISC_R_SUCCESS)
04113                                 result = active ? DNS_R_EMPTYNAME :
04114                                                   DNS_R_NXDOMAIN;
04115                 } else
04116                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
04117                 goto tree_exit;
04118         } else if (result != ISC_R_SUCCESS)
04119                 goto tree_exit;
04120 
04121  found:
04122         /*
04123          * We have found a node whose name is the desired name, or we
04124          * have matched a wildcard.
04125          */
04126 
04127         if (search.zonecut != NULL) {
04128                 /*
04129                  * If we're beneath a zone cut, we don't want to look for
04130                  * CNAMEs because they're not legitimate zone glue.
04131                  */
04132                 cname_ok = ISC_FALSE;
04133         } else {
04134                 /*
04135                  * The node may be a zone cut itself.  If it might be one,
04136                  * make sure we check for it later.
04137                  *
04138                  * DS records live above the zone cut in ordinary zone so
04139                  * we want to ignore any referral.
04140                  *
04141                  * Stub zones don't have anything "above" the delgation so
04142                  * we always return a referral.
04143                  */
04144                 if (node->find_callback &&
04145                     ((node != search.rbtdb->origin_node &&
04146                       !dns_rdatatype_atparent(type)) ||
04147                      IS_STUB(search.rbtdb)))
04148                         maybe_zonecut = ISC_TRUE;
04149         }
04150 
04151         /*
04152          * Certain DNSSEC types are not subject to CNAME matching
04153          * (RFC4035, section 2.5 and RFC3007).
04154          *
04155          * We don't check for RRSIG, because we don't store RRSIG records
04156          * directly.
04157          */
04158         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
04159                 cname_ok = ISC_FALSE;
04160 
04161         /*
04162          * We now go looking for rdata...
04163          */
04164 
04165         lock = &search.rbtdb->node_locks[node->locknum].lock;
04166         NODE_LOCK(lock, isc_rwlocktype_read);
04167 
04168         found = NULL;
04169         foundsig = NULL;
04170         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
04171         nsecheader = NULL;
04172         nsecsig = NULL;
04173         cnamesig = NULL;
04174         empty_node = ISC_TRUE;
04175         for (header = node->data; header != NULL; header = header_next) {
04176                 header_next = header->next;
04177                 /*
04178                  * Look for an active, extant rdataset.
04179                  */
04180                 do {
04181                         if (header->serial <= search.serial &&
04182                             !IGNORE(header)) {
04183                                 /*
04184                                  * Is this a "this rdataset doesn't
04185                                  * exist" record?
04186                                  */
04187                                 if (NONEXISTENT(header))
04188                                         header = NULL;
04189                                 break;
04190                         } else
04191                                 header = header->down;
04192                 } while (header != NULL);
04193                 if (header != NULL) {
04194                         /*
04195                          * We now know that there is at least one active
04196                          * rdataset at this node.
04197                          */
04198                         empty_node = ISC_FALSE;
04199 
04200                         /*
04201                          * Do special zone cut handling, if requested.
04202                          */
04203                         if (maybe_zonecut &&
04204                             header->type == dns_rdatatype_ns) {
04205                                 /*
04206                                  * We increment the reference count on node to
04207                                  * ensure that search->zonecut_rdataset will
04208                                  * still be valid later.
04209                                  */
04210                                 new_reference(search.rbtdb, node);
04211                                 search.zonecut = node;
04212                                 search.zonecut_rdataset = header;
04213                                 search.zonecut_sigrdataset = NULL;
04214                                 search.need_cleanup = ISC_TRUE;
04215                                 maybe_zonecut = ISC_FALSE;
04216                                 at_zonecut = ISC_TRUE;
04217                                 /*
04218                                  * It is not clear if KEY should still be
04219                                  * allowed at the parent side of the zone
04220                                  * cut or not.  It is needed for RFC3007
04221                                  * validated updates.
04222                                  */
04223                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
04224                                     && type != dns_rdatatype_nsec
04225                                     && type != dns_rdatatype_key) {
04226                                         /*
04227                                          * Glue is not OK, but any answer we
04228                                          * could return would be glue.  Return
04229                                          * the delegation.
04230                                          */
04231                                         found = NULL;
04232                                         break;
04233                                 }
04234                                 if (found != NULL && foundsig != NULL)
04235                                         break;
04236                         }
04237 
04238 
04239                         /*
04240                          * If the NSEC3 record doesn't match the chain
04241                          * we are using behave as if it isn't here.
04242                          */
04243                         if (header->type == dns_rdatatype_nsec3 &&
04244                            !matchparams(header, &search)) {
04245                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
04246                                 goto partial_match;
04247                         }
04248                         /*
04249                          * If we found a type we were looking for,
04250                          * remember it.
04251                          */
04252                         if (header->type == type ||
04253                             type == dns_rdatatype_any ||
04254                             (header->type == dns_rdatatype_cname &&
04255                              cname_ok)) {
04256                                 /*
04257                                  * We've found the answer!
04258                                  */
04259                                 found = header;
04260                                 if (header->type == dns_rdatatype_cname &&
04261                                     cname_ok) {
04262                                         /*
04263                                          * We may be finding a CNAME instead
04264                                          * of the desired type.
04265                                          *
04266                                          * If we've already got the CNAME RRSIG,
04267                                          * use it, otherwise change sigtype
04268                                          * so that we find it.
04269                                          */
04270                                         if (cnamesig != NULL)
04271                                                 foundsig = cnamesig;
04272                                         else
04273                                                 sigtype =
04274                                                     RBTDB_RDATATYPE_SIGCNAME;
04275                                 }
04276                                 /*
04277                                  * If we've got all we need, end the search.
04278                                  */
04279                                 if (!maybe_zonecut && foundsig != NULL)
04280                                         break;
04281                         } else if (header->type == sigtype) {
04282                                 /*
04283                                  * We've found the RRSIG rdataset for our
04284                                  * target type.  Remember it.
04285                                  */
04286                                 foundsig = header;
04287                                 /*
04288                                  * If we've got all we need, end the search.
04289                                  */
04290                                 if (!maybe_zonecut && found != NULL)
04291                                         break;
04292                         } else if (header->type == dns_rdatatype_nsec &&
04293                                    !search.rbtversion->havensec3) {
04294                                 /*
04295                                  * Remember a NSEC rdataset even if we're
04296                                  * not specifically looking for it, because
04297                                  * we might need it later.
04298                                  */
04299                                 nsecheader = header;
04300                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
04301                                    !search.rbtversion->havensec3) {
04302                                 /*
04303                                  * If we need the NSEC rdataset, we'll also
04304                                  * need its signature.
04305                                  */
04306                                 nsecsig = header;
04307                         } else if (cname_ok &&
04308                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
04309                                 /*
04310                                  * If we get a CNAME match, we'll also need
04311                                  * its signature.
04312                                  */
04313                                 cnamesig = header;
04314                         }
04315                 }
04316         }
04317 
04318         if (empty_node) {
04319                 /*
04320                  * We have an exact match for the name, but there are no
04321                  * active rdatasets in the desired version.  That means that
04322                  * this node doesn't exist in the desired version, and that
04323                  * we really have a partial match.
04324                  */
04325                 if (!wild) {
04326                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04327                         goto partial_match;
04328                 }
04329         }
04330 
04331         /*
04332          * If we didn't find what we were looking for...
04333          */
04334         if (found == NULL) {
04335                 if (search.zonecut != NULL) {
04336                         /*
04337                          * We were trying to find glue at a node beneath a
04338                          * zone cut, but didn't.
04339                          *
04340                          * Return the delegation.
04341                          */
04342                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04343                         result = setup_delegation(&search, nodep, foundname,
04344                                                   rdataset, sigrdataset);
04345                         goto tree_exit;
04346                 }
04347                 /*
04348                  * The desired type doesn't exist.
04349                  */
04350                 result = DNS_R_NXRRSET;
04351                 if (search.rbtversion->secure == dns_db_secure &&
04352                     !search.rbtversion->havensec3 &&
04353                     (nsecheader == NULL || nsecsig == NULL)) {
04354                         /*
04355                          * The zone is secure but there's no NSEC,
04356                          * or the NSEC has no signature!
04357                          */
04358                         if (!wild) {
04359                                 result = DNS_R_BADDB;
04360                                 goto node_exit;
04361                         }
04362 
04363                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04364                         result = find_closest_nsec(&search, nodep, foundname,
04365                                                    rdataset, sigrdataset,
04366                                                    search.rbtdb->tree,
04367                                                    search.rbtversion->secure);
04368                         if (result == ISC_R_SUCCESS)
04369                                 result = DNS_R_EMPTYWILD;
04370                         goto tree_exit;
04371                 }
04372                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
04373                     nsecheader == NULL)
04374                 {
04375                         /*
04376                          * There's no NSEC record, and we were told
04377                          * to find one.
04378                          */
04379                         result = DNS_R_BADDB;
04380                         goto node_exit;
04381                 }
04382                 if (nodep != NULL) {
04383                         new_reference(search.rbtdb, node);
04384                         *nodep = node;
04385                 }
04386                 if ((search.rbtversion->secure == dns_db_secure &&
04387                      !search.rbtversion->havensec3) ||
04388                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
04389                 {
04390                         bind_rdataset(search.rbtdb, node, nsecheader,
04391                                       0, rdataset);
04392                         if (nsecsig != NULL)
04393                                 bind_rdataset(search.rbtdb, node,
04394                                               nsecsig, 0, sigrdataset);
04395                 }
04396                 if (wild)
04397                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
04398                 goto node_exit;
04399         }
04400 
04401         /*
04402          * We found what we were looking for, or we found a CNAME.
04403          */
04404 
04405         if (type != found->type &&
04406             type != dns_rdatatype_any &&
04407             found->type == dns_rdatatype_cname) {
04408                 /*
04409                  * We weren't doing an ANY query and we found a CNAME instead
04410                  * of the type we were looking for, so we need to indicate
04411                  * that result to the caller.
04412                  */
04413                 result = DNS_R_CNAME;
04414         } else if (search.zonecut != NULL) {
04415                 /*
04416                  * If we're beneath a zone cut, we must indicate that the
04417                  * result is glue, unless we're actually at the zone cut
04418                  * and the type is NSEC or KEY.
04419                  */
04420                 if (search.zonecut == node) {
04421                         /*
04422                          * It is not clear if KEY should still be
04423                          * allowed at the parent side of the zone
04424                          * cut or not.  It is needed for RFC3007
04425                          * validated updates.
04426                          */
04427                         if (type == dns_rdatatype_nsec ||
04428                             type == dns_rdatatype_nsec3 ||
04429                             type == dns_rdatatype_key)
04430                                 result = ISC_R_SUCCESS;
04431                         else if (type == dns_rdatatype_any)
04432                                 result = DNS_R_ZONECUT;
04433                         else
04434                                 result = DNS_R_GLUE;
04435                 } else
04436                         result = DNS_R_GLUE;
04437                 /*
04438                  * We might have found data that isn't glue, but was occluded
04439                  * by a dynamic update.  If the caller cares about this, they
04440                  * will have told us to validate glue.
04441                  *
04442                  * XXX We should cache the glue validity state!
04443                  */
04444                 if (result == DNS_R_GLUE &&
04445                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
04446                     !valid_glue(&search, foundname, type, node)) {
04447                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04448                         result = setup_delegation(&search, nodep, foundname,
04449                                                   rdataset, sigrdataset);
04450                     goto tree_exit;
04451                 }
04452         } else {
04453                 /*
04454                  * An ordinary successful query!
04455                  */
04456                 result = ISC_R_SUCCESS;
04457         }
04458 
04459         if (nodep != NULL) {
04460                 if (!at_zonecut)
04461                         new_reference(search.rbtdb, node);
04462                 else
04463                         search.need_cleanup = ISC_FALSE;
04464                 *nodep = node;
04465         }
04466 
04467         if (type != dns_rdatatype_any) {
04468                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
04469                 if (foundsig != NULL)
04470                         bind_rdataset(search.rbtdb, node, foundsig, 0,
04471                                       sigrdataset);
04472         }
04473 
04474         if (wild)
04475                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
04476 
04477  node_exit:
04478         NODE_UNLOCK(lock, isc_rwlocktype_read);
04479 
04480  tree_exit:
04481         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
04482 
04483         /*
04484          * If we found a zonecut but aren't going to use it, we have to
04485          * let go of it.
04486          */
04487         if (search.need_cleanup) {
04488                 node = search.zonecut;
04489                 INSIST(node != NULL);
04490                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
04491 
04492                 NODE_LOCK(lock, isc_rwlocktype_read);
04493                 decrement_reference(search.rbtdb, node, 0,
04494                                     isc_rwlocktype_read, isc_rwlocktype_none,
04495                                     ISC_FALSE);
04496                 NODE_UNLOCK(lock, isc_rwlocktype_read);
04497         }
04498 
04499         if (close_version)
04500                 closeversion(db, &version, ISC_FALSE);
04501 
04502         dns_rbtnodechain_reset(&search.chain);
04503 
04504         return (result);
04505 }
04506 
04507 static isc_result_t
04508 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
04509                  isc_stdtime_t now, dns_dbnode_t **nodep,
04510                  dns_name_t *foundname,
04511                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04512 {
04513         UNUSED(db);
04514         UNUSED(name);
04515         UNUSED(options);
04516         UNUSED(now);
04517         UNUSED(nodep);
04518         UNUSED(foundname);
04519         UNUSED(rdataset);
04520         UNUSED(sigrdataset);
04521 
04522         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
04523 
04524         /* NOTREACHED */
04525         return (ISC_R_NOTIMPLEMENTED);
04526 }
04527 
04528 static isc_result_t
04529 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
04530         rbtdb_search_t *search = arg;
04531         rdatasetheader_t *header, *header_prev, *header_next;
04532         rdatasetheader_t *dname_header, *sigdname_header;
04533         isc_result_t result;
04534         nodelock_t *lock;
04535         isc_rwlocktype_t locktype;
04536 
04537         /* XXX comment */
04538 
04539         REQUIRE(search->zonecut == NULL);
04540 
04541         /*
04542          * Keep compiler silent.
04543          */
04544         UNUSED(name);
04545 
04546         lock = &(search->rbtdb->node_locks[node->locknum].lock);
04547         locktype = isc_rwlocktype_read;
04548         NODE_LOCK(lock, locktype);
04549 
04550         /*
04551          * Look for a DNAME or RRSIG DNAME rdataset.
04552          */
04553         dname_header = NULL;
04554         sigdname_header = NULL;
04555         header_prev = NULL;
04556         for (header = node->data; header != NULL; header = header_next) {
04557                 header_next = header->next;
04558                 if (header->rdh_ttl <  search->now) {
04559                         /*
04560                          * This rdataset is stale.  If no one else is
04561                          * using the node, we can clean it up right
04562                          * now, otherwise we mark it as stale, and
04563                          * the node as dirty, so it will get cleaned
04564                          * up later.
04565                          */
04566                         if ((header->rdh_ttl <  search->now - RBTDB_VIRTUAL) &&
04567                             (locktype == isc_rwlocktype_write ||
04568                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
04569                                 /*
04570                                  * We update the node's status only when we
04571                                  * can get write access; otherwise, we leave
04572                                  * others to this work.  Periodical cleaning
04573                                  * will eventually take the job as the last
04574                                  * resort.
04575                                  * We won't downgrade the lock, since other
04576                                  * rdatasets are probably stale, too.
04577                                  */
04578                                 locktype = isc_rwlocktype_write;
04579 
04580                                 if (dns_rbtnode_refcurrent(node) == 0) {
04581                                         isc_mem_t *mctx;
04582 
04583                                         /*
04584                                          * header->down can be non-NULL if the
04585                                          * refcount has just decremented to 0
04586                                          * but decrement_reference() has not
04587                                          * performed clean_cache_node(), in
04588                                          * which case we need to purge the
04589                                          * stale headers first.
04590                                          */
04591                                         mctx = search->rbtdb->common.mctx;
04592                                         clean_stale_headers(search->rbtdb,
04593                                                             mctx,
04594                                                             header);
04595                                         if (header_prev != NULL)
04596                                                 header_prev->next =
04597                                                         header->next;
04598                                         else
04599                                                 node->data = header->next;
04600                                         free_rdataset(search->rbtdb, mctx,
04601                                                       header);
04602                                 } else {
04603                                         mark_stale_header(search->rbtdb,
04604                                                           header);
04605                                         header_prev = header;
04606                                 }
04607                         } else
04608                                 header_prev = header;
04609                 } else if (header->type == dns_rdatatype_dname &&
04610                            EXISTS(header)) {
04611                         dname_header = header;
04612                         header_prev = header;
04613                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
04614                          EXISTS(header)) {
04615                         sigdname_header = header;
04616                         header_prev = header;
04617                 } else
04618                         header_prev = header;
04619         }
04620 
04621         if (dname_header != NULL &&
04622             (!DNS_TRUST_PENDING(dname_header->trust) ||
04623              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
04624                 /*
04625                  * We increment the reference count on node to ensure that
04626                  * search->zonecut_rdataset will still be valid later.
04627                  */
04628                 new_reference(search->rbtdb, node);
04629                 INSIST(!ISC_LINK_LINKED(node, deadlink));
04630                 search->zonecut = node;
04631                 search->zonecut_rdataset = dname_header;
04632                 search->zonecut_sigrdataset = sigdname_header;
04633                 search->need_cleanup = ISC_TRUE;
04634                 result = DNS_R_PARTIALMATCH;
04635         } else
04636                 result = DNS_R_CONTINUE;
04637 
04638         NODE_UNLOCK(lock, locktype);
04639 
04640         return (result);
04641 }
04642 
04643 static inline isc_result_t
04644 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
04645                      dns_dbnode_t **nodep, dns_name_t *foundname,
04646                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04647 {
04648         unsigned int i;
04649         dns_rbtnode_t *level_node;
04650         rdatasetheader_t *header, *header_prev, *header_next;
04651         rdatasetheader_t *found, *foundsig;
04652         isc_result_t result = ISC_R_NOTFOUND;
04653         dns_name_t name;
04654         dns_rbtdb_t *rbtdb;
04655         isc_boolean_t done;
04656         nodelock_t *lock;
04657         isc_rwlocktype_t locktype;
04658 
04659         /*
04660          * Caller must be holding the tree lock.
04661          */
04662 
04663         rbtdb = search->rbtdb;
04664         i = search->chain.level_matches;
04665         done = ISC_FALSE;
04666         do {
04667                 locktype = isc_rwlocktype_read;
04668                 lock = &rbtdb->node_locks[node->locknum].lock;
04669                 NODE_LOCK(lock, locktype);
04670 
04671                 /*
04672                  * Look for NS and RRSIG NS rdatasets.
04673                  */
04674                 found = NULL;
04675                 foundsig = NULL;
04676                 header_prev = NULL;
04677                 for (header = node->data;
04678                      header != NULL;
04679                      header = header_next) {
04680                         header_next = header->next;
04681                         if (header->rdh_ttl <  search->now) {
04682                                 /*
04683                                  * This rdataset is stale.  If no one else is
04684                                  * using the node, we can clean it up right
04685                                  * now, otherwise we mark it as stale, and
04686                                  * the node as dirty, so it will get cleaned
04687                                  * up later.
04688                                  */
04689                                 if ((header->rdh_ttl <  search->now -
04690                                                     RBTDB_VIRTUAL) &&
04691                                     (locktype == isc_rwlocktype_write ||
04692                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
04693                                         /*
04694                                          * We update the node's status only
04695                                          * when we can get write access.
04696                                          */
04697                                         locktype = isc_rwlocktype_write;
04698 
04699                                         if (dns_rbtnode_refcurrent(node)
04700                                             == 0) {
04701                                                 isc_mem_t *m;
04702 
04703                                                 m = search->rbtdb->common.mctx;
04704                                                 clean_stale_headers(
04705                                                         search->rbtdb,
04706                                                         m, header);
04707                                                 if (header_prev != NULL)
04708                                                         header_prev->next =
04709                                                                 header->next;
04710                                                 else
04711                                                         node->data =
04712                                                                 header->next;
04713                                                 free_rdataset(rbtdb, m,
04714                                                               header);
04715                                         } else {
04716                                                 mark_stale_header(rbtdb,
04717                                                                   header);
04718                                                 header_prev = header;
04719                                         }
04720                                 } else
04721                                         header_prev = header;
04722                         } else if (EXISTS(header)) {
04723                                 /*
04724                                  * We've found an extant rdataset.  See if
04725                                  * we're interested in it.
04726                                  */
04727                                 if (header->type == dns_rdatatype_ns) {
04728                                         found = header;
04729                                         if (foundsig != NULL)
04730                                                 break;
04731                                 } else if (header->type ==
04732                                            RBTDB_RDATATYPE_SIGNS) {
04733                                         foundsig = header;
04734                                         if (found != NULL)
04735                                                 break;
04736                                 }
04737                                 header_prev = header;
04738                         } else
04739                                 header_prev = header;
04740                 }
04741 
04742                 if (found != NULL) {
04743                         /*
04744                          * If we have to set foundname, we do it before
04745                          * anything else.  If we were to set foundname after
04746                          * we had set nodep or bound the rdataset, then we'd
04747                          * have to undo that work if dns_name_concatenate()
04748                          * failed.  By setting foundname first, there's
04749                          * nothing to undo if we have trouble.
04750                          */
04751                         if (foundname != NULL) {
04752                                 dns_name_init(&name, NULL);
04753                                 dns_rbt_namefromnode(node, &name);
04754                                 result = dns_name_copy(&name, foundname, NULL);
04755                                 while (result == ISC_R_SUCCESS && i > 0) {
04756                                         i--;
04757                                         level_node = search->chain.levels[i];
04758                                         dns_name_init(&name, NULL);
04759                                         dns_rbt_namefromnode(level_node,
04760                                                              &name);
04761                                         result =
04762                                                 dns_name_concatenate(foundname,
04763                                                                      &name,
04764                                                                      foundname,
04765                                                                      NULL);
04766                                 }
04767                                 if (result != ISC_R_SUCCESS) {
04768                                         *nodep = NULL;
04769                                         goto node_exit;
04770                                 }
04771                         }
04772                         result = DNS_R_DELEGATION;
04773                         if (nodep != NULL) {
04774                                 new_reference(search->rbtdb, node);
04775                                 *nodep = node;
04776                         }
04777                         bind_rdataset(search->rbtdb, node, found, search->now,
04778                                       rdataset);
04779                         if (foundsig != NULL)
04780                                 bind_rdataset(search->rbtdb, node, foundsig,
04781                                               search->now, sigrdataset);
04782                         if (need_headerupdate(found, search->now) ||
04783                             (foundsig != NULL &&
04784                              need_headerupdate(foundsig, search->now))) {
04785                                 if (locktype != isc_rwlocktype_write) {
04786                                         NODE_UNLOCK(lock, locktype);
04787                                         NODE_LOCK(lock, isc_rwlocktype_write);
04788                                         locktype = isc_rwlocktype_write;
04789                                         POST(locktype);
04790                                 }
04791                                 if (need_headerupdate(found, search->now))
04792                                         update_header(search->rbtdb, found,
04793                                                       search->now);
04794                                 if (foundsig != NULL &&
04795                                     need_headerupdate(foundsig, search->now)) {
04796                                         update_header(search->rbtdb, foundsig,
04797                                                       search->now);
04798                                 }
04799                         }
04800                 }
04801 
04802         node_exit:
04803                 NODE_UNLOCK(lock, locktype);
04804 
04805                 if (found == NULL && i > 0) {
04806                         i--;
04807                         node = search->chain.levels[i];
04808                 } else
04809                         done = ISC_TRUE;
04810 
04811         } while (!done);
04812 
04813         return (result);
04814 }
04815 
04816 static isc_result_t
04817 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
04818                   isc_stdtime_t now, dns_name_t *foundname,
04819                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04820 {
04821         dns_rbtnode_t *node;
04822         rdatasetheader_t *header, *header_next, *header_prev;
04823         rdatasetheader_t *found, *foundsig;
04824         isc_boolean_t empty_node;
04825         isc_result_t result;
04826         dns_fixedname_t fname, forigin;
04827         dns_name_t *name, *origin;
04828         rbtdb_rdatatype_t matchtype, sigmatchtype;
04829         nodelock_t *lock;
04830         isc_rwlocktype_t locktype;
04831 
04832         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
04833         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
04834                                              dns_rdatatype_nsec);
04835 
04836         do {
04837                 node = NULL;
04838                 dns_fixedname_init(&fname);
04839                 name = dns_fixedname_name(&fname);
04840                 dns_fixedname_init(&forigin);
04841                 origin = dns_fixedname_name(&forigin);
04842                 result = dns_rbtnodechain_current(&search->chain, name,
04843                                                   origin, &node);
04844                 if (result != ISC_R_SUCCESS)
04845                         return (result);
04846                 locktype = isc_rwlocktype_read;
04847                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
04848                 NODE_LOCK(lock, locktype);
04849                 found = NULL;
04850                 foundsig = NULL;
04851                 empty_node = ISC_TRUE;
04852                 header_prev = NULL;
04853                 for (header = node->data;
04854                      header != NULL;
04855                      header = header_next) {
04856                         header_next = header->next;
04857                         if (header->rdh_ttl <  now) {
04858                                 /*
04859                                  * This rdataset is stale.  If no one else is
04860                                  * using the node, we can clean it up right
04861                                  * now, otherwise we mark it as stale, and the
04862                                  * node as dirty, so it will get cleaned up
04863                                  * later.
04864                                  */
04865                                 if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
04866                                     (locktype == isc_rwlocktype_write ||
04867                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
04868                                         /*
04869                                          * We update the node's status only
04870                                          * when we can get write access.
04871                                          */
04872                                         locktype = isc_rwlocktype_write;
04873 
04874                                         if (dns_rbtnode_refcurrent(node)
04875                                             == 0) {
04876                                                 isc_mem_t *m;
04877 
04878                                                 m = search->rbtdb->common.mctx;
04879                                                 clean_stale_headers(
04880                                                         search->rbtdb,
04881                                                         m, header);
04882                                                 if (header_prev != NULL)
04883                                                         header_prev->next =
04884                                                                 header->next;
04885                                                 else
04886                                                         node->data = header->next;
04887                                                 free_rdataset(search->rbtdb, m,
04888                                                               header);
04889                                         } else {
04890                                                 mark_stale_header(search->rbtdb,
04891                                                                   header);
04892                                                 header_prev = header;
04893                                         }
04894                                 } else
04895                                         header_prev = header;
04896                                 continue;
04897                         }
04898                         if (NONEXISTENT(header) ||
04899                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
04900                                 header_prev = header;
04901                                 continue;
04902                         }
04903                         empty_node = ISC_FALSE;
04904                         if (header->type == matchtype)
04905                                 found = header;
04906                         else if (header->type == sigmatchtype)
04907                                 foundsig = header;
04908                         header_prev = header;
04909                 }
04910                 if (found != NULL) {
04911                         result = dns_name_concatenate(name, origin,
04912                                                       foundname, NULL);
04913                         if (result != ISC_R_SUCCESS)
04914                                 goto unlock_node;
04915                         bind_rdataset(search->rbtdb, node, found,
04916                                       now, rdataset);
04917                         if (foundsig != NULL)
04918                                 bind_rdataset(search->rbtdb, node, foundsig,
04919                                               now, sigrdataset);
04920                         new_reference(search->rbtdb, node);
04921                         *nodep = node;
04922                         result = DNS_R_COVERINGNSEC;
04923                 } else if (!empty_node) {
04924                         result = ISC_R_NOTFOUND;
04925                 } else
04926                         result = dns_rbtnodechain_prev(&search->chain, NULL,
04927                                                        NULL);
04928  unlock_node:
04929                 NODE_UNLOCK(lock, locktype);
04930         } while (empty_node && result == ISC_R_SUCCESS);
04931         return (result);
04932 }
04933 
04934 /*
04935  * Connect this RBTDB to the response policy zone summary data for the view.
04936  */
04937 static void
04938 rpz_attach(dns_db_t *db, dns_rpz_zones_t *rpzs, dns_rpz_num_t rpz_num) {
04939         dns_rbtdb_t * rbtdb;
04940 
04941         rbtdb = (dns_rbtdb_t *)db;
04942         REQUIRE(VALID_RBTDB(rbtdb));
04943 
04944         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04945         REQUIRE(rbtdb->rpzs == NULL && rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
04946         dns_rpz_attach_rpzs(rpzs, &rbtdb->rpzs);
04947         rbtdb->rpz_num = rpz_num;
04948         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04949 }
04950 
04951 /*
04952  * Enable this RBTDB as a response policy zone.
04953  */
04954 static isc_result_t
04955 rpz_ready(dns_db_t *db) {
04956         dns_rbtdb_t * rbtdb;
04957         isc_result_t result;
04958 
04959         rbtdb = (dns_rbtdb_t *)db;
04960         REQUIRE(VALID_RBTDB(rbtdb));
04961 
04962         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04963         if (rbtdb->rpzs == NULL) {
04964                 INSIST(rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
04965                 result = ISC_R_SUCCESS;
04966         } else {
04967                 result = dns_rpz_ready(rbtdb->rpzs, &rbtdb->load_rpzs,
04968                                        rbtdb->rpz_num);
04969         }
04970         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04971         return (result);
04972 }
04973 
04974 static isc_result_t
04975 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
04976            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
04977            dns_dbnode_t **nodep, dns_name_t *foundname,
04978            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04979 {
04980         dns_rbtnode_t *node = NULL;
04981         isc_result_t result;
04982         rbtdb_search_t search;
04983         isc_boolean_t cname_ok = ISC_TRUE;
04984         isc_boolean_t empty_node;
04985         nodelock_t *lock;
04986         isc_rwlocktype_t locktype;
04987         rdatasetheader_t *header, *header_prev, *header_next;
04988         rdatasetheader_t *found, *nsheader;
04989         rdatasetheader_t *foundsig, *nssig, *cnamesig;
04990         rdatasetheader_t *update, *updatesig;
04991         rbtdb_rdatatype_t sigtype, negtype;
04992 
04993         UNUSED(version);
04994 
04995         search.rbtdb = (dns_rbtdb_t *)db;
04996 
04997         REQUIRE(VALID_RBTDB(search.rbtdb));
04998         REQUIRE(version == NULL);
04999 
05000         if (now == 0)
05001                 isc_stdtime_get(&now);
05002 
05003         search.rbtversion = NULL;
05004         search.serial = 1;
05005         search.options = options;
05006         search.copy_name = ISC_FALSE;
05007         search.need_cleanup = ISC_FALSE;
05008         search.wild = ISC_FALSE;
05009         search.zonecut = NULL;
05010         dns_fixedname_init(&search.zonecut_name);
05011         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
05012         search.now = now;
05013         update = NULL;
05014         updatesig = NULL;
05015 
05016         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05017 
05018         /*
05019          * Search down from the root of the tree.  If, while going down, we
05020          * encounter a callback node, cache_zonecut_callback() will search the
05021          * rdatasets at the zone cut for a DNAME rdataset.
05022          */
05023         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
05024                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
05025                                   cache_zonecut_callback, &search);
05026 
05027         if (result == DNS_R_PARTIALMATCH) {
05028                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
05029                         result = find_coveringnsec(&search, nodep, now,
05030                                                    foundname, rdataset,
05031                                                    sigrdataset);
05032                         if (result == DNS_R_COVERINGNSEC)
05033                                 goto tree_exit;
05034                 }
05035                 if (search.zonecut != NULL) {
05036                     result = setup_delegation(&search, nodep, foundname,
05037                                               rdataset, sigrdataset);
05038                     goto tree_exit;
05039                 } else {
05040                 find_ns:
05041                         result = find_deepest_zonecut(&search, node, nodep,
05042                                                       foundname, rdataset,
05043                                                       sigrdataset);
05044                         goto tree_exit;
05045                 }
05046         } else if (result != ISC_R_SUCCESS)
05047                 goto tree_exit;
05048 
05049         /*
05050          * Certain DNSSEC types are not subject to CNAME matching
05051          * (RFC4035, section 2.5 and RFC3007).
05052          *
05053          * We don't check for RRSIG, because we don't store RRSIG records
05054          * directly.
05055          */
05056         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
05057                 cname_ok = ISC_FALSE;
05058 
05059         /*
05060          * We now go looking for rdata...
05061          */
05062 
05063         lock = &(search.rbtdb->node_locks[node->locknum].lock);
05064         locktype = isc_rwlocktype_read;
05065         NODE_LOCK(lock, locktype);
05066 
05067         found = NULL;
05068         foundsig = NULL;
05069         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
05070         negtype = RBTDB_RDATATYPE_VALUE(0, type);
05071         nsheader = NULL;
05072         nssig = NULL;
05073         cnamesig = NULL;
05074         empty_node = ISC_TRUE;
05075         header_prev = NULL;
05076         for (header = node->data; header != NULL; header = header_next) {
05077                 header_next = header->next;
05078                 if (header->rdh_ttl <  now) {
05079                         /*
05080                          * This rdataset is stale.  If no one else is using the
05081                          * node, we can clean it up right now, otherwise we
05082                          * mark it as stale, and the node as dirty, so it will
05083                          * get cleaned up later.
05084                          */
05085                         if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
05086                             (locktype == isc_rwlocktype_write ||
05087                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
05088                                 /*
05089                                  * We update the node's status only when we
05090                                  * can get write access.
05091                                  */
05092                                 locktype = isc_rwlocktype_write;
05093 
05094                                 if (dns_rbtnode_refcurrent(node) == 0) {
05095                                         isc_mem_t *mctx;
05096 
05097                                         mctx = search.rbtdb->common.mctx;
05098                                         clean_stale_headers(search.rbtdb, mctx,
05099                                                             header);
05100                                         if (header_prev != NULL)
05101                                                 header_prev->next =
05102                                                         header->next;
05103                                         else
05104                                                 node->data = header->next;
05105                                         free_rdataset(search.rbtdb, mctx,
05106                                                       header);
05107                                 } else {
05108                                         mark_stale_header(search.rbtdb, header);
05109                                         header_prev = header;
05110                                 }
05111                         } else
05112                                 header_prev = header;
05113                 } else if (EXISTS(header)) {
05114                         /*
05115                          * We now know that there is at least one active
05116                          * non-stale rdataset at this node.
05117                          */
05118                         empty_node = ISC_FALSE;
05119 
05120                         /*
05121                          * If we found a type we were looking for, remember
05122                          * it.
05123                          */
05124                         if (header->type == type ||
05125                             (type == dns_rdatatype_any &&
05126                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
05127                             (cname_ok && header->type ==
05128                              dns_rdatatype_cname)) {
05129                                 /*
05130                                  * We've found the answer.
05131                                  */
05132                                 found = header;
05133                                 if (header->type == dns_rdatatype_cname &&
05134                                     cname_ok &&
05135                                     cnamesig != NULL) {
05136                                         /*
05137                                          * If we've already got the
05138                                          * CNAME RRSIG, use it.
05139                                          */
05140                                         foundsig = cnamesig;
05141                                 }
05142                         } else if (header->type == sigtype) {
05143                                 /*
05144                                  * We've found the RRSIG rdataset for our
05145                                  * target type.  Remember it.
05146                                  */
05147                                 foundsig = header;
05148                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
05149                                    header->type == negtype) {
05150                                 /*
05151                                  * We've found a negative cache entry.
05152                                  */
05153                                 found = header;
05154                         } else if (header->type == dns_rdatatype_ns) {
05155                                 /*
05156                                  * Remember a NS rdataset even if we're
05157                                  * not specifically looking for it, because
05158                                  * we might need it later.
05159                                  */
05160                                 nsheader = header;
05161                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
05162                                 /*
05163                                  * If we need the NS rdataset, we'll also
05164                                  * need its signature.
05165                                  */
05166                                 nssig = header;
05167                         } else if (cname_ok &&
05168                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
05169                                 /*
05170                                  * If we get a CNAME match, we'll also need
05171                                  * its signature.
05172                                  */
05173                                 cnamesig = header;
05174                         }
05175                         header_prev = header;
05176                 } else
05177                         header_prev = header;
05178         }
05179 
05180         if (empty_node) {
05181                 /*
05182                  * We have an exact match for the name, but there are no
05183                  * extant rdatasets.  That means that this node doesn't
05184                  * meaningfully exist, and that we really have a partial match.
05185                  */
05186                 NODE_UNLOCK(lock, locktype);
05187                 goto find_ns;
05188         }
05189 
05190         /*
05191          * If we didn't find what we were looking for...
05192          */
05193         if (found == NULL ||
05194             (DNS_TRUST_ADDITIONAL(found->trust) &&
05195              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
05196             (found->trust == dns_trust_glue &&
05197              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
05198             (DNS_TRUST_PENDING(found->trust) &&
05199              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
05200                 /*
05201                  * If there is an NS rdataset at this node, then this is the
05202                  * deepest zone cut.
05203                  */
05204                 if (nsheader != NULL) {
05205                         if (nodep != NULL) {
05206                                 new_reference(search.rbtdb, node);
05207                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
05208                                 *nodep = node;
05209                         }
05210                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
05211                                       rdataset);
05212                         if (need_headerupdate(nsheader, search.now))
05213                                 update = nsheader;
05214                         if (nssig != NULL) {
05215                                 bind_rdataset(search.rbtdb, node, nssig,
05216                                               search.now, sigrdataset);
05217                                 if (need_headerupdate(nssig, search.now))
05218                                         updatesig = nssig;
05219                         }
05220                         result = DNS_R_DELEGATION;
05221                         goto node_exit;
05222                 }
05223 
05224                 /*
05225                  * Go find the deepest zone cut.
05226                  */
05227                 NODE_UNLOCK(lock, locktype);
05228                 goto find_ns;
05229         }
05230 
05231         /*
05232          * We found what we were looking for, or we found a CNAME.
05233          */
05234 
05235         if (nodep != NULL) {
05236                 new_reference(search.rbtdb, node);
05237                 INSIST(!ISC_LINK_LINKED(node, deadlink));
05238                 *nodep = node;
05239         }
05240 
05241         if (NEGATIVE(found)) {
05242                 /*
05243                  * We found a negative cache entry.
05244                  */
05245                 if (NXDOMAIN(found))
05246                         result = DNS_R_NCACHENXDOMAIN;
05247                 else
05248                         result = DNS_R_NCACHENXRRSET;
05249         } else if (type != found->type &&
05250                    type != dns_rdatatype_any &&
05251                    found->type == dns_rdatatype_cname) {
05252                 /*
05253                  * We weren't doing an ANY query and we found a CNAME instead
05254                  * of the type we were looking for, so we need to indicate
05255                  * that result to the caller.
05256                  */
05257                 result = DNS_R_CNAME;
05258         } else {
05259                 /*
05260                  * An ordinary successful query!
05261                  */
05262                 result = ISC_R_SUCCESS;
05263         }
05264 
05265         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
05266             result == DNS_R_NCACHENXRRSET) {
05267                 bind_rdataset(search.rbtdb, node, found, search.now,
05268                               rdataset);
05269                 if (need_headerupdate(found, search.now))
05270                         update = found;
05271                 if (!NEGATIVE(found) && foundsig != NULL) {
05272                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
05273                                       sigrdataset);
05274                         if (need_headerupdate(foundsig, search.now))
05275                                 updatesig = foundsig;
05276                 }
05277         }
05278 
05279  node_exit:
05280         if ((update != NULL || updatesig != NULL) &&
05281             locktype != isc_rwlocktype_write) {
05282                 NODE_UNLOCK(lock, locktype);
05283                 NODE_LOCK(lock, isc_rwlocktype_write);
05284                 locktype = isc_rwlocktype_write;
05285                 POST(locktype);
05286         }
05287         if (update != NULL && need_headerupdate(update, search.now))
05288                 update_header(search.rbtdb, update, search.now);
05289         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
05290                 update_header(search.rbtdb, updatesig, search.now);
05291 
05292         NODE_UNLOCK(lock, locktype);
05293 
05294  tree_exit:
05295         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05296 
05297         /*
05298          * If we found a zonecut but aren't going to use it, we have to
05299          * let go of it.
05300          */
05301         if (search.need_cleanup) {
05302                 node = search.zonecut;
05303                 INSIST(node != NULL);
05304                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
05305 
05306                 NODE_LOCK(lock, isc_rwlocktype_read);
05307                 decrement_reference(search.rbtdb, node, 0,
05308                                     isc_rwlocktype_read, isc_rwlocktype_none,
05309                                     ISC_FALSE);
05310                 NODE_UNLOCK(lock, isc_rwlocktype_read);
05311         }
05312 
05313         dns_rbtnodechain_reset(&search.chain);
05314 
05315         update_cachestats(search.rbtdb, result);
05316         return (result);
05317 }
05318 
05319 static isc_result_t
05320 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
05321                   isc_stdtime_t now, dns_dbnode_t **nodep,
05322                   dns_name_t *foundname,
05323                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
05324 {
05325         dns_rbtnode_t *node = NULL;
05326         nodelock_t *lock;
05327         isc_result_t result;
05328         rbtdb_search_t search;
05329         rdatasetheader_t *header, *header_prev, *header_next;
05330         rdatasetheader_t *found, *foundsig;
05331         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
05332         isc_rwlocktype_t locktype;
05333 
05334         search.rbtdb = (dns_rbtdb_t *)db;
05335 
05336         REQUIRE(VALID_RBTDB(search.rbtdb));
05337 
05338         if (now == 0)
05339                 isc_stdtime_get(&now);
05340 
05341         search.rbtversion = NULL;
05342         search.serial = 1;
05343         search.options = options;
05344         search.copy_name = ISC_FALSE;
05345         search.need_cleanup = ISC_FALSE;
05346         search.wild = ISC_FALSE;
05347         search.zonecut = NULL;
05348         dns_fixedname_init(&search.zonecut_name);
05349         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
05350         search.now = now;
05351 
05352         if ((options & DNS_DBFIND_NOEXACT) != 0)
05353                 rbtoptions |= DNS_RBTFIND_NOEXACT;
05354 
05355         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05356 
05357         /*
05358          * Search down from the root of the tree.
05359          */
05360         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
05361                                   &search.chain, rbtoptions, NULL, &search);
05362 
05363         if (result == DNS_R_PARTIALMATCH) {
05364         find_ns:
05365                 result = find_deepest_zonecut(&search, node, nodep, foundname,
05366                                               rdataset, sigrdataset);
05367                 goto tree_exit;
05368         } else if (result != ISC_R_SUCCESS)
05369                 goto tree_exit;
05370 
05371         /*
05372          * We now go looking for an NS rdataset at the node.
05373          */
05374 
05375         lock = &(search.rbtdb->node_locks[node->locknum].lock);
05376         locktype = isc_rwlocktype_read;
05377         NODE_LOCK(lock, locktype);
05378 
05379         found = NULL;
05380         foundsig = NULL;
05381         header_prev = NULL;
05382         for (header = node->data; header != NULL; header = header_next) {
05383                 header_next = header->next;
05384                 if (header->rdh_ttl <  now) {
05385                         /*
05386                          * This rdataset is stale.  If no one else is using the
05387                          * node, we can clean it up right now, otherwise we
05388                          * mark it as stale, and the node as dirty, so it will
05389                          * get cleaned up later.
05390                          */
05391                         if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
05392                             (locktype == isc_rwlocktype_write ||
05393                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
05394                                 /*
05395                                  * We update the node's status only when we
05396                                  * can get write access.
05397                                  */
05398                                 locktype = isc_rwlocktype_write;
05399 
05400                                 if (dns_rbtnode_refcurrent(node) == 0) {
05401                                         isc_mem_t *mctx;
05402 
05403                                         mctx = search.rbtdb->common.mctx;
05404                                         clean_stale_headers(search.rbtdb, mctx,
05405                                                             header);
05406                                         if (header_prev != NULL)
05407                                                 header_prev->next =
05408                                                         header->next;
05409                                         else
05410                                                 node->data = header->next;
05411                                         free_rdataset(search.rbtdb, mctx,
05412                                                       header);
05413                                 } else {
05414                                         mark_stale_header(search.rbtdb, header);
05415                                         header_prev = header;
05416                                 }
05417                         } else
05418                                 header_prev = header;
05419                 } else if (EXISTS(header)) {
05420                         /*
05421                          * If we found a type we were looking for, remember
05422                          * it.
05423                          */
05424                         if (header->type == dns_rdatatype_ns) {
05425                                 /*
05426                                  * Remember a NS rdataset even if we're
05427                                  * not specifically looking for it, because
05428                                  * we might need it later.
05429                                  */
05430                                 found = header;
05431                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
05432                                 /*
05433                                  * If we need the NS rdataset, we'll also
05434                                  * need its signature.
05435                                  */
05436                                 foundsig = header;
05437                         }
05438                         header_prev = header;
05439                 } else
05440                         header_prev = header;
05441         }
05442 
05443         if (found == NULL) {
05444                 /*
05445                  * No NS records here.
05446                  */
05447                 NODE_UNLOCK(lock, locktype);
05448                 goto find_ns;
05449         }
05450 
05451         if (nodep != NULL) {
05452                 new_reference(search.rbtdb, node);
05453                 INSIST(!ISC_LINK_LINKED(node, deadlink));
05454                 *nodep = node;
05455         }
05456 
05457         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
05458         if (foundsig != NULL)
05459                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
05460                               sigrdataset);
05461 
05462         if (need_headerupdate(found, search.now) ||
05463             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
05464                 if (locktype != isc_rwlocktype_write) {
05465                         NODE_UNLOCK(lock, locktype);
05466                         NODE_LOCK(lock, isc_rwlocktype_write);
05467                         locktype = isc_rwlocktype_write;
05468                         POST(locktype);
05469                 }
05470                 if (need_headerupdate(found, search.now))
05471                         update_header(search.rbtdb, found, search.now);
05472                 if (foundsig != NULL &&
05473                     need_headerupdate(foundsig, search.now)) {
05474                         update_header(search.rbtdb, foundsig, search.now);
05475                 }
05476         }
05477 
05478         NODE_UNLOCK(lock, locktype);
05479 
05480  tree_exit:
05481         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05482 
05483         INSIST(!search.need_cleanup);
05484 
05485         dns_rbtnodechain_reset(&search.chain);
05486 
05487         if (result == DNS_R_DELEGATION)
05488                 result = ISC_R_SUCCESS;
05489 
05490         return (result);
05491 }
05492 
05493 static void
05494 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
05495         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05496         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
05497         unsigned int refs;
05498 
05499         REQUIRE(VALID_RBTDB(rbtdb));
05500         REQUIRE(targetp != NULL && *targetp == NULL);
05501 
05502         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
05503         dns_rbtnode_refincrement(node, &refs);
05504         INSIST(refs != 0);
05505         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
05506 
05507         *targetp = source;
05508 }
05509 
05510 static void
05511 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
05512         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05513         dns_rbtnode_t *node;
05514         isc_boolean_t want_free = ISC_FALSE;
05515         isc_boolean_t inactive = ISC_FALSE;
05516         rbtdb_nodelock_t *nodelock;
05517 
05518         REQUIRE(VALID_RBTDB(rbtdb));
05519         REQUIRE(targetp != NULL && *targetp != NULL);
05520 
05521         node = (dns_rbtnode_t *)(*targetp);
05522         nodelock = &rbtdb->node_locks[node->locknum];
05523 
05524         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
05525 
05526         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
05527                                 isc_rwlocktype_none, ISC_FALSE)) {
05528                 if (isc_refcount_current(&nodelock->references) == 0 &&
05529                     nodelock->exiting) {
05530                         inactive = ISC_TRUE;
05531                 }
05532         }
05533 
05534         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
05535 
05536         *targetp = NULL;
05537 
05538         if (inactive) {
05539                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
05540                 rbtdb->active--;
05541                 if (rbtdb->active == 0)
05542                         want_free = ISC_TRUE;
05543                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
05544                 if (want_free) {
05545                         char buf[DNS_NAME_FORMATSIZE];
05546                         if (dns_name_dynamic(&rbtdb->common.origin))
05547                                 dns_name_format(&rbtdb->common.origin, buf,
05548                                                 sizeof(buf));
05549                         else
05550                                 strcpy(buf, "<UNKNOWN>");
05551                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
05552                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
05553                                       "calling free_rbtdb(%s)", buf);
05554                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
05555                 }
05556         }
05557 }
05558 
05559 static isc_result_t
05560 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
05561         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05562         dns_rbtnode_t *rbtnode = node;
05563         rdatasetheader_t *header;
05564         isc_boolean_t force_expire = ISC_FALSE;
05565         /*
05566          * These are the category and module used by the cache cleaner.
05567          */
05568         isc_boolean_t log = ISC_FALSE;
05569         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
05570         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
05571         int level = ISC_LOG_DEBUG(2);
05572         char printname[DNS_NAME_FORMATSIZE];
05573 
05574         REQUIRE(VALID_RBTDB(rbtdb));
05575 
05576         /*
05577          * Caller must hold a tree lock.
05578          */
05579 
05580         if (now == 0)
05581                 isc_stdtime_get(&now);
05582 
05583         if (isc_mem_isovermem(rbtdb->common.mctx)) {
05584                 isc_uint32_t val;
05585 
05586                 isc_random_get(&val);
05587                 /*
05588                  * XXXDCL Could stand to have a better policy, like LRU.
05589                  */
05590                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
05591 
05592                 /*
05593                  * Note that 'log' can be true IFF overmem is also true.
05594                  * overmem can currently only be true for cache
05595                  * databases -- hence all of the "overmem cache" log strings.
05596                  */
05597                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
05598                 if (log)
05599                         isc_log_write(dns_lctx, category, module, level,
05600                                       "overmem cache: %s %s",
05601                                       force_expire ? "FORCE" : "check",
05602                                       dns_rbt_formatnodename(rbtnode,
05603                                                            printname,
05604                                                            sizeof(printname)));
05605         }
05606 
05607         /*
05608          * We may not need write access, but this code path is not performance
05609          * sensitive, so it should be okay to always lock as a writer.
05610          */
05611         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05612                   isc_rwlocktype_write);
05613 
05614         for (header = rbtnode->data; header != NULL; header = header->next)
05615                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
05616                         /*
05617                          * We don't check if refcurrent(rbtnode) == 0 and try
05618                          * to free like we do in cache_find(), because
05619                          * refcurrent(rbtnode) must be non-zero.  This is so
05620                          * because 'node' is an argument to the function.
05621                          */
05622                         mark_stale_header(rbtdb, header);
05623                         if (log)
05624                                 isc_log_write(dns_lctx, category, module,
05625                                               level, "overmem cache: stale %s",
05626                                               printname);
05627                 } else if (force_expire) {
05628                         if (! RETAIN(header)) {
05629                                 set_ttl(rbtdb, header, 0);
05630                                 mark_stale_header(rbtdb, header);
05631                         } else if (log) {
05632                                 isc_log_write(dns_lctx, category, module,
05633                                               level, "overmem cache: "
05634                                               "reprieve by RETAIN() %s",
05635                                               printname);
05636                         }
05637                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
05638                         isc_log_write(dns_lctx, category, module, level,
05639                                       "overmem cache: saved %s", printname);
05640 
05641         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05642                     isc_rwlocktype_write);
05643 
05644         return (ISC_R_SUCCESS);
05645 }
05646 
05647 static void
05648 overmem(dns_db_t *db, isc_boolean_t over) {
05649         /* This is an empty callback.  See adb.c:water() */
05650 
05651         UNUSED(db);
05652         UNUSED(over);
05653 
05654         return;
05655 }
05656 
05657 static void
05658 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
05659         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05660         dns_rbtnode_t *rbtnode = node;
05661         isc_boolean_t first;
05662 
05663         REQUIRE(VALID_RBTDB(rbtdb));
05664 
05665         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05666                   isc_rwlocktype_read);
05667 
05668         fprintf(out, "node %p, %u references, locknum = %u\n",
05669                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
05670                 rbtnode->locknum);
05671         if (rbtnode->data != NULL) {
05672                 rdatasetheader_t *current, *top_next;
05673 
05674                 for (current = rbtnode->data; current != NULL;
05675                      current = top_next) {
05676                         top_next = current->next;
05677                         first = ISC_TRUE;
05678                         fprintf(out, "\ttype %u", current->type);
05679                         do {
05680                                 if (!first)
05681                                         fprintf(out, "\t");
05682                                 first = ISC_FALSE;
05683                                 fprintf(out,
05684                                         "\tserial = %lu, ttl = %u, "
05685                                         "trust = %u, attributes = %u, "
05686                                         "resign = %u\n",
05687                                         (unsigned long)current->serial,
05688                                         current->rdh_ttl,
05689                                         current->trust,
05690                                         current->attributes,
05691                                         current->resign);
05692                                 current = current->down;
05693                         } while (current != NULL);
05694                 }
05695         } else
05696                 fprintf(out, "(empty)\n");
05697 
05698         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05699                     isc_rwlocktype_read);
05700 }
05701 
05702 static isc_result_t
05703 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
05704 {
05705         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05706         rbtdb_dbiterator_t *rbtdbiter;
05707 
05708         REQUIRE(VALID_RBTDB(rbtdb));
05709 
05710         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
05711         if (rbtdbiter == NULL)
05712                 return (ISC_R_NOMEMORY);
05713 
05714         rbtdbiter->common.methods = &dbiterator_methods;
05715         rbtdbiter->common.db = NULL;
05716         dns_db_attach(db, &rbtdbiter->common.db);
05717         rbtdbiter->common.relative_names =
05718                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
05719         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
05720         rbtdbiter->common.cleaning = ISC_FALSE;
05721         rbtdbiter->paused = ISC_TRUE;
05722         rbtdbiter->tree_locked = isc_rwlocktype_none;
05723         rbtdbiter->result = ISC_R_SUCCESS;
05724         dns_fixedname_init(&rbtdbiter->name);
05725         dns_fixedname_init(&rbtdbiter->origin);
05726         rbtdbiter->node = NULL;
05727         rbtdbiter->delete = 0;
05728         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
05729         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
05730         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
05731         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
05732         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
05733         if (rbtdbiter->nsec3only)
05734                 rbtdbiter->current = &rbtdbiter->nsec3chain;
05735         else
05736                 rbtdbiter->current = &rbtdbiter->chain;
05737 
05738         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
05739 
05740         return (ISC_R_SUCCESS);
05741 }
05742 
05743 static isc_result_t
05744 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
05745                   dns_rdatatype_t type, dns_rdatatype_t covers,
05746                   isc_stdtime_t now, dns_rdataset_t *rdataset,
05747                   dns_rdataset_t *sigrdataset)
05748 {
05749         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05750         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
05751         rdatasetheader_t *header, *header_next, *found, *foundsig;
05752         rbtdb_serial_t serial;
05753         rbtdb_version_t *rbtversion = version;
05754         isc_boolean_t close_version = ISC_FALSE;
05755         rbtdb_rdatatype_t matchtype, sigmatchtype;
05756 
05757         REQUIRE(VALID_RBTDB(rbtdb));
05758         REQUIRE(type != dns_rdatatype_any);
05759         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
05760 
05761         if (rbtversion == NULL) {
05762                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
05763                 close_version = ISC_TRUE;
05764         }
05765         serial = rbtversion->serial;
05766         now = 0;
05767 
05768         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05769                   isc_rwlocktype_read);
05770 
05771         found = NULL;
05772         foundsig = NULL;
05773         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
05774         if (covers == 0)
05775                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
05776         else
05777                 sigmatchtype = 0;
05778 
05779         for (header = rbtnode->data; header != NULL; header = header_next) {
05780                 header_next = header->next;
05781                 do {
05782                         if (header->serial <= serial &&
05783                             !IGNORE(header)) {
05784                                 /*
05785                                  * Is this a "this rdataset doesn't
05786                                  * exist" record?
05787                                  */
05788                                 if (NONEXISTENT(header))
05789                                         header = NULL;
05790                                 break;
05791                         } else
05792                                 header = header->down;
05793                 } while (header != NULL);
05794                 if (header != NULL) {
05795                         /*
05796                          * We have an active, extant rdataset.  If it's a
05797                          * type we're looking for, remember it.
05798                          */
05799                         if (header->type == matchtype) {
05800                                 found = header;
05801                                 if (foundsig != NULL)
05802                                         break;
05803                         } else if (header->type == sigmatchtype) {
05804                                 foundsig = header;
05805                                 if (found != NULL)
05806                                         break;
05807                         }
05808                 }
05809         }
05810         if (found != NULL) {
05811                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
05812                 if (foundsig != NULL)
05813                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
05814                                       sigrdataset);
05815         }
05816 
05817         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05818                     isc_rwlocktype_read);
05819 
05820         if (close_version)
05821                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
05822                              ISC_FALSE);
05823 
05824         if (found == NULL)
05825                 return (ISC_R_NOTFOUND);
05826 
05827         return (ISC_R_SUCCESS);
05828 }
05829 
05830 static isc_result_t
05831 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
05832                    dns_rdatatype_t type, dns_rdatatype_t covers,
05833                    isc_stdtime_t now, dns_rdataset_t *rdataset,
05834                    dns_rdataset_t *sigrdataset)
05835 {
05836         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05837         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
05838         rdatasetheader_t *header, *header_next, *found, *foundsig;
05839         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
05840         isc_result_t result;
05841         nodelock_t *lock;
05842         isc_rwlocktype_t locktype;
05843 
05844         REQUIRE(VALID_RBTDB(rbtdb));
05845         REQUIRE(type != dns_rdatatype_any);
05846 
05847         UNUSED(version);
05848 
05849         result = ISC_R_SUCCESS;
05850 
05851         if (now == 0)
05852                 isc_stdtime_get(&now);
05853 
05854         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
05855         locktype = isc_rwlocktype_read;
05856         NODE_LOCK(lock, locktype);
05857 
05858         found = NULL;
05859         foundsig = NULL;
05860         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
05861         negtype = RBTDB_RDATATYPE_VALUE(0, type);
05862         if (covers == 0)
05863                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
05864         else
05865                 sigmatchtype = 0;
05866 
05867         for (header = rbtnode->data; header != NULL; header = header_next) {
05868                 header_next = header->next;
05869                 if (header->rdh_ttl < now) {
05870                         if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
05871                             (locktype == isc_rwlocktype_write ||
05872                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
05873                                 /*
05874                                  * We update the node's status only when we
05875                                  * can get write access.
05876                                  */
05877                                 locktype = isc_rwlocktype_write;
05878 
05879                                 /*
05880                                  * We don't check if refcurrent(rbtnode) == 0
05881                                  * and try to free like we do in cache_find(),
05882                                  * because refcurrent(rbtnode) must be
05883                                  * non-zero.  This is so because 'node' is an
05884                                  * argument to the function.
05885                                  */
05886                                 mark_stale_header(rbtdb, header);
05887                         }
05888                 } else if (EXISTS(header)) {
05889                         if (header->type == matchtype)
05890                                 found = header;
05891                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
05892                                  header->type == negtype)
05893                                 found = header;
05894                         else if (header->type == sigmatchtype)
05895                                 foundsig = header;
05896                 }
05897         }
05898         if (found != NULL) {
05899                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
05900                 if (!NEGATIVE(found) && foundsig != NULL)
05901                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
05902                                       sigrdataset);
05903         }
05904 
05905         NODE_UNLOCK(lock, locktype);
05906 
05907         if (found == NULL)
05908                 return (ISC_R_NOTFOUND);
05909 
05910         if (NEGATIVE(found)) {
05911                 /*
05912                  * We found a negative cache entry.
05913                  */
05914                 if (NXDOMAIN(found))
05915                         result = DNS_R_NCACHENXDOMAIN;
05916                 else
05917                         result = DNS_R_NCACHENXRRSET;
05918         }
05919 
05920         update_cachestats(rbtdb, result);
05921 
05922         return (result);
05923 }
05924 
05925 static isc_result_t
05926 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
05927              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
05928 {
05929         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05930         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
05931         rbtdb_version_t *rbtversion = version;
05932         rbtdb_rdatasetiter_t *iterator;
05933         unsigned int refs;
05934 
05935         REQUIRE(VALID_RBTDB(rbtdb));
05936 
05937         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
05938         if (iterator == NULL)
05939                 return (ISC_R_NOMEMORY);
05940 
05941         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
05942                 now = 0;
05943                 if (rbtversion == NULL)
05944                         currentversion(db,
05945                                  (dns_dbversion_t **) (void *)(&rbtversion));
05946                 else {
05947                         INSIST(rbtversion->rbtdb == rbtdb);
05948 
05949                         isc_refcount_increment(&rbtversion->references,
05950                                                &refs);
05951                         INSIST(refs > 1);
05952                 }
05953         } else {
05954                 if (now == 0)
05955                         isc_stdtime_get(&now);
05956                 rbtversion = NULL;
05957         }
05958 
05959         iterator->common.magic = DNS_RDATASETITER_MAGIC;
05960         iterator->common.methods = &rdatasetiter_methods;
05961         iterator->common.db = db;
05962         iterator->common.node = node;
05963         iterator->common.version = (dns_dbversion_t *)rbtversion;
05964         iterator->common.now = now;
05965 
05966         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
05967 
05968         dns_rbtnode_refincrement(rbtnode, &refs);
05969         INSIST(refs != 0);
05970 
05971         iterator->current = NULL;
05972 
05973         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
05974 
05975         *iteratorp = (dns_rdatasetiter_t *)iterator;
05976 
05977         return (ISC_R_SUCCESS);
05978 }
05979 
05980 static isc_boolean_t
05981 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
05982         rdatasetheader_t *header, *header_next;
05983         isc_boolean_t cname, other_data;
05984         dns_rdatatype_t rdtype;
05985 
05986         /*
05987          * The caller must hold the node lock.
05988          */
05989 
05990         /*
05991          * Look for CNAME and "other data" rdatasets active in our version.
05992          */
05993         cname = ISC_FALSE;
05994         other_data = ISC_FALSE;
05995         for (header = node->data; header != NULL; header = header_next) {
05996                 header_next = header->next;
05997                 if (header->type == dns_rdatatype_cname) {
05998                         /*
05999                          * Look for an active extant CNAME.
06000                          */
06001                         do {
06002                                 if (header->serial <= serial &&
06003                                     !IGNORE(header)) {
06004                                         /*
06005                                          * Is this a "this rdataset doesn't
06006                                          * exist" record?
06007                                          */
06008                                         if (NONEXISTENT(header))
06009                                                 header = NULL;
06010                                         break;
06011                                 } else
06012                                         header = header->down;
06013                         } while (header != NULL);
06014                         if (header != NULL)
06015                                 cname = ISC_TRUE;
06016                 } else {
06017                         /*
06018                          * Look for active extant "other data".
06019                          *
06020                          * "Other data" is any rdataset whose type is not
06021                          * KEY, NSEC, SIG or RRSIG.
06022                          */
06023                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
06024                         if (rdtype != dns_rdatatype_key &&
06025                             rdtype != dns_rdatatype_sig &&
06026                             rdtype != dns_rdatatype_nsec &&
06027                             rdtype != dns_rdatatype_rrsig) {
06028                                 /*
06029                                  * Is it active and extant?
06030                                  */
06031                                 do {
06032                                         if (header->serial <= serial &&
06033                                             !IGNORE(header)) {
06034                                                 /*
06035                                                  * Is this a "this rdataset
06036                                                  * doesn't exist" record?
06037                                                  */
06038                                                 if (NONEXISTENT(header))
06039                                                         header = NULL;
06040                                                 break;
06041                                         } else
06042                                                 header = header->down;
06043                                 } while (header != NULL);
06044                                 if (header != NULL)
06045                                         other_data = ISC_TRUE;
06046                         }
06047                 }
06048         }
06049 
06050         if (cname && other_data)
06051                 return (ISC_TRUE);
06052 
06053         return (ISC_FALSE);
06054 }
06055 
06056 static isc_result_t
06057 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
06058         isc_result_t result;
06059 
06060         INSIST(!IS_CACHE(rbtdb));
06061         INSIST(newheader->heap_index == 0);
06062         INSIST(!ISC_LINK_LINKED(newheader, link));
06063 
06064         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
06065         return (result);
06066 }
06067 
06068 static void
06069 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
06070               rdatasetheader_t *header)
06071 {
06072         /*
06073          * Remove the old header from the heap
06074          */
06075         if (header != NULL && header->heap_index != 0) {
06076                 isc_heap_delete(rbtdb->heaps[header->node->locknum],
06077                                 header->heap_index);
06078                 header->heap_index = 0;
06079                 if (version != NULL) {
06080                         new_reference(rbtdb, header->node);
06081                         ISC_LIST_APPEND(version->resigned_list, header, link);
06082                 }
06083         }
06084 }
06085 
06086 static isc_result_t
06087 add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
06088       rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
06089       dns_rdataset_t *addedrdataset, isc_stdtime_t now)
06090 {
06091         rbtdb_changed_t *changed = NULL;
06092         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
06093         unsigned char *merged;
06094         isc_result_t result;
06095         isc_boolean_t header_nx;
06096         isc_boolean_t newheader_nx;
06097         isc_boolean_t merge;
06098         dns_rdatatype_t rdtype, covers;
06099         rbtdb_rdatatype_t negtype, sigtype;
06100         dns_trust_t trust;
06101         int idx;
06102 
06103         /*
06104          * Add an rdatasetheader_t to a node.
06105          */
06106 
06107         /*
06108          * Caller must be holding the node lock.
06109          */
06110 
06111         if ((options & DNS_DBADD_MERGE) != 0) {
06112                 REQUIRE(rbtversion != NULL);
06113                 merge = ISC_TRUE;
06114         } else
06115                 merge = ISC_FALSE;
06116 
06117         if ((options & DNS_DBADD_FORCE) != 0)
06118                 trust = dns_trust_ultimate;
06119         else
06120                 trust = newheader->trust;
06121 
06122         if (rbtversion != NULL && !loading) {
06123                 /*
06124                  * We always add a changed record, even if no changes end up
06125                  * being made to this node, because it's harmless and
06126                  * simplifies the code.
06127                  */
06128                 changed = add_changed(rbtdb, rbtversion, rbtnode);
06129                 if (changed == NULL) {
06130                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06131                         return (ISC_R_NOMEMORY);
06132                 }
06133         }
06134 
06135         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
06136         topheader_prev = NULL;
06137         sigheader = NULL;
06138         negtype = 0;
06139         if (rbtversion == NULL && !newheader_nx) {
06140                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
06141                 covers = RBTDB_RDATATYPE_EXT(newheader->type);
06142                 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
06143                 if (NEGATIVE(newheader)) {
06144                         /*
06145                          * We're adding a negative cache entry.
06146                          */
06147                         for (topheader = rbtnode->data;
06148                              topheader != NULL;
06149                              topheader = topheader->next) {
06150                                 /*
06151                                  * If we're adding an negative cache entry
06152                                  * which covers all types (NXDOMAIN,
06153                                  * NODATA(QTYPE=ANY)).
06154                                  *
06155                                  * We make all other data stale so that the
06156                                  * only rdataset that can be found at this
06157                                  * node is the negative cache entry.
06158                                  *
06159                                  * Otherwise look for any RRSIGs of the
06160                                  * given type so they can be marked stale
06161                                  * later.
06162                                  */
06163                                 if (covers == dns_rdatatype_any) {
06164                                         set_ttl(rbtdb, topheader, 0);
06165                                         mark_stale_header(rbtdb, topheader);
06166                                 } else if (topheader->type == sigtype)
06167                                         sigheader = topheader;
06168                         }
06169                         if (covers == dns_rdatatype_any)
06170                                 goto find_header;
06171                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
06172                 } else {
06173                         /*
06174                          * We're adding something that isn't a
06175                          * negative cache entry.  Look for an extant
06176                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
06177                          * cache entry.  If we're adding an RRSIG, also
06178                          * check for an extant non-stale NODATA ncache
06179                          * entry which covers the same type as the RRSIG.
06180                          */
06181                         for (topheader = rbtnode->data;
06182                              topheader != NULL;
06183                              topheader = topheader->next) {
06184                                 if ((topheader->type ==
06185                                         RBTDB_RDATATYPE_NCACHEANY) ||
06186                                         (newheader->type == sigtype &&
06187                                         topheader->type ==
06188                                         RBTDB_RDATATYPE_VALUE(0, covers))) {
06189                                                 break;
06190                                         }
06191                         }
06192                         if (topheader != NULL && EXISTS(topheader) &&
06193                             topheader->rdh_ttl >= now) {
06194                                 /*
06195                                  * Found one.
06196                                  */
06197                                 if (trust < topheader->trust) {
06198                                         /*
06199                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
06200                                          * is more trusted.
06201                                          */
06202                                         free_rdataset(rbtdb,
06203                                                       rbtdb->common.mctx,
06204                                                       newheader);
06205                                         if (addedrdataset != NULL)
06206                                                 bind_rdataset(rbtdb, rbtnode,
06207                                                               topheader, now,
06208                                                               addedrdataset);
06209                                         return (DNS_R_UNCHANGED);
06210                                 }
06211                                 /*
06212                                  * The new rdataset is better.  Expire the
06213                                  * ncache entry.
06214                                  */
06215                                 set_ttl(rbtdb, topheader, 0);
06216                                 mark_stale_header(rbtdb, topheader);
06217                                 topheader = NULL;
06218                                 goto find_header;
06219                         }
06220                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
06221                 }
06222         }
06223 
06224         for (topheader = rbtnode->data;
06225              topheader != NULL;
06226              topheader = topheader->next) {
06227                 if (topheader->type == newheader->type ||
06228                     topheader->type == negtype)
06229                         break;
06230                 topheader_prev = topheader;
06231         }
06232 
06233  find_header:
06234         /*
06235          * If header isn't NULL, we've found the right type.  There may be
06236          * IGNORE rdatasets between the top of the chain and the first real
06237          * data.  We skip over them.
06238          */
06239         header = topheader;
06240         while (header != NULL && IGNORE(header))
06241                 header = header->down;
06242         if (header != NULL) {
06243                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
06244 
06245                 /*
06246                  * Deleting an already non-existent rdataset has no effect.
06247                  */
06248                 if (header_nx && newheader_nx) {
06249                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06250                         return (DNS_R_UNCHANGED);
06251                 }
06252 
06253                 /*
06254                  * Trying to add an rdataset with lower trust to a cache DB
06255                  * has no effect, provided that the cache data isn't stale.
06256                  */
06257                 if (rbtversion == NULL && trust < header->trust &&
06258                     (header->rdh_ttl >= now || header_nx)) {
06259                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06260                         if (addedrdataset != NULL)
06261                                 bind_rdataset(rbtdb, rbtnode, header, now,
06262                                               addedrdataset);
06263                         return (DNS_R_UNCHANGED);
06264                 }
06265 
06266                 /*
06267                  * Don't merge if a nonexistent rdataset is involved.
06268                  */
06269                 if (merge && (header_nx || newheader_nx))
06270                         merge = ISC_FALSE;
06271 
06272                 /*
06273                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
06274                  * that is the union of 'newheader' and 'header'.
06275                  */
06276                 if (merge) {
06277                         unsigned int flags = 0;
06278                         INSIST(rbtversion->serial >= header->serial);
06279                         merged = NULL;
06280                         result = ISC_R_SUCCESS;
06281 
06282                         if ((options & DNS_DBADD_EXACT) != 0)
06283                                 flags |= DNS_RDATASLAB_EXACT;
06284                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
06285                              newheader->rdh_ttl != header->rdh_ttl)
06286                                         result = DNS_R_NOTEXACT;
06287                         else if (newheader->rdh_ttl != header->rdh_ttl)
06288                                 flags |= DNS_RDATASLAB_FORCE;
06289                         if (result == ISC_R_SUCCESS)
06290                                 result = dns_rdataslab_merge(
06291                                              (unsigned char *)header,
06292                                              (unsigned char *)newheader,
06293                                              (unsigned int)(sizeof(*newheader)),
06294                                              rbtdb->common.mctx,
06295                                              rbtdb->common.rdclass,
06296                                              (dns_rdatatype_t)header->type,
06297                                              flags, &merged);
06298                         if (result == ISC_R_SUCCESS) {
06299                                 /*
06300                                  * If 'header' has the same serial number as
06301                                  * we do, we could clean it up now if we knew
06302                                  * that our caller had no references to it.
06303                                  * We don't know this, however, so we leave it
06304                                  * alone.  It will get cleaned up when
06305                                  * clean_zone_node() runs.
06306                                  */
06307                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06308                                               newheader);
06309                                 newheader = (rdatasetheader_t *)merged;
06310                                 init_rdataset(rbtdb, newheader);
06311                                 update_newheader(newheader, header);
06312                                 if (loading && RESIGN(newheader) &&
06313                                     RESIGN(header) &&
06314                                     header->resign < newheader->resign)
06315                                         newheader->resign = header->resign;
06316                         } else {
06317                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06318                                               newheader);
06319                                 return (result);
06320                         }
06321                 }
06322                 /*
06323                  * Don't replace existing NS, A and AAAA RRsets
06324                  * in the cache if they are already exist.  This
06325                  * prevents named being locked to old servers.
06326                  * Don't lower trust of existing record if the
06327                  * update is forced.
06328                  */
06329                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
06330                     header->type == dns_rdatatype_ns &&
06331                     !header_nx && !newheader_nx &&
06332                     header->trust >= newheader->trust &&
06333                     dns_rdataslab_equalx((unsigned char *)header,
06334                                          (unsigned char *)newheader,
06335                                          (unsigned int)(sizeof(*newheader)),
06336                                          rbtdb->common.rdclass,
06337                                          (dns_rdatatype_t)header->type)) {
06338                         /*
06339                          * Honour the new ttl if it is less than the
06340                          * older one.
06341                          */
06342                         if (header->rdh_ttl > newheader->rdh_ttl)
06343                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
06344                         if (header->noqname == NULL &&
06345                             newheader->noqname != NULL) {
06346                                 header->noqname = newheader->noqname;
06347                                 newheader->noqname = NULL;
06348                         }
06349                         if (header->closest == NULL &&
06350                             newheader->closest != NULL) {
06351                                 header->closest = newheader->closest;
06352                                 newheader->closest = NULL;
06353                         }
06354                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06355                         if (addedrdataset != NULL)
06356                                 bind_rdataset(rbtdb, rbtnode, header, now,
06357                                               addedrdataset);
06358                         return (ISC_R_SUCCESS);
06359                 }
06360                 /*
06361                  * If we have will be replacing a NS RRset force its TTL
06362                  * to be no more than the current NS RRset's TTL.  This
06363                  * ensures the delegations that are withdrawn are honoured.
06364                  */
06365                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
06366                     header->type == dns_rdatatype_ns &&
06367                     !header_nx && !newheader_nx &&
06368                     header->trust <= newheader->trust) {
06369                         if (newheader->rdh_ttl > header->rdh_ttl) {
06370                                 newheader->rdh_ttl = header->rdh_ttl;
06371                         }
06372                 }
06373                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
06374                     (options & DNS_DBADD_PREFETCH) == 0 &&
06375                     (header->type == dns_rdatatype_a ||
06376                      header->type == dns_rdatatype_aaaa ||
06377                      header->type == dns_rdatatype_ds ||
06378                      header->type == RBTDB_RDATATYPE_SIGDDS) &&
06379                     !header_nx && !newheader_nx &&
06380                     header->trust >= newheader->trust &&
06381                     dns_rdataslab_equal((unsigned char *)header,
06382                                         (unsigned char *)newheader,
06383                                         (unsigned int)(sizeof(*newheader)))) {
06384                         /*
06385                          * Honour the new ttl if it is less than the
06386                          * older one.
06387                          */
06388                         if (header->rdh_ttl > newheader->rdh_ttl)
06389                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
06390                         if (header->noqname == NULL &&
06391                             newheader->noqname != NULL) {
06392                                 header->noqname = newheader->noqname;
06393                                 newheader->noqname = NULL;
06394                         }
06395                         if (header->closest == NULL &&
06396                             newheader->closest != NULL) {
06397                                 header->closest = newheader->closest;
06398                                 newheader->closest = NULL;
06399                         }
06400                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06401                         if (addedrdataset != NULL)
06402                                 bind_rdataset(rbtdb, rbtnode, header, now,
06403                                               addedrdataset);
06404                         return (ISC_R_SUCCESS);
06405                 }
06406                 INSIST(rbtversion == NULL ||
06407                        rbtversion->serial >= topheader->serial);
06408                 if (topheader_prev != NULL)
06409                         topheader_prev->next = newheader;
06410                 else
06411                         rbtnode->data = newheader;
06412                 newheader->next = topheader->next;
06413                 if (loading) {
06414                         /*
06415                          * There are no other references to 'header' when
06416                          * loading, so we MAY clean up 'header' now.
06417                          * Since we don't generate changed records when
06418                          * loading, we MUST clean up 'header' now.
06419                          */
06420                         newheader->down = NULL;
06421                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
06422 
06423                         idx = newheader->node->locknum;
06424                         if (IS_CACHE(rbtdb)) {
06425                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
06426                                                  newheader, link);
06427                                 INSIST(rbtdb->heaps != NULL);
06428                                 (void)isc_heap_insert(rbtdb->heaps[idx],
06429                                                       newheader);
06430                         } else if (RESIGN(newheader)) {
06431                                 result = resign_insert(rbtdb, idx, newheader);
06432                                 if (result != ISC_R_SUCCESS)
06433                                         return (result);
06434                         }
06435                 } else {
06436                         newheader->down = topheader;
06437                         topheader->next = newheader;
06438                         rbtnode->dirty = 1;
06439                         if (changed != NULL)
06440                                 changed->dirty = ISC_TRUE;
06441                         if (rbtversion == NULL) {
06442                                 set_ttl(rbtdb, header, 0);
06443                                 mark_stale_header(rbtdb, header);
06444                                 if (sigheader != NULL) {
06445                                         set_ttl(rbtdb, sigheader, 0);
06446                                         mark_stale_header(rbtdb, sigheader);
06447                                 }
06448                         }
06449                         idx = newheader->node->locknum;
06450                         if (IS_CACHE(rbtdb)) {
06451                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
06452                                                  newheader, link);
06453                                 /*
06454                                  * XXXMLG We don't check the return value
06455                                  * here.  If it fails, we will not do TTL
06456                                  * based expiry on this node.  However, we
06457                                  * will do it on the LRU side, so memory
06458                                  * will not leak... for long.
06459                                  */
06460                                 INSIST(rbtdb->heaps != NULL);
06461                                 (void)isc_heap_insert(rbtdb->heaps[idx],
06462                                                       newheader);
06463                         } else if (RESIGN(newheader)) {
06464                                 resign_delete(rbtdb, rbtversion, header);
06465                                 result = resign_insert(rbtdb, idx, newheader);
06466                                 if (result != ISC_R_SUCCESS)
06467                                         return (result);
06468                         }
06469                 }
06470         } else {
06471                 /*
06472                  * No non-IGNORED rdatasets of the given type exist at
06473                  * this node.
06474                  */
06475 
06476                 /*
06477                  * If we're trying to delete the type, don't bother.
06478                  */
06479                 if (newheader_nx) {
06480                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06481                         return (DNS_R_UNCHANGED);
06482                 }
06483 
06484                 if (topheader != NULL) {
06485                         /*
06486                          * We have an list of rdatasets of the given type,
06487                          * but they're all marked IGNORE.  We simply insert
06488                          * the new rdataset at the head of the list.
06489                          *
06490                          * Ignored rdatasets cannot occur during loading, so
06491                          * we INSIST on it.
06492                          */
06493                         INSIST(!loading);
06494                         INSIST(rbtversion == NULL ||
06495                                rbtversion->serial >= topheader->serial);
06496                         if (topheader_prev != NULL)
06497                                 topheader_prev->next = newheader;
06498                         else
06499                                 rbtnode->data = newheader;
06500                         newheader->next = topheader->next;
06501                         newheader->down = topheader;
06502                         topheader->next = newheader;
06503                         rbtnode->dirty = 1;
06504                         if (changed != NULL)
06505                                 changed->dirty = ISC_TRUE;
06506                 } else {
06507                         /*
06508                          * No rdatasets of the given type exist at the node.
06509                          */
06510                         newheader->next = rbtnode->data;
06511                         newheader->down = NULL;
06512                         rbtnode->data = newheader;
06513                 }
06514                 idx = newheader->node->locknum;
06515                 if (IS_CACHE(rbtdb)) {
06516                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
06517                                          newheader, link);
06518                         isc_heap_insert(rbtdb->heaps[idx], newheader);
06519                 } else if (RESIGN(newheader)) {
06520                         resign_delete(rbtdb, rbtversion, header);
06521                         result = resign_insert(rbtdb, idx, newheader);
06522                         if (result != ISC_R_SUCCESS)
06523                                 return (result);
06524                 }
06525         }
06526 
06527         /*
06528          * Check if the node now contains CNAME and other data.
06529          */
06530         if (rbtversion != NULL &&
06531             cname_and_other_data(rbtnode, rbtversion->serial))
06532                 return (DNS_R_CNAMEANDOTHER);
06533 
06534         if (addedrdataset != NULL)
06535                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
06536 
06537         return (ISC_R_SUCCESS);
06538 }
06539 
06540 static inline isc_boolean_t
06541 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
06542                 rbtdb_rdatatype_t type)
06543 {
06544         if (IS_CACHE(rbtdb)) {
06545                 if (type == dns_rdatatype_dname)
06546                         return (ISC_TRUE);
06547                 else
06548                         return (ISC_FALSE);
06549         } else if (type == dns_rdatatype_dname ||
06550                    (type == dns_rdatatype_ns &&
06551                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
06552                 return (ISC_TRUE);
06553         return (ISC_FALSE);
06554 }
06555 
06556 static inline isc_result_t
06557 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
06558            dns_rdataset_t *rdataset)
06559 {
06560         struct noqname *noqname;
06561         isc_mem_t *mctx = rbtdb->common.mctx;
06562         dns_name_t name;
06563         dns_rdataset_t neg, negsig;
06564         isc_result_t result;
06565         isc_region_t r;
06566 
06567         dns_name_init(&name, NULL);
06568         dns_rdataset_init(&neg);
06569         dns_rdataset_init(&negsig);
06570 
06571         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
06572         RUNTIME_CHECK(result == ISC_R_SUCCESS);
06573 
06574         noqname = isc_mem_get(mctx, sizeof(*noqname));
06575         if (noqname == NULL) {
06576                 result = ISC_R_NOMEMORY;
06577                 goto cleanup;
06578         }
06579         dns_name_init(&noqname->name, NULL);
06580         noqname->neg = NULL;
06581         noqname->negsig = NULL;
06582         noqname->type = neg.type;
06583         result = dns_name_dup(&name, mctx, &noqname->name);
06584         if (result != ISC_R_SUCCESS)
06585                 goto cleanup;
06586         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
06587         if (result != ISC_R_SUCCESS)
06588                 goto cleanup;
06589         noqname->neg = r.base;
06590         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
06591         if (result != ISC_R_SUCCESS)
06592                 goto cleanup;
06593         noqname->negsig = r.base;
06594         dns_rdataset_disassociate(&neg);
06595         dns_rdataset_disassociate(&negsig);
06596         newheader->noqname = noqname;
06597         return (ISC_R_SUCCESS);
06598 
06599 cleanup:
06600         dns_rdataset_disassociate(&neg);
06601         dns_rdataset_disassociate(&negsig);
06602         if (noqname != NULL)
06603                 free_noqname(mctx, &noqname);
06604         return(result);
06605 }
06606 
06607 static inline isc_result_t
06608 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
06609            dns_rdataset_t *rdataset)
06610 {
06611         struct noqname *closest;
06612         isc_mem_t *mctx = rbtdb->common.mctx;
06613         dns_name_t name;
06614         dns_rdataset_t neg, negsig;
06615         isc_result_t result;
06616         isc_region_t r;
06617 
06618         dns_name_init(&name, NULL);
06619         dns_rdataset_init(&neg);
06620         dns_rdataset_init(&negsig);
06621 
06622         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
06623         RUNTIME_CHECK(result == ISC_R_SUCCESS);
06624 
06625         closest = isc_mem_get(mctx, sizeof(*closest));
06626         if (closest == NULL) {
06627                 result = ISC_R_NOMEMORY;
06628                 goto cleanup;
06629         }
06630         dns_name_init(&closest->name, NULL);
06631         closest->neg = NULL;
06632         closest->negsig = NULL;
06633         closest->type = neg.type;
06634         result = dns_name_dup(&name, mctx, &closest->name);
06635         if (result != ISC_R_SUCCESS)
06636                 goto cleanup;
06637         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
06638         if (result != ISC_R_SUCCESS)
06639                 goto cleanup;
06640         closest->neg = r.base;
06641         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
06642         if (result != ISC_R_SUCCESS)
06643                 goto cleanup;
06644         closest->negsig = r.base;
06645         dns_rdataset_disassociate(&neg);
06646         dns_rdataset_disassociate(&negsig);
06647         newheader->closest = closest;
06648         return (ISC_R_SUCCESS);
06649 
06650  cleanup:
06651         dns_rdataset_disassociate(&neg);
06652         dns_rdataset_disassociate(&negsig);
06653         if (closest != NULL)
06654                 free_noqname(mctx, &closest);
06655         return(result);
06656 }
06657 
06658 static dns_dbmethods_t zone_methods;
06659 
06660 static isc_result_t
06661 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
06662             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
06663             dns_rdataset_t *addedrdataset)
06664 {
06665         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
06666         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
06667         rbtdb_version_t *rbtversion = version;
06668         isc_region_t region;
06669         rdatasetheader_t *newheader;
06670         rdatasetheader_t *header;
06671         isc_result_t result;
06672         isc_boolean_t delegating;
06673         isc_boolean_t newnsec;
06674         isc_boolean_t tree_locked = ISC_FALSE;
06675         isc_boolean_t cache_is_overmem = ISC_FALSE;
06676         dns_fixedname_t fixed;
06677         dns_name_t *name;
06678 
06679         REQUIRE(VALID_RBTDB(rbtdb));
06680         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
06681 
06682         if (rbtdb->common.methods == &zone_methods)
06683                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
06684                           (rdataset->type == dns_rdatatype_nsec3 ||
06685                            rdataset->covers == dns_rdatatype_nsec3)) ||
06686                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
06687                            rdataset->type != dns_rdatatype_nsec3 &&
06688                            rdataset->covers != dns_rdatatype_nsec3)));
06689 
06690         if (rbtversion == NULL) {
06691                 if (now == 0)
06692                         isc_stdtime_get(&now);
06693         } else
06694                 now = 0;
06695 
06696         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
06697                                             &region, sizeof(rdatasetheader_t));
06698         if (result != ISC_R_SUCCESS)
06699                 return (result);
06700 
06701         dns_fixedname_init(&fixed);
06702         name = dns_fixedname_name(&fixed);
06703         dns_rbt_fullnamefromnode(node, name);
06704         dns_rdataset_getownercase(rdataset, name);
06705 
06706         newheader = (rdatasetheader_t *)region.base;
06707         init_rdataset(rbtdb, newheader);
06708         setownercase(newheader, name);
06709         set_ttl(rbtdb, newheader, rdataset->ttl + now);
06710         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
06711                                                 rdataset->covers);
06712         newheader->attributes = 0;
06713         newheader->noqname = NULL;
06714         newheader->closest = NULL;
06715         newheader->count = init_count++;
06716         newheader->trust = rdataset->trust;
06717         newheader->additional_auth = NULL;
06718         newheader->additional_glue = NULL;
06719         newheader->last_used = now;
06720         newheader->node = rbtnode;
06721         if (rbtversion != NULL) {
06722                 newheader->serial = rbtversion->serial;
06723                 now = 0;
06724 
06725                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
06726                         newheader->attributes |= RDATASET_ATTR_RESIGN;
06727                         newheader->resign = rdataset->resign;
06728                 } else
06729                         newheader->resign = 0;
06730         } else {
06731                 newheader->serial = 1;
06732                 newheader->resign = 0;
06733                 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0)
06734                         newheader->attributes |= RDATASET_ATTR_PREFETCH;
06735                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
06736                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
06737                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
06738                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
06739                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
06740                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
06741                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
06742                         result = addnoqname(rbtdb, newheader, rdataset);
06743                         if (result != ISC_R_SUCCESS) {
06744                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06745                                               newheader);
06746                                 return (result);
06747                         }
06748                 }
06749                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
06750                         result = addclosest(rbtdb, newheader, rdataset);
06751                         if (result != ISC_R_SUCCESS) {
06752                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06753                                               newheader);
06754                                 return (result);
06755                         }
06756                 }
06757         }
06758 
06759         /*
06760          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
06761          * just DNAME for the cache), then we need to set the callback bit
06762          * on the node.
06763          */
06764         if (delegating_type(rbtdb, rbtnode, rdataset->type))
06765                 delegating = ISC_TRUE;
06766         else
06767                 delegating = ISC_FALSE;
06768 
06769         /*
06770          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
06771          */
06772         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
06773             rdataset->type == dns_rdatatype_nsec)
06774                 newnsec = ISC_TRUE;
06775         else
06776                 newnsec = ISC_FALSE;
06777 
06778         /*
06779          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
06780          * or the DB is a cache in an overmem state, hold an exclusive lock on
06781          * the tree.  In the latter case the lock does not necessarily have to
06782          * be acquired but it will help purge stale entries more effectively.
06783          */
06784         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
06785                 cache_is_overmem = ISC_TRUE;
06786         if (delegating || newnsec || cache_is_overmem) {
06787                 tree_locked = ISC_TRUE;
06788                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
06789         }
06790 
06791         if (cache_is_overmem)
06792                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
06793 
06794         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06795                   isc_rwlocktype_write);
06796 
06797         if (rbtdb->rrsetstats != NULL) {
06798                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
06799                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
06800         }
06801 
06802         if (IS_CACHE(rbtdb)) {
06803                 if (tree_locked)
06804                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
06805 
06806                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
06807                 if (header && header->rdh_ttl < now - RBTDB_VIRTUAL)
06808                         expire_header(rbtdb, header, tree_locked,
06809                                       expire_ttl);
06810 
06811                 /*
06812                  * If we've been holding a write lock on the tree just for
06813                  * cleaning, we can release it now.  However, we still need the
06814                  * node lock.
06815                  */
06816                 if (tree_locked && !delegating && !newnsec) {
06817                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
06818                         tree_locked = ISC_FALSE;
06819                 }
06820         }
06821 
06822         result = ISC_R_SUCCESS;
06823         if (newnsec) {
06824                 dns_rbtnode_t *nsecnode;
06825 
06826                 dns_rbt_fullnamefromnode(rbtnode, name);
06827                 nsecnode = NULL;
06828                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
06829                 if (result == ISC_R_SUCCESS) {
06830                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
06831                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
06832                 } else if (result == ISC_R_EXISTS) {
06833                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
06834                         result = ISC_R_SUCCESS;
06835                 }
06836         }
06837 
06838         if (result == ISC_R_SUCCESS)
06839                 result = add32(rbtdb, rbtnode, rbtversion, newheader, options,
06840                                ISC_FALSE, addedrdataset, now);
06841         if (result == ISC_R_SUCCESS && delegating)
06842                 rbtnode->find_callback = 1;
06843 
06844         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06845                     isc_rwlocktype_write);
06846 
06847         if (tree_locked)
06848                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
06849 
06850         /*
06851          * Update the zone's secure status.  If version is non-NULL
06852          * this is deferred until closeversion() is called.
06853          */
06854         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
06855                 iszonesecure(db, version, rbtdb->origin_node);
06856 
06857         return (result);
06858 }
06859 
06860 static isc_result_t
06861 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
06862                  dns_rdataset_t *rdataset, unsigned int options,
06863                  dns_rdataset_t *newrdataset)
06864 {
06865         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
06866         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
06867         rbtdb_version_t *rbtversion = version;
06868         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
06869         unsigned char *subresult;
06870         isc_region_t region;
06871         isc_result_t result;
06872         rbtdb_changed_t *changed;
06873 
06874         REQUIRE(VALID_RBTDB(rbtdb));
06875         REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
06876 
06877         if (rbtdb->common.methods == &zone_methods)
06878                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
06879                           (rdataset->type == dns_rdatatype_nsec3 ||
06880                            rdataset->covers == dns_rdatatype_nsec3)) ||
06881                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
06882                            rdataset->type != dns_rdatatype_nsec3 &&
06883                            rdataset->covers != dns_rdatatype_nsec3)));
06884 
06885         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
06886                                             &region, sizeof(rdatasetheader_t));
06887         if (result != ISC_R_SUCCESS)
06888                 return (result);
06889         newheader = (rdatasetheader_t *)region.base;
06890         init_rdataset(rbtdb, newheader);
06891         set_ttl(rbtdb, newheader, rdataset->ttl);
06892         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
06893                                                 rdataset->covers);
06894         newheader->attributes = 0;
06895         newheader->serial = rbtversion->serial;
06896         newheader->trust = 0;
06897         newheader->noqname = NULL;
06898         newheader->closest = NULL;
06899         newheader->count = init_count++;
06900         newheader->additional_auth = NULL;
06901         newheader->additional_glue = NULL;
06902         newheader->last_used = 0;
06903         newheader->node = rbtnode;
06904         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
06905                 newheader->attributes |= RDATASET_ATTR_RESIGN;
06906                 newheader->resign = rdataset->resign;
06907         } else
06908                 newheader->resign = 0;
06909 
06910         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06911                   isc_rwlocktype_write);
06912 
06913         changed = add_changed(rbtdb, rbtversion, rbtnode);
06914         if (changed == NULL) {
06915                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06916                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06917                             isc_rwlocktype_write);
06918                 return (ISC_R_NOMEMORY);
06919         }
06920 
06921         topheader_prev = NULL;
06922         for (topheader = rbtnode->data;
06923              topheader != NULL;
06924              topheader = topheader->next) {
06925                 if (topheader->type == newheader->type)
06926                         break;
06927                 topheader_prev = topheader;
06928         }
06929         /*
06930          * If header isn't NULL, we've found the right type.  There may be
06931          * IGNORE rdatasets between the top of the chain and the first real
06932          * data.  We skip over them.
06933          */
06934         header = topheader;
06935         while (header != NULL && IGNORE(header))
06936                 header = header->down;
06937         if (header != NULL && EXISTS(header)) {
06938                 unsigned int flags = 0;
06939                 subresult = NULL;
06940                 result = ISC_R_SUCCESS;
06941                 if ((options & DNS_DBSUB_EXACT) != 0) {
06942                         flags |= DNS_RDATASLAB_EXACT;
06943                         if (newheader->rdh_ttl != header->rdh_ttl)
06944                                 result = DNS_R_NOTEXACT;
06945                 }
06946                 if (result == ISC_R_SUCCESS)
06947                         result = dns_rdataslab_subtract(
06948                                         (unsigned char *)header,
06949                                         (unsigned char *)newheader,
06950                                         (unsigned int)(sizeof(*newheader)),
06951                                         rbtdb->common.mctx,
06952                                         rbtdb->common.rdclass,
06953                                         (dns_rdatatype_t)header->type,
06954                                         flags, &subresult);
06955                 if (result == ISC_R_SUCCESS) {
06956                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06957                         newheader = (rdatasetheader_t *)subresult;
06958                         init_rdataset(rbtdb, newheader);
06959                         update_newheader(newheader, header);
06960                         /*
06961                          * We have to set the serial since the rdataslab
06962                          * subtraction routine copies the reserved portion of
06963                          * header, not newheader.
06964                          */
06965                         newheader->serial = rbtversion->serial;
06966                         /*
06967                          * XXXJT: dns_rdataslab_subtract() copied the pointers
06968                          * to additional info.  We need to clear these fields
06969                          * to avoid having duplicated references.
06970                          */
06971                         newheader->additional_auth = NULL;
06972                         newheader->additional_glue = NULL;
06973                 } else if (result == DNS_R_NXRRSET) {
06974                         /*
06975                          * This subtraction would remove all of the rdata;
06976                          * add a nonexistent header instead.
06977                          */
06978                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06979                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
06980                         if (newheader == NULL) {
06981                                 result = ISC_R_NOMEMORY;
06982                                 goto unlock;
06983                         }
06984                         init_rdataset(rbtdb, newheader);
06985                         set_ttl(rbtdb, newheader, 0);
06986                         newheader->type = topheader->type;
06987                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
06988                         newheader->trust = 0;
06989                         newheader->serial = rbtversion->serial;
06990                         newheader->noqname = NULL;
06991                         newheader->closest = NULL;
06992                         newheader->count = 0;
06993                         newheader->additional_auth = NULL;
06994                         newheader->additional_glue = NULL;
06995                         newheader->node = rbtnode;
06996                         newheader->resign = 0;
06997                         newheader->last_used = 0;
06998                 } else {
06999                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
07000                         goto unlock;
07001                 }
07002 
07003                 /*
07004                  * If we're here, we want to link newheader in front of
07005                  * topheader.
07006                  */
07007                 INSIST(rbtversion->serial >= topheader->serial);
07008                 if (topheader_prev != NULL)
07009                         topheader_prev->next = newheader;
07010                 else
07011                         rbtnode->data = newheader;
07012                 newheader->next = topheader->next;
07013                 newheader->down = topheader;
07014                 topheader->next = newheader;
07015                 rbtnode->dirty = 1;
07016                 changed->dirty = ISC_TRUE;
07017                 resign_delete(rbtdb, rbtversion, header);
07018         } else {
07019                 /*
07020                  * The rdataset doesn't exist, so we don't need to do anything
07021                  * to satisfy the deletion request.
07022                  */
07023                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
07024                 if ((options & DNS_DBSUB_EXACT) != 0)
07025                         result = DNS_R_NOTEXACT;
07026                 else
07027                         result = DNS_R_UNCHANGED;
07028         }
07029 
07030         if (result == ISC_R_SUCCESS && newrdataset != NULL)
07031                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
07032 
07033         if (result == DNS_R_NXRRSET && newrdataset != NULL &&
07034             (options & DNS_DBSUB_WANTOLD) != 0)
07035                 bind_rdataset(rbtdb, rbtnode, header, 0, newrdataset);
07036 
07037  unlock:
07038         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
07039                     isc_rwlocktype_write);
07040 
07041         /*
07042          * Update the zone's secure status.  If version is non-NULL
07043          * this is deferred until closeversion() is called.
07044          */
07045         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
07046                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
07047 
07048         return (result);
07049 }
07050 
07051 static isc_result_t
07052 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
07053                dns_rdatatype_t type, dns_rdatatype_t covers)
07054 {
07055         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
07056         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
07057         rbtdb_version_t *rbtversion = version;
07058         isc_result_t result;
07059         rdatasetheader_t *newheader;
07060 
07061         REQUIRE(VALID_RBTDB(rbtdb));
07062         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
07063 
07064         if (type == dns_rdatatype_any)
07065                 return (ISC_R_NOTIMPLEMENTED);
07066         if (type == dns_rdatatype_rrsig && covers == 0)
07067                 return (ISC_R_NOTIMPLEMENTED);
07068 
07069         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
07070         if (newheader == NULL)
07071                 return (ISC_R_NOMEMORY);
07072         init_rdataset(rbtdb, newheader);
07073         set_ttl(rbtdb, newheader, 0);
07074         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
07075         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
07076         newheader->trust = 0;
07077         newheader->noqname = NULL;
07078         newheader->closest = NULL;
07079         newheader->additional_auth = NULL;
07080         newheader->additional_glue = NULL;
07081         if (rbtversion != NULL)
07082                 newheader->serial = rbtversion->serial;
07083         else
07084                 newheader->serial = 0;
07085         newheader->count = 0;
07086         newheader->last_used = 0;
07087         newheader->node = rbtnode;
07088 
07089         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
07090                   isc_rwlocktype_write);
07091 
07092         result = add32(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
07093                        ISC_FALSE, NULL, 0);
07094 
07095         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
07096                     isc_rwlocktype_write);
07097 
07098         /*
07099          * Update the zone's secure status.  If version is non-NULL
07100          * this is deferred until closeversion() is called.
07101          */
07102         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
07103                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
07104 
07105         return (result);
07106 }
07107 
07108 /*
07109  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
07110  */
07111 static isc_result_t
07112 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
07113          isc_boolean_t hasnsec)
07114 {
07115         isc_result_t noderesult, nsecresult, tmpresult;
07116         dns_rbtnode_t *nsecnode = NULL, *node = NULL;
07117 
07118         noderesult = dns_rbt_addnode(rbtdb->tree, name, &node);
07119         if (rbtdb->rpzs != NULL && noderesult == ISC_R_SUCCESS) {
07120                 noderesult = dns_rpz_add(rbtdb->load_rpzs, rbtdb->rpz_num,
07121                                          name);
07122                 if (noderesult == ISC_R_SUCCESS) {
07123                         node->rpz = 1;
07124                 } else  {
07125                         /*
07126                          * Remove the node we just added above.
07127                          */
07128                         tmpresult = dns_rbt_deletenode(rbtdb->tree, node,
07129                                                        ISC_FALSE);
07130                         if (tmpresult != ISC_R_SUCCESS)
07131                                 isc_log_write(dns_lctx,
07132                                               DNS_LOGCATEGORY_DATABASE,
07133                                               DNS_LOGMODULE_CACHE,
07134                                               ISC_LOG_WARNING,
07135                                               "loading_addrdataset: "
07136                                               "dns_rbt_deletenode: %s after "
07137                                               "dns_rbt_addnode(NSEC): %s",
07138                                               isc_result_totext(tmpresult),
07139                                               isc_result_totext(ISC_R_SUCCESS));
07140                 }
07141         }
07142         if (!hasnsec)
07143                 goto done;
07144         if (noderesult == ISC_R_EXISTS) {
07145                 /*
07146                  * Add a node to the auxiliary NSEC tree for an old node
07147                  * just now getting an NSEC record.
07148                  */
07149                 if (node->nsec == DNS_RBT_NSEC_HAS_NSEC)
07150                         goto done;
07151         } else if (noderesult != ISC_R_SUCCESS)
07152                 goto done;
07153 
07154         /*
07155          * Build the auxiliary tree for NSECs as we go.
07156          * This tree speeds searches for closest NSECs that would otherwise
07157          * need to examine many irrelevant nodes in large TLDs.
07158          *
07159          * Add nodes to the auxiliary tree after corresponding nodes have
07160          * been added to the main tree.
07161          */
07162         nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
07163         if (nsecresult == ISC_R_SUCCESS) {
07164                 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
07165                 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
07166                 goto done;
07167         }
07168 
07169         if (nsecresult == ISC_R_EXISTS) {
07170 #if 1 /* 0 */
07171                 isc_log_write(dns_lctx,
07172                               DNS_LOGCATEGORY_DATABASE,
07173                               DNS_LOGMODULE_CACHE,
07174                               ISC_LOG_WARNING,
07175                               "addnode: NSEC node already exists");
07176 #endif
07177                 node->nsec = DNS_RBT_NSEC_HAS_NSEC;
07178                 goto done;
07179         }
07180 
07181         if (noderesult == ISC_R_SUCCESS) {
07182                 /*
07183                  * Clean rpz entries added above.
07184                  */
07185                 if (rbtdb->rpzs != NULL && node->rpz)
07186                         dns_rpz_delete(rbtdb->load_rpzs, rbtdb->rpz_num, name);
07187                 /*
07188                  * Remove the node we just added above.
07189                  */
07190                 tmpresult = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
07191                 if (tmpresult != ISC_R_SUCCESS)
07192                         isc_log_write(dns_lctx,
07193                                       DNS_LOGCATEGORY_DATABASE,
07194                                       DNS_LOGMODULE_CACHE,
07195                                       ISC_LOG_WARNING,
07196                                       "loading_addrdataset: "
07197                                       "dns_rbt_deletenode: %s after "
07198                                       "dns_rbt_addnode(NSEC): %s",
07199                                       isc_result_totext(tmpresult),
07200                                       isc_result_totext(noderesult));
07201         }
07202 
07203         /*
07204          * Set the error condition to be returned.
07205          */
07206         noderesult = nsecresult;
07207 
07208  done:
07209         if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS)
07210                 *nodep = node;
07211 
07212         return (noderesult);
07213 }
07214 
07215 static isc_result_t
07216 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
07217         rbtdb_load_t *loadctx = arg;
07218         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
07219         dns_rbtnode_t *node;
07220         isc_result_t result;
07221         isc_region_t region;
07222         rdatasetheader_t *newheader;
07223 
07224         /*
07225          * This routine does no node locking.  See comments in
07226          * 'load' below for more information on loading and
07227          * locking.
07228          */
07229 
07230 
07231         /*
07232          * SOA records are only allowed at top of zone.
07233          */
07234         if (rdataset->type == dns_rdatatype_soa &&
07235             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
07236                 return (DNS_R_NOTZONETOP);
07237 
07238         if (rdataset->type != dns_rdatatype_nsec3 &&
07239             rdataset->covers != dns_rdatatype_nsec3)
07240                 add_empty_wildcards(rbtdb, name);
07241 
07242         if (dns_name_iswildcard(name)) {
07243                 /*
07244                  * NS record owners cannot legally be wild cards.
07245                  */
07246                 if (rdataset->type == dns_rdatatype_ns)
07247                         return (DNS_R_INVALIDNS);
07248                 /*
07249                  * NSEC3 record owners cannot legally be wild cards.
07250                  */
07251                 if (rdataset->type == dns_rdatatype_nsec3)
07252                         return (DNS_R_INVALIDNSEC3);
07253                 result = add_wildcard_magic(rbtdb, name);
07254                 if (result != ISC_R_SUCCESS)
07255                         return (result);
07256         }
07257 
07258         node = NULL;
07259         if (rdataset->type == dns_rdatatype_nsec3 ||
07260             rdataset->covers == dns_rdatatype_nsec3) {
07261                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
07262                 if (result == ISC_R_SUCCESS)
07263                         node->nsec = DNS_RBT_NSEC_NSEC3;
07264         } else if (rdataset->type == dns_rdatatype_nsec) {
07265                 result = loadnode(rbtdb, name, &node, ISC_TRUE);
07266         } else {
07267                 result = loadnode(rbtdb, name, &node, ISC_FALSE);
07268         }
07269         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
07270                 return (result);
07271         if (result == ISC_R_SUCCESS) {
07272                 dns_name_t foundname;
07273                 dns_name_init(&foundname, NULL);
07274                 dns_rbt_namefromnode(node, &foundname);
07275 #ifdef DNS_RBT_USEHASH
07276                 node->locknum = node->hashval % rbtdb->node_lock_count;
07277 #else
07278                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
07279                         rbtdb->node_lock_count;
07280 #endif
07281         }
07282 
07283         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
07284                                             &region,
07285                                             sizeof(rdatasetheader_t));
07286         if (result != ISC_R_SUCCESS)
07287                 return (result);
07288         newheader = (rdatasetheader_t *)region.base;
07289         init_rdataset(rbtdb, newheader);
07290         set_ttl(rbtdb, newheader,
07291                 rdataset->ttl + loadctx->now); /* XXX overflow check */
07292         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
07293                                                 rdataset->covers);
07294         newheader->attributes = 0;
07295         newheader->trust = rdataset->trust;
07296         newheader->serial = 1;
07297         newheader->noqname = NULL;
07298         newheader->closest = NULL;
07299         newheader->count = init_count++;
07300         newheader->additional_auth = NULL;
07301         newheader->additional_glue = NULL;
07302         newheader->last_used = 0;
07303         newheader->node = node;
07304         setownercase(newheader, name);
07305 
07306         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
07307                 newheader->attributes |= RDATASET_ATTR_RESIGN;
07308                 newheader->resign = rdataset->resign;
07309         } else
07310                 newheader->resign = 0;
07311 
07312         result = add32(rbtdb, node, rbtdb->current_version, newheader,
07313                        DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
07314         if (result == ISC_R_SUCCESS &&
07315             delegating_type(rbtdb, node, rdataset->type))
07316                 node->find_callback = 1;
07317         else if (result == DNS_R_UNCHANGED)
07318                 result = ISC_R_SUCCESS;
07319 
07320         return (result);
07321 }
07322 
07323 static isc_result_t
07324 rbt_datafixer(dns_rbtnode_t *rbtnode, void *base, size_t filesize,
07325               void *arg, isc_uint64_t *crc)
07326 {
07327         isc_result_t result;
07328         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *) arg;
07329         rdatasetheader_t *header;
07330         unsigned char *limit = ((unsigned char *) base) + filesize;
07331         unsigned char *p;
07332         size_t size;
07333 
07334         REQUIRE(rbtnode != NULL);
07335 
07336         for (header = rbtnode->data; header != NULL; header = header->next) {
07337                 p = (unsigned char *) header;
07338 
07339                 size = dns_rdataslab_size(p, sizeof(*header));
07340                 isc_crc64_update(crc, p, size);
07341 #ifdef DEBUG
07342                 hexdump("hashing header", p, sizeof(rdatasetheader_t));
07343                 hexdump("hashing slab", p + sizeof(rdatasetheader_t),
07344                         size - sizeof(rdatasetheader_t));
07345 #endif
07346                 header->serial = 1;
07347                 header->is_mmapped = 1;
07348                 header->node = rbtnode;
07349                 header->node_is_relative = 0;
07350 
07351                 if (rbtdb != NULL && RESIGN(header) && header->resign != 0) {
07352                         int idx = header->node->locknum;
07353                         result = isc_heap_insert(rbtdb->heaps[idx], header);
07354                         if (result != ISC_R_SUCCESS)
07355                                 return (result);
07356                 }
07357 
07358                 if (header->next != NULL) {
07359                         size_t cooked = dns_rbt_serialize_align(size);
07360                         if ((uintptr_t)header->next !=
07361                                     (p - (unsigned char *)base) + cooked)
07362                                 return (ISC_R_INVALIDFILE);
07363                         header->next = (rdatasetheader_t *)(p + cooked);
07364                         header->next_is_relative = 0;
07365                         if ((header->next < (rdatasetheader_t *) base) ||
07366                             (header->next > (rdatasetheader_t *) limit))
07367                                 return (ISC_R_INVALIDFILE);
07368                 }
07369         }
07370 
07371         return (ISC_R_SUCCESS);
07372 }
07373 
07374 /*
07375  * Load the RBT database from the image in 'f'
07376  */
07377 static isc_result_t
07378 deserialize32(void *arg, FILE *f, off_t offset) {
07379         isc_result_t result;
07380         rbtdb_load_t *loadctx = arg;
07381         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
07382         rbtdb_file_header_t *header;
07383         int fd;
07384         off_t filesize = 0;
07385         char *base;
07386         dns_rbt_t *temporary_rbt = NULL;
07387         int protect, flags;
07388 
07389         REQUIRE(VALID_RBTDB(rbtdb));
07390 
07391         /*
07392          * TODO CKB: since this is read-write (had to be to add nodes later)
07393          * we will need to lock the file or the nodes in it before modifying
07394          * the nodes in the file.
07395          */
07396 
07397         /* Map in the whole file in one go */
07398         fd = fileno(f);
07399         isc_file_getsizefd(fd, &filesize);
07400         protect = PROT_READ|PROT_WRITE;
07401         flags = MAP_PRIVATE;
07402 #ifdef MAP_FILE
07403         flags |= MAP_FILE;
07404 #endif
07405 
07406         base = isc_file_mmap(NULL, filesize, protect, flags, fd, 0);
07407         if (base == NULL || base == MAP_FAILED)
07408                 return (ISC_R_FAILURE);
07409 
07410         header = (rbtdb_file_header_t *)(base + offset);
07411 
07412         rbtdb->mmap_location = base;
07413         rbtdb->mmap_size = (size_t) filesize;
07414         rbtdb->origin_node = NULL;
07415 
07416         if (header->tree != 0) {
07417                 result = dns_rbt_deserialize_tree(base, filesize,
07418                                                   (off_t) header->tree,
07419                                                   rbtdb->common.mctx,
07420                                                   delete_callback, rbtdb,
07421                                                   rbt_datafixer, rbtdb,
07422                                                   &rbtdb->origin_node,
07423                                                   &temporary_rbt);
07424                 if (temporary_rbt != NULL) {
07425                         dns_rbt_destroy(&rbtdb->tree);
07426                         rbtdb->tree = temporary_rbt;
07427                         temporary_rbt = NULL;
07428 
07429                         rbtdb->origin_node =
07430                                 (dns_rbtnode_t *)(header->tree + base + 1024);
07431                 }
07432                 if (result != ISC_R_SUCCESS)
07433                         return (result);
07434         }
07435 
07436         if (header->nsec != 0) {
07437                 result = dns_rbt_deserialize_tree(base, filesize,
07438                                                   (off_t) header->nsec,
07439                                                   rbtdb->common.mctx,
07440                                                   delete_callback, rbtdb,
07441                                                   rbt_datafixer, rbtdb,
07442                                                   NULL, &temporary_rbt);
07443                 if (temporary_rbt != NULL) {
07444                         dns_rbt_destroy(&rbtdb->nsec);
07445                         rbtdb->nsec = temporary_rbt;
07446                         temporary_rbt = NULL;
07447                 }
07448                 if (result != ISC_R_SUCCESS)
07449                         return (result);
07450         }
07451 
07452         if (header->nsec3 != 0) {
07453                 result = dns_rbt_deserialize_tree(base, filesize,
07454                                                   (off_t) header->nsec3,
07455                                                   rbtdb->common.mctx,
07456                                                   delete_callback, rbtdb,
07457                                                   rbt_datafixer, rbtdb,
07458                                                   NULL, &temporary_rbt);
07459                 if (temporary_rbt != NULL) {
07460                         dns_rbt_destroy(&rbtdb->nsec3);
07461                         rbtdb->nsec3 = temporary_rbt;
07462                         temporary_rbt = NULL;
07463                 }
07464                 if (result != ISC_R_SUCCESS)
07465                         return (result);
07466         }
07467 
07468         return (ISC_R_SUCCESS);
07469 }
07470 
07471 static isc_result_t
07472 beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
07473         rbtdb_load_t *loadctx;
07474         dns_rbtdb_t *rbtdb;
07475         rbtdb = (dns_rbtdb_t *)db;
07476 
07477         REQUIRE(DNS_CALLBACK_VALID(callbacks));
07478         REQUIRE(VALID_RBTDB(rbtdb));
07479 
07480         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
07481         if (loadctx == NULL)
07482                 return (ISC_R_NOMEMORY);
07483 
07484         loadctx->rbtdb = rbtdb;
07485         if (IS_CACHE(rbtdb))
07486                 isc_stdtime_get(&loadctx->now);
07487         else
07488                 loadctx->now = 0;
07489 
07490         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
07491 
07492         if (rbtdb->rpzs != NULL) {
07493                 isc_result_t result;
07494 
07495                 result = dns_rpz_beginload(&rbtdb->load_rpzs,
07496                                            rbtdb->rpzs, rbtdb->rpz_num);
07497                 if (result != ISC_R_SUCCESS) {
07498                         isc_mem_put(rbtdb->common.mctx, loadctx,
07499                                     sizeof(*loadctx));
07500                         return (result);
07501                 }
07502         }
07503 
07504         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
07505                 == 0);
07506         rbtdb->attributes |= RBTDB_ATTR_LOADING;
07507 
07508         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
07509 
07510         callbacks->add = loading_addrdataset;
07511         callbacks->add_private = loadctx;
07512         callbacks->deserialize = deserialize32;
07513         callbacks->deserialize_private = loadctx;
07514 
07515         return (ISC_R_SUCCESS);
07516 }
07517 
07518 static isc_result_t
07519 endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
07520         rbtdb_load_t *loadctx;
07521         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
07522 
07523         REQUIRE(VALID_RBTDB(rbtdb));
07524         REQUIRE(DNS_CALLBACK_VALID(callbacks));
07525         loadctx = callbacks->add_private;
07526         REQUIRE(loadctx != NULL);
07527         REQUIRE(loadctx->rbtdb == rbtdb);
07528 
07529         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
07530 
07531         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
07532         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
07533 
07534         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
07535         rbtdb->attributes |= RBTDB_ATTR_LOADED;
07536 
07537         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
07538 
07539         /*
07540          * If there's a KEY rdataset at the zone origin containing a
07541          * zone key, we consider the zone secure.
07542          */
07543         if (! IS_CACHE(rbtdb) && rbtdb->origin_node != NULL)
07544                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
07545 
07546         callbacks->add = NULL;
07547         callbacks->add_private = NULL;
07548         callbacks->deserialize = NULL;
07549         callbacks->deserialize_private = NULL;
07550 
07551         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
07552 
07553         return (ISC_R_SUCCESS);
07554 }
07555 
07556 /*
07557  * helper function to handle writing out the rdataset data pointed to
07558  * by the void *data pointer in the dns_rbtnode
07559  */
07560 static isc_result_t
07561 rbt_datawriter(FILE *rbtfile, unsigned char *data, void *arg,
07562                isc_uint64_t *crc)
07563 {
07564         rbtdb_version_t *version = (rbtdb_version_t *) arg;
07565         rbtdb_serial_t serial;
07566         rdatasetheader_t newheader;
07567         rdatasetheader_t *header = (rdatasetheader_t *) data, *next;
07568         off_t where;
07569         size_t cooked, size;
07570         unsigned char *p;
07571         isc_result_t result = ISC_R_SUCCESS;
07572         char pad[sizeof(char *)];
07573         uintptr_t off;
07574 
07575         REQUIRE(rbtfile != NULL);
07576         REQUIRE(data != NULL);
07577         REQUIRE(version != NULL);
07578 
07579         serial = version->serial;
07580 
07581         for (; header != NULL; header = next) {
07582                 next = header->next;
07583                 do {
07584                         if (header->serial <= serial && !IGNORE(header)) {
07585                                 if (NONEXISTENT(header))
07586                                         header = NULL;
07587                                 break;
07588                         } else
07589                                 header = header->down;
07590                 } while (header != NULL);
07591 
07592                 if (header == NULL)
07593                         continue;
07594 
07595                 CHECK(isc_stdio_tell(rbtfile, &where));
07596                 size = dns_rdataslab_size((unsigned char *) header,
07597                                           sizeof(rdatasetheader_t));
07598 
07599                 p = (unsigned char *) header;
07600                 memmove(&newheader, p, sizeof(rdatasetheader_t));
07601                 newheader.down = NULL;
07602                 newheader.next = NULL;
07603                 off = where;
07604                 if ((off_t)off != where)
07605                         return (ISC_R_RANGE);
07606                 newheader.node = (dns_rbtnode_t *) off;
07607                 newheader.node_is_relative = 1;
07608                 newheader.serial = 1;
07609 
07610                 /*
07611                  * Round size up to the next pointer sized offset so it
07612                  * will be properly aligned when read back in.
07613                  */
07614                 cooked = dns_rbt_serialize_align(size);
07615                 if (next != NULL) {
07616                         newheader.next = (rdatasetheader_t *) (off + cooked);
07617                         newheader.next_is_relative = 1;
07618                 }
07619 
07620 #ifdef DEBUG
07621                 hexdump("writing header", (unsigned char *) &newheader,
07622                         sizeof(rdatasetheader_t));
07623                 hexdump("writing slab", p + sizeof(rdatasetheader_t),
07624                         size - sizeof(rdatasetheader_t));
07625 #endif
07626                 isc_crc64_update(crc, (unsigned char *) &newheader,
07627                                  sizeof(rdatasetheader_t));
07628                 CHECK(isc_stdio_write(&newheader, sizeof(rdatasetheader_t), 1,
07629                                       rbtfile, NULL));
07630 
07631                 isc_crc64_update(crc, p + sizeof(rdatasetheader_t),
07632                                  size - sizeof(rdatasetheader_t));
07633                 CHECK(isc_stdio_write(p + sizeof(rdatasetheader_t),
07634                                       size - sizeof(rdatasetheader_t), 1,
07635                                       rbtfile, NULL));
07636                 /*
07637                  * Pad to force alignment.
07638                  */
07639                 if (size != (size_t) cooked) {
07640                         memset(pad, 0, sizeof(pad));
07641                         CHECK(isc_stdio_write(pad, cooked - size, 1,
07642                                               rbtfile, NULL));
07643                 }
07644         }
07645 
07646  failure:
07647         return (result);
07648 }
07649 
07650 /*
07651  * Write out a zeroed header as a placeholder.  Doing this ensures
07652  * that the file will not read while it is partially written, should
07653  * writing fail or be interrupted.
07654  */
07655 static isc_result_t
07656 rbtdb_zero_header(FILE *rbtfile) {
07657         char buffer[RBTDB_HEADER_LENGTH];
07658         isc_result_t result;
07659 
07660         memset(buffer, 0, RBTDB_HEADER_LENGTH);
07661         result = isc_stdio_write(buffer, 1, RBTDB_HEADER_LENGTH, rbtfile, NULL);
07662         fflush(rbtfile);
07663 
07664         return (result);
07665 }
07666 
07667 static isc_once_t once = ISC_ONCE_INIT;
07668 
07669 static void
07670 init_file_version(void) {
07671         int n;
07672 
07673         memset(FILE_VERSION, 0, sizeof(FILE_VERSION));
07674         n = snprintf(FILE_VERSION, sizeof(FILE_VERSION),
07675                  "RBTDB Image %s %s", dns_major, dns_mapapi);
07676         INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION));
07677 }
07678 
07679 /*
07680  * Write the file header out, recording the locations of the three
07681  * RBT's used in the rbtdb: tree, nsec, and nsec3, and including NodeDump
07682  * version information and any information stored in the rbtdb object
07683  * itself that should be stored here.
07684  */
07685 static isc_result_t
07686 rbtdb_write_header(FILE *rbtfile, off_t tree_location, off_t nsec_location,
07687                    off_t nsec3_location)
07688 {
07689         rbtdb_file_header_t header;
07690         isc_result_t result;
07691 
07692         RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
07693 
07694         memset(&header, 0, sizeof(rbtdb_file_header_t));
07695         memmove(header.version1, FILE_VERSION, sizeof(header.version1));
07696         memmove(header.version2, FILE_VERSION, sizeof(header.version2));
07697         header.ptrsize = (isc_uint32_t) sizeof(void *);
07698         header.bigendian = (1 == htonl(1)) ? 1 : 0;
07699         header.tree = (isc_uint64_t) tree_location;
07700         header.nsec = (isc_uint64_t) nsec_location;
07701         header.nsec3 = (isc_uint64_t) nsec3_location;
07702         result = isc_stdio_write(&header, 1, sizeof(rbtdb_file_header_t),
07703                               rbtfile, NULL);
07704         fflush(rbtfile);
07705 
07706         return (result);
07707 }
07708 
07709 static isc_result_t
07710 serialize(dns_db_t *db, dns_dbversion_t *ver, FILE *rbtfile) {
07711         rbtdb_version_t *version = (rbtdb_version_t *) ver;
07712         dns_rbtdb_t *rbtdb;
07713         isc_result_t result;
07714         off_t tree_location, nsec_location, nsec3_location, header_location;
07715 
07716         rbtdb = (dns_rbtdb_t *)db;
07717 
07718         REQUIRE(VALID_RBTDB(rbtdb));
07719         REQUIRE(rbtfile != NULL);
07720 
07721         /* Ensure we're writing to a plain file */
07722         CHECK(isc_file_isplainfilefd(fileno(rbtfile)));
07723 
07724         /*
07725          * first, write out a zeroed header to store rbtdb information
07726          *
07727          * then for each of the three trees, store the current position
07728          * in the file and call dns_rbt_serialize_tree
07729          *
07730          * finally, write out the rbtdb header, storing the locations of the
07731          * rbtheaders
07732          *
07733          * NOTE: need to do something better with the return codes, &= will
07734          * not work.
07735          */
07736         CHECK(isc_stdio_tell(rbtfile, &header_location));
07737         CHECK(rbtdb_zero_header(rbtfile));
07738         CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->tree, rbt_datawriter,
07739                                      version, &tree_location));
07740         CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec, rbt_datawriter,
07741                                      version, &nsec_location));
07742         CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec3, rbt_datawriter,
07743                                      version, &nsec3_location));
07744 
07745         CHECK(isc_stdio_seek(rbtfile, header_location, SEEK_SET));
07746         CHECK(rbtdb_write_header(rbtfile, tree_location, nsec_location,
07747                                  nsec3_location));
07748  failure:
07749         return (result);
07750 }
07751 
07752 static isc_result_t
07753 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
07754      dns_masterformat_t masterformat)
07755 {
07756         dns_rbtdb_t *rbtdb;
07757         rbtdb_version_t *rbtversion = version;
07758 
07759         rbtdb = (dns_rbtdb_t *)db;
07760 
07761         REQUIRE(VALID_RBTDB(rbtdb));
07762         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
07763 
07764         return (dns_master_dump2(rbtdb->common.mctx, db, version,
07765                                  &dns_master_style_default,
07766                                  filename, masterformat));
07767 }
07768 
07769 static void
07770 delete_callback(void *data, void *arg) {
07771         dns_rbtdb_t *rbtdb = arg;
07772         rdatasetheader_t *current, *next;
07773         unsigned int locknum;
07774 
07775         current = data;
07776         locknum = current->node->locknum;
07777         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
07778         while (current != NULL) {
07779                 next = current->next;
07780                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
07781                 current = next;
07782         }
07783         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
07784 }
07785 
07786 static isc_boolean_t
07787 issecure(dns_db_t *db) {
07788         dns_rbtdb_t *rbtdb;
07789         isc_boolean_t secure;
07790 
07791         rbtdb = (dns_rbtdb_t *)db;
07792 
07793         REQUIRE(VALID_RBTDB(rbtdb));
07794 
07795         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07796         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
07797         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07798 
07799         return (secure);
07800 }
07801 
07802 static isc_boolean_t
07803 isdnssec(dns_db_t *db) {
07804         dns_rbtdb_t *rbtdb;
07805         isc_boolean_t dnssec;
07806 
07807         rbtdb = (dns_rbtdb_t *)db;
07808 
07809         REQUIRE(VALID_RBTDB(rbtdb));
07810 
07811         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07812         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
07813         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07814 
07815         return (dnssec);
07816 }
07817 
07818 static unsigned int
07819 nodecount(dns_db_t *db) {
07820         dns_rbtdb_t *rbtdb;
07821         unsigned int count;
07822 
07823         rbtdb = (dns_rbtdb_t *)db;
07824 
07825         REQUIRE(VALID_RBTDB(rbtdb));
07826 
07827         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07828         count = dns_rbt_nodecount(rbtdb->tree);
07829         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07830 
07831         return (count);
07832 }
07833 
07834 static unsigned int
07835 hashsize(dns_db_t *db) {
07836         dns_rbtdb_t *rbtdb;
07837         unsigned int count;
07838 
07839         rbtdb = (dns_rbtdb_t *)db;
07840 
07841         REQUIRE(VALID_RBTDB(rbtdb));
07842 
07843         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07844         count = dns_rbt_hashsize(rbtdb->tree);
07845         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07846 
07847         return (count);
07848 }
07849 
07850 static void
07851 settask(dns_db_t *db, isc_task_t *task) {
07852         dns_rbtdb_t *rbtdb;
07853 
07854         rbtdb = (dns_rbtdb_t *)db;
07855 
07856         REQUIRE(VALID_RBTDB(rbtdb));
07857 
07858         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
07859         if (rbtdb->task != NULL)
07860                 isc_task_detach(&rbtdb->task);
07861         if (task != NULL)
07862                 isc_task_attach(task, &rbtdb->task);
07863         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
07864 }
07865 
07866 static isc_boolean_t
07867 ispersistent(dns_db_t *db) {
07868         UNUSED(db);
07869         return (ISC_FALSE);
07870 }
07871 
07872 static isc_result_t
07873 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
07874         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
07875         dns_rbtnode_t *onode;
07876         isc_result_t result = ISC_R_SUCCESS;
07877 
07878         REQUIRE(VALID_RBTDB(rbtdb));
07879         REQUIRE(nodep != NULL && *nodep == NULL);
07880 
07881         /* Note that the access to origin_node doesn't require a DB lock */
07882         onode = (dns_rbtnode_t *)rbtdb->origin_node;
07883         if (onode != NULL) {
07884                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
07885                 new_reference(rbtdb, onode);
07886                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
07887 
07888                 *nodep = rbtdb->origin_node;
07889         } else {
07890                 INSIST(IS_CACHE(rbtdb));
07891                 result = ISC_R_NOTFOUND;
07892         }
07893 
07894         return (result);
07895 }
07896 
07897 static isc_result_t
07898 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
07899                    isc_uint8_t *flags, isc_uint16_t *iterations,
07900                    unsigned char *salt, size_t *salt_length)
07901 {
07902         dns_rbtdb_t *rbtdb;
07903         isc_result_t result = ISC_R_NOTFOUND;
07904         rbtdb_version_t *rbtversion = version;
07905 
07906         rbtdb = (dns_rbtdb_t *)db;
07907 
07908         REQUIRE(VALID_RBTDB(rbtdb));
07909         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
07910 
07911         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07912 
07913         if (rbtversion == NULL)
07914                 rbtversion = rbtdb->current_version;
07915 
07916         if (rbtversion->havensec3) {
07917                 if (hash != NULL)
07918                         *hash = rbtversion->hash;
07919                 if (salt != NULL && salt_length != NULL) {
07920                         REQUIRE(*salt_length >= rbtversion->salt_length);
07921                         memmove(salt, rbtversion->salt,
07922                                 rbtversion->salt_length);
07923                 }
07924                 if (salt_length != NULL)
07925                         *salt_length = rbtversion->salt_length;
07926                 if (iterations != NULL)
07927                         *iterations = rbtversion->iterations;
07928                 if (flags != NULL)
07929                         *flags = rbtversion->flags;
07930                 result = ISC_R_SUCCESS;
07931         }
07932         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07933 
07934         return (result);
07935 }
07936 
07937 static isc_result_t
07938 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
07939         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
07940         isc_stdtime_t oldresign;
07941         isc_result_t result = ISC_R_SUCCESS;
07942         rdatasetheader_t *header;
07943 
07944         REQUIRE(VALID_RBTDB(rbtdb));
07945         REQUIRE(!IS_CACHE(rbtdb));
07946         REQUIRE(rdataset != NULL);
07947 
07948         header = rdataset->private3;
07949         header--;
07950 
07951         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
07952                   isc_rwlocktype_write);
07953 
07954         oldresign = header->resign;
07955         header->resign = resign;
07956         if (header->heap_index != 0) {
07957                 INSIST(RESIGN(header));
07958                 if (resign == 0) {
07959                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
07960                                         header->heap_index);
07961                         header->heap_index = 0;
07962                 } else if (resign < oldresign)
07963                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
07964                                            header->heap_index);
07965                 else if (resign > oldresign)
07966                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
07967                                            header->heap_index);
07968         } else if (resign && header->heap_index == 0) {
07969                 header->attributes |= RDATASET_ATTR_RESIGN;
07970                 result = resign_insert(rbtdb, header->node->locknum, header);
07971         }
07972         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
07973                     isc_rwlocktype_write);
07974         return (result);
07975 }
07976 
07977 static isc_result_t
07978 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
07979                dns_name_t *foundname)
07980 {
07981         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
07982         rdatasetheader_t *header = NULL, *this;
07983         unsigned int i;
07984         isc_result_t result = ISC_R_NOTFOUND;
07985         unsigned int locknum;
07986 
07987         REQUIRE(VALID_RBTDB(rbtdb));
07988 
07989         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
07990 
07991         for (i = 0; i < rbtdb->node_lock_count; i++) {
07992                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
07993                 this = isc_heap_element(rbtdb->heaps[i], 1);
07994                 if (this == NULL) {
07995                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
07996                                     isc_rwlocktype_read);
07997                         continue;
07998                 }
07999                 if (header == NULL)
08000                         header = this;
08001                 else if (isc_serial_lt(this->resign, header->resign)) {
08002                         locknum = header->node->locknum;
08003                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
08004                                     isc_rwlocktype_read);
08005                         header = this;
08006                 } else
08007                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
08008                                     isc_rwlocktype_read);
08009         }
08010 
08011         if (header == NULL)
08012                 goto unlock;
08013 
08014         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
08015 
08016         if (foundname != NULL)
08017                 dns_rbt_fullnamefromnode(header->node, foundname);
08018 
08019         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
08020                     isc_rwlocktype_read);
08021 
08022         result = ISC_R_SUCCESS;
08023 
08024  unlock:
08025         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
08026 
08027         return (result);
08028 }
08029 
08030 static void
08031 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
08032 {
08033         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
08034         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
08035         dns_rbtnode_t *node;
08036         rdatasetheader_t *header;
08037 
08038         REQUIRE(VALID_RBTDB(rbtdb));
08039         REQUIRE(rdataset != NULL);
08040         REQUIRE(rdataset->methods == &rdataset_methods);
08041         REQUIRE(rbtdb->future_version == rbtversion);
08042         REQUIRE(rbtversion != NULL);
08043         REQUIRE(rbtversion->writer);
08044         REQUIRE(rbtversion->rbtdb == rbtdb);
08045 
08046         node = rdataset->private2;
08047         INSIST(node != NULL);
08048         header = rdataset->private3;
08049         INSIST(header != NULL);
08050         header--;
08051 
08052         if (header->heap_index == 0)
08053                 return;
08054 
08055         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
08056         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
08057                   isc_rwlocktype_write);
08058         /*
08059          * Delete from heap and save to re-signed list so that it can
08060          * be restored if we backout of this change.
08061          */
08062         resign_delete(rbtdb, rbtversion, header);
08063         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
08064                     isc_rwlocktype_write);
08065         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
08066 }
08067 
08068 static isc_result_t
08069 setcachestats(dns_db_t *db, isc_stats_t *stats) {
08070         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
08071 
08072         REQUIRE(VALID_RBTDB(rbtdb));
08073         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
08074         REQUIRE(stats != NULL);
08075 
08076         isc_stats_attach(stats, &rbtdb->cachestats);
08077         return (ISC_R_SUCCESS);
08078 }
08079 
08080 static dns_stats_t *
08081 getrrsetstats(dns_db_t *db) {
08082         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
08083 
08084         REQUIRE(VALID_RBTDB(rbtdb));
08085         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
08086 
08087         return (rbtdb->rrsetstats);
08088 }
08089 
08090 static dns_dbmethods_t zone_methods = {
08091         attach,
08092         detach,
08093         beginload,
08094         endload,
08095         serialize,
08096         dump,
08097         currentversion,
08098         newversion,
08099         attachversion,
08100         closeversion,
08101         findnode,
08102         zone_find,
08103         zone_findzonecut,
08104         attachnode,
08105         detachnode,
08106         expirenode,
08107         printnode,
08108         createiterator,
08109         zone_findrdataset,
08110         allrdatasets,
08111         addrdataset,
08112         subtractrdataset,
08113         deleterdataset,
08114         issecure,
08115         nodecount,
08116         ispersistent,
08117         overmem,
08118         settask,
08119         getoriginnode,
08120         NULL,
08121         getnsec3parameters,
08122         findnsec3node,
08123         setsigningtime,
08124         getsigningtime,
08125         resigned,
08126         isdnssec,
08127         NULL,
08128         rpz_attach,
08129         rpz_ready,
08130         NULL,
08131         NULL,
08132         NULL,
08133         hashsize
08134 };
08135 
08136 static dns_dbmethods_t cache_methods = {
08137         attach,
08138         detach,
08139         beginload,
08140         endload,
08141         NULL,
08142         dump,
08143         currentversion,
08144         newversion,
08145         attachversion,
08146         closeversion,
08147         findnode,
08148         cache_find,
08149         cache_findzonecut,
08150         attachnode,
08151         detachnode,
08152         expirenode,
08153         printnode,
08154         createiterator,
08155         cache_findrdataset,
08156         allrdatasets,
08157         addrdataset,
08158         subtractrdataset,
08159         deleterdataset,
08160         issecure,
08161         nodecount,
08162         ispersistent,
08163         overmem,
08164         settask,
08165         getoriginnode,
08166         NULL,
08167         NULL,
08168         NULL,
08169         NULL,
08170         NULL,
08171         NULL,
08172         isdnssec,
08173         getrrsetstats,
08174         NULL,
08175         NULL,
08176         NULL,
08177         NULL,
08178         setcachestats,
08179         hashsize
08180 };
08181 
08182 isc_result_t
08183 #ifdef DNS_RBTDB_VERSION64
08184 dns_rbtdb64_create
08185 #else
08186 dns_rbtdb_create
08187 #endif
08188                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
08189                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
08190                  void *driverarg, dns_db_t **dbp)
08191 {
08192         dns_rbtdb_t *rbtdb;
08193         isc_result_t result;
08194         int i;
08195         dns_name_t name;
08196         isc_boolean_t (*sooner)(void *, void *);
08197         isc_mem_t *hmctx = mctx;
08198 
08199         /* Keep the compiler happy. */
08200         UNUSED(driverarg);
08201 
08202         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
08203         if (rbtdb == NULL)
08204                 return (ISC_R_NOMEMORY);
08205 
08206         /*
08207          * If argv[0] exists, it points to a memory context to use for heap
08208          */
08209         if (argc != 0)
08210                 hmctx = (isc_mem_t *) argv[0];
08211 
08212         memset(rbtdb, '\0', sizeof(*rbtdb));
08213         dns_name_init(&rbtdb->common.origin, NULL);
08214         rbtdb->common.attributes = 0;
08215         if (type == dns_dbtype_cache) {
08216                 rbtdb->common.methods = &cache_methods;
08217                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
08218         } else if (type == dns_dbtype_stub) {
08219                 rbtdb->common.methods = &zone_methods;
08220                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
08221         } else
08222                 rbtdb->common.methods = &zone_methods;
08223         rbtdb->common.rdclass = rdclass;
08224         rbtdb->common.mctx = NULL;
08225 
08226         result = RBTDB_INITLOCK(&rbtdb->lock);
08227         if (result != ISC_R_SUCCESS)
08228                 goto cleanup_rbtdb;
08229 
08230         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
08231         if (result != ISC_R_SUCCESS)
08232                 goto cleanup_lock;
08233 
08234         /*
08235          * Initialize node_lock_count in a generic way to support future
08236          * extension which allows the user to specify this value on creation.
08237          * Note that when specified for a cache DB it must be larger than 1
08238          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
08239          */
08240         if (rbtdb->node_lock_count == 0) {
08241                 if (IS_CACHE(rbtdb))
08242                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
08243                 else
08244                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
08245         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
08246                 result = ISC_R_RANGE;
08247                 goto cleanup_tree_lock;
08248         }
08249         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
08250         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
08251                                         sizeof(rbtdb_nodelock_t));
08252         if (rbtdb->node_locks == NULL) {
08253                 result = ISC_R_NOMEMORY;
08254                 goto cleanup_tree_lock;
08255         }
08256 
08257         rbtdb->cachestats = NULL;
08258         rbtdb->rrsetstats = NULL;
08259         if (IS_CACHE(rbtdb)) {
08260                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
08261                 if (result != ISC_R_SUCCESS)
08262                         goto cleanup_node_locks;
08263                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
08264                                                sizeof(rdatasetheaderlist_t));
08265                 if (rbtdb->rdatasets == NULL) {
08266                         result = ISC_R_NOMEMORY;
08267                         goto cleanup_rrsetstats;
08268                 }
08269                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
08270                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
08271         } else
08272                 rbtdb->rdatasets = NULL;
08273 
08274         /*
08275          * Create the heaps.
08276          */
08277         rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
08278                                    sizeof(isc_heap_t *));
08279         if (rbtdb->heaps == NULL) {
08280                 result = ISC_R_NOMEMORY;
08281                 goto cleanup_rdatasets;
08282         }
08283         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
08284                 rbtdb->heaps[i] = NULL;
08285         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
08286         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
08287                 result = isc_heap_create(hmctx, sooner, set_index, 0,
08288                                          &rbtdb->heaps[i]);
08289                 if (result != ISC_R_SUCCESS)
08290                         goto cleanup_heaps;
08291         }
08292 
08293         /*
08294          * Create deadnode lists.
08295          */
08296         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
08297                                        sizeof(rbtnodelist_t));
08298         if (rbtdb->deadnodes == NULL) {
08299                 result = ISC_R_NOMEMORY;
08300                 goto cleanup_heaps;
08301         }
08302         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
08303                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
08304 
08305         rbtdb->active = rbtdb->node_lock_count;
08306 
08307         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
08308                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
08309                 if (result == ISC_R_SUCCESS) {
08310                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
08311                         if (result != ISC_R_SUCCESS)
08312                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
08313                 }
08314                 if (result != ISC_R_SUCCESS) {
08315                         while (i-- > 0) {
08316                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
08317                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
08318                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
08319                         }
08320                         goto cleanup_deadnodes;
08321                 }
08322                 rbtdb->node_locks[i].exiting = ISC_FALSE;
08323         }
08324 
08325         /*
08326          * Attach to the mctx.  The database will persist so long as there
08327          * are references to it, and attaching to the mctx ensures that our
08328          * mctx won't disappear out from under us.
08329          */
08330         isc_mem_attach(mctx, &rbtdb->common.mctx);
08331         isc_mem_attach(hmctx, &rbtdb->hmctx);
08332 
08333         /*
08334          * Must be initialized before free_rbtdb() is called.
08335          */
08336         isc_ondestroy_init(&rbtdb->common.ondest);
08337 
08338         /*
08339          * Make a copy of the origin name.
08340          */
08341         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
08342         if (result != ISC_R_SUCCESS) {
08343                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
08344                 return (result);
08345         }
08346 
08347         /*
08348          * Make the Red-Black Trees.
08349          */
08350         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
08351         if (result != ISC_R_SUCCESS) {
08352                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
08353                 return (result);
08354         }
08355 
08356         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
08357         if (result != ISC_R_SUCCESS) {
08358                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
08359                 return (result);
08360         }
08361 
08362         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
08363         if (result != ISC_R_SUCCESS) {
08364                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
08365                 return (result);
08366         }
08367 
08368         /*
08369          * In order to set the node callback bit correctly in zone databases,
08370          * we need to know if the node has the origin name of the zone.
08371          * In loading_addrdataset() we could simply compare the new name
08372          * to the origin name, but this is expensive.  Also, we don't know the
08373          * node name in addrdataset(), so we need another way of knowing the
08374          * zone's top.
08375          *
08376          * We now explicitly create a node for the zone's origin, and then
08377          * we simply remember the node's address.  This is safe, because
08378          * the top-of-zone node can never be deleted, nor can its address
08379          * change.
08380          */
08381         if (!IS_CACHE(rbtdb)) {
08382                 dns_rbtnode_t *nsec3node;
08383 
08384                 rbtdb->origin_node = NULL;
08385                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
08386                                          &rbtdb->origin_node);
08387                 if (result != ISC_R_SUCCESS) {
08388                         INSIST(result != ISC_R_EXISTS);
08389                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
08390                         return (result);
08391                 }
08392                 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
08393                 /*
08394                  * We need to give the origin node the right locknum.
08395                  */
08396                 dns_name_init(&name, NULL);
08397                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
08398 #ifdef DNS_RBT_USEHASH
08399                 rbtdb->origin_node->locknum =
08400                         rbtdb->origin_node->hashval %
08401                         rbtdb->node_lock_count;
08402 #else
08403                 rbtdb->origin_node->locknum =
08404                         dns_name_hash(&name, ISC_TRUE) %
08405                         rbtdb->node_lock_count;
08406 #endif
08407                 /*
08408                  * Add an apex node to the NSEC3 tree so that NSEC3 searches
08409                  * return partial matches when there is only a single NSEC3
08410                  * record in the tree.
08411                  */
08412                 nsec3node = NULL;
08413                 result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
08414                                          &nsec3node);
08415                 if (result != ISC_R_SUCCESS) {
08416                         INSIST(result != ISC_R_EXISTS);
08417                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
08418                         return (result);
08419                 }
08420                 nsec3node->nsec = DNS_RBT_NSEC_NSEC3;
08421                 /*
08422                  * We need to give the nsec3 origin node the right locknum.
08423                  */
08424                 dns_name_init(&name, NULL);
08425                 dns_rbt_namefromnode(nsec3node, &name);
08426 #ifdef DNS_RBT_USEHASH
08427                 nsec3node->locknum = nsec3node->hashval %
08428                         rbtdb->node_lock_count;
08429 #else
08430                 nsec3node->locknum = dns_name_hash(&name, ISC_TRUE) %
08431                         rbtdb->node_lock_count;
08432 #endif
08433         }
08434 
08435         /*
08436          * Misc. Initialization.
08437          */
08438         result = isc_refcount_init(&rbtdb->references, 1);
08439         if (result != ISC_R_SUCCESS) {
08440                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
08441                 return (result);
08442         }
08443         rbtdb->attributes = 0;
08444         rbtdb->task = NULL;
08445         rbtdb->rpzs = NULL;
08446         rbtdb->load_rpzs = NULL;
08447         rbtdb->rpz_num = DNS_RPZ_INVALID_NUM;
08448 
08449         /*
08450          * Version Initialization.
08451          */
08452         rbtdb->current_serial = 1;
08453         rbtdb->least_serial = 1;
08454         rbtdb->next_serial = 2;
08455         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
08456         if (rbtdb->current_version == NULL) {
08457                 isc_refcount_decrement(&rbtdb->references, NULL);
08458                 isc_refcount_destroy(&rbtdb->references);
08459                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
08460                 return (ISC_R_NOMEMORY);
08461         }
08462         rbtdb->current_version->rbtdb = rbtdb;
08463         rbtdb->current_version->secure = dns_db_insecure;
08464         rbtdb->current_version->havensec3 = ISC_FALSE;
08465         rbtdb->current_version->flags = 0;
08466         rbtdb->current_version->iterations = 0;
08467         rbtdb->current_version->hash = 0;
08468         rbtdb->current_version->salt_length = 0;
08469         memset(rbtdb->current_version->salt, 0,
08470                sizeof(rbtdb->current_version->salt));
08471         rbtdb->future_version = NULL;
08472         ISC_LIST_INIT(rbtdb->open_versions);
08473         /*
08474          * Keep the current version in the open list so that list operation
08475          * won't happen in normal lookup operations.
08476          */
08477         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
08478 
08479         rbtdb->common.magic = DNS_DB_MAGIC;
08480         rbtdb->common.impmagic = RBTDB_MAGIC;
08481 
08482         *dbp = (dns_db_t *)rbtdb;
08483 
08484         return (ISC_R_SUCCESS);
08485 
08486  cleanup_deadnodes:
08487         isc_mem_put(mctx, rbtdb->deadnodes,
08488                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
08489 
08490  cleanup_heaps:
08491         if (rbtdb->heaps != NULL) {
08492                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
08493                         if (rbtdb->heaps[i] != NULL)
08494                                 isc_heap_destroy(&rbtdb->heaps[i]);
08495                 isc_mem_put(hmctx, rbtdb->heaps,
08496                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
08497         }
08498 
08499  cleanup_rdatasets:
08500         if (rbtdb->rdatasets != NULL)
08501                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
08502                             sizeof(rdatasetheaderlist_t));
08503  cleanup_rrsetstats:
08504         if (rbtdb->rrsetstats != NULL)
08505                 dns_stats_detach(&rbtdb->rrsetstats);
08506 
08507  cleanup_node_locks:
08508         isc_mem_put(mctx, rbtdb->node_locks,
08509                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
08510 
08511  cleanup_tree_lock:
08512         isc_rwlock_destroy(&rbtdb->tree_lock);
08513 
08514  cleanup_lock:
08515         RBTDB_DESTROYLOCK(&rbtdb->lock);
08516 
08517  cleanup_rbtdb:
08518         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
08519         return (result);
08520 }
08521 
08522 
08523 /*
08524  * Slabbed Rdataset Methods
08525  */
08526 
08527 static void
08528 rdataset_disassociate(dns_rdataset_t *rdataset) {
08529         dns_db_t *db = rdataset->private1;
08530         dns_dbnode_t *node = rdataset->private2;
08531 
08532         detachnode(db, &node);
08533 }
08534 
08535 static isc_result_t
08536 rdataset_first(dns_rdataset_t *rdataset) {
08537         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
08538         unsigned int count;
08539 
08540         count = raw[0] * 256 + raw[1];
08541         if (count == 0) {
08542                 rdataset->private5 = NULL;
08543                 return (ISC_R_NOMORE);
08544         }
08545 
08546 #if DNS_RDATASET_FIXED
08547         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
08548                 raw += 2 + (4 * count);
08549         else
08550 #endif
08551                 raw += 2;
08552 
08553         /*
08554          * The privateuint4 field is the number of rdata beyond the
08555          * cursor position, so we decrement the total count by one
08556          * before storing it.
08557          *
08558          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
08559          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
08560          * to the first entry in the offset table.
08561          */
08562         count--;
08563         rdataset->privateuint4 = count;
08564         rdataset->private5 = raw;
08565 
08566         return (ISC_R_SUCCESS);
08567 }
08568 
08569 static isc_result_t
08570 rdataset_next(dns_rdataset_t *rdataset) {
08571         unsigned int count;
08572         unsigned int length;
08573         unsigned char *raw;     /* RDATASLAB */
08574 
08575         count = rdataset->privateuint4;
08576         if (count == 0)
08577                 return (ISC_R_NOMORE);
08578         count--;
08579         rdataset->privateuint4 = count;
08580 
08581         /*
08582          * Skip forward one record (length + 4) or one offset (4).
08583          */
08584         raw = rdataset->private5;
08585 #if DNS_RDATASET_FIXED
08586         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
08587 #endif
08588                 length = raw[0] * 256 + raw[1];
08589                 raw += length;
08590 #if DNS_RDATASET_FIXED
08591         }
08592         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
08593 #else
08594         rdataset->private5 = raw + 2;           /* length(2) */
08595 #endif
08596 
08597         return (ISC_R_SUCCESS);
08598 }
08599 
08600 static void
08601 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
08602         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
08603 #if DNS_RDATASET_FIXED
08604         unsigned int offset;
08605 #endif
08606         unsigned int length;
08607         isc_region_t r;
08608         unsigned int flags = 0;
08609 
08610         REQUIRE(raw != NULL);
08611 
08612         /*
08613          * Find the start of the record if not already in private5
08614          * then skip the length and order fields.
08615          */
08616 #if DNS_RDATASET_FIXED
08617         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
08618                 offset = (raw[0] << 24) + (raw[1] << 16) +
08619                          (raw[2] << 8) + raw[3];
08620                 raw = rdataset->private3;
08621                 raw += offset;
08622         }
08623 #endif
08624         length = raw[0] * 256 + raw[1];
08625 #if DNS_RDATASET_FIXED
08626         raw += 4;
08627 #else
08628         raw += 2;
08629 #endif
08630         if (rdataset->type == dns_rdatatype_rrsig) {
08631                 if (*raw & DNS_RDATASLAB_OFFLINE)
08632                         flags |= DNS_RDATA_OFFLINE;
08633                 length--;
08634                 raw++;
08635         }
08636         r.length = length;
08637         r.base = raw;
08638         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
08639         rdata->flags |= flags;
08640 }
08641 
08642 static void
08643 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
08644         dns_db_t *db = source->private1;
08645         dns_dbnode_t *node = source->private2;
08646         dns_dbnode_t *cloned_node = NULL;
08647 
08648         attachnode(db, node, &cloned_node);
08649         INSIST(!ISC_LINK_LINKED(target, link));
08650         *target = *source;
08651         ISC_LINK_INIT(target, link);
08652 
08653         /*
08654          * Reset iterator state.
08655          */
08656         target->privateuint4 = 0;
08657         target->private5 = NULL;
08658 }
08659 
08660 static unsigned int
08661 rdataset_count(dns_rdataset_t *rdataset) {
08662         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
08663         unsigned int count;
08664 
08665         count = raw[0] * 256 + raw[1];
08666 
08667         return (count);
08668 }
08669 
08670 static isc_result_t
08671 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
08672                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
08673 {
08674         dns_db_t *db = rdataset->private1;
08675         dns_dbnode_t *node = rdataset->private2;
08676         dns_dbnode_t *cloned_node;
08677         struct noqname *noqname = rdataset->private6;
08678 
08679         cloned_node = NULL;
08680         attachnode(db, node, &cloned_node);
08681         nsec->methods = &slab_methods;
08682         nsec->rdclass = db->rdclass;
08683         nsec->type = noqname->type;
08684         nsec->covers = 0;
08685         nsec->ttl = rdataset->ttl;
08686         nsec->trust = rdataset->trust;
08687         nsec->private1 = rdataset->private1;
08688         nsec->private2 = rdataset->private2;
08689         nsec->private3 = noqname->neg;
08690         nsec->privateuint4 = 0;
08691         nsec->private5 = NULL;
08692         nsec->private6 = NULL;
08693         nsec->private7 = NULL;
08694 
08695         cloned_node = NULL;
08696         attachnode(db, node, &cloned_node);
08697         nsecsig->methods = &slab_methods;
08698         nsecsig->rdclass = db->rdclass;
08699         nsecsig->type = dns_rdatatype_rrsig;
08700         nsecsig->covers = noqname->type;
08701         nsecsig->ttl = rdataset->ttl;
08702         nsecsig->trust = rdataset->trust;
08703         nsecsig->private1 = rdataset->private1;
08704         nsecsig->private2 = rdataset->private2;
08705         nsecsig->private3 = noqname->negsig;
08706         nsecsig->privateuint4 = 0;
08707         nsecsig->private5 = NULL;
08708         nsec->private6 = NULL;
08709         nsec->private7 = NULL;
08710 
08711         dns_name_clone(&noqname->name, name);
08712 
08713         return (ISC_R_SUCCESS);
08714 }
08715 
08716 static isc_result_t
08717 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
08718                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
08719 {
08720         dns_db_t *db = rdataset->private1;
08721         dns_dbnode_t *node = rdataset->private2;
08722         dns_dbnode_t *cloned_node;
08723         struct noqname *closest = rdataset->private7;
08724 
08725         cloned_node = NULL;
08726         attachnode(db, node, &cloned_node);
08727         nsec->methods = &slab_methods;
08728         nsec->rdclass = db->rdclass;
08729         nsec->type = closest->type;
08730         nsec->covers = 0;
08731         nsec->ttl = rdataset->ttl;
08732         nsec->trust = rdataset->trust;
08733         nsec->private1 = rdataset->private1;
08734         nsec->private2 = rdataset->private2;
08735         nsec->private3 = closest->neg;
08736         nsec->privateuint4 = 0;
08737         nsec->private5 = NULL;
08738         nsec->private6 = NULL;
08739         nsec->private7 = NULL;
08740 
08741         cloned_node = NULL;
08742         attachnode(db, node, &cloned_node);
08743         nsecsig->methods = &slab_methods;
08744         nsecsig->rdclass = db->rdclass;
08745         nsecsig->type = dns_rdatatype_rrsig;
08746         nsecsig->covers = closest->type;
08747         nsecsig->ttl = rdataset->ttl;
08748         nsecsig->trust = rdataset->trust;
08749         nsecsig->private1 = rdataset->private1;
08750         nsecsig->private2 = rdataset->private2;
08751         nsecsig->private3 = closest->negsig;
08752         nsecsig->privateuint4 = 0;
08753         nsecsig->private5 = NULL;
08754         nsec->private6 = NULL;
08755         nsec->private7 = NULL;
08756 
08757         dns_name_clone(&closest->name, name);
08758 
08759         return (ISC_R_SUCCESS);
08760 }
08761 
08762 static void
08763 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
08764         dns_rbtdb_t *rbtdb = rdataset->private1;
08765         dns_rbtnode_t *rbtnode = rdataset->private2;
08766         rdatasetheader_t *header = rdataset->private3;
08767 
08768         header--;
08769         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08770                   isc_rwlocktype_write);
08771         header->trust = rdataset->trust = trust;
08772         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08773                   isc_rwlocktype_write);
08774 }
08775 
08776 static void
08777 rdataset_expire(dns_rdataset_t *rdataset) {
08778         dns_rbtdb_t *rbtdb = rdataset->private1;
08779         dns_rbtnode_t *rbtnode = rdataset->private2;
08780         rdatasetheader_t *header = rdataset->private3;
08781 
08782         header--;
08783         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08784                   isc_rwlocktype_write);
08785         expire_header(rbtdb, header, ISC_FALSE, expire_flush);
08786         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08787                   isc_rwlocktype_write);
08788 }
08789 
08790 static void
08791 rdataset_clearprefetch(dns_rdataset_t *rdataset) {
08792         dns_rbtdb_t *rbtdb = rdataset->private1;
08793         dns_rbtnode_t *rbtnode = rdataset->private2;
08794         rdatasetheader_t *header = rdataset->private3;
08795 
08796         header--;
08797         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08798                   isc_rwlocktype_write);
08799         header->attributes &= ~RDATASET_ATTR_PREFETCH;
08800         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08801                   isc_rwlocktype_write);
08802 }
08803 
08804 /*
08805  * Rdataset Iterator Methods
08806  */
08807 
08808 static void
08809 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
08810         rbtdb_rdatasetiter_t *rbtiterator;
08811 
08812         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
08813 
08814         if (rbtiterator->common.version != NULL)
08815                 closeversion(rbtiterator->common.db,
08816                              &rbtiterator->common.version, ISC_FALSE);
08817         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
08818         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
08819                     sizeof(*rbtiterator));
08820 
08821         *iteratorp = NULL;
08822 }
08823 
08824 static isc_result_t
08825 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
08826         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
08827         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
08828         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
08829         rbtdb_version_t *rbtversion = rbtiterator->common.version;
08830         rdatasetheader_t *header, *top_next;
08831         rbtdb_serial_t serial;
08832         isc_stdtime_t now;
08833 
08834         if (IS_CACHE(rbtdb)) {
08835                 serial = 1;
08836                 now = rbtiterator->common.now;
08837         } else {
08838                 serial = rbtversion->serial;
08839                 now = 0;
08840         }
08841 
08842         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08843                   isc_rwlocktype_read);
08844 
08845         for (header = rbtnode->data; header != NULL; header = top_next) {
08846                 top_next = header->next;
08847                 do {
08848                         if (header->serial <= serial && !IGNORE(header)) {
08849                                 /*
08850                                  * Is this a "this rdataset doesn't exist"
08851                                  * record?  Or is it too old in the cache?
08852                                  *
08853                                  * Note: unlike everywhere else, we
08854                                  * check for now > header->rdh_ttl instead
08855                                  * of now >= header->rdh_ttl.  This allows
08856                                  * ANY and RRSIG queries for 0 TTL
08857                                  * rdatasets to work.
08858                                  */
08859                                 if (NONEXISTENT(header) ||
08860                                     (now != 0 && now > header->rdh_ttl))
08861                                         header = NULL;
08862                                 break;
08863                         } else
08864                                 header = header->down;
08865                 } while (header != NULL);
08866                 if (header != NULL)
08867                         break;
08868         }
08869 
08870         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08871                     isc_rwlocktype_read);
08872 
08873         rbtiterator->current = header;
08874 
08875         if (header == NULL)
08876                 return (ISC_R_NOMORE);
08877 
08878         return (ISC_R_SUCCESS);
08879 }
08880 
08881 static isc_result_t
08882 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
08883         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
08884         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
08885         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
08886         rbtdb_version_t *rbtversion = rbtiterator->common.version;
08887         rdatasetheader_t *header, *top_next;
08888         rbtdb_serial_t serial;
08889         isc_stdtime_t now;
08890         rbtdb_rdatatype_t type, negtype;
08891         dns_rdatatype_t rdtype, covers;
08892 
08893         header = rbtiterator->current;
08894         if (header == NULL)
08895                 return (ISC_R_NOMORE);
08896 
08897         if (IS_CACHE(rbtdb)) {
08898                 serial = 1;
08899                 now = rbtiterator->common.now;
08900         } else {
08901                 serial = rbtversion->serial;
08902                 now = 0;
08903         }
08904 
08905         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08906                   isc_rwlocktype_read);
08907 
08908         type = header->type;
08909         rdtype = RBTDB_RDATATYPE_BASE(header->type);
08910         if (NEGATIVE(header)) {
08911                 covers = RBTDB_RDATATYPE_EXT(header->type);
08912                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
08913         } else
08914                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
08915         for (header = header->next; header != NULL; header = top_next) {
08916                 top_next = header->next;
08917                 /*
08918                  * If not walking back up the down list.
08919                  */
08920                 if (header->type != type && header->type != negtype) {
08921                         do {
08922                                 if (header->serial <= serial &&
08923                                     !IGNORE(header)) {
08924                                         /*
08925                                          * Is this a "this rdataset doesn't
08926                                          * exist" record?
08927                                          *
08928                                          * Note: unlike everywhere else, we
08929                                          * check for now > header->ttl instead
08930                                          * of now >= header->ttl.  This allows
08931                                          * ANY and RRSIG queries for 0 TTL
08932                                          * rdatasets to work.
08933                                          */
08934                                         if ((header->attributes &
08935                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
08936                                             (now != 0 && now > header->rdh_ttl))
08937                                                 header = NULL;
08938                                         break;
08939                                 } else
08940                                         header = header->down;
08941                         } while (header != NULL);
08942                         if (header != NULL)
08943                                 break;
08944                 }
08945         }
08946 
08947         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08948                     isc_rwlocktype_read);
08949 
08950         rbtiterator->current = header;
08951 
08952         if (header == NULL)
08953                 return (ISC_R_NOMORE);
08954 
08955         return (ISC_R_SUCCESS);
08956 }
08957 
08958 static void
08959 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
08960         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
08961         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
08962         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
08963         rdatasetheader_t *header;
08964 
08965         header = rbtiterator->current;
08966         REQUIRE(header != NULL);
08967 
08968         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08969                   isc_rwlocktype_read);
08970 
08971         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
08972                       rdataset);
08973 
08974         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
08975                     isc_rwlocktype_read);
08976 }
08977 
08978 
08979 /*
08980  * Database Iterator Methods
08981  */
08982 
08983 static inline void
08984 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
08985         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
08986         dns_rbtnode_t *node = rbtdbiter->node;
08987 
08988         if (node == NULL)
08989                 return;
08990 
08991         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
08992         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
08993 }
08994 
08995 static inline void
08996 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
08997         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
08998         dns_rbtnode_t *node = rbtdbiter->node;
08999         nodelock_t *lock;
09000 
09001         if (node == NULL)
09002                 return;
09003 
09004         lock = &rbtdb->node_locks[node->locknum].lock;
09005         NODE_LOCK(lock, isc_rwlocktype_read);
09006         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
09007                             rbtdbiter->tree_locked, ISC_FALSE);
09008         NODE_UNLOCK(lock, isc_rwlocktype_read);
09009 
09010         rbtdbiter->node = NULL;
09011 }
09012 
09013 static void
09014 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
09015         dns_rbtnode_t *node;
09016         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
09017         isc_boolean_t was_read_locked = ISC_FALSE;
09018         nodelock_t *lock;
09019         int i;
09020 
09021         if (rbtdbiter->delete != 0) {
09022                 /*
09023                  * Note that "%d node of %d in tree" can report things like
09024                  * "flush_deletions: 59 nodes of 41 in tree".  This means
09025                  * That some nodes appear on the deletions list more than
09026                  * once.  Only the last occurence will actually be deleted.
09027                  */
09028                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
09029                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
09030                               "flush_deletions: %d nodes of %d in tree",
09031                               rbtdbiter->delete,
09032                               dns_rbt_nodecount(rbtdb->tree));
09033 
09034                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
09035                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
09036                         was_read_locked = ISC_TRUE;
09037                 }
09038                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
09039                 rbtdbiter->tree_locked = isc_rwlocktype_write;
09040 
09041                 for (i = 0; i < rbtdbiter->delete; i++) {
09042                         node = rbtdbiter->deletions[i];
09043                         lock = &rbtdb->node_locks[node->locknum].lock;
09044 
09045                         NODE_LOCK(lock, isc_rwlocktype_read);
09046                         decrement_reference(rbtdb, node, 0,
09047                                             isc_rwlocktype_read,
09048                                             rbtdbiter->tree_locked, ISC_FALSE);
09049                         NODE_UNLOCK(lock, isc_rwlocktype_read);
09050                 }
09051 
09052                 rbtdbiter->delete = 0;
09053 
09054                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
09055                 if (was_read_locked) {
09056                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
09057                         rbtdbiter->tree_locked = isc_rwlocktype_read;
09058 
09059                 } else {
09060                         rbtdbiter->tree_locked = isc_rwlocktype_none;
09061                 }
09062         }
09063 }
09064 
09065 static inline void
09066 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
09067         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
09068 
09069         REQUIRE(rbtdbiter->paused);
09070         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
09071 
09072         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
09073         rbtdbiter->tree_locked = isc_rwlocktype_read;
09074 
09075         rbtdbiter->paused = ISC_FALSE;
09076 }
09077 
09078 static void
09079 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
09080         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
09081         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
09082         dns_db_t *db = NULL;
09083 
09084         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
09085                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
09086                 rbtdbiter->tree_locked = isc_rwlocktype_none;
09087         } else
09088                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
09089 
09090         dereference_iter_node(rbtdbiter);
09091 
09092         flush_deletions(rbtdbiter);
09093 
09094         dns_db_attach(rbtdbiter->common.db, &db);
09095         dns_db_detach(&rbtdbiter->common.db);
09096 
09097         dns_rbtnodechain_reset(&rbtdbiter->chain);
09098         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
09099         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
09100         dns_db_detach(&db);
09101 
09102         *iteratorp = NULL;
09103 }
09104 
09105 static isc_result_t
09106 dbiterator_first(dns_dbiterator_t *iterator) {
09107         isc_result_t result;
09108         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09109         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
09110         dns_name_t *name, *origin;
09111 
09112         if (rbtdbiter->result != ISC_R_SUCCESS &&
09113             rbtdbiter->result != ISC_R_NOMORE)
09114                 return (rbtdbiter->result);
09115 
09116         if (rbtdbiter->paused)
09117                 resume_iteration(rbtdbiter);
09118 
09119         dereference_iter_node(rbtdbiter);
09120 
09121         name = dns_fixedname_name(&rbtdbiter->name);
09122         origin = dns_fixedname_name(&rbtdbiter->origin);
09123         dns_rbtnodechain_reset(&rbtdbiter->chain);
09124         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
09125 
09126         if (rbtdbiter->nsec3only) {
09127                 rbtdbiter->current = &rbtdbiter->nsec3chain;
09128                 result = dns_rbtnodechain_first(rbtdbiter->current,
09129                                                 rbtdb->nsec3, name, origin);
09130         } else {
09131                 rbtdbiter->current = &rbtdbiter->chain;
09132                 result = dns_rbtnodechain_first(rbtdbiter->current,
09133                                                 rbtdb->tree, name, origin);
09134                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
09135                         rbtdbiter->current = &rbtdbiter->nsec3chain;
09136                         result = dns_rbtnodechain_first(rbtdbiter->current,
09137                                                         rbtdb->nsec3, name,
09138                                                         origin);
09139                 }
09140         }
09141         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
09142                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
09143                                                   NULL, &rbtdbiter->node);
09144                 if (result == ISC_R_SUCCESS) {
09145                         rbtdbiter->new_origin = ISC_TRUE;
09146                         reference_iter_node(rbtdbiter);
09147                 }
09148         } else {
09149                 INSIST(result == ISC_R_NOTFOUND);
09150                 result = ISC_R_NOMORE; /* The tree is empty. */
09151         }
09152 
09153         rbtdbiter->result = result;
09154 
09155         return (result);
09156 }
09157 
09158 static isc_result_t
09159 dbiterator_last(dns_dbiterator_t *iterator) {
09160         isc_result_t result;
09161         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09162         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
09163         dns_name_t *name, *origin;
09164 
09165         if (rbtdbiter->result != ISC_R_SUCCESS &&
09166             rbtdbiter->result != ISC_R_NOMORE)
09167                 return (rbtdbiter->result);
09168 
09169         if (rbtdbiter->paused)
09170                 resume_iteration(rbtdbiter);
09171 
09172         dereference_iter_node(rbtdbiter);
09173 
09174         name = dns_fixedname_name(&rbtdbiter->name);
09175         origin = dns_fixedname_name(&rbtdbiter->origin);
09176         dns_rbtnodechain_reset(&rbtdbiter->chain);
09177         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
09178 
09179         result = ISC_R_NOTFOUND;
09180         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
09181                 rbtdbiter->current = &rbtdbiter->nsec3chain;
09182                 result = dns_rbtnodechain_last(rbtdbiter->current,
09183                                                rbtdb->nsec3, name, origin);
09184         }
09185         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
09186                 rbtdbiter->current = &rbtdbiter->chain;
09187                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
09188                                                name, origin);
09189         }
09190         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
09191                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
09192                                                   NULL, &rbtdbiter->node);
09193                 if (result == ISC_R_SUCCESS) {
09194                         rbtdbiter->new_origin = ISC_TRUE;
09195                         reference_iter_node(rbtdbiter);
09196                 }
09197         } else {
09198                 INSIST(result == ISC_R_NOTFOUND);
09199                 result = ISC_R_NOMORE; /* The tree is empty. */
09200         }
09201 
09202         rbtdbiter->result = result;
09203 
09204         return (result);
09205 }
09206 
09207 static isc_result_t
09208 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
09209         isc_result_t result, tresult;
09210         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09211         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
09212         dns_name_t *iname, *origin;
09213 
09214         if (rbtdbiter->result != ISC_R_SUCCESS &&
09215             rbtdbiter->result != ISC_R_NOTFOUND &&
09216             rbtdbiter->result != ISC_R_NOMORE)
09217                 return (rbtdbiter->result);
09218 
09219         if (rbtdbiter->paused)
09220                 resume_iteration(rbtdbiter);
09221 
09222         dereference_iter_node(rbtdbiter);
09223 
09224         iname = dns_fixedname_name(&rbtdbiter->name);
09225         origin = dns_fixedname_name(&rbtdbiter->origin);
09226         dns_rbtnodechain_reset(&rbtdbiter->chain);
09227         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
09228 
09229         if (rbtdbiter->nsec3only) {
09230                 rbtdbiter->current = &rbtdbiter->nsec3chain;
09231                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
09232                                           &rbtdbiter->node,
09233                                           rbtdbiter->current,
09234                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
09235         } else if (rbtdbiter->nonsec3) {
09236                 rbtdbiter->current = &rbtdbiter->chain;
09237                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
09238                                           &rbtdbiter->node,
09239                                           rbtdbiter->current,
09240                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
09241         } else {
09242                 /*
09243                  * Stay on main chain if not found on either chain.
09244                  */
09245                 rbtdbiter->current = &rbtdbiter->chain;
09246                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
09247                                           &rbtdbiter->node,
09248                                           rbtdbiter->current,
09249                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
09250                 if (result == DNS_R_PARTIALMATCH) {
09251                         dns_rbtnode_t *node = NULL;
09252                         tresult = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
09253                                                   &node, &rbtdbiter->nsec3chain,
09254                                                   DNS_RBTFIND_EMPTYDATA,
09255                                                   NULL, NULL);
09256                         if (tresult == ISC_R_SUCCESS) {
09257                                 rbtdbiter->node = node;
09258                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
09259                                 result = tresult;
09260                         }
09261                 }
09262         }
09263 
09264 #if 1
09265         if (result == ISC_R_SUCCESS) {
09266                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
09267                                                   origin, NULL);
09268                 if (result == ISC_R_SUCCESS) {
09269                         rbtdbiter->new_origin = ISC_TRUE;
09270                         reference_iter_node(rbtdbiter);
09271                 }
09272         } else if (result == DNS_R_PARTIALMATCH) {
09273                 result = ISC_R_NOTFOUND;
09274                 rbtdbiter->node = NULL;
09275         }
09276 
09277         rbtdbiter->result = result;
09278 #else
09279         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
09280                 isc_result_t tresult;
09281                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
09282                                                    origin, NULL);
09283                 if (tresult == ISC_R_SUCCESS) {
09284                         rbtdbiter->new_origin = ISC_TRUE;
09285                         reference_iter_node(rbtdbiter);
09286                 } else {
09287                         result = tresult;
09288                         rbtdbiter->node = NULL;
09289                 }
09290         } else
09291                 rbtdbiter->node = NULL;
09292 
09293         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
09294                             ISC_R_SUCCESS : result;
09295 #endif
09296 
09297         return (result);
09298 }
09299 
09300 static isc_result_t
09301 dbiterator_prev(dns_dbiterator_t *iterator) {
09302         isc_result_t result;
09303         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09304         dns_name_t *name, *origin;
09305         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
09306 
09307         REQUIRE(rbtdbiter->node != NULL);
09308 
09309         if (rbtdbiter->result != ISC_R_SUCCESS)
09310                 return (rbtdbiter->result);
09311 
09312         if (rbtdbiter->paused)
09313                 resume_iteration(rbtdbiter);
09314 
09315         name = dns_fixedname_name(&rbtdbiter->name);
09316         origin = dns_fixedname_name(&rbtdbiter->origin);
09317         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
09318         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
09319             !rbtdbiter->nonsec3 &&
09320             &rbtdbiter->nsec3chain == rbtdbiter->current) {
09321                 rbtdbiter->current = &rbtdbiter->chain;
09322                 dns_rbtnodechain_reset(rbtdbiter->current);
09323                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
09324                                                name, origin);
09325                 if (result == ISC_R_NOTFOUND)
09326                         result = ISC_R_NOMORE;
09327         }
09328 
09329         dereference_iter_node(rbtdbiter);
09330 
09331         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
09332                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
09333                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
09334                                                   NULL, &rbtdbiter->node);
09335         }
09336 
09337         if (result == ISC_R_SUCCESS)
09338                 reference_iter_node(rbtdbiter);
09339 
09340         rbtdbiter->result = result;
09341 
09342         return (result);
09343 }
09344 
09345 static isc_result_t
09346 dbiterator_next(dns_dbiterator_t *iterator) {
09347         isc_result_t result;
09348         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09349         dns_name_t *name, *origin;
09350         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
09351 
09352         REQUIRE(rbtdbiter->node != NULL);
09353 
09354         if (rbtdbiter->result != ISC_R_SUCCESS)
09355                 return (rbtdbiter->result);
09356 
09357         if (rbtdbiter->paused)
09358                 resume_iteration(rbtdbiter);
09359 
09360         name = dns_fixedname_name(&rbtdbiter->name);
09361         origin = dns_fixedname_name(&rbtdbiter->origin);
09362         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
09363         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
09364             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
09365                 rbtdbiter->current = &rbtdbiter->nsec3chain;
09366                 dns_rbtnodechain_reset(rbtdbiter->current);
09367                 result = dns_rbtnodechain_first(rbtdbiter->current,
09368                                                 rbtdb->nsec3, name, origin);
09369                 if (result == ISC_R_NOTFOUND)
09370                         result = ISC_R_NOMORE;
09371         }
09372 
09373         dereference_iter_node(rbtdbiter);
09374 
09375         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
09376                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
09377                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
09378                                                   NULL, &rbtdbiter->node);
09379         }
09380         if (result == ISC_R_SUCCESS)
09381                 reference_iter_node(rbtdbiter);
09382 
09383         rbtdbiter->result = result;
09384 
09385         return (result);
09386 }
09387 
09388 static isc_result_t
09389 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
09390                    dns_name_t *name)
09391 {
09392         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
09393         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09394         dns_rbtnode_t *node = rbtdbiter->node;
09395         isc_result_t result;
09396         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
09397         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
09398 
09399         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
09400         REQUIRE(rbtdbiter->node != NULL);
09401 
09402         if (rbtdbiter->paused)
09403                 resume_iteration(rbtdbiter);
09404 
09405         if (name != NULL) {
09406                 if (rbtdbiter->common.relative_names)
09407                         origin = NULL;
09408                 result = dns_name_concatenate(nodename, origin, name, NULL);
09409                 if (result != ISC_R_SUCCESS)
09410                         return (result);
09411                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
09412                         result = DNS_R_NEWORIGIN;
09413         } else
09414                 result = ISC_R_SUCCESS;
09415 
09416         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
09417         new_reference(rbtdb, node);
09418         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
09419 
09420         *nodep = rbtdbiter->node;
09421 
09422         if (iterator->cleaning && result == ISC_R_SUCCESS) {
09423                 isc_result_t expire_result;
09424 
09425                 /*
09426                  * If the deletion array is full, flush it before trying
09427                  * to expire the current node.  The current node can't
09428                  * fully deleted while the iteration cursor is still on it.
09429                  */
09430                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
09431                         flush_deletions(rbtdbiter);
09432 
09433                 expire_result = expirenode(iterator->db, *nodep, 0);
09434 
09435                 /*
09436                  * expirenode() currently always returns success.
09437                  */
09438                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
09439                         unsigned int refs;
09440 
09441                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
09442                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
09443                         dns_rbtnode_refincrement(node, &refs);
09444                         INSIST(refs != 0);
09445                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
09446                 }
09447         }
09448 
09449         return (result);
09450 }
09451 
09452 static isc_result_t
09453 dbiterator_pause(dns_dbiterator_t *iterator) {
09454         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
09455         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09456 
09457         if (rbtdbiter->result != ISC_R_SUCCESS &&
09458             rbtdbiter->result != ISC_R_NOMORE)
09459                 return (rbtdbiter->result);
09460 
09461         if (rbtdbiter->paused)
09462                 return (ISC_R_SUCCESS);
09463 
09464         rbtdbiter->paused = ISC_TRUE;
09465 
09466         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
09467                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
09468                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
09469                 rbtdbiter->tree_locked = isc_rwlocktype_none;
09470         }
09471 
09472         flush_deletions(rbtdbiter);
09473 
09474         return (ISC_R_SUCCESS);
09475 }
09476 
09477 static isc_result_t
09478 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
09479         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
09480         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
09481 
09482         if (rbtdbiter->result != ISC_R_SUCCESS)
09483                 return (rbtdbiter->result);
09484 
09485         return (dns_name_copy(origin, name, NULL));
09486 }
09487 
09488 /*%
09489  * Additional cache routines.
09490  */
09491 static isc_result_t
09492 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
09493                        dns_rdatatype_t qtype, dns_acache_t *acache,
09494                        dns_zone_t **zonep, dns_db_t **dbp,
09495                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
09496                        dns_name_t *fname, dns_message_t *msg,
09497                        isc_stdtime_t now)
09498 {
09499         dns_rbtdb_t *rbtdb = rdataset->private1;
09500         dns_rbtnode_t *rbtnode = rdataset->private2;
09501         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
09502         unsigned int current_count = rdataset->privateuint4;
09503         unsigned int count;
09504         rdatasetheader_t *header;
09505         nodelock_t *nodelock;
09506         unsigned int total_count;
09507         acachectl_t *acarray;
09508         dns_acacheentry_t *entry;
09509         isc_result_t result;
09510 
09511         UNUSED(qtype); /* we do not use this value at least for now */
09512         UNUSED(acache);
09513 
09514         header = (struct rdatasetheader *)(raw - sizeof(*header));
09515 
09516         total_count = raw[0] * 256 + raw[1];
09517         INSIST(total_count > current_count);
09518         count = total_count - current_count - 1;
09519 
09520         acarray = NULL;
09521 
09522         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
09523         NODE_LOCK(nodelock, isc_rwlocktype_read);
09524 
09525         switch (type) {
09526         case dns_rdatasetadditional_fromauth:
09527                 acarray = header->additional_auth;
09528                 break;
09529         case dns_rdatasetadditional_fromcache:
09530                 acarray = NULL;
09531                 break;
09532         case dns_rdatasetadditional_fromglue:
09533                 acarray = header->additional_glue;
09534                 break;
09535         default:
09536                 INSIST(0);
09537         }
09538 
09539         if (acarray == NULL) {
09540                 if (type != dns_rdatasetadditional_fromcache)
09541                         dns_acache_countquerymiss(acache);
09542                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
09543                 return (ISC_R_NOTFOUND);
09544         }
09545 
09546         if (acarray[count].entry == NULL) {
09547                 dns_acache_countquerymiss(acache);
09548                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
09549                 return (ISC_R_NOTFOUND);
09550         }
09551 
09552         entry = NULL;
09553         dns_acache_attachentry(acarray[count].entry, &entry);
09554 
09555         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
09556 
09557         result = dns_acache_getentry(entry, zonep, dbp, versionp,
09558                                      nodep, fname, msg, now);
09559 
09560         dns_acache_detachentry(&entry);
09561 
09562         return (result);
09563 }
09564 
09565 static void
09566 acache_callback(dns_acacheentry_t *entry, void **arg) {
09567         dns_rbtdb_t *rbtdb;
09568         dns_rbtnode_t *rbtnode;
09569         nodelock_t *nodelock;
09570         acachectl_t *acarray = NULL;
09571         acache_cbarg_t *cbarg;
09572         unsigned int count;
09573 
09574         REQUIRE(arg != NULL);
09575         cbarg = *arg;
09576 
09577         /*
09578          * The caller must hold the entry lock.
09579          */
09580 
09581         rbtdb = (dns_rbtdb_t *)cbarg->db;
09582         rbtnode = (dns_rbtnode_t *)cbarg->node;
09583 
09584         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
09585         NODE_LOCK(nodelock, isc_rwlocktype_write);
09586 
09587         switch (cbarg->type) {
09588         case dns_rdatasetadditional_fromauth:
09589                 acarray = cbarg->header->additional_auth;
09590                 break;
09591         case dns_rdatasetadditional_fromglue:
09592                 acarray = cbarg->header->additional_glue;
09593                 break;
09594         default:
09595                 INSIST(0);
09596         }
09597 
09598         count = cbarg->count;
09599         if (acarray != NULL && acarray[count].entry == entry) {
09600                 acarray[count].entry = NULL;
09601                 INSIST(acarray[count].cbarg == cbarg);
09602                 acarray[count].cbarg = NULL;
09603                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
09604                 dns_acache_detachentry(&entry);
09605         }
09606 
09607         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
09608 
09609         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
09610         dns_db_detach((dns_db_t **)(void*)&rbtdb);
09611 
09612         *arg = NULL;
09613 }
09614 
09615 static void
09616 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
09617                       acache_cbarg_t **cbargp)
09618 {
09619         acache_cbarg_t *cbarg;
09620 
09621         REQUIRE(mctx != NULL);
09622         REQUIRE(entry != NULL);
09623         REQUIRE(cbargp != NULL && *cbargp != NULL);
09624 
09625         cbarg = *cbargp;
09626 
09627         if (dns_acache_cancelentry(entry)) {
09628                 dns_db_detachnode(cbarg->db, &cbarg->node);
09629                 dns_db_detach(&cbarg->db);
09630         }
09631 
09632         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
09633 
09634         *cbargp = NULL;
09635 }
09636 
09637 static isc_result_t
09638 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
09639                        dns_rdatatype_t qtype, dns_acache_t *acache,
09640                        dns_zone_t *zone, dns_db_t *db,
09641                        dns_dbversion_t *version, dns_dbnode_t *node,
09642                        dns_name_t *fname)
09643 {
09644         dns_rbtdb_t *rbtdb = rdataset->private1;
09645         dns_rbtnode_t *rbtnode = rdataset->private2;
09646         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
09647         unsigned int current_count = rdataset->privateuint4;
09648         rdatasetheader_t *header;
09649         unsigned int total_count, count;
09650         nodelock_t *nodelock;
09651         isc_result_t result;
09652         acachectl_t *acarray;
09653         dns_acacheentry_t *newentry, *oldentry = NULL;
09654         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
09655 
09656         UNUSED(qtype);
09657 
09658         if (type == dns_rdatasetadditional_fromcache)
09659                 return (ISC_R_SUCCESS);
09660 
09661         header = (struct rdatasetheader *)(raw - sizeof(*header));
09662 
09663         total_count = raw[0] * 256 + raw[1];
09664         INSIST(total_count > current_count);
09665         count = total_count - current_count - 1; /* should be private data */
09666 
09667         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
09668         if (newcbarg == NULL)
09669                 return (ISC_R_NOMEMORY);
09670         newcbarg->type = type;
09671         newcbarg->count = count;
09672         newcbarg->header = header;
09673         newcbarg->db = NULL;
09674         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
09675         newcbarg->node = NULL;
09676         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
09677                           &newcbarg->node);
09678         newentry = NULL;
09679         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
09680                                         acache_callback, newcbarg, &newentry);
09681         if (result != ISC_R_SUCCESS)
09682                 goto fail;
09683 
09684         /* Set cache data in the new entry. */
09685         result = dns_acache_setentry(acache, newentry, zone, db,
09686                                      version, node, fname);
09687         if (result != ISC_R_SUCCESS)
09688                 goto fail;
09689 
09690         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
09691         NODE_LOCK(nodelock, isc_rwlocktype_write);
09692 
09693         acarray = NULL;
09694         switch (type) {
09695         case dns_rdatasetadditional_fromauth:
09696                 acarray = header->additional_auth;
09697                 break;
09698         case dns_rdatasetadditional_fromglue:
09699                 acarray = header->additional_glue;
09700                 break;
09701         default:
09702                 INSIST(0);
09703         }
09704 
09705         if (acarray == NULL) {
09706                 unsigned int i;
09707 
09708                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
09709                                       sizeof(acachectl_t));
09710 
09711                 if (acarray == NULL) {
09712                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
09713                         goto fail;
09714                 }
09715 
09716                 for (i = 0; i < total_count; i++) {
09717                         acarray[i].entry = NULL;
09718                         acarray[i].cbarg = NULL;
09719                 }
09720         }
09721         switch (type) {
09722         case dns_rdatasetadditional_fromauth:
09723                 header->additional_auth = acarray;
09724                 break;
09725         case dns_rdatasetadditional_fromglue:
09726                 header->additional_glue = acarray;
09727                 break;
09728         default:
09729                 INSIST(0);
09730         }
09731 
09732         if (acarray[count].entry != NULL) {
09733                 /*
09734                  * Swap the entry.  Delay cleaning-up the old entry since
09735                  * it would require a node lock.
09736                  */
09737                 oldentry = acarray[count].entry;
09738                 INSIST(acarray[count].cbarg != NULL);
09739                 oldcbarg = acarray[count].cbarg;
09740         }
09741         acarray[count].entry = newentry;
09742         acarray[count].cbarg = newcbarg;
09743 
09744         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
09745 
09746         if (oldentry != NULL) {
09747                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
09748                 dns_acache_detachentry(&oldentry);
09749         }
09750 
09751         return (ISC_R_SUCCESS);
09752 
09753  fail:
09754         if (newcbarg != NULL) {
09755                 if (newentry != NULL) {
09756                         acache_cancelentry(rbtdb->common.mctx, newentry,
09757                                            &newcbarg);
09758                         dns_acache_detachentry(&newentry);
09759                 } else {
09760                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
09761                         dns_db_detach(&newcbarg->db);
09762                         isc_mem_put(rbtdb->common.mctx, newcbarg,
09763                             sizeof(*newcbarg));
09764                 }
09765         }
09766 
09767         return (result);
09768 }
09769 
09770 static isc_result_t
09771 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
09772                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
09773 {
09774         dns_rbtdb_t *rbtdb = rdataset->private1;
09775         dns_rbtnode_t *rbtnode = rdataset->private2;
09776         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
09777         unsigned int current_count = rdataset->privateuint4;
09778         rdatasetheader_t *header;
09779         nodelock_t *nodelock;
09780         unsigned int total_count, count;
09781         acachectl_t *acarray;
09782         dns_acacheentry_t *entry;
09783         acache_cbarg_t *cbarg;
09784 
09785         UNUSED(qtype);          /* we do not use this value at least for now */
09786         UNUSED(acache);
09787 
09788         if (type == dns_rdatasetadditional_fromcache)
09789                 return (ISC_R_SUCCESS);
09790 
09791         header = (struct rdatasetheader *)(raw - sizeof(*header));
09792 
09793         total_count = raw[0] * 256 + raw[1];
09794         INSIST(total_count > current_count);
09795         count = total_count - current_count - 1;
09796 
09797         acarray = NULL;
09798         entry = NULL;
09799 
09800         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
09801         NODE_LOCK(nodelock, isc_rwlocktype_write);
09802 
09803         switch (type) {
09804         case dns_rdatasetadditional_fromauth:
09805                 acarray = header->additional_auth;
09806                 break;
09807         case dns_rdatasetadditional_fromglue:
09808                 acarray = header->additional_glue;
09809                 break;
09810         default:
09811                 INSIST(0);
09812         }
09813 
09814         if (acarray == NULL) {
09815                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
09816                 return (ISC_R_NOTFOUND);
09817         }
09818 
09819         entry = acarray[count].entry;
09820         if (entry == NULL) {
09821                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
09822                 return (ISC_R_NOTFOUND);
09823         }
09824 
09825         acarray[count].entry = NULL;
09826         cbarg = acarray[count].cbarg;
09827         acarray[count].cbarg = NULL;
09828 
09829         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
09830 
09831         if (entry != NULL) {
09832                 if (cbarg != NULL)
09833                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
09834                 dns_acache_detachentry(&entry);
09835         }
09836 
09837         return (ISC_R_SUCCESS);
09838 }
09839 
09840 static void
09841 setownercase(rdatasetheader_t *header, const dns_name_t *name) {
09842         unsigned int i;
09843 
09844         /*
09845          * We do not need to worry about label lengths as they are all
09846          * less than or equal to 63.
09847          */
09848         memset(header->upper, 0, sizeof(header->upper));
09849         for (i = 0; i < name->length; i++)
09850                 if (name->ndata[i] >= 0x41 && name->ndata[i] <= 0x5a)
09851                         header->upper[i/8] |= 1 << (i%8);
09852         header->attributes |= RDATASET_ATTR_CASESET;
09853 }
09854 
09855 static void
09856 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name) {
09857         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
09858         rdatasetheader_t *header;
09859 
09860         header = (struct rdatasetheader *)(raw - sizeof(*header));
09861         setownercase(header, name);
09862 }
09863 
09864 static void
09865 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name) {
09866         const unsigned char *raw = rdataset->private3;        /* RDATASLAB */
09867         const rdatasetheader_t *header;
09868         unsigned int i;
09869 
09870         header = (const struct rdatasetheader *)(raw - sizeof(*header));
09871 
09872         if (!CASESET(header))
09873                 return;
09874 
09875         for (i = 0; i < name->length; i++) {
09876                 /*
09877                  * Set the case bit if it does not match the recorded bit.
09878                  */
09879                 if (name->ndata[i] >= 0x61 && name->ndata[i] <= 0x7a &&
09880                     (header->upper[i/8] & (1 << (i%8))) != 0)
09881                         name->ndata[i] &= ~0x20; /* clear the lower case bit */
09882                 else if (name->ndata[i] >= 0x41 && name->ndata[i] <= 0x5a &&
09883                          (header->upper[i/8] & (1 << (i%8))) == 0)
09884                         name->ndata[i] |= 0x20; /* set the lower case bit */
09885         }
09886 }
09887 
09888 /*%
09889  * Routines for LRU-based cache management.
09890  */
09891 
09892 /*%
09893  * See if a given cache entry that is being reused needs to be updated
09894  * in the LRU-list.  From the LRU management point of view, this function is
09895  * expected to return true for almost all cases.  When used with threads,
09896  * however, this may cause a non-negligible performance penalty because a
09897  * writer lock will have to be acquired before updating the list.
09898  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
09899  * function returns true if the entry has not been updated for some period of
09900  * time.  We differentiate the NS or glue address case and the others since
09901  * experiments have shown that the former tends to be accessed relatively
09902  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
09903  * may cause external queries at a higher level zone, involving more
09904  * transactions).
09905  *
09906  * Caller must hold the node (read or write) lock.
09907  */
09908 static inline isc_boolean_t
09909 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
09910         if ((header->attributes &
09911              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
09912                 return (ISC_FALSE);
09913 
09914 #if DNS_RBTDB_LIMITLRUUPDATE
09915         if (header->type == dns_rdatatype_ns ||
09916             (header->trust == dns_trust_glue &&
09917              (header->type == dns_rdatatype_a ||
09918               header->type == dns_rdatatype_aaaa))) {
09919                 /*
09920                  * Glue records are updated if at least 60 seconds have passed
09921                  * since the previous update time.
09922                  */
09923                 return (header->last_used + 60 <= now);
09924         }
09925 
09926         /* Other records are updated if 5 minutes have passed. */
09927         return (header->last_used + 300 <= now);
09928 #else
09929         UNUSED(now);
09930 
09931         return (ISC_TRUE);
09932 #endif
09933 }
09934 
09935 /*%
09936  * Update the timestamp of a given cache entry and move it to the head
09937  * of the corresponding LRU list.
09938  *
09939  * Caller must hold the node (write) lock.
09940  *
09941  * Note that the we do NOT touch the heap here, as the TTL has not changed.
09942  */
09943 static void
09944 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
09945               isc_stdtime_t now)
09946 {
09947         INSIST(IS_CACHE(rbtdb));
09948 
09949         /* To be checked: can we really assume this? XXXMLG */
09950         INSIST(ISC_LINK_LINKED(header, link));
09951 
09952         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
09953         header->last_used = now;
09954         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
09955 }
09956 
09957 /*%
09958  * Purge some expired and/or stale (i.e. unused for some period) cache entries
09959  * under an overmem condition.  To recover from this condition quickly, up to
09960  * 2 entries will be purged.  This process is triggered while adding a new
09961  * entry, and we specifically avoid purging entries in the same LRU bucket as
09962  * the one to which the new entry will belong.  Otherwise, we might purge
09963  * entries of the same name of different RR types while adding RRsets from a
09964  * single response (consider the case where we're adding A and AAAA glue records
09965  * of the same NS name).
09966  */
09967 static void
09968 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
09969               isc_stdtime_t now, isc_boolean_t tree_locked)
09970 {
09971         rdatasetheader_t *header, *header_prev;
09972         unsigned int locknum;
09973         int purgecount = 2;
09974 
09975         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
09976              locknum != locknum_start && purgecount > 0;
09977              locknum = (locknum + 1) % rbtdb->node_lock_count) {
09978                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
09979                           isc_rwlocktype_write);
09980 
09981                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
09982                 if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) {
09983                         expire_header(rbtdb, header, tree_locked,
09984                                       expire_ttl);
09985                         purgecount--;
09986                 }
09987 
09988                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
09989                      header != NULL && purgecount > 0;
09990                      header = header_prev) {
09991                         header_prev = ISC_LIST_PREV(header, link);
09992                         /*
09993                          * Unlink the entry at this point to avoid checking it
09994                          * again even if it's currently used someone else and
09995                          * cannot be purged at this moment.  This entry won't be
09996                          * referenced any more (so unlinking is safe) since the
09997                          * TTL was reset to 0.
09998                          */
09999                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
10000                                         link);
10001                         expire_header(rbtdb, header, tree_locked,
10002                                       expire_lru);
10003                         purgecount--;
10004                 }
10005 
10006                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
10007                                     isc_rwlocktype_write);
10008         }
10009 }
10010 
10011 static void
10012 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
10013               isc_boolean_t tree_locked, expire_t reason)
10014 {
10015         set_ttl(rbtdb, header, 0);
10016         mark_stale_header(rbtdb, header);
10017 
10018         /*
10019          * Caller must hold the node (write) lock.
10020          */
10021 
10022         if (dns_rbtnode_refcurrent(header->node) == 0) {
10023                 /*
10024                  * If no one else is using the node, we can clean it up now.
10025                  * We first need to gain a new reference to the node to meet a
10026                  * requirement of decrement_reference().
10027                  */
10028                 new_reference(rbtdb, header->node);
10029                 decrement_reference(rbtdb, header->node, 0,
10030                                     isc_rwlocktype_write,
10031                                     tree_locked ? isc_rwlocktype_write :
10032                                     isc_rwlocktype_none, ISC_FALSE);
10033 
10034                 if (rbtdb->cachestats == NULL)
10035                         return;
10036 
10037                 switch (reason) {
10038                 case expire_ttl:
10039                         isc_stats_increment(rbtdb->cachestats,
10040                                             dns_cachestatscounter_deletettl);
10041                         break;
10042                 case expire_lru:
10043                         isc_stats_increment(rbtdb->cachestats,
10044                                             dns_cachestatscounter_deletelru);
10045                         break;
10046                 default:
10047                         break;
10048                 }
10049 
10050         }
10051 }

Generated on Tue Apr 28 17:40:59 2015 by Doxygen 1.5.4 for BIND9 Internals 9.11.0pre-alpha