rbtdb.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2004-2015  Internet Systems Consortium, Inc. ("ISC")
00003  * Copyright (C) 1999-2003  Internet Software Consortium.
00004  *
00005  * Permission to use, copy, modify, and/or distribute this software for any
00006  * purpose with or without fee is hereby granted, provided that the above
00007  * copyright notice and this permission notice appear in all copies.
00008  *
00009  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
00010  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
00011  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
00012  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
00013  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
00014  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
00015  * PERFORMANCE OF THIS SOFTWARE.
00016  */
00017 
00018 /*! \file */
00019 
00020 /*
00021  * Principal Author: Bob Halley
00022  */
00023 
00024 #include <config.h>
00025 
00026 /* #define inline */
00027 
00028 #ifdef HAVE_INTTYPES_H
00029 #include <inttypes.h> /* uintptr_t */
00030 #endif
00031 
00032 #include <isc/crc64.h>
00033 #include <isc/event.h>
00034 #include <isc/heap.h>
00035 #include <isc/file.h>
00036 #include <isc/hex.h>
00037 #include <isc/mem.h>
00038 #include <isc/mutex.h>
00039 #include <isc/once.h>
00040 #include <isc/platform.h>
00041 #include <isc/print.h>
00042 #include <isc/random.h>
00043 #include <isc/refcount.h>
00044 #include <isc/rwlock.h>
00045 #include <isc/serial.h>
00046 #include <isc/socket.h>
00047 #include <isc/stdio.h>
00048 #include <isc/string.h>
00049 #include <isc/task.h>
00050 #include <isc/time.h>
00051 #include <isc/util.h>
00052 
00053 #include <dns/acache.h>
00054 #include <dns/callbacks.h>
00055 #include <dns/db.h>
00056 #include <dns/dbiterator.h>
00057 #include <dns/events.h>
00058 #include <dns/fixedname.h>
00059 #include <dns/lib.h>
00060 #include <dns/log.h>
00061 #include <dns/masterdump.h>
00062 #include <dns/nsec.h>
00063 #include <dns/nsec3.h>
00064 #include <dns/rbt.h>
00065 #include <dns/rpz.h>
00066 #include <dns/rdata.h>
00067 #include <dns/rdataset.h>
00068 #include <dns/rdatasetiter.h>
00069 #include <dns/rdataslab.h>
00070 #include <dns/rdatastruct.h>
00071 #include <dns/result.h>
00072 #include <dns/stats.h>
00073 #include <dns/version.h>
00074 #include <dns/view.h>
00075 #include <dns/zone.h>
00076 #include <dns/zonekey.h>
00077 
00078 #ifndef WIN32
00079 #include <sys/mman.h>
00080 #else
00081 #define PROT_READ       0x01
00082 #define PROT_WRITE      0x02
00083 #define MAP_PRIVATE     0x0002
00084 #define MAP_FAILED      ((void *)-1)
00085 #endif
00086 
00087 #ifdef DNS_RBTDB_VERSION64
00088 #include "rbtdb64.h"
00089 #else
00090 #include "rbtdb.h"
00091 #endif
00092 
00093 #ifdef DNS_RBTDB_VERSION64
00094 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
00095 #else
00096 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
00097 #endif
00098 
00099 #define CHECK(op) \
00100         do { result = (op); \
00101                 if (result != ISC_R_SUCCESS) goto failure; \
00102         } while (0)
00103 
00104 /*
00105  * This is the map file header for RBTDB images.  It is populated, and then
00106  * written, as the LAST thing done to the file.  Writing this last (with
00107  * zeros in the header area initially) will ensure that the header is only
00108  * valid when the RBTDB image is also valid.
00109  */
00110 typedef struct rbtdb_file_header rbtdb_file_header_t;
00111 
00112 /* Header length, always the same size regardless of structure size */
00113 #define RBTDB_HEADER_LENGTH     1024
00114 
00115 struct rbtdb_file_header {
00116         char version1[32];
00117         isc_uint32_t ptrsize;
00118         unsigned int bigendian:1;
00119         isc_uint64_t tree;
00120         isc_uint64_t nsec;
00121         isc_uint64_t nsec3;
00122 
00123         char version2[32];              /* repeated; must match version1 */
00124 };
00125 
00126 
00127 /*%
00128  * Note that "impmagic" is not the first four bytes of the struct, so
00129  * ISC_MAGIC_VALID cannot be used.
00130  */
00131 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
00132                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
00133 
00134 #ifdef DNS_RBTDB_VERSION64
00135 typedef isc_uint64_t                    rbtdb_serial_t;
00136 /*%
00137  * Make casting easier in symbolic debuggers by using different names
00138  * for the 64 bit version.
00139  */
00140 #define dns_rbtdb_t dns_rbtdb64_t
00141 #define rdatasetheader_t rdatasetheader64_t
00142 #define rbtdb_version_t rbtdb_version64_t
00143 
00144 #define once once64
00145 #define FILE_VERSION FILE_VERSION64
00146 #define init_count init_count64
00147 
00148 #define cache_methods cache_methods64
00149 #define dbiterator_methods dbiterator_methods64
00150 #define rdataset_methods rdataset_methods64
00151 #define rdatasetiter_methods rdatasetiter_methods64
00152 #define slab_methods slab_methods64
00153 #define zone_methods zone_methods64
00154 
00155 #define acache_callback acache_callback64
00156 #define acache_cancelentry acache_cancelentry64
00157 #define activeempty activeempty64
00158 #define activeemtpynode activeemtpynode64
00159 #define add32 add64
00160 #define add_changed add_changed64
00161 #define add_empty_wildcards add_empty_wildcards64
00162 #define add_wildcard_magic add_wildcard_magic64
00163 #define addrdataset addrdataset64
00164 #define allrdatasets allrdatasets64
00165 #define attach attach64
00166 #define attachnode attachnode64
00167 #define attachversion attachversion64
00168 #define beginload beginload64
00169 #define bind_rdataset bind_rdataset64
00170 #define cache_find cache_find64
00171 #define cache_findrdataset cache_findrdataset64
00172 #define cache_findzonecut cache_findzonecut64
00173 #define cache_zonecut_callback cache_zonecut_callback64
00174 #define cleanup_dead_nodes cleanup_dead_nodes64
00175 #define cleanup_dead_nodes_callback cleanup_dead_nodes_callback64
00176 #define closeversion closeversion64
00177 #define createiterator createiterator64
00178 #define currentversion currentversion64
00179 #define dbiterator_current dbiterator_current64
00180 #define dbiterator_destroy dbiterator_destroy64
00181 #define dbiterator_first dbiterator_first64
00182 #define dbiterator_last dbiterator_last64
00183 #define dbiterator_next dbiterator_next64
00184 #define dbiterator_origin dbiterator_origin64
00185 #define dbiterator_pause dbiterator_pause64
00186 #define dbiterator_prev dbiterator_prev64
00187 #define dbiterator_seek dbiterator_seek64
00188 #define decrement_reference decrement_reference64
00189 #define delete_callback delete_callback64
00190 #define delete_node delete_node64
00191 #define deleterdataset deleterdataset64
00192 #define deserialize32 deserialize64
00193 #define detach detach64
00194 #define detachnode detachnode64
00195 #define dump dump64
00196 #define endload endload64
00197 #define expire_header expire_header64
00198 #define expirenode expirenode64
00199 #define find_closest_nsec find_closest_nsec64
00200 #define find_coveringnsec find_coveringnsec64
00201 #define find_deepest_zonecut find_deepest_zonecut64
00202 #define findnode findnode64
00203 #define findnodeintree findnodeintree64
00204 #define findnsec3node findnsec3node64
00205 #define flush_deletions flush_deletions64
00206 #define free_acachearray free_acachearray64
00207 #define free_noqname free_noqname64
00208 #define free_rbtdb free_rbtdb64
00209 #define free_rbtdb_callback free_rbtdb_callback64
00210 #define free_rdataset free_rdataset64
00211 #define getnsec3parameters getnsec3parameters64
00212 #define getoriginnode getoriginnode64
00213 #define getrrsetstats getrrsetstats64
00214 #define getsigningtime getsigningtime64
00215 #define hashsize hashsize64
00216 #define init_file_version init_file_version64
00217 #define isdnssec isdnssec64
00218 #define ispersistent ispersistent64
00219 #define issecure issecure64
00220 #define iszonesecure iszonesecure64
00221 #define loading_addrdataset loading_addrdataset64
00222 #define loadnode loadnode64
00223 #define matchparams matchparams64
00224 #define maybe_free_rbtdb maybe_free_rbtdb64
00225 #define new_reference new_reference64
00226 #define newversion newversion64
00227 #define nodecount nodecount64
00228 #define overmem overmem64
00229 #define previous_closest_nsec previous_closest_nsec64
00230 #define printnode printnode64
00231 #define prune_tree prune_tree64
00232 #define rbt_datafixer rbt_datafixer64
00233 #define rbt_datawriter rbt_datawriter64
00234 #define rdataset_clearprefetch rdataset_clearprefetch64
00235 #define rdataset_clone rdataset_clone64
00236 #define rdataset_count rdataset_count64
00237 #define rdataset_current rdataset_current64
00238 #define rdataset_disassociate rdataset_disassociate64
00239 #define rdataset_expire rdataset_expire64
00240 #define rdataset_first rdataset_first64
00241 #define rdataset_getadditional rdataset_getadditional64
00242 #define rdataset_getclosest rdataset_getclosest64
00243 #define rdataset_getnoqname rdataset_getnoqname64
00244 #define rdataset_getownercase rdataset_getownercase64
00245 #define rdataset_next rdataset_next64
00246 #define rdataset_putadditional rdataset_putadditional64
00247 #define rdataset_setadditional rdataset_setadditional64
00248 #define rdataset_setownercase rdataset_setownercase64
00249 #define rdataset_settrust rdataset_settrust64
00250 #define rdatasetiter_current rdatasetiter_current64
00251 #define rdatasetiter_destroy rdatasetiter_destroy64
00252 #define rdatasetiter_first rdatasetiter_first64
00253 #define rdatasetiter_next rdatasetiter_next64
00254 #define reactivate_node reactivate_node64
00255 #define resign_delete resign_delete64
00256 #define resign_insert resign_insert64
00257 #define resign_sooner resign_sooner64
00258 #define resigned resigned64
00259 #define rpz_attach rpz_attach64
00260 #define rpz_ready rpz_ready64
00261 #define serialize serialize64
00262 #define set_index set_index64
00263 #define set_ttl set_ttl64
00264 #define setcachestats setcachestats64
00265 #define setownercase setownercase64
00266 #define setsigningtime setsigningtime64
00267 #define settask settask64
00268 #define setup_delegation setup_delegation64
00269 #define subtractrdataset subtractrdataset64
00270 #define ttl_sooner ttl_sooner64
00271 #define update_cachestats update_cachestats64
00272 #define update_header update_header64
00273 #define update_newheader update_newheader64
00274 #define update_rrsetstats update_rrsetstats64
00275 #define zone_find zone_find64
00276 #define zone_findrdataset zone_findrdataset64
00277 #define zone_findzonecut zone_findzonecut64
00278 #define zone_zonecut_callback zone_zonecut_callback64
00279 
00280 #else
00281 typedef isc_uint32_t                    rbtdb_serial_t;
00282 #endif
00283 
00284 typedef isc_uint32_t                    rbtdb_rdatatype_t;
00285 
00286 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
00287 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
00288 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
00289 
00290 #define RBTDB_RDATATYPE_SIGNSEC \
00291                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
00292 #define RBTDB_RDATATYPE_SIGNSEC3 \
00293                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
00294 #define RBTDB_RDATATYPE_SIGNS \
00295                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
00296 #define RBTDB_RDATATYPE_SIGCNAME \
00297                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
00298 #define RBTDB_RDATATYPE_SIGDNAME \
00299                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
00300 #define RBTDB_RDATATYPE_SIGDDS \
00301                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
00302 #define RBTDB_RDATATYPE_NCACHEANY \
00303                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
00304 
00305 /*
00306  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
00307  * Using rwlock is effective with regard to lookup performance only when
00308  * it is implemented in an efficient way.
00309  * Otherwise, it is generally wise to stick to the simple locking since rwlock
00310  * would require more memory or can even make lookups slower due to its own
00311  * overhead (when it internally calls mutex locks).
00312  */
00313 #ifdef ISC_RWLOCK_USEATOMIC
00314 #define DNS_RBTDB_USERWLOCK 1
00315 #else
00316 #define DNS_RBTDB_USERWLOCK 0
00317 #endif
00318 
00319 #if DNS_RBTDB_USERWLOCK
00320 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
00321 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
00322 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
00323 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
00324 #else
00325 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
00326 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
00327 #define RBTDB_LOCK(l, t)        LOCK(l)
00328 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
00329 #endif
00330 
00331 /*
00332  * Since node locking is sensitive to both performance and memory footprint,
00333  * we need some trick here.  If we have both high-performance rwlock and
00334  * high performance and small-memory reference counters, we use rwlock for
00335  * node lock and isc_refcount for node references.  In this case, we don't have
00336  * to protect the access to the counters by locks.
00337  * Otherwise, we simply use ordinary mutex lock for node locking, and use
00338  * simple integers as reference counters which is protected by the lock.
00339  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
00340  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
00341  * counters first and then protect other parts of a node as read-only data.
00342  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
00343  * provided for these special cases.  When we can use the efficient backend
00344  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
00345  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
00346  * section including the access to the reference counter.
00347  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
00348  * section is also protected by NODE_STRONGLOCK().
00349  */
00350 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
00351 typedef isc_rwlock_t nodelock_t;
00352 
00353 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
00354 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
00355 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
00356 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
00357 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
00358 
00359 #define NODE_STRONGLOCK(l)      ((void)0)
00360 #define NODE_STRONGUNLOCK(l)    ((void)0)
00361 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
00362 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
00363 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
00364 #else
00365 typedef isc_mutex_t nodelock_t;
00366 
00367 #define NODE_INITLOCK(l)        isc_mutex_init(l)
00368 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
00369 #define NODE_LOCK(l, t)         LOCK(l)
00370 #define NODE_UNLOCK(l, t)       UNLOCK(l)
00371 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
00372 
00373 #define NODE_STRONGLOCK(l)      LOCK(l)
00374 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
00375 #define NODE_WEAKLOCK(l, t)     ((void)0)
00376 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
00377 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
00378 #endif
00379 
00380 /*%
00381  * Whether to rate-limit updating the LRU to avoid possible thread contention.
00382  * Our performance measurement has shown the cost is marginal, so it's defined
00383  * to be 0 by default either with or without threads.
00384  */
00385 #ifndef DNS_RBTDB_LIMITLRUUPDATE
00386 #define DNS_RBTDB_LIMITLRUUPDATE 0
00387 #endif
00388 
00389 /*
00390  * Allow clients with a virtual time of up to 5 minutes in the past to see
00391  * records that would have otherwise have expired.
00392  */
00393 #define RBTDB_VIRTUAL 300
00394 
00395 struct noqname {
00396         dns_name_t      name;
00397         void *          neg;
00398         void *          negsig;
00399         dns_rdatatype_t type;
00400 };
00401 
00402 typedef struct acachectl acachectl_t;
00403 
00404 typedef struct rdatasetheader {
00405         /*%
00406          * Locked by the owning node's lock.
00407          */
00408         rbtdb_serial_t                  serial;
00409         dns_ttl_t                       rdh_ttl;
00410         rbtdb_rdatatype_t               type;
00411         isc_uint16_t                    attributes;
00412         dns_trust_t                     trust;
00413         struct noqname                  *noqname;
00414         struct noqname                  *closest;
00415         unsigned int                    is_mmapped : 1;
00416         unsigned int                    next_is_relative : 1;
00417         unsigned int                    node_is_relative : 1;
00418         /*%<
00419          * We don't use the LIST macros, because the LIST structure has
00420          * both head and tail pointers, and is doubly linked.
00421          */
00422 
00423         struct rdatasetheader           *next;
00424         /*%<
00425          * If this is the top header for an rdataset, 'next' points
00426          * to the top header for the next rdataset (i.e., the next type).
00427          * Otherwise, it points up to the header whose down pointer points
00428          * at this header.
00429          */
00430 
00431         struct rdatasetheader           *down;
00432         /*%<
00433          * Points to the header for the next older version of
00434          * this rdataset.
00435          */
00436 
00437         isc_uint32_t                    count;
00438         /*%<
00439          * Monotonously increased every time this rdataset is bound so that
00440          * it is used as the base of the starting point in DNS responses
00441          * when the "cyclic" rrset-order is required.  Since the ordering
00442          * should not be so crucial, no lock is set for the counter for
00443          * performance reasons.
00444          */
00445 
00446         acachectl_t                     *additional_auth;
00447         acachectl_t                     *additional_glue;
00448 
00449         dns_rbtnode_t                   *node;
00450         isc_stdtime_t                   last_used;
00451         ISC_LINK(struct rdatasetheader) link;
00452 
00453         unsigned int                    heap_index;
00454         /*%<
00455          * Used for TTL-based cache cleaning.
00456          */
00457         isc_stdtime_t                   resign;
00458         /*%<
00459          * Case vector.  If the bit is set then the corresponding
00460          * character in the owner name needs to be AND'd with 0x20,
00461          * rendering that character upper case.
00462          */
00463         unsigned char                   upper[32];
00464 } rdatasetheader_t;
00465 
00466 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
00467 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
00468 
00469 #define RDATASET_ATTR_NONEXISTENT       0x0001
00470 #define RDATASET_ATTR_STALE             0x0002
00471 #define RDATASET_ATTR_IGNORE            0x0004
00472 #define RDATASET_ATTR_RETAIN            0x0008
00473 #define RDATASET_ATTR_NXDOMAIN          0x0010
00474 #define RDATASET_ATTR_RESIGN            0x0020
00475 #define RDATASET_ATTR_STATCOUNT         0x0040
00476 #define RDATASET_ATTR_OPTOUT            0x0080
00477 #define RDATASET_ATTR_NEGATIVE          0x0100
00478 #define RDATASET_ATTR_PREFETCH          0x0200
00479 #define RDATASET_ATTR_CASESET           0x0400
00480 
00481 typedef struct acache_cbarg {
00482         dns_rdatasetadditional_t        type;
00483         unsigned int                    count;
00484         dns_db_t                        *db;
00485         dns_dbnode_t                    *node;
00486         rdatasetheader_t                *header;
00487 } acache_cbarg_t;
00488 
00489 struct acachectl {
00490         dns_acacheentry_t               *entry;
00491         acache_cbarg_t                  *cbarg;
00492 };
00493 
00494 /*
00495  * XXX
00496  * When the cache will pre-expire data (due to memory low or other
00497  * situations) before the rdataset's TTL has expired, it MUST
00498  * respect the RETAIN bit and not expire the data until its TTL is
00499  * expired.
00500  */
00501 
00502 #undef IGNORE                   /* WIN32 winbase.h defines this. */
00503 
00504 #define EXISTS(header) \
00505         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
00506 #define NONEXISTENT(header) \
00507         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
00508 #define IGNORE(header) \
00509         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
00510 #define RETAIN(header) \
00511         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
00512 #define NXDOMAIN(header) \
00513         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
00514 #define RESIGN(header) \
00515         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
00516 #define OPTOUT(header) \
00517         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
00518 #define NEGATIVE(header) \
00519         (((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
00520 #define PREFETCH(header) \
00521         (((header)->attributes & RDATASET_ATTR_PREFETCH) != 0)
00522 #define CASESET(header) \
00523         (((header)->attributes & RDATASET_ATTR_CASESET) != 0)
00524 
00525 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
00526 
00527 /*%
00528  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
00529  * There is a tradeoff issue about configuring this value: if this is too
00530  * small, it may cause heavier contention between threads; if this is too large,
00531  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
00532  * The default value should work well for most environments, but this can
00533  * also be configurable at compilation time via the
00534  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
00535  * 1 due to the assumption of overmem_purge().
00536  */
00537 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
00538 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
00539 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
00540 #else
00541 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
00542 #endif
00543 #else
00544 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
00545 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
00546 
00547 typedef struct {
00548         nodelock_t                      lock;
00549         /* Protected in the refcount routines. */
00550         isc_refcount_t                  references;
00551         /* Locked by lock. */
00552         isc_boolean_t                   exiting;
00553 } rbtdb_nodelock_t;
00554 
00555 typedef struct rbtdb_changed {
00556         dns_rbtnode_t *                 node;
00557         isc_boolean_t                   dirty;
00558         ISC_LINK(struct rbtdb_changed)  link;
00559 } rbtdb_changed_t;
00560 
00561 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
00562 
00563 typedef enum {
00564         dns_db_insecure,
00565         dns_db_partial,
00566         dns_db_secure
00567 } dns_db_secure_t;
00568 
00569 typedef struct dns_rbtdb dns_rbtdb_t;
00570 
00571 /* Reason for expiring a record from cache */
00572 typedef enum {
00573         expire_lru,
00574         expire_ttl,
00575         expire_flush
00576 } expire_t;
00577 
00578 typedef struct rbtdb_version {
00579         /* Not locked */
00580         rbtdb_serial_t                  serial;
00581         dns_rbtdb_t *                   rbtdb;
00582         /*
00583          * Protected in the refcount routines.
00584          * XXXJT: should we change the lock policy based on the refcount
00585          * performance?
00586          */
00587         isc_refcount_t                  references;
00588         /* Locked by database lock. */
00589         isc_boolean_t                   writer;
00590         isc_boolean_t                   commit_ok;
00591         rbtdb_changedlist_t             changed_list;
00592         rdatasetheaderlist_t            resigned_list;
00593         ISC_LINK(struct rbtdb_version)  link;
00594         dns_db_secure_t                 secure;
00595         isc_boolean_t                   havensec3;
00596         /* NSEC3 parameters */
00597         dns_hash_t                      hash;
00598         isc_uint8_t                     flags;
00599         isc_uint16_t                    iterations;
00600         isc_uint8_t                     salt_length;
00601         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
00602 } rbtdb_version_t;
00603 
00604 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
00605 
00606 struct dns_rbtdb {
00607         /* Unlocked. */
00608         dns_db_t                        common;
00609         /* Locks the data in this struct */
00610 #if DNS_RBTDB_USERWLOCK
00611         isc_rwlock_t                    lock;
00612 #else
00613         isc_mutex_t                     lock;
00614 #endif
00615         /* Locks the tree structure (prevents nodes appearing/disappearing) */
00616         isc_rwlock_t                    tree_lock;
00617         /* Locks for individual tree nodes */
00618         unsigned int                    node_lock_count;
00619         rbtdb_nodelock_t *              node_locks;
00620         dns_rbtnode_t *                 origin_node;
00621         dns_stats_t *                   rrsetstats; /* cache DB only */
00622         isc_stats_t *                   cachestats; /* cache DB only */
00623         /* Locked by lock. */
00624         unsigned int                    active;
00625         isc_refcount_t                  references;
00626         unsigned int                    attributes;
00627         rbtdb_serial_t                  current_serial;
00628         rbtdb_serial_t                  least_serial;
00629         rbtdb_serial_t                  next_serial;
00630         rbtdb_version_t *               current_version;
00631         rbtdb_version_t *               future_version;
00632         rbtdb_versionlist_t             open_versions;
00633         isc_task_t *                    task;
00634         dns_dbnode_t                    *soanode;
00635         dns_dbnode_t                    *nsnode;
00636 
00637         /*
00638          * This is a linked list used to implement the LRU cache.  There will
00639          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
00640          * placed on the linked list rdatasets[1].
00641          */
00642         rdatasetheaderlist_t            *rdatasets;
00643 
00644         /*%
00645          * Temporary storage for stale cache nodes and dynamically deleted
00646          * nodes that await being cleaned up.
00647          */
00648         rbtnodelist_t                   *deadnodes;
00649 
00650         /*
00651          * Heaps.  These are used for TTL based expiry in a cache,
00652          * or for zone resigning in a zone DB.  hmctx is the memory
00653          * context to use for the heap (which differs from the main
00654          * database memory context in the case of a cache).
00655          */
00656         isc_mem_t *                     hmctx;
00657         isc_heap_t                      **heaps;
00658 
00659         /*
00660          * Base values for the mmap() code.
00661          */
00662         void *                          mmap_location;
00663         size_t                          mmap_size;
00664 
00665         /* Locked by tree_lock. */
00666         dns_rbt_t *                     tree;
00667         dns_rbt_t *                     nsec;
00668         dns_rbt_t *                     nsec3;
00669         dns_rpz_zones_t                 *rpzs;
00670         dns_rpz_num_t                   rpz_num;
00671         dns_rpz_zones_t                 *load_rpzs;
00672 
00673         /* Unlocked */
00674         unsigned int                    quantum;
00675 };
00676 
00677 #define RBTDB_ATTR_LOADED               0x01
00678 #define RBTDB_ATTR_LOADING              0x02
00679 
00680 /*%
00681  * Search Context
00682  */
00683 typedef struct {
00684         dns_rbtdb_t *           rbtdb;
00685         rbtdb_version_t *       rbtversion;
00686         rbtdb_serial_t          serial;
00687         unsigned int            options;
00688         dns_rbtnodechain_t      chain;
00689         isc_boolean_t           copy_name;
00690         isc_boolean_t           need_cleanup;
00691         isc_boolean_t           wild;
00692         dns_rbtnode_t *         zonecut;
00693         rdatasetheader_t *      zonecut_rdataset;
00694         rdatasetheader_t *      zonecut_sigrdataset;
00695         dns_fixedname_t         zonecut_name;
00696         isc_stdtime_t           now;
00697 } rbtdb_search_t;
00698 
00699 /*%
00700  * Load Context
00701  */
00702 typedef struct {
00703         dns_rbtdb_t *           rbtdb;
00704         isc_stdtime_t           now;
00705 } rbtdb_load_t;
00706 
00707 static void delete_callback(void *data, void *arg);
00708 static void rdataset_disassociate(dns_rdataset_t *rdataset);
00709 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
00710 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
00711 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
00712 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
00713 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
00714 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
00715                                         dns_name_t *name,
00716                                         dns_rdataset_t *neg,
00717                                         dns_rdataset_t *negsig);
00718 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
00719                                         dns_name_t *name,
00720                                         dns_rdataset_t *neg,
00721                                         dns_rdataset_t *negsig);
00722 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
00723                                            dns_rdatasetadditional_t type,
00724                                            dns_rdatatype_t qtype,
00725                                            dns_acache_t *acache,
00726                                            dns_zone_t **zonep,
00727                                            dns_db_t **dbp,
00728                                            dns_dbversion_t **versionp,
00729                                            dns_dbnode_t **nodep,
00730                                            dns_name_t *fname,
00731                                            dns_message_t *msg,
00732                                            isc_stdtime_t now);
00733 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
00734                                            dns_rdatasetadditional_t type,
00735                                            dns_rdatatype_t qtype,
00736                                            dns_acache_t *acache,
00737                                            dns_zone_t *zone,
00738                                            dns_db_t *db,
00739                                            dns_dbversion_t *version,
00740                                            dns_dbnode_t *node,
00741                                            dns_name_t *fname);
00742 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
00743                                            dns_rdataset_t *rdataset,
00744                                            dns_rdatasetadditional_t type,
00745                                            dns_rdatatype_t qtype);
00746 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
00747                                               isc_stdtime_t now);
00748 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
00749                           isc_stdtime_t now);
00750 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
00751                           isc_boolean_t tree_locked, expire_t reason);
00752 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
00753                           isc_stdtime_t now, isc_boolean_t tree_locked);
00754 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
00755                                   rdatasetheader_t *newheader);
00756 static void resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
00757                           rdatasetheader_t *header);
00758 static void prune_tree(isc_task_t *task, isc_event_t *event);
00759 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
00760 static void rdataset_expire(dns_rdataset_t *rdataset);
00761 static void rdataset_clearprefetch(dns_rdataset_t *rdataset);
00762 static void rdataset_setownercase(dns_rdataset_t *rdataset,
00763                                   const dns_name_t *name);
00764 static void rdataset_getownercase(const dns_rdataset_t *rdataset,
00765                                   dns_name_t *name);
00766 
00767 static dns_rdatasetmethods_t rdataset_methods = {
00768         rdataset_disassociate,
00769         rdataset_first,
00770         rdataset_next,
00771         rdataset_current,
00772         rdataset_clone,
00773         rdataset_count,
00774         NULL,
00775         rdataset_getnoqname,
00776         NULL,
00777         rdataset_getclosest,
00778         rdataset_getadditional,
00779         rdataset_setadditional,
00780         rdataset_putadditional,
00781         rdataset_settrust,
00782         rdataset_expire,
00783         rdataset_clearprefetch,
00784         rdataset_setownercase,
00785         rdataset_getownercase
00786 };
00787 
00788 static dns_rdatasetmethods_t slab_methods = {
00789         rdataset_disassociate,
00790         rdataset_first,
00791         rdataset_next,
00792         rdataset_current,
00793         rdataset_clone,
00794         rdataset_count,
00795         NULL,
00796         NULL,
00797         NULL,
00798         NULL,
00799         NULL,
00800         NULL,
00801         NULL,
00802         NULL,
00803         NULL,
00804         NULL,
00805         NULL,
00806         NULL
00807 };
00808 
00809 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
00810 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
00811 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
00812 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
00813                                  dns_rdataset_t *rdataset);
00814 
00815 static dns_rdatasetitermethods_t rdatasetiter_methods = {
00816         rdatasetiter_destroy,
00817         rdatasetiter_first,
00818         rdatasetiter_next,
00819         rdatasetiter_current
00820 };
00821 
00822 typedef struct rbtdb_rdatasetiter {
00823         dns_rdatasetiter_t              common;
00824         rdatasetheader_t *              current;
00825 } rbtdb_rdatasetiter_t;
00826 
00827 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
00828 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
00829 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
00830 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
00831                                         dns_name_t *name);
00832 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
00833 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
00834 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
00835                                            dns_dbnode_t **nodep,
00836                                            dns_name_t *name);
00837 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
00838 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
00839                                           dns_name_t *name);
00840 
00841 static dns_dbiteratormethods_t dbiterator_methods = {
00842         dbiterator_destroy,
00843         dbiterator_first,
00844         dbiterator_last,
00845         dbiterator_seek,
00846         dbiterator_prev,
00847         dbiterator_next,
00848         dbiterator_current,
00849         dbiterator_pause,
00850         dbiterator_origin
00851 };
00852 
00853 #define DELETION_BATCH_MAX 64
00854 
00855 /*
00856  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
00857  */
00858 typedef struct rbtdb_dbiterator {
00859         dns_dbiterator_t                common;
00860         isc_boolean_t                   paused;
00861         isc_boolean_t                   new_origin;
00862         isc_rwlocktype_t                tree_locked;
00863         isc_result_t                    result;
00864         dns_fixedname_t                 name;
00865         dns_fixedname_t                 origin;
00866         dns_rbtnodechain_t              chain;
00867         dns_rbtnodechain_t              nsec3chain;
00868         dns_rbtnodechain_t              *current;
00869         dns_rbtnode_t                   *node;
00870         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
00871         int                             delete;
00872         isc_boolean_t                   nsec3only;
00873         isc_boolean_t                   nonsec3;
00874 } rbtdb_dbiterator_t;
00875 
00876 
00877 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
00878 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
00879 
00880 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
00881                        isc_event_t *event);
00882 static void overmem(dns_db_t *db, isc_boolean_t over);
00883 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
00884 static void setownercase(rdatasetheader_t *header, const dns_name_t *name);
00885 
00886 /* Pad to 32 bytes */
00887 static char FILE_VERSION[32] = "\0";
00888 
00889 /*%
00890  * 'init_count' is used to initialize 'newheader->count' which inturn
00891  * is used to determine where in the cycle rrset-order cyclic starts.
00892  * We don't lock this as we don't care about simultaneous updates.
00893  *
00894  * Note:
00895  *      Both init_count and header->count can be ISC_UINT32_MAX.
00896  *      The count on the returned rdataset however can't be as
00897  *      that indicates that the database does not implement cyclic
00898  *      processing.
00899  */
00900 static unsigned int init_count;
00901 
00902 /*
00903  * Locking
00904  *
00905  * If a routine is going to lock more than one lock in this module, then
00906  * the locking must be done in the following order:
00907  *
00908  *      Tree Lock
00909  *
00910  *      Node Lock       (Only one from the set may be locked at one time by
00911  *                       any caller)
00912  *
00913  *      Database Lock
00914  *
00915  * Failure to follow this hierarchy can result in deadlock.
00916  */
00917 
00918 /*
00919  * Deleting Nodes
00920  *
00921  * For zone databases the node for the origin of the zone MUST NOT be deleted.
00922  */
00923 
00924 /*
00925  * Debugging routines
00926  */
00927 #ifdef DEBUG
00928 static void
00929 hexdump(const char *desc, unsigned char *data, size_t size) {
00930         char hexdump[BUFSIZ * 2 + 1];
00931         isc_buffer_t b;
00932         isc_region_t r;
00933         isc_result_t result;
00934         size_t bytes;
00935 
00936         fprintf(stderr, "%s: ", desc);
00937         do {
00938                 isc_buffer_init(&b, hexdump, sizeof(hexdump));
00939                 r.base = data;
00940                 r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
00941                 result = isc_hex_totext(&r, 0, "", &b);
00942                 RUNTIME_CHECK(result == ISC_R_SUCCESS);
00943                 isc_buffer_putuint8(&b, 0);
00944                 fprintf(stderr, "%s", hexdump);
00945                 data += bytes;
00946                 size -= bytes;
00947         } while (size > 0);
00948         fprintf(stderr, "\n");
00949 }
00950 #endif
00951 
00952 
00953 /*
00954  * DB Routines
00955  */
00956 
00957 static void
00958 attach(dns_db_t *source, dns_db_t **targetp) {
00959         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
00960 
00961         REQUIRE(VALID_RBTDB(rbtdb));
00962 
00963         isc_refcount_increment(&rbtdb->references, NULL);
00964 
00965         *targetp = source;
00966 }
00967 
00968 static void
00969 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
00970         dns_rbtdb_t *rbtdb = event->ev_arg;
00971 
00972         UNUSED(task);
00973 
00974         free_rbtdb(rbtdb, ISC_TRUE, event);
00975 }
00976 
00977 static void
00978 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
00979         INSIST(IS_CACHE(rbtdb));
00980 
00981         if (rbtdb->cachestats == NULL)
00982                 return;
00983 
00984         switch (result) {
00985         case ISC_R_SUCCESS:
00986         case DNS_R_CNAME:
00987         case DNS_R_DNAME:
00988         case DNS_R_DELEGATION:
00989         case DNS_R_NCACHENXDOMAIN:
00990         case DNS_R_NCACHENXRRSET:
00991                 isc_stats_increment(rbtdb->cachestats,
00992                                     dns_cachestatscounter_hits);
00993                 break;
00994         default:
00995                 isc_stats_increment(rbtdb->cachestats,
00996                                     dns_cachestatscounter_misses);
00997         }
00998 }
00999 
01000 static void
01001 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
01002                   isc_boolean_t increment)
01003 {
01004         dns_rdatastatstype_t statattributes = 0;
01005         dns_rdatastatstype_t base = 0;
01006         dns_rdatastatstype_t type;
01007 
01008         /* At the moment we count statistics only for cache DB */
01009         INSIST(IS_CACHE(rbtdb));
01010 
01011         if (NEGATIVE(header)) {
01012                 if (NXDOMAIN(header))
01013                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
01014                 else {
01015                         statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
01016                         base = RBTDB_RDATATYPE_EXT(header->type);
01017                 }
01018         } else
01019                 base = RBTDB_RDATATYPE_BASE(header->type);
01020 
01021         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
01022         if (increment)
01023                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
01024         else
01025                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
01026 }
01027 
01028 static void
01029 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
01030         int idx;
01031         isc_heap_t *heap;
01032         dns_ttl_t oldttl;
01033 
01034 
01035         if (!IS_CACHE(rbtdb)) {
01036                 header->rdh_ttl = newttl;
01037                 return;
01038         }
01039 
01040         oldttl = header->rdh_ttl;
01041         header->rdh_ttl = newttl;
01042 
01043         /*
01044          * It's possible the rbtdb is not a cache.  If this is the case,
01045          * we will not have a heap, and we move on.  If we do, though,
01046          * we might need to adjust things.
01047          */
01048         if (header->heap_index == 0 || newttl == oldttl)
01049                 return;
01050         idx = header->node->locknum;
01051         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
01052             return;
01053         heap = rbtdb->heaps[idx];
01054 
01055         if (newttl < oldttl)
01056                 isc_heap_increased(heap, header->heap_index);
01057         else
01058                 isc_heap_decreased(heap, header->heap_index);
01059 }
01060 
01061 /*%
01062  * These functions allow the heap code to rank the priority of each
01063  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
01064  */
01065 static isc_boolean_t
01066 ttl_sooner(void *v1, void *v2) {
01067         rdatasetheader_t *h1 = v1;
01068         rdatasetheader_t *h2 = v2;
01069 
01070         if (h1->rdh_ttl < h2->rdh_ttl)
01071                 return (ISC_TRUE);
01072         return (ISC_FALSE);
01073 }
01074 
01075 static isc_boolean_t
01076 resign_sooner(void *v1, void *v2) {
01077         rdatasetheader_t *h1 = v1;
01078         rdatasetheader_t *h2 = v2;
01079 
01080         if (isc_serial_lt(h1->resign, h2->resign))
01081                 return (ISC_TRUE);
01082         return (ISC_FALSE);
01083 }
01084 
01085 /*%
01086  * This function sets the heap index into the header.
01087  */
01088 static void
01089 set_index(void *what, unsigned int idx) {
01090         rdatasetheader_t *h = what;
01091 
01092         h->heap_index = idx;
01093 }
01094 
01095 /*%
01096  * Work out how many nodes can be deleted in the time between two
01097  * requests to the nameserver.  Smooth the resulting number and use it
01098  * as a estimate for the number of nodes to be deleted in the next
01099  * iteration.
01100  */
01101 static unsigned int
01102 adjust_quantum(unsigned int old, isc_time_t *start) {
01103         unsigned int pps = dns_pps;     /* packets per second */
01104         unsigned int interval;
01105         isc_uint64_t usecs;
01106         isc_time_t end;
01107         unsigned int new;
01108 
01109         if (pps < 100)
01110                 pps = 100;
01111         isc_time_now(&end);
01112 
01113         interval = 1000000 / pps;       /* interval in usec */
01114         if (interval == 0)
01115                 interval = 1;
01116         usecs = isc_time_microdiff(&end, start);
01117         if (usecs == 0) {
01118                 /*
01119                  * We were unable to measure the amount of time taken.
01120                  * Double the nodes deleted next time.
01121                  */
01122                 old *= 2;
01123                 if (old > 1000)
01124                         old = 1000;
01125                 return (old);
01126         }
01127         new = old * interval;
01128         new /= (unsigned int)usecs;
01129         if (new == 0)
01130                 new = 1;
01131         else if (new > 1000)
01132                 new = 1000;
01133 
01134         /* Smooth */
01135         new = (new + old * 3) / 4;
01136 
01137         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
01138                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
01139 
01140         return (new);
01141 }
01142 
01143 static void
01144 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
01145         unsigned int i;
01146         isc_ondestroy_t ondest;
01147         isc_result_t result;
01148         char buf[DNS_NAME_FORMATSIZE];
01149         dns_rbt_t **treep;
01150         isc_time_t start;
01151 
01152         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
01153                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
01154 
01155         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
01156         REQUIRE(rbtdb->future_version == NULL);
01157 
01158         if (rbtdb->current_version != NULL) {
01159                 unsigned int refs;
01160 
01161                 isc_refcount_decrement(&rbtdb->current_version->references,
01162                                        &refs);
01163                 INSIST(refs == 0);
01164                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
01165                 isc_refcount_destroy(&rbtdb->current_version->references);
01166                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
01167                             sizeof(rbtdb_version_t));
01168         }
01169 
01170         /*
01171          * We assume the number of remaining dead nodes is reasonably small;
01172          * the overhead of unlinking all nodes here should be negligible.
01173          */
01174         for (i = 0; i < rbtdb->node_lock_count; i++) {
01175                 dns_rbtnode_t *node;
01176 
01177                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
01178                 while (node != NULL) {
01179                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
01180                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
01181                 }
01182         }
01183 
01184         if (event == NULL)
01185                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
01186 
01187         for (;;) {
01188                 /*
01189                  * pick the next tree to (start to) destroy
01190                  */
01191                 treep = &rbtdb->tree;
01192                 if (*treep == NULL) {
01193                         treep = &rbtdb->nsec;
01194                         if (*treep == NULL) {
01195                                 treep = &rbtdb->nsec3;
01196                                 /*
01197                                  * we're finished after clear cutting
01198                                  */
01199                                 if (*treep == NULL)
01200                                         break;
01201                         }
01202                 }
01203 
01204                 isc_time_now(&start);
01205                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
01206                 if (result == ISC_R_QUOTA) {
01207                         INSIST(rbtdb->task != NULL);
01208                         if (rbtdb->quantum != 0)
01209                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
01210                                                                 &start);
01211                         if (event == NULL)
01212                                 event = isc_event_allocate(rbtdb->common.mctx,
01213                                                            NULL,
01214                                                          DNS_EVENT_FREESTORAGE,
01215                                                            free_rbtdb_callback,
01216                                                            rbtdb,
01217                                                            sizeof(isc_event_t));
01218                         if (event == NULL)
01219                                 continue;
01220                         isc_task_send(rbtdb->task, &event);
01221                         return;
01222                 }
01223                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
01224         }
01225 
01226         if (event != NULL)
01227                 isc_event_free(&event);
01228         if (log) {
01229                 if (dns_name_dynamic(&rbtdb->common.origin))
01230                         dns_name_format(&rbtdb->common.origin, buf,
01231                                         sizeof(buf));
01232                 else
01233                         strcpy(buf, "<UNKNOWN>");
01234                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
01235                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
01236                               "done free_rbtdb(%s)", buf);
01237         }
01238         if (dns_name_dynamic(&rbtdb->common.origin))
01239                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
01240         for (i = 0; i < rbtdb->node_lock_count; i++) {
01241                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
01242                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
01243         }
01244 
01245         /*
01246          * Clean up LRU / re-signing order lists.
01247          */
01248         if (rbtdb->rdatasets != NULL) {
01249                 for (i = 0; i < rbtdb->node_lock_count; i++)
01250                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
01251                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
01252                             rbtdb->node_lock_count *
01253                             sizeof(rdatasetheaderlist_t));
01254         }
01255         /*
01256          * Clean up dead node buckets.
01257          */
01258         if (rbtdb->deadnodes != NULL) {
01259                 for (i = 0; i < rbtdb->node_lock_count; i++)
01260                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
01261                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
01262                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
01263         }
01264         /*
01265          * Clean up heap objects.
01266          */
01267         if (rbtdb->heaps != NULL) {
01268                 for (i = 0; i < rbtdb->node_lock_count; i++)
01269                         isc_heap_destroy(&rbtdb->heaps[i]);
01270                 isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
01271                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
01272         }
01273 
01274         if (rbtdb->rrsetstats != NULL)
01275                 dns_stats_detach(&rbtdb->rrsetstats);
01276         if (rbtdb->cachestats != NULL)
01277                 isc_stats_detach(&rbtdb->cachestats);
01278 
01279         if (rbtdb->load_rpzs != NULL) {
01280                 /*
01281                  * We must be cleaning up after a failed zone loading.
01282                  */
01283                 REQUIRE(rbtdb->rpzs != NULL &&
01284                         rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
01285                 dns_rpz_detach_rpzs(&rbtdb->load_rpzs);
01286         }
01287         if (rbtdb->rpzs != NULL) {
01288                 REQUIRE(rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
01289                 dns_rpz_detach_rpzs(&rbtdb->rpzs);
01290         }
01291 
01292         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
01293                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
01294         isc_rwlock_destroy(&rbtdb->tree_lock);
01295         isc_refcount_destroy(&rbtdb->references);
01296         if (rbtdb->task != NULL)
01297                 isc_task_detach(&rbtdb->task);
01298 
01299         RBTDB_DESTROYLOCK(&rbtdb->lock);
01300         rbtdb->common.magic = 0;
01301         rbtdb->common.impmagic = 0;
01302         ondest = rbtdb->common.ondest;
01303         isc_mem_detach(&rbtdb->hmctx);
01304 
01305         if (rbtdb->mmap_location != NULL)
01306                 isc_file_munmap(rbtdb->mmap_location,
01307                                 (size_t) rbtdb->mmap_size);
01308 
01309         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
01310         isc_ondestroy_notify(&ondest, rbtdb);
01311 }
01312 
01313 static inline void
01314 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
01315         isc_boolean_t want_free = ISC_FALSE;
01316         unsigned int i;
01317         unsigned int inactive = 0;
01318 
01319         /* XXX check for open versions here */
01320 
01321         if (rbtdb->soanode != NULL)
01322                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
01323         if (rbtdb->nsnode != NULL)
01324                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
01325 
01326         /*
01327          * Even though there are no external direct references, there still
01328          * may be nodes in use.
01329          */
01330         for (i = 0; i < rbtdb->node_lock_count; i++) {
01331                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
01332                 rbtdb->node_locks[i].exiting = ISC_TRUE;
01333                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
01334                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
01335                     == 0) {
01336                         inactive++;
01337                 }
01338         }
01339 
01340         if (inactive != 0) {
01341                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
01342                 rbtdb->active -= inactive;
01343                 if (rbtdb->active == 0)
01344                         want_free = ISC_TRUE;
01345                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
01346                 if (want_free) {
01347                         char buf[DNS_NAME_FORMATSIZE];
01348                         if (dns_name_dynamic(&rbtdb->common.origin))
01349                                 dns_name_format(&rbtdb->common.origin, buf,
01350                                                 sizeof(buf));
01351                         else
01352                                 strcpy(buf, "<UNKNOWN>");
01353                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
01354                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
01355                                       "calling free_rbtdb(%s)", buf);
01356                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
01357                 }
01358         }
01359 }
01360 
01361 static void
01362 detach(dns_db_t **dbp) {
01363         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
01364         unsigned int refs;
01365 
01366         REQUIRE(VALID_RBTDB(rbtdb));
01367 
01368         isc_refcount_decrement(&rbtdb->references, &refs);
01369 
01370         if (refs == 0)
01371                 maybe_free_rbtdb(rbtdb);
01372 
01373         *dbp = NULL;
01374 }
01375 
01376 static void
01377 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
01378         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
01379         rbtdb_version_t *version;
01380         unsigned int refs;
01381 
01382         REQUIRE(VALID_RBTDB(rbtdb));
01383 
01384         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
01385         version = rbtdb->current_version;
01386         isc_refcount_increment(&version->references, &refs);
01387         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
01388 
01389         *versionp = (dns_dbversion_t *)version;
01390 }
01391 
01392 static inline rbtdb_version_t *
01393 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
01394                  unsigned int references, isc_boolean_t writer)
01395 {
01396         isc_result_t result;
01397         rbtdb_version_t *version;
01398 
01399         version = isc_mem_get(mctx, sizeof(*version));
01400         if (version == NULL)
01401                 return (NULL);
01402         version->serial = serial;
01403         result = isc_refcount_init(&version->references, references);
01404         if (result != ISC_R_SUCCESS) {
01405                 isc_mem_put(mctx, version, sizeof(*version));
01406                 return (NULL);
01407         }
01408         version->writer = writer;
01409         version->commit_ok = ISC_FALSE;
01410         ISC_LIST_INIT(version->changed_list);
01411         ISC_LIST_INIT(version->resigned_list);
01412         ISC_LINK_INIT(version, link);
01413 
01414         return (version);
01415 }
01416 
01417 static isc_result_t
01418 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
01419         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
01420         rbtdb_version_t *version;
01421 
01422         REQUIRE(VALID_RBTDB(rbtdb));
01423         REQUIRE(versionp != NULL && *versionp == NULL);
01424         REQUIRE(rbtdb->future_version == NULL);
01425 
01426         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
01427         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
01428         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
01429                                    ISC_TRUE);
01430         if (version != NULL) {
01431                 version->rbtdb = rbtdb;
01432                 version->commit_ok = ISC_TRUE;
01433                 version->secure = rbtdb->current_version->secure;
01434                 version->havensec3 = rbtdb->current_version->havensec3;
01435                 if (version->havensec3) {
01436                         version->flags = rbtdb->current_version->flags;
01437                         version->iterations =
01438                                 rbtdb->current_version->iterations;
01439                         version->hash = rbtdb->current_version->hash;
01440                         version->salt_length =
01441                                 rbtdb->current_version->salt_length;
01442                         memmove(version->salt, rbtdb->current_version->salt,
01443                                 version->salt_length);
01444                 } else {
01445                         version->flags = 0;
01446                         version->iterations = 0;
01447                         version->hash = 0;
01448                         version->salt_length = 0;
01449                         memset(version->salt, 0, sizeof(version->salt));
01450                 }
01451                 rbtdb->next_serial++;
01452                 rbtdb->future_version = version;
01453         }
01454         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
01455 
01456         if (version == NULL)
01457                 return (ISC_R_NOMEMORY);
01458 
01459         *versionp = version;
01460 
01461         return (ISC_R_SUCCESS);
01462 }
01463 
01464 static void
01465 attachversion(dns_db_t *db, dns_dbversion_t *source,
01466               dns_dbversion_t **targetp)
01467 {
01468         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
01469         rbtdb_version_t *rbtversion = source;
01470         unsigned int refs;
01471 
01472         REQUIRE(VALID_RBTDB(rbtdb));
01473         INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
01474 
01475         isc_refcount_increment(&rbtversion->references, &refs);
01476         INSIST(refs > 1);
01477 
01478         *targetp = rbtversion;
01479 }
01480 
01481 static rbtdb_changed_t *
01482 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
01483             dns_rbtnode_t *node)
01484 {
01485         rbtdb_changed_t *changed;
01486         unsigned int refs;
01487 
01488         /*
01489          * Caller must be holding the node lock if its reference must be
01490          * protected by the lock.
01491          */
01492 
01493         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
01494 
01495         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
01496 
01497         REQUIRE(version->writer);
01498 
01499         if (changed != NULL) {
01500                 dns_rbtnode_refincrement(node, &refs);
01501                 INSIST(refs != 0);
01502                 changed->node = node;
01503                 changed->dirty = ISC_FALSE;
01504                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
01505         } else
01506                 version->commit_ok = ISC_FALSE;
01507 
01508         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
01509 
01510         return (changed);
01511 }
01512 
01513 static void
01514 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
01515                  acachectl_t *array)
01516 {
01517         unsigned int count;
01518         unsigned int i;
01519         unsigned char *raw;     /* RDATASLAB */
01520 
01521         /*
01522          * The caller must be holding the corresponding node lock.
01523          */
01524 
01525         if (array == NULL)
01526                 return;
01527 
01528         raw = (unsigned char *)header + sizeof(*header);
01529         count = raw[0] * 256 + raw[1];
01530 
01531         /*
01532          * Sanity check: since an additional cache entry has a reference to
01533          * the original DB node (in the callback arg), there should be no
01534          * acache entries when the node can be freed.
01535          */
01536         for (i = 0; i < count; i++)
01537                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
01538 
01539         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
01540 }
01541 
01542 static inline void
01543 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
01544 
01545         if (dns_name_dynamic(&(*noqname)->name))
01546                 dns_name_free(&(*noqname)->name, mctx);
01547         if ((*noqname)->neg != NULL)
01548                 isc_mem_put(mctx, (*noqname)->neg,
01549                             dns_rdataslab_size((*noqname)->neg, 0));
01550         if ((*noqname)->negsig != NULL)
01551                 isc_mem_put(mctx, (*noqname)->negsig,
01552                             dns_rdataslab_size((*noqname)->negsig, 0));
01553         isc_mem_put(mctx, *noqname, sizeof(**noqname));
01554         *noqname = NULL;
01555 }
01556 
01557 static inline void
01558 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) {
01559         ISC_LINK_INIT(h, link);
01560         h->heap_index = 0;
01561         h->is_mmapped = 0;
01562         h->next_is_relative = 0;
01563         h->node_is_relative = 0;
01564 
01565 #if TRACE_HEADER
01566         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
01567                 fprintf(stderr, "initialized header: %p\n", h);
01568 #else
01569         UNUSED(rbtdb);
01570 #endif
01571 }
01572 
01573 /*
01574  * Update the copied values of 'next' and 'node' if they are relative.
01575  */
01576 static void
01577 update_newheader(rdatasetheader_t *new, rdatasetheader_t *old) {
01578         char *p;
01579 
01580         if (old->next_is_relative) {
01581                 p = (char *) old;
01582                 p += (uintptr_t)old->next;
01583                 new->next = (rdatasetheader_t *)p;
01584         }
01585         if (old->node_is_relative) {
01586                 p = (char *) old;
01587                 p += (uintptr_t)old->node;
01588                 new->node = (dns_rbtnode_t *)p;
01589         }
01590         if (CASESET(old)) {
01591                 memmove(new->upper, old->upper, sizeof(old->upper));
01592                 new->attributes |= RDATASET_ATTR_CASESET;
01593         }
01594 }
01595 
01596 static inline rdatasetheader_t *
01597 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) {
01598         rdatasetheader_t *h;
01599 
01600         h = isc_mem_get(mctx, sizeof(*h));
01601         if (h == NULL)
01602                 return (NULL);
01603 
01604 #if TRACE_HEADER
01605         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
01606                 fprintf(stderr, "allocated header: %p\n", h);
01607 #endif
01608         memset(h->upper, 0xeb, sizeof(h->upper));
01609         init_rdataset(rbtdb, h);
01610         h->rdh_ttl = 0;
01611         return (h);
01612 }
01613 
01614 static inline void
01615 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
01616         unsigned int size;
01617         int idx;
01618 
01619         if (EXISTS(rdataset) &&
01620             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
01621                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
01622         }
01623 
01624         idx = rdataset->node->locknum;
01625         if (ISC_LINK_LINKED(rdataset, link)) {
01626                 INSIST(IS_CACHE(rbtdb));
01627                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
01628         }
01629 
01630         if (rdataset->heap_index != 0)
01631                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
01632         rdataset->heap_index = 0;
01633 
01634         if (rdataset->noqname != NULL)
01635                 free_noqname(mctx, &rdataset->noqname);
01636         if (rdataset->closest != NULL)
01637                 free_noqname(mctx, &rdataset->closest);
01638 
01639         free_acachearray(mctx, rdataset, rdataset->additional_auth);
01640         free_acachearray(mctx, rdataset, rdataset->additional_glue);
01641 
01642         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
01643                 size = sizeof(*rdataset);
01644         else
01645                 size = dns_rdataslab_size((unsigned char *)rdataset,
01646                                           sizeof(*rdataset));
01647 
01648         if (rdataset->is_mmapped == 1)
01649                 return;
01650 
01651         isc_mem_put(mctx, rdataset, size);
01652 }
01653 
01654 static inline void
01655 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
01656         rdatasetheader_t *header, *dcurrent;
01657         isc_boolean_t make_dirty = ISC_FALSE;
01658 
01659         /*
01660          * Caller must hold the node lock.
01661          */
01662 
01663         /*
01664          * We set the IGNORE attribute on rdatasets with serial number
01665          * 'serial'.  When the reference count goes to zero, these rdatasets
01666          * will be cleaned up; until that time, they will be ignored.
01667          */
01668         for (header = node->data; header != NULL; header = header->next) {
01669                 if (header->serial == serial) {
01670                         header->attributes |= RDATASET_ATTR_IGNORE;
01671                         make_dirty = ISC_TRUE;
01672                 }
01673                 for (dcurrent = header->down;
01674                      dcurrent != NULL;
01675                      dcurrent = dcurrent->down) {
01676                         if (dcurrent->serial == serial) {
01677                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
01678                                 make_dirty = ISC_TRUE;
01679                         }
01680                 }
01681         }
01682         if (make_dirty)
01683                 node->dirty = 1;
01684 }
01685 
01686 static inline void
01687 mark_stale_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
01688 
01689         /*
01690          * If we are already stale there is nothing to do.
01691          */
01692         if ((header->attributes & RDATASET_ATTR_STALE) != 0)
01693                 return;
01694 
01695         header->attributes |= RDATASET_ATTR_STALE;
01696         header->node->dirty = 1;
01697 
01698         /*
01699          * If we have not been counted then there is nothing to do.
01700          */
01701         if ((header->attributes & RDATASET_ATTR_STATCOUNT) == 0)
01702                 return;
01703 
01704         if (EXISTS(header))
01705                 update_rrsetstats(rbtdb, header, ISC_TRUE);
01706 }
01707 
01708 static inline void
01709 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
01710 {
01711         rdatasetheader_t *d, *down_next;
01712 
01713         for (d = top->down; d != NULL; d = down_next) {
01714                 down_next = d->down;
01715                 free_rdataset(rbtdb, mctx, d);
01716         }
01717         top->down = NULL;
01718 }
01719 
01720 static inline void
01721 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
01722         rdatasetheader_t *current, *top_prev, *top_next;
01723         isc_mem_t *mctx = rbtdb->common.mctx;
01724 
01725         /*
01726          * Caller must be holding the node lock.
01727          */
01728 
01729         top_prev = NULL;
01730         for (current = node->data; current != NULL; current = top_next) {
01731                 top_next = current->next;
01732                 clean_stale_headers(rbtdb, mctx, current);
01733                 /*
01734                  * If current is nonexistent or stale, we can clean it up.
01735                  */
01736                 if ((current->attributes &
01737                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
01738                         if (top_prev != NULL)
01739                                 top_prev->next = current->next;
01740                         else
01741                                 node->data = current->next;
01742                         free_rdataset(rbtdb, mctx, current);
01743                 } else
01744                         top_prev = current;
01745         }
01746         node->dirty = 0;
01747 }
01748 
01749 static inline void
01750 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
01751                 rbtdb_serial_t least_serial)
01752 {
01753         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
01754         rdatasetheader_t *top_prev, *top_next;
01755         isc_mem_t *mctx = rbtdb->common.mctx;
01756         isc_boolean_t still_dirty = ISC_FALSE;
01757 
01758         /*
01759          * Caller must be holding the node lock.
01760          */
01761         REQUIRE(least_serial != 0);
01762 
01763         top_prev = NULL;
01764         for (current = node->data; current != NULL; current = top_next) {
01765                 top_next = current->next;
01766 
01767                 /*
01768                  * First, we clean up any instances of multiple rdatasets
01769                  * with the same serial number, or that have the IGNORE
01770                  * attribute.
01771                  */
01772                 dparent = current;
01773                 for (dcurrent = current->down;
01774                      dcurrent != NULL;
01775                      dcurrent = down_next) {
01776                         down_next = dcurrent->down;
01777                         INSIST(dcurrent->serial <= dparent->serial);
01778                         if (dcurrent->serial == dparent->serial ||
01779                             IGNORE(dcurrent)) {
01780                                 if (down_next != NULL)
01781                                         down_next->next = dparent;
01782                                 dparent->down = down_next;
01783                                 free_rdataset(rbtdb, mctx, dcurrent);
01784                         } else
01785                                 dparent = dcurrent;
01786                 }
01787 
01788                 /*
01789                  * We've now eliminated all IGNORE datasets with the possible
01790                  * exception of current, which we now check.
01791                  */
01792                 if (IGNORE(current)) {
01793                         down_next = current->down;
01794                         if (down_next == NULL) {
01795                                 if (top_prev != NULL)
01796                                         top_prev->next = current->next;
01797                                 else
01798                                         node->data = current->next;
01799                                 free_rdataset(rbtdb, mctx, current);
01800                                 /*
01801                                  * current no longer exists, so we can
01802                                  * just continue with the loop.
01803                                  */
01804                                 continue;
01805                         } else {
01806                                 /*
01807                                  * Pull up current->down, making it the new
01808                                  * current.
01809                                  */
01810                                 if (top_prev != NULL)
01811                                         top_prev->next = down_next;
01812                                 else
01813                                         node->data = down_next;
01814                                 down_next->next = top_next;
01815                                 free_rdataset(rbtdb, mctx, current);
01816                                 current = down_next;
01817                         }
01818                 }
01819 
01820                 /*
01821                  * We now try to find the first down node less than the
01822                  * least serial.
01823                  */
01824                 dparent = current;
01825                 for (dcurrent = current->down;
01826                      dcurrent != NULL;
01827                      dcurrent = down_next) {
01828                         down_next = dcurrent->down;
01829                         if (dcurrent->serial < least_serial)
01830                                 break;
01831                         dparent = dcurrent;
01832                 }
01833 
01834                 /*
01835                  * If there is a such an rdataset, delete it and any older
01836                  * versions.
01837                  */
01838                 if (dcurrent != NULL) {
01839                         do {
01840                                 down_next = dcurrent->down;
01841                                 INSIST(dcurrent->serial <= least_serial);
01842                                 free_rdataset(rbtdb, mctx, dcurrent);
01843                                 dcurrent = down_next;
01844                         } while (dcurrent != NULL);
01845                         dparent->down = NULL;
01846                 }
01847 
01848                 /*
01849                  * Note.  The serial number of 'current' might be less than
01850                  * least_serial too, but we cannot delete it because it is
01851                  * the most recent version, unless it is a NONEXISTENT
01852                  * rdataset.
01853                  */
01854                 if (current->down != NULL) {
01855                         still_dirty = ISC_TRUE;
01856                         top_prev = current;
01857                 } else {
01858                         /*
01859                          * If this is a NONEXISTENT rdataset, we can delete it.
01860                          */
01861                         if (NONEXISTENT(current)) {
01862                                 if (top_prev != NULL)
01863                                         top_prev->next = current->next;
01864                                 else
01865                                         node->data = current->next;
01866                                 free_rdataset(rbtdb, mctx, current);
01867                         } else
01868                                 top_prev = current;
01869                 }
01870         }
01871         if (!still_dirty)
01872                 node->dirty = 0;
01873 }
01874 
01875 static void
01876 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
01877 {
01878         dns_rbtnode_t *nsecnode;
01879         dns_fixedname_t fname;
01880         dns_name_t *name;
01881         isc_result_t result = ISC_R_UNEXPECTED;
01882 
01883         INSIST(!ISC_LINK_LINKED(node, deadlink));
01884 
01885         switch (node->nsec) {
01886         case DNS_RBT_NSEC_NORMAL:
01887                 if (rbtdb->rpzs != NULL && node->rpz) {
01888                         dns_fixedname_init(&fname);
01889                         name = dns_fixedname_name(&fname);
01890                         dns_rbt_fullnamefromnode(node, name);
01891                         dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
01892                 }
01893                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
01894                 break;
01895         case DNS_RBT_NSEC_HAS_NSEC:
01896                 dns_fixedname_init(&fname);
01897                 name = dns_fixedname_name(&fname);
01898                 dns_rbt_fullnamefromnode(node, name);
01899                 /*
01900                  * Delete the corresponding node from the auxiliary NSEC
01901                  * tree before deleting from the main tree.
01902                  */
01903                 nsecnode = NULL;
01904                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
01905                                           NULL, DNS_RBTFIND_EMPTYDATA,
01906                                           NULL, NULL);
01907                 if (result != ISC_R_SUCCESS) {
01908                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
01909                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
01910                                       "delete_node: "
01911                                       "dns_rbt_findnode(nsec): %s",
01912                                       isc_result_totext(result));
01913                 } else {
01914                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
01915                                                     ISC_FALSE);
01916                         if (result != ISC_R_SUCCESS) {
01917                                 isc_log_write(dns_lctx,
01918                                               DNS_LOGCATEGORY_DATABASE,
01919                                               DNS_LOGMODULE_CACHE,
01920                                               ISC_LOG_WARNING,
01921                                               "delete_node(): "
01922                                               "dns_rbt_deletenode(nsecnode): %s",
01923                                               isc_result_totext(result));
01924                         }
01925                 }
01926                 if (rbtdb->rpzs != NULL && node->rpz)
01927                         dns_rpz_delete(rbtdb->rpzs, rbtdb->rpz_num, name);
01928                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
01929                 break;
01930         case DNS_RBT_NSEC_NSEC:
01931                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
01932                 break;
01933         case DNS_RBT_NSEC_NSEC3:
01934                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
01935                 break;
01936         }
01937         if (result != ISC_R_SUCCESS) {
01938                 isc_log_write(dns_lctx,
01939                               DNS_LOGCATEGORY_DATABASE,
01940                               DNS_LOGMODULE_CACHE,
01941                               ISC_LOG_WARNING,
01942                               "delete_node(): "
01943                               "dns_rbt_deletenode: %s",
01944                               isc_result_totext(result));
01945         }
01946 }
01947 
01948 /*%
01949  * Clean up dead nodes.  These are nodes which have no references, and
01950  * have no data.  They are dead but we could not or chose not to delete
01951  * them when we deleted all the data at that node because we did not want
01952  * to wait for the tree write lock.
01953  *
01954  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
01955  */
01956 static void
01957 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
01958         dns_rbtnode_t *node;
01959         int count = 10;         /* XXXJT: should be adjustable */
01960 
01961         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
01962         while (node != NULL && count > 0) {
01963                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
01964 
01965                 /*
01966                  * Since we're holding a tree write lock, it should be
01967                  * impossible for this node to be referenced by others.
01968                  */
01969                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
01970                        node->data == NULL);
01971 
01972                 delete_node(rbtdb, node);
01973 
01974                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
01975                 count--;
01976         }
01977 }
01978 
01979 /*
01980  * Caller must be holding the node lock.
01981  */
01982 static inline void
01983 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
01984         unsigned int lockrefs, noderefs;
01985         isc_refcount_t *lockref;
01986 
01987         INSIST(!ISC_LINK_LINKED(node, deadlink));
01988         dns_rbtnode_refincrement0(node, &noderefs);
01989         if (noderefs == 1) {    /* this is the first reference to the node */
01990                 lockref = &rbtdb->node_locks[node->locknum].references;
01991                 isc_refcount_increment0(lockref, &lockrefs);
01992                 INSIST(lockrefs != 0);
01993         }
01994         INSIST(noderefs != 0);
01995 }
01996 
01997 /*
01998  * This function is assumed to be called when a node is newly referenced
01999  * and can be in the deadnode list.  In that case the node must be retrieved
02000  * from the list because it is going to be used.  In addition, if the caller
02001  * happens to hold a write lock on the tree, it's a good chance to purge dead
02002  * nodes.
02003  * Note: while a new reference is gained in multiple places, there are only very
02004  * few cases where the node can be in the deadnode list (only empty nodes can
02005  * have been added to the list).
02006  */
02007 static inline void
02008 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
02009                 isc_rwlocktype_t treelocktype)
02010 {
02011         isc_rwlocktype_t locktype = isc_rwlocktype_read;
02012         nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
02013         isc_boolean_t maybe_cleanup = ISC_FALSE;
02014 
02015         POST(locktype);
02016 
02017         NODE_STRONGLOCK(nodelock);
02018         NODE_WEAKLOCK(nodelock, locktype);
02019 
02020         /*
02021          * Check if we can possibly cleanup the dead node.  If so, upgrade
02022          * the node lock below to perform the cleanup.
02023          */
02024         if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
02025             treelocktype == isc_rwlocktype_write) {
02026                 maybe_cleanup = ISC_TRUE;
02027         }
02028 
02029         if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
02030                 /*
02031                  * Upgrade the lock and test if we still need to unlink.
02032                  */
02033                 NODE_WEAKUNLOCK(nodelock, locktype);
02034                 locktype = isc_rwlocktype_write;
02035                 POST(locktype);
02036                 NODE_WEAKLOCK(nodelock, locktype);
02037                 if (ISC_LINK_LINKED(node, deadlink))
02038                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
02039                                         node, deadlink);
02040                 if (maybe_cleanup)
02041                         cleanup_dead_nodes(rbtdb, node->locknum);
02042         }
02043 
02044         new_reference(rbtdb, node);
02045 
02046         NODE_WEAKUNLOCK(nodelock, locktype);
02047         NODE_STRONGUNLOCK(nodelock);
02048 }
02049 
02050 /*
02051  * Caller must be holding the node lock; either the "strong", read or write
02052  * lock.  Note that the lock must be held even when node references are
02053  * atomically modified; in that case the decrement operation itself does not
02054  * have to be protected, but we must avoid a race condition where multiple
02055  * threads are decreasing the reference to zero simultaneously and at least
02056  * one of them is going to free the node.
02057  * This function returns ISC_TRUE if and only if the node reference decreases
02058  * to zero.
02059  */
02060 static isc_boolean_t
02061 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
02062                     rbtdb_serial_t least_serial,
02063                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
02064                     isc_boolean_t pruning)
02065 {
02066         isc_result_t result;
02067         isc_boolean_t write_locked;
02068         rbtdb_nodelock_t *nodelock;
02069         unsigned int refs, nrefs;
02070         int bucket = node->locknum;
02071         isc_boolean_t no_reference = ISC_TRUE;
02072 
02073         nodelock = &rbtdb->node_locks[bucket];
02074 
02075 #define KEEP_NODE(n, r) \
02076         ((n)->data != NULL || (n)->down != NULL || (n) == (r)->origin_node)
02077 
02078         /* Handle easy and typical case first. */
02079         if (!node->dirty && KEEP_NODE(node, rbtdb)) {
02080                 dns_rbtnode_refdecrement(node, &nrefs);
02081                 INSIST((int)nrefs >= 0);
02082                 if (nrefs == 0) {
02083                         isc_refcount_decrement(&nodelock->references, &refs);
02084                         INSIST((int)refs >= 0);
02085                 }
02086                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
02087         }
02088 
02089         /* Upgrade the lock? */
02090         if (nlock == isc_rwlocktype_read) {
02091                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
02092                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
02093         }
02094 
02095         dns_rbtnode_refdecrement(node, &nrefs);
02096         INSIST((int)nrefs >= 0);
02097         if (nrefs > 0) {
02098                 /* Restore the lock? */
02099                 if (nlock == isc_rwlocktype_read)
02100                         NODE_WEAKDOWNGRADE(&nodelock->lock);
02101                 return (ISC_FALSE);
02102         }
02103 
02104         if (node->dirty) {
02105                 if (IS_CACHE(rbtdb))
02106                         clean_cache_node(rbtdb, node);
02107                 else {
02108                         if (least_serial == 0) {
02109                                 /*
02110                                  * Caller doesn't know the least serial.
02111                                  * Get it.
02112                                  */
02113                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
02114                                 least_serial = rbtdb->least_serial;
02115                                 RBTDB_UNLOCK(&rbtdb->lock,
02116                                              isc_rwlocktype_read);
02117                         }
02118                         clean_zone_node(rbtdb, node, least_serial);
02119                 }
02120         }
02121 
02122         /*
02123          * Attempt to switch to a write lock on the tree.  If this fails,
02124          * we will add this node to a linked list of nodes in this locking
02125          * bucket which we will free later.
02126          */
02127         if (tlock != isc_rwlocktype_write) {
02128                 /*
02129                  * Locking hierarchy notwithstanding, we don't need to free
02130                  * the node lock before acquiring the tree write lock because
02131                  * we only do a trylock.
02132                  */
02133                 if (tlock == isc_rwlocktype_read)
02134                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
02135                 else
02136                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
02137                                                     isc_rwlocktype_write);
02138                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
02139                               result == ISC_R_LOCKBUSY);
02140 
02141                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
02142         } else
02143                 write_locked = ISC_TRUE;
02144 
02145         isc_refcount_decrement(&nodelock->references, &refs);
02146         INSIST((int)refs >= 0);
02147 
02148         if (KEEP_NODE(node, rbtdb))
02149                 goto restore_locks;
02150 
02151 #undef KEEP_NODE
02152 
02153         if (write_locked) {
02154                 /*
02155                  * We can now delete the node.
02156                  */
02157 
02158                 /*
02159                  * If this node is the only one in the level it's in, deleting
02160                  * this node may recursively make its parent the only node in
02161                  * the parent level; if so, and if no one is currently using
02162                  * the parent node, this is almost the only opportunity to
02163                  * clean it up.  But the recursive cleanup is not that trivial
02164                  * since the child and parent may be in different lock buckets,
02165                  * which would cause a lock order reversal problem.  To avoid
02166                  * the trouble, we'll dispatch a separate event for batch
02167                  * cleaning.  We need to check whether we're deleting the node
02168                  * as a result of pruning to avoid infinite dispatching.
02169                  * Note: pruning happens only when a task has been set for the
02170                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
02171                  * it's their responsibility to purge stale leaves (e.g. by
02172                  * periodic walk-through).
02173                  */
02174                 if (!pruning && node->parent != NULL &&
02175                     node->parent->down == node && node->left == NULL &&
02176                     node->right == NULL && rbtdb->task != NULL) {
02177                         isc_event_t *ev;
02178                         dns_db_t *db;
02179 
02180                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
02181                                                 DNS_EVENT_RBTPRUNE,
02182                                                 prune_tree, node,
02183                                                 sizeof(isc_event_t));
02184                         if (ev != NULL) {
02185                                 new_reference(rbtdb, node);
02186                                 db = NULL;
02187                                 attach((dns_db_t *)rbtdb, &db);
02188                                 ev->ev_sender = db;
02189                                 isc_task_send(rbtdb->task, &ev);
02190                                 no_reference = ISC_FALSE;
02191                         } else {
02192                                 /*
02193                                  * XXX: this is a weird situation.  We could
02194                                  * ignore this error case, but then the stale
02195                                  * node will unlikely be purged except via a
02196                                  * rare condition such as manual cleanup.  So
02197                                  * we queue it in the deadnodes list, hoping
02198                                  * the memory shortage is temporary and the node
02199                                  * will be deleted later.
02200                                  */
02201                                 isc_log_write(dns_lctx,
02202                                               DNS_LOGCATEGORY_DATABASE,
02203                                               DNS_LOGMODULE_CACHE,
02204                                               ISC_LOG_INFO,
02205                                               "decrement_reference: failed to "
02206                                               "allocate pruning event");
02207                                 INSIST(node->data == NULL);
02208                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
02209                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
02210                                                 deadlink);
02211                         }
02212                 } else {
02213                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
02214                                 char printname[DNS_NAME_FORMATSIZE];
02215 
02216                                 isc_log_write(dns_lctx,
02217                                               DNS_LOGCATEGORY_DATABASE,
02218                                               DNS_LOGMODULE_CACHE,
02219                                               ISC_LOG_DEBUG(1),
02220                                               "decrement_reference: "
02221                                               "delete from rbt: %p %s",
02222                                               node,
02223                                               dns_rbt_formatnodename(node,
02224                                                         printname,
02225                                                         sizeof(printname)));
02226                         }
02227 
02228                         delete_node(rbtdb, node);
02229                 }
02230         } else {
02231                 INSIST(node->data == NULL);
02232                 INSIST(!ISC_LINK_LINKED(node, deadlink));
02233                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
02234         }
02235 
02236  restore_locks:
02237         /* Restore the lock? */
02238         if (nlock == isc_rwlocktype_read)
02239                 NODE_WEAKDOWNGRADE(&nodelock->lock);
02240 
02241         /*
02242          * Relock a read lock, or unlock the write lock if no lock was held.
02243          */
02244         if (tlock == isc_rwlocktype_none)
02245                 if (write_locked)
02246                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02247 
02248         if (tlock == isc_rwlocktype_read)
02249                 if (write_locked)
02250                         isc_rwlock_downgrade(&rbtdb->tree_lock);
02251 
02252         return (no_reference);
02253 }
02254 
02255 /*
02256  * Prune the tree by recursively cleaning-up single leaves.  In the worst
02257  * case, the number of iteration is the number of tree levels, which is at
02258  * most the maximum number of domain name labels, i.e, 127.  In practice, this
02259  * should be much smaller (only a few times), and even the worst case would be
02260  * acceptable for a single event.
02261  */
02262 static void
02263 prune_tree(isc_task_t *task, isc_event_t *event) {
02264         dns_rbtdb_t *rbtdb = event->ev_sender;
02265         dns_rbtnode_t *node = event->ev_arg;
02266         dns_rbtnode_t *parent;
02267         unsigned int locknum;
02268 
02269         UNUSED(task);
02270 
02271         isc_event_free(&event);
02272 
02273         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02274         locknum = node->locknum;
02275         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
02276         do {
02277                 parent = node->parent;
02278                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
02279                                     isc_rwlocktype_write, ISC_TRUE);
02280 
02281                 if (parent != NULL && parent->down == NULL) {
02282                         /*
02283                          * node was the only down child of the parent and has
02284                          * just been removed.  We'll then need to examine the
02285                          * parent.  Keep the lock if possible; otherwise,
02286                          * release the old lock and acquire one for the parent.
02287                          */
02288                         if (parent->locknum != locknum) {
02289                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
02290                                             isc_rwlocktype_write);
02291                                 locknum = parent->locknum;
02292                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
02293                                           isc_rwlocktype_write);
02294                         }
02295 
02296                         /*
02297                          * We need to gain a reference to the node before
02298                          * decrementing it in the next iteration.  In addition,
02299                          * if the node is in the dead-nodes list, extract it
02300                          * from the list beforehand as we do in
02301                          * reactivate_node().
02302                          */
02303                         if (ISC_LINK_LINKED(parent, deadlink))
02304                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
02305                                                 parent, deadlink);
02306                         new_reference(rbtdb, parent);
02307                 } else
02308                         parent = NULL;
02309 
02310                 node = parent;
02311         } while (node != NULL);
02312         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
02313         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02314 
02315         detach((dns_db_t **)&rbtdb);
02316 }
02317 
02318 static inline void
02319 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
02320                    rbtdb_changedlist_t *cleanup_list)
02321 {
02322         /*
02323          * Caller must be holding the database lock.
02324          */
02325 
02326         rbtdb->least_serial = version->serial;
02327         *cleanup_list = version->changed_list;
02328         ISC_LIST_INIT(version->changed_list);
02329 }
02330 
02331 static inline void
02332 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
02333         rbtdb_changed_t *changed, *next_changed;
02334 
02335         /*
02336          * If the changed record is dirty, then
02337          * an update created multiple versions of
02338          * a given rdataset.  We keep this list
02339          * until we're the least open version, at
02340          * which point it's safe to get rid of any
02341          * older versions.
02342          *
02343          * If the changed record isn't dirty, then
02344          * we don't need it anymore since we're
02345          * committing and not rolling back.
02346          *
02347          * The caller must be holding the database lock.
02348          */
02349         for (changed = HEAD(version->changed_list);
02350              changed != NULL;
02351              changed = next_changed) {
02352                 next_changed = NEXT(changed, link);
02353                 if (!changed->dirty) {
02354                         UNLINK(version->changed_list,
02355                                changed, link);
02356                         APPEND(*cleanup_list,
02357                                changed, link);
02358                 }
02359         }
02360 }
02361 
02362 static void
02363 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
02364         dns_rdataset_t keyset;
02365         dns_rdataset_t nsecset, signsecset;
02366         isc_boolean_t haszonekey = ISC_FALSE;
02367         isc_boolean_t hasnsec = ISC_FALSE;
02368         isc_result_t result;
02369 
02370         dns_rdataset_init(&keyset);
02371         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
02372                                      0, 0, &keyset, NULL);
02373         if (result == ISC_R_SUCCESS) {
02374                 result = dns_rdataset_first(&keyset);
02375                 while (result == ISC_R_SUCCESS) {
02376                         dns_rdata_t keyrdata = DNS_RDATA_INIT;
02377                         dns_rdataset_current(&keyset, &keyrdata);
02378                         if (dns_zonekey_iszonekey(&keyrdata)) {
02379                                 haszonekey = ISC_TRUE;
02380                                 break;
02381                         }
02382                         result = dns_rdataset_next(&keyset);
02383                 }
02384                 dns_rdataset_disassociate(&keyset);
02385         }
02386         if (!haszonekey) {
02387                 version->secure = dns_db_insecure;
02388                 version->havensec3 = ISC_FALSE;
02389                 return;
02390         }
02391 
02392         dns_rdataset_init(&nsecset);
02393         dns_rdataset_init(&signsecset);
02394         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
02395                                      0, 0, &nsecset, &signsecset);
02396         if (result == ISC_R_SUCCESS) {
02397                 if (dns_rdataset_isassociated(&signsecset)) {
02398                         hasnsec = ISC_TRUE;
02399                         dns_rdataset_disassociate(&signsecset);
02400                 }
02401                 dns_rdataset_disassociate(&nsecset);
02402         }
02403 
02404         setnsec3parameters(db, version);
02405 
02406         /*
02407          * Do we have a valid NSEC/NSEC3 chain?
02408          */
02409         if (version->havensec3 || hasnsec)
02410                 version->secure = dns_db_secure;
02411         else
02412                 version->secure = dns_db_insecure;
02413 }
02414 
02415 /*%<
02416  * Walk the origin node looking for NSEC3PARAM records.
02417  * Cache the nsec3 parameters.
02418  */
02419 static void
02420 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
02421         dns_rbtnode_t *node;
02422         dns_rdata_nsec3param_t nsec3param;
02423         dns_rdata_t rdata = DNS_RDATA_INIT;
02424         isc_region_t region;
02425         isc_result_t result;
02426         rdatasetheader_t *header, *header_next;
02427         unsigned char *raw;             /* RDATASLAB */
02428         unsigned int count, length;
02429         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02430 
02431         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
02432         version->havensec3 = ISC_FALSE;
02433         node = rbtdb->origin_node;
02434         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
02435                   isc_rwlocktype_read);
02436         for (header = node->data;
02437              header != NULL;
02438              header = header_next) {
02439                 header_next = header->next;
02440                 do {
02441                         if (header->serial <= version->serial &&
02442                             !IGNORE(header)) {
02443                                 if (NONEXISTENT(header))
02444                                         header = NULL;
02445                                 break;
02446                         } else
02447                                 header = header->down;
02448                 } while (header != NULL);
02449 
02450                 if (header != NULL &&
02451                     (header->type == dns_rdatatype_nsec3param)) {
02452                         /*
02453                          * Find A NSEC3PARAM with a supported algorithm.
02454                          */
02455                         raw = (unsigned char *)header + sizeof(*header);
02456                         count = raw[0] * 256 + raw[1]; /* count */
02457 #if DNS_RDATASET_FIXED
02458                         raw += count * 4 + 2;
02459 #else
02460                         raw += 2;
02461 #endif
02462                         while (count-- > 0U) {
02463                                 length = raw[0] * 256 + raw[1];
02464 #if DNS_RDATASET_FIXED
02465                                 raw += 4;
02466 #else
02467                                 raw += 2;
02468 #endif
02469                                 region.base = raw;
02470                                 region.length = length;
02471                                 raw += length;
02472                                 dns_rdata_fromregion(&rdata,
02473                                                      rbtdb->common.rdclass,
02474                                                      dns_rdatatype_nsec3param,
02475                                                      &region);
02476                                 result = dns_rdata_tostruct(&rdata,
02477                                                             &nsec3param,
02478                                                             NULL);
02479                                 INSIST(result == ISC_R_SUCCESS);
02480                                 dns_rdata_reset(&rdata);
02481 
02482                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
02483                                     !dns_nsec3_supportedhash(nsec3param.hash))
02484                                         continue;
02485 
02486                                 if (nsec3param.flags != 0)
02487                                         continue;
02488 
02489                                 memmove(version->salt, nsec3param.salt,
02490                                         nsec3param.salt_length);
02491                                 version->hash = nsec3param.hash;
02492                                 version->salt_length = nsec3param.salt_length;
02493                                 version->iterations = nsec3param.iterations;
02494                                 version->flags = nsec3param.flags;
02495                                 version->havensec3 = ISC_TRUE;
02496                                 /*
02497                                  * Look for a better algorithm than the
02498                                  * unknown test algorithm.
02499                                  */
02500                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
02501                                         goto unlock;
02502                         }
02503                 }
02504         }
02505  unlock:
02506         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
02507                     isc_rwlocktype_read);
02508         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
02509 }
02510 
02511 static void
02512 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
02513         dns_rbtdb_t *rbtdb = event->ev_arg;
02514         isc_boolean_t again = ISC_FALSE;
02515         unsigned int locknum;
02516         unsigned int refs;
02517 
02518         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02519         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
02520                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
02521                           isc_rwlocktype_write);
02522                 cleanup_dead_nodes(rbtdb, locknum);
02523                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
02524                         again = ISC_TRUE;
02525                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
02526                             isc_rwlocktype_write);
02527         }
02528         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02529         if (again)
02530                 isc_task_send(task, &event);
02531         else {
02532                 isc_event_free(&event);
02533                 isc_refcount_decrement(&rbtdb->references, &refs);
02534                 if (refs == 0)
02535                         maybe_free_rbtdb(rbtdb);
02536         }
02537 }
02538 
02539 static void
02540 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
02541         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02542         rbtdb_version_t *version, *cleanup_version, *least_greater;
02543         isc_boolean_t rollback = ISC_FALSE;
02544         rbtdb_changedlist_t cleanup_list;
02545         rdatasetheaderlist_t resigned_list;
02546         rbtdb_changed_t *changed, *next_changed;
02547         rbtdb_serial_t serial, least_serial;
02548         dns_rbtnode_t *rbtnode;
02549         unsigned int refs;
02550         rdatasetheader_t *header;
02551 
02552         REQUIRE(VALID_RBTDB(rbtdb));
02553         version = (rbtdb_version_t *)*versionp;
02554         INSIST(version->rbtdb == rbtdb);
02555 
02556         cleanup_version = NULL;
02557         ISC_LIST_INIT(cleanup_list);
02558         ISC_LIST_INIT(resigned_list);
02559 
02560         isc_refcount_decrement(&version->references, &refs);
02561         if (refs > 0) {         /* typical and easy case first */
02562                 if (commit) {
02563                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
02564                         INSIST(!version->writer);
02565                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
02566                 }
02567                 goto end;
02568         }
02569 
02570         /*
02571          * Update the zone's secure status in version before making
02572          * it the current version.
02573          */
02574         if (version->writer && commit && !IS_CACHE(rbtdb))
02575                 iszonesecure(db, version, rbtdb->origin_node);
02576 
02577         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
02578         serial = version->serial;
02579         if (version->writer) {
02580                 if (commit) {
02581                         unsigned cur_ref;
02582                         rbtdb_version_t *cur_version;
02583 
02584                         INSIST(version->commit_ok);
02585                         INSIST(version == rbtdb->future_version);
02586                         /*
02587                          * The current version is going to be replaced.
02588                          * Release the (likely last) reference to it from the
02589                          * DB itself and unlink it from the open list.
02590                          */
02591                         cur_version = rbtdb->current_version;
02592                         isc_refcount_decrement(&cur_version->references,
02593                                                &cur_ref);
02594                         if (cur_ref == 0) {
02595                                 if (cur_version->serial == rbtdb->least_serial)
02596                                         INSIST(EMPTY(cur_version->changed_list));
02597                                 UNLINK(rbtdb->open_versions,
02598                                        cur_version, link);
02599                         }
02600                         if (EMPTY(rbtdb->open_versions)) {
02601                                 /*
02602                                  * We're going to become the least open
02603                                  * version.
02604                                  */
02605                                 make_least_version(rbtdb, version,
02606                                                    &cleanup_list);
02607                         } else {
02608                                 /*
02609                                  * Some other open version is the
02610                                  * least version.  We can't cleanup
02611                                  * records that were changed in this
02612                                  * version because the older versions
02613                                  * may still be in use by an open
02614                                  * version.
02615                                  *
02616                                  * We can, however, discard the
02617                                  * changed records for things that
02618                                  * we've added that didn't exist in
02619                                  * prior versions.
02620                                  */
02621                                 cleanup_nondirty(version, &cleanup_list);
02622                         }
02623                         /*
02624                          * If the (soon to be former) current version
02625                          * isn't being used by anyone, we can clean
02626                          * it up.
02627                          */
02628                         if (cur_ref == 0) {
02629                                 cleanup_version = cur_version;
02630                                 APPENDLIST(version->changed_list,
02631                                            cleanup_version->changed_list,
02632                                            link);
02633                         }
02634                         /*
02635                          * Become the current version.
02636                          */
02637                         version->writer = ISC_FALSE;
02638                         rbtdb->current_version = version;
02639                         rbtdb->current_serial = version->serial;
02640                         rbtdb->future_version = NULL;
02641 
02642                         /*
02643                          * Keep the current version in the open list, and
02644                          * gain a reference for the DB itself (see the DB
02645                          * creation function below).  This must be the only
02646                          * case where we need to increment the counter from
02647                          * zero and need to use isc_refcount_increment0().
02648                          */
02649                         isc_refcount_increment0(&version->references,
02650                                                 &cur_ref);
02651                         INSIST(cur_ref == 1);
02652                         PREPEND(rbtdb->open_versions,
02653                                 rbtdb->current_version, link);
02654                         resigned_list = version->resigned_list;
02655                         ISC_LIST_INIT(version->resigned_list);
02656                 } else {
02657                         /*
02658                          * We're rolling back this transaction.
02659                          */
02660                         cleanup_list = version->changed_list;
02661                         ISC_LIST_INIT(version->changed_list);
02662                         resigned_list = version->resigned_list;
02663                         ISC_LIST_INIT(version->resigned_list);
02664                         rollback = ISC_TRUE;
02665                         cleanup_version = version;
02666                         rbtdb->future_version = NULL;
02667                 }
02668         } else {
02669                 if (version != rbtdb->current_version) {
02670                         /*
02671                          * There are no external or internal references
02672                          * to this version and it can be cleaned up.
02673                          */
02674                         cleanup_version = version;
02675 
02676                         /*
02677                          * Find the version with the least serial
02678                          * number greater than ours.
02679                          */
02680                         least_greater = PREV(version, link);
02681                         if (least_greater == NULL)
02682                                 least_greater = rbtdb->current_version;
02683 
02684                         INSIST(version->serial < least_greater->serial);
02685                         /*
02686                          * Is this the least open version?
02687                          */
02688                         if (version->serial == rbtdb->least_serial) {
02689                                 /*
02690                                  * Yes.  Install the new least open
02691                                  * version.
02692                                  */
02693                                 make_least_version(rbtdb,
02694                                                    least_greater,
02695                                                    &cleanup_list);
02696                         } else {
02697                                 /*
02698                                  * Add any unexecuted cleanups to
02699                                  * those of the least greater version.
02700                                  */
02701                                 APPENDLIST(least_greater->changed_list,
02702                                            version->changed_list,
02703                                            link);
02704                         }
02705                 } else if (version->serial == rbtdb->least_serial)
02706                         INSIST(EMPTY(version->changed_list));
02707                 UNLINK(rbtdb->open_versions, version, link);
02708         }
02709         least_serial = rbtdb->least_serial;
02710         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
02711 
02712         if (cleanup_version != NULL) {
02713                 INSIST(EMPTY(cleanup_version->changed_list));
02714                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
02715                             sizeof(*cleanup_version));
02716         }
02717 
02718         /*
02719          * Commit/rollback re-signed headers.
02720          */
02721         for (header = HEAD(resigned_list);
02722              header != NULL;
02723              header = HEAD(resigned_list)) {
02724                 nodelock_t *lock;
02725 
02726                 ISC_LIST_UNLINK(resigned_list, header, link);
02727 
02728                 lock = &rbtdb->node_locks[header->node->locknum].lock;
02729                 NODE_LOCK(lock, isc_rwlocktype_write);
02730                 if (rollback && !IGNORE(header)) {
02731                         isc_result_t result;
02732                         result = resign_insert(rbtdb, header->node->locknum,
02733                                                header);
02734                         if (result != ISC_R_SUCCESS)
02735                                 isc_log_write(dns_lctx,
02736                                               DNS_LOGCATEGORY_DATABASE,
02737                                               DNS_LOGMODULE_ZONE, ISC_LOG_ERROR,
02738                                               "Unable to reinsert header to "
02739                                               "re-signing heap: %s\n",
02740                                 dns_result_totext(result));
02741                 }
02742                 decrement_reference(rbtdb, header->node, least_serial,
02743                                     isc_rwlocktype_write, isc_rwlocktype_none,
02744                                     ISC_FALSE);
02745                 NODE_UNLOCK(lock, isc_rwlocktype_write);
02746         }
02747 
02748         if (!EMPTY(cleanup_list)) {
02749                 isc_event_t *event = NULL;
02750                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
02751 
02752                 if (rbtdb->task != NULL)
02753                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
02754                                                    DNS_EVENT_RBTDEADNODES,
02755                                                    cleanup_dead_nodes_callback,
02756                                                    rbtdb, sizeof(isc_event_t));
02757                 if (event == NULL) {
02758                         /*
02759                          * We acquire a tree write lock here in order to make
02760                          * sure that stale nodes will be removed in
02761                          * decrement_reference().  If we didn't have the lock,
02762                          * those nodes could miss the chance to be removed
02763                          * until the server stops.  The write lock is
02764                          * expensive, but this event should be rare enough
02765                          * to justify the cost.
02766                          */
02767                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02768                         tlock = isc_rwlocktype_write;
02769                 }
02770 
02771                 for (changed = HEAD(cleanup_list);
02772                      changed != NULL;
02773                      changed = next_changed) {
02774                         nodelock_t *lock;
02775 
02776                         next_changed = NEXT(changed, link);
02777                         rbtnode = changed->node;
02778                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
02779 
02780                         NODE_LOCK(lock, isc_rwlocktype_write);
02781                         /*
02782                          * This is a good opportunity to purge any dead nodes,
02783                          * so use it.
02784                          */
02785                         if (event == NULL)
02786                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
02787 
02788                         if (rollback)
02789                                 rollback_node(rbtnode, serial);
02790                         decrement_reference(rbtdb, rbtnode, least_serial,
02791                                             isc_rwlocktype_write, tlock,
02792                                             ISC_FALSE);
02793 
02794                         NODE_UNLOCK(lock, isc_rwlocktype_write);
02795 
02796                         isc_mem_put(rbtdb->common.mctx, changed,
02797                                     sizeof(*changed));
02798                 }
02799                 if (event != NULL) {
02800                         isc_refcount_increment(&rbtdb->references, NULL);
02801                         isc_task_send(rbtdb->task, &event);
02802                 } else
02803                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
02804         }
02805 
02806  end:
02807         *versionp = NULL;
02808 }
02809 
02810 /*
02811  * Add the necessary magic for the wildcard name 'name'
02812  * to be found in 'rbtdb'.
02813  *
02814  * In order for wildcard matching to work correctly in
02815  * zone_find(), we must ensure that a node for the wildcarding
02816  * level exists in the database, and has its 'find_callback'
02817  * and 'wild' bits set.
02818  *
02819  * E.g. if the wildcard name is "*.sub.example." then we
02820  * must ensure that "sub.example." exists and is marked as
02821  * a wildcard level.
02822  */
02823 static isc_result_t
02824 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
02825         isc_result_t result;
02826         dns_name_t foundname;
02827         dns_offsets_t offsets;
02828         unsigned int n;
02829         dns_rbtnode_t *node = NULL;
02830 
02831         dns_name_init(&foundname, offsets);
02832         n = dns_name_countlabels(name);
02833         INSIST(n >= 2);
02834         n--;
02835         dns_name_getlabelsequence(name, 1, n, &foundname);
02836         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
02837         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
02838                 return (result);
02839         if (result == ISC_R_SUCCESS)
02840                 node->nsec = DNS_RBT_NSEC_NORMAL;
02841         node->find_callback = 1;
02842         node->wild = 1;
02843         return (ISC_R_SUCCESS);
02844 }
02845 
02846 static isc_result_t
02847 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
02848         isc_result_t result;
02849         dns_name_t foundname;
02850         dns_offsets_t offsets;
02851         unsigned int n, l, i;
02852 
02853         dns_name_init(&foundname, offsets);
02854         n = dns_name_countlabels(name);
02855         l = dns_name_countlabels(&rbtdb->common.origin);
02856         i = l + 1;
02857         while (i < n) {
02858                 dns_rbtnode_t *node = NULL;     /* dummy */
02859                 dns_name_getlabelsequence(name, n - i, i, &foundname);
02860                 if (dns_name_iswildcard(&foundname)) {
02861                         result = add_wildcard_magic(rbtdb, &foundname);
02862                         if (result != ISC_R_SUCCESS)
02863                                 return (result);
02864                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
02865                                                  &node);
02866                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
02867                                 return (result);
02868                         if (result == ISC_R_SUCCESS)
02869                                 node->nsec = DNS_RBT_NSEC_NORMAL;
02870                 }
02871                 i++;
02872         }
02873         return (ISC_R_SUCCESS);
02874 }
02875 
02876 static isc_result_t
02877 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, dns_name_t *name,
02878                isc_boolean_t create, dns_dbnode_t **nodep)
02879 {
02880         dns_rbtnode_t *node = NULL;
02881         dns_name_t nodename;
02882         isc_result_t result;
02883         isc_rwlocktype_t locktype = isc_rwlocktype_read;
02884 
02885         INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
02886 
02887         dns_name_init(&nodename, NULL);
02888         RWLOCK(&rbtdb->tree_lock, locktype);
02889         result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
02890                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
02891         if (result != ISC_R_SUCCESS) {
02892                 RWUNLOCK(&rbtdb->tree_lock, locktype);
02893                 if (!create) {
02894                         if (result == DNS_R_PARTIALMATCH)
02895                                 result = ISC_R_NOTFOUND;
02896                         return (result);
02897                 }
02898                 /*
02899                  * It would be nice to try to upgrade the lock instead of
02900                  * unlocking then relocking.
02901                  */
02902                 locktype = isc_rwlocktype_write;
02903                 RWLOCK(&rbtdb->tree_lock, locktype);
02904                 node = NULL;
02905                 result = dns_rbt_addnode(tree, name, &node);
02906                 if (result == ISC_R_SUCCESS) {
02907                         dns_rbt_namefromnode(node, &nodename);
02908 #ifdef DNS_RBT_USEHASH
02909                         node->locknum = node->hashval % rbtdb->node_lock_count;
02910 #else
02911                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
02912                                 rbtdb->node_lock_count;
02913 #endif
02914                         if (tree == rbtdb->tree) {
02915                                 add_empty_wildcards(rbtdb, name);
02916 
02917                                 if (dns_name_iswildcard(name)) {
02918                                         result = add_wildcard_magic(rbtdb, name);
02919                                         if (result != ISC_R_SUCCESS) {
02920                                                 RWUNLOCK(&rbtdb->tree_lock, locktype);
02921                                                 return (result);
02922                                         }
02923                                 }
02924                         }
02925                         if (tree == rbtdb->nsec3)
02926                                 node->nsec = DNS_RBT_NSEC_NSEC3;
02927                 } else if (result != ISC_R_EXISTS) {
02928                         RWUNLOCK(&rbtdb->tree_lock, locktype);
02929                         return (result);
02930                 }
02931         }
02932 
02933         if (tree == rbtdb->nsec3)
02934                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
02935 
02936         reactivate_node(rbtdb, node, locktype);
02937 
02938         /*
02939          * Always try to add the policy zone data, because this node might
02940          * already have been implicitly created by the previous addition of
02941          * a longer domain.  A common example is adding *.example.com
02942          * (implicitly creating example.com) followed by explicitly adding
02943          * example.com.
02944          */
02945         if (create && rbtdb->rpzs != NULL && tree == rbtdb->tree) {
02946                 dns_fixedname_t fnamef;
02947                 dns_name_t *fname;
02948 
02949                 dns_fixedname_init(&fnamef);
02950                 fname = dns_fixedname_name(&fnamef);
02951                 dns_rbt_fullnamefromnode(node, fname);
02952                 result = dns_rpz_add(rbtdb->rpzs, rbtdb->rpz_num, fname);
02953                 if (result == ISC_R_SUCCESS)
02954                         node->rpz = 1;
02955                 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
02956                         /*
02957                          * It is too late to give up, so merely complain.
02958                          */
02959                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_RPZ,
02960                                       DNS_LOGMODULE_RBTDB, DNS_RPZ_ERROR_LEVEL,
02961                                       "dns_rpz_add(): %s",
02962                                       isc_result_totext(result));
02963                 }
02964         }
02965 
02966         RWUNLOCK(&rbtdb->tree_lock, locktype);
02967 
02968         *nodep = (dns_dbnode_t *)node;
02969 
02970         return (ISC_R_SUCCESS);
02971 }
02972 
02973 static isc_result_t
02974 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
02975          dns_dbnode_t **nodep)
02976 {
02977         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02978 
02979         REQUIRE(VALID_RBTDB(rbtdb));
02980 
02981         return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
02982 }
02983 
02984 static isc_result_t
02985 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
02986               dns_dbnode_t **nodep)
02987 {
02988         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
02989 
02990         REQUIRE(VALID_RBTDB(rbtdb));
02991 
02992         return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
02993 }
02994 
02995 static isc_result_t
02996 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
02997         rbtdb_search_t *search = arg;
02998         rdatasetheader_t *header, *header_next;
02999         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
03000         rdatasetheader_t *found;
03001         isc_result_t result;
03002         dns_rbtnode_t *onode;
03003 
03004         /*
03005          * We only want to remember the topmost zone cut, since it's the one
03006          * that counts, so we'll just continue if we've already found a
03007          * zonecut.
03008          */
03009         if (search->zonecut != NULL)
03010                 return (DNS_R_CONTINUE);
03011 
03012         found = NULL;
03013         result = DNS_R_CONTINUE;
03014         onode = search->rbtdb->origin_node;
03015 
03016         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03017                   isc_rwlocktype_read);
03018 
03019         /*
03020          * Look for an NS or DNAME rdataset active in our version.
03021          */
03022         ns_header = NULL;
03023         dname_header = NULL;
03024         sigdname_header = NULL;
03025         for (header = node->data; header != NULL; header = header_next) {
03026                 header_next = header->next;
03027                 if (header->type == dns_rdatatype_ns ||
03028                     header->type == dns_rdatatype_dname ||
03029                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
03030                         do {
03031                                 if (header->serial <= search->serial &&
03032                                     !IGNORE(header)) {
03033                                         /*
03034                                          * Is this a "this rdataset doesn't
03035                                          * exist" record?
03036                                          */
03037                                         if (NONEXISTENT(header))
03038                                                 header = NULL;
03039                                         break;
03040                                 } else
03041                                         header = header->down;
03042                         } while (header != NULL);
03043                         if (header != NULL) {
03044                                 if (header->type == dns_rdatatype_dname)
03045                                         dname_header = header;
03046                                 else if (header->type ==
03047                                            RBTDB_RDATATYPE_SIGDNAME)
03048                                         sigdname_header = header;
03049                                 else if (node != onode ||
03050                                          IS_STUB(search->rbtdb)) {
03051                                         /*
03052                                          * We've found an NS rdataset that
03053                                          * isn't at the origin node.  We check
03054                                          * that they're not at the origin node,
03055                                          * because otherwise we'd erroneously
03056                                          * treat the zone top as if it were
03057                                          * a delegation.
03058                                          */
03059                                         ns_header = header;
03060                                 }
03061                         }
03062                 }
03063         }
03064 
03065         /*
03066          * Did we find anything?
03067          */
03068         if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
03069             ns_header != NULL) {
03070                 /*
03071                  * Note that NS has precedence over DNAME if both exist
03072                  * in a zone.  Otherwise DNAME take precedence over NS.
03073                  */
03074                 found = ns_header;
03075                 search->zonecut_sigrdataset = NULL;
03076         } else if (dname_header != NULL) {
03077                 found = dname_header;
03078                 search->zonecut_sigrdataset = sigdname_header;
03079         } else if (ns_header != NULL) {
03080                 found = ns_header;
03081                 search->zonecut_sigrdataset = NULL;
03082         }
03083 
03084         if (found != NULL) {
03085                 /*
03086                  * We increment the reference count on node to ensure that
03087                  * search->zonecut_rdataset will still be valid later.
03088                  */
03089                 new_reference(search->rbtdb, node);
03090                 search->zonecut = node;
03091                 search->zonecut_rdataset = found;
03092                 search->need_cleanup = ISC_TRUE;
03093                 /*
03094                  * Since we've found a zonecut, anything beneath it is
03095                  * glue and is not subject to wildcard matching, so we
03096                  * may clear search->wild.
03097                  */
03098                 search->wild = ISC_FALSE;
03099                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
03100                         /*
03101                          * If the caller does not want to find glue, then
03102                          * this is the best answer and the search should
03103                          * stop now.
03104                          */
03105                         result = DNS_R_PARTIALMATCH;
03106                 } else {
03107                         dns_name_t *zcname;
03108 
03109                         /*
03110                          * The search will continue beneath the zone cut.
03111                          * This may or may not be the best match.  In case it
03112                          * is, we need to remember the node name.
03113                          */
03114                         zcname = dns_fixedname_name(&search->zonecut_name);
03115                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
03116                                       ISC_R_SUCCESS);
03117                         search->copy_name = ISC_TRUE;
03118                 }
03119         } else {
03120                 /*
03121                  * There is no zonecut at this node which is active in this
03122                  * version.
03123                  *
03124                  * If this is a "wild" node and the caller hasn't disabled
03125                  * wildcard matching, remember that we've seen a wild node
03126                  * in case we need to go searching for wildcard matches
03127                  * later on.
03128                  */
03129                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
03130                         search->wild = ISC_TRUE;
03131         }
03132 
03133         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03134                     isc_rwlocktype_read);
03135 
03136         return (result);
03137 }
03138 
03139 static inline void
03140 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
03141               rdatasetheader_t *header, isc_stdtime_t now,
03142               dns_rdataset_t *rdataset)
03143 {
03144         unsigned char *raw;     /* RDATASLAB */
03145 
03146         /*
03147          * Caller must be holding the node reader lock.
03148          * XXXJT: technically, we need a writer lock, since we'll increment
03149          * the header count below.  However, since the actual counter value
03150          * doesn't matter, we prioritize performance here.  (We may want to
03151          * use atomic increment when available).
03152          */
03153 
03154         if (rdataset == NULL)
03155                 return;
03156 
03157         new_reference(rbtdb, node);
03158 
03159         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
03160 
03161         rdataset->methods = &rdataset_methods;
03162         rdataset->rdclass = rbtdb->common.rdclass;
03163         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
03164         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
03165         rdataset->ttl = header->rdh_ttl - now;
03166         rdataset->trust = header->trust;
03167         if (NEGATIVE(header))
03168                 rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
03169         if (NXDOMAIN(header))
03170                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
03171         if (OPTOUT(header))
03172                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
03173         if (PREFETCH(header))
03174                 rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
03175         rdataset->private1 = rbtdb;
03176         rdataset->private2 = node;
03177         raw = (unsigned char *)header + sizeof(*header);
03178         rdataset->private3 = raw;
03179         rdataset->count = header->count++;
03180         if (rdataset->count == ISC_UINT32_MAX)
03181                 rdataset->count = 0;
03182 
03183         /*
03184          * Reset iterator state.
03185          */
03186         rdataset->privateuint4 = 0;
03187         rdataset->private5 = NULL;
03188 
03189         /*
03190          * Add noqname proof.
03191          */
03192         rdataset->private6 = header->noqname;
03193         if (rdataset->private6 != NULL)
03194                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
03195         rdataset->private7 = header->closest;
03196         if (rdataset->private7 != NULL)
03197                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
03198 
03199         /*
03200          * Copy out re-signing information.
03201          */
03202         if (RESIGN(header)) {
03203                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
03204                 rdataset->resign = header->resign;
03205         } else
03206                 rdataset->resign = 0;
03207 }
03208 
03209 static inline isc_result_t
03210 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
03211                  dns_name_t *foundname, dns_rdataset_t *rdataset,
03212                  dns_rdataset_t *sigrdataset)
03213 {
03214         isc_result_t result;
03215         dns_name_t *zcname;
03216         rbtdb_rdatatype_t type;
03217         dns_rbtnode_t *node;
03218 
03219         /*
03220          * The caller MUST NOT be holding any node locks.
03221          */
03222 
03223         node = search->zonecut;
03224         type = search->zonecut_rdataset->type;
03225 
03226         /*
03227          * If we have to set foundname, we do it before anything else.
03228          * If we were to set foundname after we had set nodep or bound the
03229          * rdataset, then we'd have to undo that work if dns_name_copy()
03230          * failed.  By setting foundname first, there's nothing to undo if
03231          * we have trouble.
03232          */
03233         if (foundname != NULL && search->copy_name) {
03234                 zcname = dns_fixedname_name(&search->zonecut_name);
03235                 result = dns_name_copy(zcname, foundname, NULL);
03236                 if (result != ISC_R_SUCCESS)
03237                         return (result);
03238         }
03239         if (nodep != NULL) {
03240                 /*
03241                  * Note that we don't have to increment the node's reference
03242                  * count here because we're going to use the reference we
03243                  * already have in the search block.
03244                  */
03245                 *nodep = node;
03246                 search->need_cleanup = ISC_FALSE;
03247         }
03248         if (rdataset != NULL) {
03249                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03250                           isc_rwlocktype_read);
03251                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
03252                               search->now, rdataset);
03253                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
03254                         bind_rdataset(search->rbtdb, node,
03255                                       search->zonecut_sigrdataset,
03256                                       search->now, sigrdataset);
03257                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03258                             isc_rwlocktype_read);
03259         }
03260 
03261         if (type == dns_rdatatype_dname)
03262                 return (DNS_R_DNAME);
03263         return (DNS_R_DELEGATION);
03264 }
03265 
03266 static inline isc_boolean_t
03267 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
03268            dns_rbtnode_t *node)
03269 {
03270         unsigned char *raw;     /* RDATASLAB */
03271         unsigned int count, size;
03272         dns_name_t ns_name;
03273         isc_boolean_t valid = ISC_FALSE;
03274         dns_offsets_t offsets;
03275         isc_region_t region;
03276         rdatasetheader_t *header;
03277 
03278         /*
03279          * No additional locking is required.
03280          */
03281 
03282         /*
03283          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
03284          * if it occurs at a zone cut, but is not valid below it.
03285          */
03286         if (type == dns_rdatatype_ns) {
03287                 if (node != search->zonecut) {
03288                         return (ISC_FALSE);
03289                 }
03290         } else if (type != dns_rdatatype_a &&
03291                    type != dns_rdatatype_aaaa &&
03292                    type != dns_rdatatype_a6) {
03293                 return (ISC_FALSE);
03294         }
03295 
03296         header = search->zonecut_rdataset;
03297         raw = (unsigned char *)header + sizeof(*header);
03298         count = raw[0] * 256 + raw[1];
03299 #if DNS_RDATASET_FIXED
03300         raw += 2 + (4 * count);
03301 #else
03302         raw += 2;
03303 #endif
03304 
03305         while (count > 0) {
03306                 count--;
03307                 size = raw[0] * 256 + raw[1];
03308 #if DNS_RDATASET_FIXED
03309                 raw += 4;
03310 #else
03311                 raw += 2;
03312 #endif
03313                 region.base = raw;
03314                 region.length = size;
03315                 raw += size;
03316                 /*
03317                  * XXX Until we have rdata structures, we have no choice but
03318                  * to directly access the rdata format.
03319                  */
03320                 dns_name_init(&ns_name, offsets);
03321                 dns_name_fromregion(&ns_name, &region);
03322                 if (dns_name_compare(&ns_name, name) == 0) {
03323                         valid = ISC_TRUE;
03324                         break;
03325                 }
03326         }
03327 
03328         return (valid);
03329 }
03330 
03331 static inline isc_boolean_t
03332 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
03333             dns_name_t *name)
03334 {
03335         dns_fixedname_t fnext;
03336         dns_fixedname_t forigin;
03337         dns_name_t *next;
03338         dns_name_t *origin;
03339         dns_name_t prefix;
03340         dns_rbtdb_t *rbtdb;
03341         dns_rbtnode_t *node;
03342         isc_result_t result;
03343         isc_boolean_t answer = ISC_FALSE;
03344         rdatasetheader_t *header;
03345 
03346         rbtdb = search->rbtdb;
03347 
03348         dns_name_init(&prefix, NULL);
03349         dns_fixedname_init(&fnext);
03350         next = dns_fixedname_name(&fnext);
03351         dns_fixedname_init(&forigin);
03352         origin = dns_fixedname_name(&forigin);
03353 
03354         result = dns_rbtnodechain_next(chain, NULL, NULL);
03355         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
03356                 node = NULL;
03357                 result = dns_rbtnodechain_current(chain, &prefix,
03358                                                   origin, &node);
03359                 if (result != ISC_R_SUCCESS)
03360                         break;
03361                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03362                           isc_rwlocktype_read);
03363                 for (header = node->data;
03364                      header != NULL;
03365                      header = header->next) {
03366                         if (header->serial <= search->serial &&
03367                             !IGNORE(header) && EXISTS(header))
03368                                 break;
03369                 }
03370                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03371                             isc_rwlocktype_read);
03372                 if (header != NULL)
03373                         break;
03374                 result = dns_rbtnodechain_next(chain, NULL, NULL);
03375         }
03376         if (result == ISC_R_SUCCESS)
03377                 result = dns_name_concatenate(&prefix, origin, next, NULL);
03378         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
03379                 answer = ISC_TRUE;
03380         return (answer);
03381 }
03382 
03383 static inline isc_boolean_t
03384 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
03385         dns_fixedname_t fnext;
03386         dns_fixedname_t forigin;
03387         dns_fixedname_t fprev;
03388         dns_name_t *next;
03389         dns_name_t *origin;
03390         dns_name_t *prev;
03391         dns_name_t name;
03392         dns_name_t rname;
03393         dns_name_t tname;
03394         dns_rbtdb_t *rbtdb;
03395         dns_rbtnode_t *node;
03396         dns_rbtnodechain_t chain;
03397         isc_boolean_t check_next = ISC_TRUE;
03398         isc_boolean_t check_prev = ISC_TRUE;
03399         isc_boolean_t answer = ISC_FALSE;
03400         isc_result_t result;
03401         rdatasetheader_t *header;
03402         unsigned int n;
03403 
03404         rbtdb = search->rbtdb;
03405 
03406         dns_name_init(&name, NULL);
03407         dns_name_init(&tname, NULL);
03408         dns_name_init(&rname, NULL);
03409         dns_fixedname_init(&fnext);
03410         next = dns_fixedname_name(&fnext);
03411         dns_fixedname_init(&fprev);
03412         prev = dns_fixedname_name(&fprev);
03413         dns_fixedname_init(&forigin);
03414         origin = dns_fixedname_name(&forigin);
03415 
03416         /*
03417          * Find if qname is at or below a empty node.
03418          * Use our own copy of the chain.
03419          */
03420 
03421         chain = search->chain;
03422         do {
03423                 node = NULL;
03424                 result = dns_rbtnodechain_current(&chain, &name,
03425                                                   origin, &node);
03426                 if (result != ISC_R_SUCCESS)
03427                         break;
03428                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03429                           isc_rwlocktype_read);
03430                 for (header = node->data;
03431                      header != NULL;
03432                      header = header->next) {
03433                         if (header->serial <= search->serial &&
03434                             !IGNORE(header) && EXISTS(header))
03435                                 break;
03436                 }
03437                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03438                             isc_rwlocktype_read);
03439                 if (header != NULL)
03440                         break;
03441                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
03442         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
03443         if (result == ISC_R_SUCCESS)
03444                 result = dns_name_concatenate(&name, origin, prev, NULL);
03445         if (result != ISC_R_SUCCESS)
03446                 check_prev = ISC_FALSE;
03447 
03448         result = dns_rbtnodechain_next(&chain, NULL, NULL);
03449         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
03450                 node = NULL;
03451                 result = dns_rbtnodechain_current(&chain, &name,
03452                                                   origin, &node);
03453                 if (result != ISC_R_SUCCESS)
03454                         break;
03455                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03456                           isc_rwlocktype_read);
03457                 for (header = node->data;
03458                      header != NULL;
03459                      header = header->next) {
03460                         if (header->serial <= search->serial &&
03461                             !IGNORE(header) && EXISTS(header))
03462                                 break;
03463                 }
03464                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03465                             isc_rwlocktype_read);
03466                 if (header != NULL)
03467                         break;
03468                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
03469         }
03470         if (result == ISC_R_SUCCESS)
03471                 result = dns_name_concatenate(&name, origin, next, NULL);
03472         if (result != ISC_R_SUCCESS)
03473                 check_next = ISC_FALSE;
03474 
03475         dns_name_clone(qname, &rname);
03476 
03477         /*
03478          * Remove the wildcard label to find the terminal name.
03479          */
03480         n = dns_name_countlabels(wname);
03481         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
03482 
03483         do {
03484                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
03485                     (check_next && dns_name_issubdomain(next, &rname))) {
03486                         answer = ISC_TRUE;
03487                         break;
03488                 }
03489                 /*
03490                  * Remove the left hand label.
03491                  */
03492                 n = dns_name_countlabels(&rname);
03493                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
03494         } while (!dns_name_equal(&rname, &tname));
03495         return (answer);
03496 }
03497 
03498 static inline isc_result_t
03499 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
03500               dns_name_t *qname)
03501 {
03502         unsigned int i, j;
03503         dns_rbtnode_t *node, *level_node, *wnode;
03504         rdatasetheader_t *header;
03505         isc_result_t result = ISC_R_NOTFOUND;
03506         dns_name_t name;
03507         dns_name_t *wname;
03508         dns_fixedname_t fwname;
03509         dns_rbtdb_t *rbtdb;
03510         isc_boolean_t done, wild, active;
03511         dns_rbtnodechain_t wchain;
03512 
03513         /*
03514          * Caller must be holding the tree lock and MUST NOT be holding
03515          * any node locks.
03516          */
03517 
03518         /*
03519          * Examine each ancestor level.  If the level's wild bit
03520          * is set, then construct the corresponding wildcard name and
03521          * search for it.  If the wildcard node exists, and is active in
03522          * this version, we're done.  If not, then we next check to see
03523          * if the ancestor is active in this version.  If so, then there
03524          * can be no possible wildcard match and again we're done.  If not,
03525          * continue the search.
03526          */
03527 
03528         rbtdb = search->rbtdb;
03529         i = search->chain.level_matches;
03530         done = ISC_FALSE;
03531         node = *nodep;
03532         do {
03533                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
03534                           isc_rwlocktype_read);
03535 
03536                 /*
03537                  * First we try to figure out if this node is active in
03538                  * the search's version.  We do this now, even though we
03539                  * may not need the information, because it simplifies the
03540                  * locking and code flow.
03541                  */
03542                 for (header = node->data;
03543                      header != NULL;
03544                      header = header->next) {
03545                         if (header->serial <= search->serial &&
03546                             !IGNORE(header) && EXISTS(header))
03547                                 break;
03548                 }
03549                 if (header != NULL)
03550                         active = ISC_TRUE;
03551                 else
03552                         active = ISC_FALSE;
03553 
03554                 if (node->wild)
03555                         wild = ISC_TRUE;
03556                 else
03557                         wild = ISC_FALSE;
03558 
03559                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
03560                             isc_rwlocktype_read);
03561 
03562                 if (wild) {
03563                         /*
03564                          * Construct the wildcard name for this level.
03565                          */
03566                         dns_name_init(&name, NULL);
03567                         dns_rbt_namefromnode(node, &name);
03568                         dns_fixedname_init(&fwname);
03569                         wname = dns_fixedname_name(&fwname);
03570                         result = dns_name_concatenate(dns_wildcardname, &name,
03571                                                       wname, NULL);
03572                         j = i;
03573                         while (result == ISC_R_SUCCESS && j != 0) {
03574                                 j--;
03575                                 level_node = search->chain.levels[j];
03576                                 dns_name_init(&name, NULL);
03577                                 dns_rbt_namefromnode(level_node, &name);
03578                                 result = dns_name_concatenate(wname,
03579                                                               &name,
03580                                                               wname,
03581                                                               NULL);
03582                         }
03583                         if (result != ISC_R_SUCCESS)
03584                                 break;
03585 
03586                         wnode = NULL;
03587                         dns_rbtnodechain_init(&wchain, NULL);
03588                         result = dns_rbt_findnode(rbtdb->tree, wname,
03589                                                   NULL, &wnode, &wchain,
03590                                                   DNS_RBTFIND_EMPTYDATA,
03591                                                   NULL, NULL);
03592                         if (result == ISC_R_SUCCESS) {
03593                                 nodelock_t *lock;
03594 
03595                                 /*
03596                                  * We have found the wildcard node.  If it
03597                                  * is active in the search's version, we're
03598                                  * done.
03599                                  */
03600                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
03601                                 NODE_LOCK(lock, isc_rwlocktype_read);
03602                                 for (header = wnode->data;
03603                                      header != NULL;
03604                                      header = header->next) {
03605                                         if (header->serial <= search->serial &&
03606                                             !IGNORE(header) && EXISTS(header))
03607                                                 break;
03608                                 }
03609                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
03610                                 if (header != NULL ||
03611                                     activeempty(search, &wchain, wname)) {
03612                                         if (activeemtpynode(search, qname,
03613                                                             wname)) {
03614                                                 return (ISC_R_NOTFOUND);
03615                                         }
03616                                         /*
03617                                          * The wildcard node is active!
03618                                          *
03619                                          * Note: result is still ISC_R_SUCCESS
03620                                          * so we don't have to set it.
03621                                          */
03622                                         *nodep = wnode;
03623                                         break;
03624                                 }
03625                         } else if (result != ISC_R_NOTFOUND &&
03626                                    result != DNS_R_PARTIALMATCH) {
03627                                 /*
03628                                  * An error has occurred.  Bail out.
03629                                  */
03630                                 break;
03631                         }
03632                 }
03633 
03634                 if (active) {
03635                         /*
03636                          * The level node is active.  Any wildcarding
03637                          * present at higher levels has no
03638                          * effect and we're done.
03639                          */
03640                         result = ISC_R_NOTFOUND;
03641                         break;
03642                 }
03643 
03644                 if (i > 0) {
03645                         i--;
03646                         node = search->chain.levels[i];
03647                 } else
03648                         done = ISC_TRUE;
03649         } while (!done);
03650 
03651         return (result);
03652 }
03653 
03654 static isc_boolean_t
03655 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
03656 {
03657         dns_rdata_t rdata = DNS_RDATA_INIT;
03658         dns_rdata_nsec3_t nsec3;
03659         unsigned char *raw;                     /* RDATASLAB */
03660         unsigned int rdlen, count;
03661         isc_region_t region;
03662         isc_result_t result;
03663 
03664         REQUIRE(header->type == dns_rdatatype_nsec3);
03665 
03666         raw = (unsigned char *)header + sizeof(*header);
03667         count = raw[0] * 256 + raw[1]; /* count */
03668 #if DNS_RDATASET_FIXED
03669         raw += count * 4 + 2;
03670 #else
03671         raw += 2;
03672 #endif
03673         while (count-- > 0) {
03674                 rdlen = raw[0] * 256 + raw[1];
03675 #if DNS_RDATASET_FIXED
03676                 raw += 4;
03677 #else
03678                 raw += 2;
03679 #endif
03680                 region.base = raw;
03681                 region.length = rdlen;
03682                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
03683                                      dns_rdatatype_nsec3, &region);
03684                 raw += rdlen;
03685                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
03686                 INSIST(result == ISC_R_SUCCESS);
03687                 if (nsec3.hash == search->rbtversion->hash &&
03688                     nsec3.iterations == search->rbtversion->iterations &&
03689                     nsec3.salt_length == search->rbtversion->salt_length &&
03690                     memcmp(nsec3.salt, search->rbtversion->salt,
03691                            nsec3.salt_length) == 0)
03692                         return (ISC_TRUE);
03693                 dns_rdata_reset(&rdata);
03694         }
03695         return (ISC_FALSE);
03696 }
03697 
03698 /*
03699  * Find node of the NSEC/NSEC3 record that is 'name'.
03700  */
03701 static inline isc_result_t
03702 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
03703                     dns_name_t *name, dns_name_t *origin,
03704                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
03705                     isc_boolean_t *firstp)
03706 {
03707         dns_fixedname_t ftarget;
03708         dns_name_t *target;
03709         dns_rbtnode_t *nsecnode;
03710         isc_result_t result;
03711 
03712         REQUIRE(nodep != NULL && *nodep == NULL);
03713 
03714         if (type == dns_rdatatype_nsec3) {
03715                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
03716                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
03717                         return (result);
03718                 result = dns_rbtnodechain_current(&search->chain, name, origin,
03719                                                   nodep);
03720                 return (result);
03721         }
03722 
03723         dns_fixedname_init(&ftarget);
03724         target = dns_fixedname_name(&ftarget);
03725 
03726         for (;;) {
03727                 if (*firstp) {
03728                         /*
03729                          * Construct the name of the second node to check.
03730                          * It is the first node sought in the NSEC tree.
03731                          */
03732                         *firstp = ISC_FALSE;
03733                         dns_rbtnodechain_init(nsecchain, NULL);
03734                         result = dns_name_concatenate(name, origin,
03735                                                       target, NULL);
03736                         if (result != ISC_R_SUCCESS)
03737                                 return (result);
03738                         nsecnode = NULL;
03739                         result = dns_rbt_findnode(search->rbtdb->nsec,
03740                                                   target, NULL,
03741                                                   &nsecnode, nsecchain,
03742                                                   DNS_RBTFIND_NOOPTIONS,
03743                                                   NULL, NULL);
03744                         if (result == ISC_R_SUCCESS) {
03745                                 /*
03746                                  * Since this was the first loop, finding the
03747                                  * name in the NSEC tree implies that the first
03748                                  * node checked in the main tree had an
03749                                  * unacceptable NSEC record.
03750                                  * Try the previous node in the NSEC tree.
03751                                  */
03752                                 result = dns_rbtnodechain_prev(nsecchain,
03753                                                                name, origin);
03754                                 if (result == DNS_R_NEWORIGIN)
03755                                         result = ISC_R_SUCCESS;
03756                         } else if (result == ISC_R_NOTFOUND ||
03757                                    result == DNS_R_PARTIALMATCH) {
03758                                 result = dns_rbtnodechain_current(nsecchain,
03759                                                         name, origin, NULL);
03760                                 if (result == ISC_R_NOTFOUND)
03761                                         result = ISC_R_NOMORE;
03762                         }
03763                 } else {
03764                         /*
03765                          * This is a second or later trip through the auxiliary
03766                          * tree for the name of a third or earlier NSEC node in
03767                          * the main tree.  Previous trips through the NSEC tree
03768                          * must have found nodes in the main tree with NSEC
03769                          * records.  Perhaps they lacked signature records.
03770                          */
03771                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
03772                         if (result == DNS_R_NEWORIGIN)
03773                                 result = ISC_R_SUCCESS;
03774                 }
03775                 if (result != ISC_R_SUCCESS)
03776                         return (result);
03777 
03778                 /*
03779                  * Construct the name to seek in the main tree.
03780                  */
03781                 result = dns_name_concatenate(name, origin, target, NULL);
03782                 if (result != ISC_R_SUCCESS)
03783                         return (result);
03784 
03785                 *nodep = NULL;
03786                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
03787                                           nodep, &search->chain,
03788                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
03789                 if (result == ISC_R_SUCCESS)
03790                         return (result);
03791 
03792                 /*
03793                  * There should always be a node in the main tree with the
03794                  * same name as the node in the auxiliary NSEC tree, except for
03795                  * nodes in the auxiliary tree that are awaiting deletion.
03796                  */
03797                 if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
03798                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
03799                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
03800                                       "previous_closest_nsec(): %s",
03801                                       isc_result_totext(result));
03802                         return (DNS_R_BADDB);
03803                 }
03804         }
03805 }
03806 
03807 /*
03808  * Find the NSEC/NSEC3 which is or before the current point on the
03809  * search chain.  For NSEC3 records only NSEC3 records that match the
03810  * current NSEC3PARAM record are considered.
03811  */
03812 static inline isc_result_t
03813 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
03814                   dns_name_t *foundname, dns_rdataset_t *rdataset,
03815                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
03816                   dns_db_secure_t secure)
03817 {
03818         dns_rbtnode_t *node, *prevnode;
03819         rdatasetheader_t *header, *header_next, *found, *foundsig;
03820         dns_rbtnodechain_t nsecchain;
03821         isc_boolean_t empty_node;
03822         isc_result_t result;
03823         dns_fixedname_t fname, forigin;
03824         dns_name_t *name, *origin;
03825         dns_rdatatype_t type;
03826         rbtdb_rdatatype_t sigtype;
03827         isc_boolean_t wraps;
03828         isc_boolean_t first = ISC_TRUE;
03829         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
03830 
03831         if (tree == search->rbtdb->nsec3) {
03832                 type = dns_rdatatype_nsec3;
03833                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
03834                 wraps = ISC_TRUE;
03835         } else {
03836                 type = dns_rdatatype_nsec;
03837                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
03838                 wraps = ISC_FALSE;
03839         }
03840 
03841         /*
03842          * Use the auxiliary tree only starting with the second node in the
03843          * hope that the original node will be right much of the time.
03844          */
03845         dns_fixedname_init(&fname);
03846         name = dns_fixedname_name(&fname);
03847         dns_fixedname_init(&forigin);
03848         origin = dns_fixedname_name(&forigin);
03849  again:
03850         node = NULL;
03851         prevnode = NULL;
03852         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
03853         if (result != ISC_R_SUCCESS)
03854                 return (result);
03855         do {
03856                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03857                           isc_rwlocktype_read);
03858                 found = NULL;
03859                 foundsig = NULL;
03860                 empty_node = ISC_TRUE;
03861                 for (header = node->data;
03862                      header != NULL;
03863                      header = header_next) {
03864                         header_next = header->next;
03865                         /*
03866                          * Look for an active, extant NSEC or RRSIG NSEC.
03867                          */
03868                         do {
03869                                 if (header->serial <= search->serial &&
03870                                     !IGNORE(header)) {
03871                                         /*
03872                                          * Is this a "this rdataset doesn't
03873                                          * exist" record?
03874                                          */
03875                                         if (NONEXISTENT(header))
03876                                                 header = NULL;
03877                                         break;
03878                                 } else
03879                                         header = header->down;
03880                         } while (header != NULL);
03881                         if (header != NULL) {
03882                                 /*
03883                                  * We now know that there is at least one
03884                                  * active rdataset at this node.
03885                                  */
03886                                 empty_node = ISC_FALSE;
03887                                 if (header->type == type) {
03888                                         found = header;
03889                                         if (foundsig != NULL)
03890                                                 break;
03891                                 } else if (header->type == sigtype) {
03892                                         foundsig = header;
03893                                         if (found != NULL)
03894                                                 break;
03895                                 }
03896                         }
03897                 }
03898                 if (!empty_node) {
03899                         if (found != NULL && search->rbtversion->havensec3 &&
03900                             found->type == dns_rdatatype_nsec3 &&
03901                             !matchparams(found, search)) {
03902                                 empty_node = ISC_TRUE;
03903                                 found = NULL;
03904                                 foundsig = NULL;
03905                                 result = previous_closest_nsec(type, search,
03906                                                                name, origin,
03907                                                                &prevnode, NULL,
03908                                                                NULL);
03909                         } else if (found != NULL &&
03910                                    (foundsig != NULL || !need_sig)) {
03911                                 /*
03912                                  * We've found the right NSEC/NSEC3 record.
03913                                  *
03914                                  * Note: for this to really be the right
03915                                  * NSEC record, it's essential that the NSEC
03916                                  * records of any nodes obscured by a zone
03917                                  * cut have been removed; we assume this is
03918                                  * the case.
03919                                  */
03920                                 result = dns_name_concatenate(name, origin,
03921                                                               foundname, NULL);
03922                                 if (result == ISC_R_SUCCESS) {
03923                                         if (nodep != NULL) {
03924                                                 new_reference(search->rbtdb,
03925                                                               node);
03926                                                 *nodep = node;
03927                                         }
03928                                         bind_rdataset(search->rbtdb, node,
03929                                                       found, search->now,
03930                                                       rdataset);
03931                                         if (foundsig != NULL)
03932                                                 bind_rdataset(search->rbtdb,
03933                                                               node,
03934                                                               foundsig,
03935                                                               search->now,
03936                                                               sigrdataset);
03937                                 }
03938                         } else if (found == NULL && foundsig == NULL) {
03939                                 /*
03940                                  * This node is active, but has no NSEC or
03941                                  * RRSIG NSEC.  That means it's glue or
03942                                  * other obscured zone data that isn't
03943                                  * relevant for our search.  Treat the
03944                                  * node as if it were empty and keep looking.
03945                                  */
03946                                 empty_node = ISC_TRUE;
03947                                 result = previous_closest_nsec(type, search,
03948                                                                name, origin,
03949                                                                &prevnode,
03950                                                                &nsecchain,
03951                                                                &first);
03952                         } else {
03953                                 /*
03954                                  * We found an active node, but either the
03955                                  * NSEC or the RRSIG NSEC is missing.  This
03956                                  * shouldn't happen.
03957                                  */
03958                                 result = DNS_R_BADDB;
03959                         }
03960                 } else {
03961                         /*
03962                          * This node isn't active.  We've got to keep
03963                          * looking.
03964                          */
03965                         result = previous_closest_nsec(type, search,
03966                                                        name, origin, &prevnode,
03967                                                        &nsecchain, &first);
03968                 }
03969                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
03970                             isc_rwlocktype_read);
03971                 node = prevnode;
03972                 prevnode = NULL;
03973         } while (empty_node && result == ISC_R_SUCCESS);
03974 
03975         if (!first)
03976                 dns_rbtnodechain_invalidate(&nsecchain);
03977 
03978         if (result == ISC_R_NOMORE && wraps) {
03979                 result = dns_rbtnodechain_last(&search->chain, tree,
03980                                                NULL, NULL);
03981                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
03982                         wraps = ISC_FALSE;
03983                         goto again;
03984                 }
03985         }
03986 
03987         /*
03988          * If the result is ISC_R_NOMORE, then we got to the beginning of
03989          * the database and didn't find a NSEC record.  This shouldn't
03990          * happen.
03991          */
03992         if (result == ISC_R_NOMORE)
03993                 result = DNS_R_BADDB;
03994 
03995         return (result);
03996 }
03997 
03998 static isc_result_t
03999 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
04000           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
04001           dns_dbnode_t **nodep, dns_name_t *foundname,
04002           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04003 {
04004         dns_rbtnode_t *node = NULL;
04005         isc_result_t result;
04006         rbtdb_search_t search;
04007         isc_boolean_t cname_ok = ISC_TRUE;
04008         isc_boolean_t close_version = ISC_FALSE;
04009         isc_boolean_t maybe_zonecut = ISC_FALSE;
04010         isc_boolean_t at_zonecut = ISC_FALSE;
04011         isc_boolean_t wild;
04012         isc_boolean_t empty_node;
04013         rdatasetheader_t *header, *header_next, *found, *nsecheader;
04014         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
04015         rbtdb_rdatatype_t sigtype;
04016         isc_boolean_t active;
04017         dns_rbtnodechain_t chain;
04018         nodelock_t *lock;
04019         dns_rbt_t *tree;
04020 
04021         search.rbtdb = (dns_rbtdb_t *)db;
04022 
04023         REQUIRE(VALID_RBTDB(search.rbtdb));
04024         INSIST(version == NULL ||
04025                ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
04026 
04027         /*
04028          * We don't care about 'now'.
04029          */
04030         UNUSED(now);
04031 
04032         /*
04033          * If the caller didn't supply a version, attach to the current
04034          * version.
04035          */
04036         if (version == NULL) {
04037                 currentversion(db, &version);
04038                 close_version = ISC_TRUE;
04039         }
04040 
04041         search.rbtversion = version;
04042         search.serial = search.rbtversion->serial;
04043         search.options = options;
04044         search.copy_name = ISC_FALSE;
04045         search.need_cleanup = ISC_FALSE;
04046         search.wild = ISC_FALSE;
04047         search.zonecut = NULL;
04048         dns_fixedname_init(&search.zonecut_name);
04049         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
04050         search.now = 0;
04051 
04052         /*
04053          * 'wild' will be true iff. we've matched a wildcard.
04054          */
04055         wild = ISC_FALSE;
04056 
04057         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
04058 
04059         /*
04060          * Search down from the root of the tree.  If, while going down, we
04061          * encounter a callback node, zone_zonecut_callback() will search the
04062          * rdatasets at the zone cut for active DNAME or NS rdatasets.
04063          */
04064         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
04065                                                          search.rbtdb->tree;
04066         result = dns_rbt_findnode(tree, name, foundname, &node,
04067                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
04068                                   zone_zonecut_callback, &search);
04069 
04070         if (result == DNS_R_PARTIALMATCH) {
04071         partial_match:
04072                 if (search.zonecut != NULL) {
04073                     result = setup_delegation(&search, nodep, foundname,
04074                                               rdataset, sigrdataset);
04075                     goto tree_exit;
04076                 }
04077 
04078                 if (search.wild) {
04079                         /*
04080                          * At least one of the levels in the search chain
04081                          * potentially has a wildcard.  For each such level,
04082                          * we must see if there's a matching wildcard active
04083                          * in the current version.
04084                          */
04085                         result = find_wildcard(&search, &node, name);
04086                         if (result == ISC_R_SUCCESS) {
04087                                 result = dns_name_copy(name, foundname, NULL);
04088                                 if (result != ISC_R_SUCCESS)
04089                                         goto tree_exit;
04090                                 wild = ISC_TRUE;
04091                                 goto found;
04092                         }
04093                         else if (result != ISC_R_NOTFOUND)
04094                                 goto tree_exit;
04095                 }
04096 
04097                 chain = search.chain;
04098                 active = activeempty(&search, &chain, name);
04099 
04100                 /*
04101                  * If we're here, then the name does not exist, is not
04102                  * beneath a zonecut, and there's no matching wildcard.
04103                  */
04104                 if ((search.rbtversion->secure == dns_db_secure &&
04105                      !search.rbtversion->havensec3) ||
04106                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
04107                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
04108                 {
04109                         result = find_closest_nsec(&search, nodep, foundname,
04110                                                    rdataset, sigrdataset, tree,
04111                                                    search.rbtversion->secure);
04112                         if (result == ISC_R_SUCCESS)
04113                                 result = active ? DNS_R_EMPTYNAME :
04114                                                   DNS_R_NXDOMAIN;
04115                 } else
04116                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
04117                 goto tree_exit;
04118         } else if (result != ISC_R_SUCCESS)
04119                 goto tree_exit;
04120 
04121  found:
04122         /*
04123          * We have found a node whose name is the desired name, or we
04124          * have matched a wildcard.
04125          */
04126 
04127         if (search.zonecut != NULL) {
04128                 /*
04129                  * If we're beneath a zone cut, we don't want to look for
04130                  * CNAMEs because they're not legitimate zone glue.
04131                  */
04132                 cname_ok = ISC_FALSE;
04133         } else {
04134                 /*
04135                  * The node may be a zone cut itself.  If it might be one,
04136                  * make sure we check for it later.
04137                  *
04138                  * DS records live above the zone cut in ordinary zone so
04139                  * we want to ignore any referral.
04140                  *
04141                  * Stub zones don't have anything "above" the delgation so
04142                  * we always return a referral.
04143                  */
04144                 if (node->find_callback &&
04145                     ((node != search.rbtdb->origin_node &&
04146                       !dns_rdatatype_atparent(type)) ||
04147                      IS_STUB(search.rbtdb)))
04148                         maybe_zonecut = ISC_TRUE;
04149         }
04150 
04151         /*
04152          * Certain DNSSEC types are not subject to CNAME matching
04153          * (RFC4035, section 2.5 and RFC3007).
04154          *
04155          * We don't check for RRSIG, because we don't store RRSIG records
04156          * directly.
04157          */
04158         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
04159                 cname_ok = ISC_FALSE;
04160 
04161         /*
04162          * We now go looking for rdata...
04163          */
04164 
04165         lock = &search.rbtdb->node_locks[node->locknum].lock;
04166         NODE_LOCK(lock, isc_rwlocktype_read);
04167 
04168         found = NULL;
04169         foundsig = NULL;
04170         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
04171         nsecheader = NULL;
04172         nsecsig = NULL;
04173         cnamesig = NULL;
04174         empty_node = ISC_TRUE;
04175         for (header = node->data; header != NULL; header = header_next) {
04176                 header_next = header->next;
04177                 /*
04178                  * Look for an active, extant rdataset.
04179                  */
04180                 do {
04181                         if (header->serial <= search.serial &&
04182                             !IGNORE(header)) {
04183                                 /*
04184                                  * Is this a "this rdataset doesn't
04185                                  * exist" record?
04186                                  */
04187                                 if (NONEXISTENT(header))
04188                                         header = NULL;
04189                                 break;
04190                         } else
04191                                 header = header->down;
04192                 } while (header != NULL);
04193                 if (header != NULL) {
04194                         /*
04195                          * We now know that there is at least one active
04196                          * rdataset at this node.
04197                          */
04198                         empty_node = ISC_FALSE;
04199 
04200                         /*
04201                          * Do special zone cut handling, if requested.
04202                          */
04203                         if (maybe_zonecut &&
04204                             header->type == dns_rdatatype_ns) {
04205                                 /*
04206                                  * We increment the reference count on node to
04207                                  * ensure that search->zonecut_rdataset will
04208                                  * still be valid later.
04209                                  */
04210                                 new_reference(search.rbtdb, node);
04211                                 search.zonecut = node;
04212                                 search.zonecut_rdataset = header;
04213                                 search.zonecut_sigrdataset = NULL;
04214                                 search.need_cleanup = ISC_TRUE;
04215                                 maybe_zonecut = ISC_FALSE;
04216                                 at_zonecut = ISC_TRUE;
04217                                 /*
04218                                  * It is not clear if KEY should still be
04219                                  * allowed at the parent side of the zone
04220                                  * cut or not.  It is needed for RFC3007
04221                                  * validated updates.
04222                                  */
04223                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
04224                                     && type != dns_rdatatype_nsec
04225                                     && type != dns_rdatatype_key) {
04226                                         /*
04227                                          * Glue is not OK, but any answer we
04228                                          * could return would be glue.  Return
04229                                          * the delegation.
04230                                          */
04231                                         found = NULL;
04232                                         break;
04233                                 }
04234                                 if (found != NULL && foundsig != NULL)
04235                                         break;
04236                         }
04237 
04238 
04239                         /*
04240                          * If the NSEC3 record doesn't match the chain
04241                          * we are using behave as if it isn't here.
04242                          */
04243                         if (header->type == dns_rdatatype_nsec3 &&
04244                            !matchparams(header, &search)) {
04245                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
04246                                 goto partial_match;
04247                         }
04248                         /*
04249                          * If we found a type we were looking for,
04250                          * remember it.
04251                          */
04252                         if (header->type == type ||
04253                             type == dns_rdatatype_any ||
04254                             (header->type == dns_rdatatype_cname &&
04255                              cname_ok)) {
04256                                 /*
04257                                  * We've found the answer!
04258                                  */
04259                                 found = header;
04260                                 if (header->type == dns_rdatatype_cname &&
04261                                     cname_ok) {
04262                                         /*
04263                                          * We may be finding a CNAME instead
04264                                          * of the desired type.
04265                                          *
04266                                          * If we've already got the CNAME RRSIG,
04267                                          * use it, otherwise change sigtype
04268                                          * so that we find it.
04269                                          */
04270                                         if (cnamesig != NULL)
04271                                                 foundsig = cnamesig;
04272                                         else
04273                                                 sigtype =
04274                                                     RBTDB_RDATATYPE_SIGCNAME;
04275                                 }
04276                                 /*
04277                                  * If we've got all we need, end the search.
04278                                  */
04279                                 if (!maybe_zonecut && foundsig != NULL)
04280                                         break;
04281                         } else if (header->type == sigtype) {
04282                                 /*
04283                                  * We've found the RRSIG rdataset for our
04284                                  * target type.  Remember it.
04285                                  */
04286                                 foundsig = header;
04287                                 /*
04288                                  * If we've got all we need, end the search.
04289                                  */
04290                                 if (!maybe_zonecut && found != NULL)
04291                                         break;
04292                         } else if (header->type == dns_rdatatype_nsec &&
04293                                    !search.rbtversion->havensec3) {
04294                                 /*
04295                                  * Remember a NSEC rdataset even if we're
04296                                  * not specifically looking for it, because
04297                                  * we might need it later.
04298                                  */
04299                                 nsecheader = header;
04300                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
04301                                    !search.rbtversion->havensec3) {
04302                                 /*
04303                                  * If we need the NSEC rdataset, we'll also
04304                                  * need its signature.
04305                                  */
04306                                 nsecsig = header;
04307                         } else if (cname_ok &&
04308                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
04309                                 /*
04310                                  * If we get a CNAME match, we'll also need
04311                                  * its signature.
04312                                  */
04313                                 cnamesig = header;
04314                         }
04315                 }
04316         }
04317 
04318         if (empty_node) {
04319                 /*
04320                  * We have an exact match for the name, but there are no
04321                  * active rdatasets in the desired version.  That means that
04322                  * this node doesn't exist in the desired version, and that
04323                  * we really have a partial match.
04324                  */
04325                 if (!wild) {
04326                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04327                         goto partial_match;
04328                 }
04329         }
04330 
04331         /*
04332          * If we didn't find what we were looking for...
04333          */
04334         if (found == NULL) {
04335                 if (search.zonecut != NULL) {
04336                         /*
04337                          * We were trying to find glue at a node beneath a
04338                          * zone cut, but didn't.
04339                          *
04340                          * Return the delegation.
04341                          */
04342                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04343                         result = setup_delegation(&search, nodep, foundname,
04344                                                   rdataset, sigrdataset);
04345                         goto tree_exit;
04346                 }
04347                 /*
04348                  * The desired type doesn't exist.
04349                  */
04350                 result = DNS_R_NXRRSET;
04351                 if (search.rbtversion->secure == dns_db_secure &&
04352                     !search.rbtversion->havensec3 &&
04353                     (nsecheader == NULL || nsecsig == NULL)) {
04354                         /*
04355                          * The zone is secure but there's no NSEC,
04356                          * or the NSEC has no signature!
04357                          */
04358                         if (!wild) {
04359                                 result = DNS_R_BADDB;
04360                                 goto node_exit;
04361                         }
04362 
04363                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04364                         result = find_closest_nsec(&search, nodep, foundname,
04365                                                    rdataset, sigrdataset,
04366                                                    search.rbtdb->tree,
04367                                                    search.rbtversion->secure);
04368                         if (result == ISC_R_SUCCESS)
04369                                 result = DNS_R_EMPTYWILD;
04370                         goto tree_exit;
04371                 }
04372                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
04373                     nsecheader == NULL)
04374                 {
04375                         /*
04376                          * There's no NSEC record, and we were told
04377                          * to find one.
04378                          */
04379                         result = DNS_R_BADDB;
04380                         goto node_exit;
04381                 }
04382                 if (nodep != NULL) {
04383                         new_reference(search.rbtdb, node);
04384                         *nodep = node;
04385                 }
04386                 if ((search.rbtversion->secure == dns_db_secure &&
04387                      !search.rbtversion->havensec3) ||
04388                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
04389                 {
04390                         bind_rdataset(search.rbtdb, node, nsecheader,
04391                                       0, rdataset);
04392                         if (nsecsig != NULL)
04393                                 bind_rdataset(search.rbtdb, node,
04394                                               nsecsig, 0, sigrdataset);
04395                 }
04396                 if (wild)
04397                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
04398                 goto node_exit;
04399         }
04400 
04401         /*
04402          * We found what we were looking for, or we found a CNAME.
04403          */
04404 
04405         if (type != found->type &&
04406             type != dns_rdatatype_any &&
04407             found->type == dns_rdatatype_cname) {
04408                 /*
04409                  * We weren't doing an ANY query and we found a CNAME instead
04410                  * of the type we were looking for, so we need to indicate
04411                  * that result to the caller.
04412                  */
04413                 result = DNS_R_CNAME;
04414         } else if (search.zonecut != NULL) {
04415                 /*
04416                  * If we're beneath a zone cut, we must indicate that the
04417                  * result is glue, unless we're actually at the zone cut
04418                  * and the type is NSEC or KEY.
04419                  */
04420                 if (search.zonecut == node) {
04421                         /*
04422                          * It is not clear if KEY should still be
04423                          * allowed at the parent side of the zone
04424                          * cut or not.  It is needed for RFC3007
04425                          * validated updates.
04426                          */
04427                         if (type == dns_rdatatype_nsec ||
04428                             type == dns_rdatatype_nsec3 ||
04429                             type == dns_rdatatype_key)
04430                                 result = ISC_R_SUCCESS;
04431                         else if (type == dns_rdatatype_any)
04432                                 result = DNS_R_ZONECUT;
04433                         else
04434                                 result = DNS_R_GLUE;
04435                 } else
04436                         result = DNS_R_GLUE;
04437                 /*
04438                  * We might have found data that isn't glue, but was occluded
04439                  * by a dynamic update.  If the caller cares about this, they
04440                  * will have told us to validate glue.
04441                  *
04442                  * XXX We should cache the glue validity state!
04443                  */
04444                 if (result == DNS_R_GLUE &&
04445                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
04446                     !valid_glue(&search, foundname, type, node)) {
04447                         NODE_UNLOCK(lock, isc_rwlocktype_read);
04448                         result = setup_delegation(&search, nodep, foundname,
04449                                                   rdataset, sigrdataset);
04450                     goto tree_exit;
04451                 }
04452         } else {
04453                 /*
04454                  * An ordinary successful query!
04455                  */
04456                 result = ISC_R_SUCCESS;
04457         }
04458 
04459         if (nodep != NULL) {
04460                 if (!at_zonecut)
04461                         new_reference(search.rbtdb, node);
04462                 else
04463                         search.need_cleanup = ISC_FALSE;
04464                 *nodep = node;
04465         }
04466 
04467         if (type != dns_rdatatype_any) {
04468                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
04469                 if (foundsig != NULL)
04470                         bind_rdataset(search.rbtdb, node, foundsig, 0,
04471                                       sigrdataset);
04472         }
04473 
04474         if (wild)
04475                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
04476 
04477  node_exit:
04478         NODE_UNLOCK(lock, isc_rwlocktype_read);
04479 
04480  tree_exit:
04481         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
04482 
04483         /*
04484          * If we found a zonecut but aren't going to use it, we have to
04485          * let go of it.
04486          */
04487         if (search.need_cleanup) {
04488                 node = search.zonecut;
04489                 INSIST(node != NULL);
04490                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
04491 
04492                 NODE_LOCK(lock, isc_rwlocktype_read);
04493                 decrement_reference(search.rbtdb, node, 0,
04494                                     isc_rwlocktype_read, isc_rwlocktype_none,
04495                                     ISC_FALSE);
04496                 NODE_UNLOCK(lock, isc_rwlocktype_read);
04497         }
04498 
04499         if (close_version)
04500                 closeversion(db, &version, ISC_FALSE);
04501 
04502         dns_rbtnodechain_reset(&search.chain);
04503 
04504         return (result);
04505 }
04506 
04507 static isc_result_t
04508 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
04509                  isc_stdtime_t now, dns_dbnode_t **nodep,
04510                  dns_name_t *foundname,
04511                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04512 {
04513         UNUSED(db);
04514         UNUSED(name);
04515         UNUSED(options);
04516         UNUSED(now);
04517         UNUSED(nodep);
04518         UNUSED(foundname);
04519         UNUSED(rdataset);
04520         UNUSED(sigrdataset);
04521 
04522         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
04523 
04524         /* NOTREACHED */
04525         return (ISC_R_NOTIMPLEMENTED);
04526 }
04527 
04528 static isc_result_t
04529 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
04530         rbtdb_search_t *search = arg;
04531         rdatasetheader_t *header, *header_prev, *header_next;
04532         rdatasetheader_t *dname_header, *sigdname_header;
04533         isc_result_t result;
04534         nodelock_t *lock;
04535         isc_rwlocktype_t locktype;
04536 
04537         /* XXX comment */
04538 
04539         REQUIRE(search->zonecut == NULL);
04540 
04541         /*
04542          * Keep compiler silent.
04543          */
04544         UNUSED(name);
04545 
04546         lock = &(search->rbtdb->node_locks[node->locknum].lock);
04547         locktype = isc_rwlocktype_read;
04548         NODE_LOCK(lock, locktype);
04549 
04550         /*
04551          * Look for a DNAME or RRSIG DNAME rdataset.
04552          */
04553         dname_header = NULL;
04554         sigdname_header = NULL;
04555         header_prev = NULL;
04556         for (header = node->data; header != NULL; header = header_next) {
04557                 header_next = header->next;
04558                 if (header->rdh_ttl <  search->now) {
04559                         /*
04560                          * This rdataset is stale.  If no one else is
04561                          * using the node, we can clean it up right
04562                          * now, otherwise we mark it as stale, and
04563                          * the node as dirty, so it will get cleaned
04564                          * up later.
04565                          */
04566                         if ((header->rdh_ttl <  search->now - RBTDB_VIRTUAL) &&
04567                             (locktype == isc_rwlocktype_write ||
04568                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
04569                                 /*
04570                                  * We update the node's status only when we
04571                                  * can get write access; otherwise, we leave
04572                                  * others to this work.  Periodical cleaning
04573                                  * will eventually take the job as the last
04574                                  * resort.
04575                                  * We won't downgrade the lock, since other
04576                                  * rdatasets are probably stale, too.
04577                                  */
04578                                 locktype = isc_rwlocktype_write;
04579 
04580                                 if (dns_rbtnode_refcurrent(node) == 0) {
04581                                         isc_mem_t *mctx;
04582 
04583                                         /*
04584                                          * header->down can be non-NULL if the
04585                                          * refcount has just decremented to 0
04586                                          * but decrement_reference() has not
04587                                          * performed clean_cache_node(), in
04588                                          * which case we need to purge the
04589                                          * stale headers first.
04590                                          */
04591                                         mctx = search->rbtdb->common.mctx;
04592                                         clean_stale_headers(search->rbtdb,
04593                                                             mctx,
04594                                                             header);
04595                                         if (header_prev != NULL)
04596                                                 header_prev->next =
04597                                                         header->next;
04598                                         else
04599                                                 node->data = header->next;
04600                                         free_rdataset(search->rbtdb, mctx,
04601                                                       header);
04602                                 } else {
04603                                         mark_stale_header(search->rbtdb,
04604                                                           header);
04605                                         header_prev = header;
04606                                 }
04607                         } else
04608                                 header_prev = header;
04609                 } else if (header->type == dns_rdatatype_dname &&
04610                            EXISTS(header)) {
04611                         dname_header = header;
04612                         header_prev = header;
04613                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
04614                          EXISTS(header)) {
04615                         sigdname_header = header;
04616                         header_prev = header;
04617                 } else
04618                         header_prev = header;
04619         }
04620 
04621         if (dname_header != NULL &&
04622             (!DNS_TRUST_PENDING(dname_header->trust) ||
04623              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
04624                 /*
04625                  * We increment the reference count on node to ensure that
04626                  * search->zonecut_rdataset will still be valid later.
04627                  */
04628                 new_reference(search->rbtdb, node);
04629                 INSIST(!ISC_LINK_LINKED(node, deadlink));
04630                 search->zonecut = node;
04631                 search->zonecut_rdataset = dname_header;
04632                 search->zonecut_sigrdataset = sigdname_header;
04633                 search->need_cleanup = ISC_TRUE;
04634                 result = DNS_R_PARTIALMATCH;
04635         } else
04636                 result = DNS_R_CONTINUE;
04637 
04638         NODE_UNLOCK(lock, locktype);
04639 
04640         return (result);
04641 }
04642 
04643 static inline isc_result_t
04644 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
04645                      dns_dbnode_t **nodep, dns_name_t *foundname,
04646                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04647 {
04648         unsigned int i;
04649         dns_rbtnode_t *level_node;
04650         rdatasetheader_t *header, *header_prev, *header_next;
04651         rdatasetheader_t *found, *foundsig;
04652         isc_result_t result = ISC_R_NOTFOUND;
04653         dns_name_t name;
04654         dns_rbtdb_t *rbtdb;
04655         isc_boolean_t done;
04656         nodelock_t *lock;
04657         isc_rwlocktype_t locktype;
04658 
04659         /*
04660          * Caller must be holding the tree lock.
04661          */
04662 
04663         rbtdb = search->rbtdb;
04664         i = search->chain.level_matches;
04665         done = ISC_FALSE;
04666         do {
04667                 locktype = isc_rwlocktype_read;
04668                 lock = &rbtdb->node_locks[node->locknum].lock;
04669                 NODE_LOCK(lock, locktype);
04670 
04671                 /*
04672                  * Look for NS and RRSIG NS rdatasets.
04673                  */
04674                 found = NULL;
04675                 foundsig = NULL;
04676                 header_prev = NULL;
04677                 for (header = node->data;
04678                      header != NULL;
04679                      header = header_next) {
04680                         header_next = header->next;
04681                         if (header->rdh_ttl <  search->now) {
04682                                 /*
04683                                  * This rdataset is stale.  If no one else is
04684                                  * using the node, we can clean it up right
04685                                  * now, otherwise we mark it as stale, and
04686                                  * the node as dirty, so it will get cleaned
04687                                  * up later.
04688                                  */
04689                                 if ((header->rdh_ttl <  search->now -
04690                                                     RBTDB_VIRTUAL) &&
04691                                     (locktype == isc_rwlocktype_write ||
04692                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
04693                                         /*
04694                                          * We update the node's status only
04695                                          * when we can get write access.
04696                                          */
04697                                         locktype = isc_rwlocktype_write;
04698 
04699                                         if (dns_rbtnode_refcurrent(node)
04700                                             == 0) {
04701                                                 isc_mem_t *m;
04702 
04703                                                 m = search->rbtdb->common.mctx;
04704                                                 clean_stale_headers(
04705                                                         search->rbtdb,
04706                                                         m, header);
04707                                                 if (header_prev != NULL)
04708                                                         header_prev->next =
04709                                                                 header->next;
04710                                                 else
04711                                                         node->data =
04712                                                                 header->next;
04713                                                 free_rdataset(rbtdb, m,
04714                                                               header);
04715                                         } else {
04716                                                 mark_stale_header(rbtdb,
04717                                                                   header);
04718                                                 header_prev = header;
04719                                         }
04720                                 } else
04721                                         header_prev = header;
04722                         } else if (EXISTS(header)) {
04723                                 /*
04724                                  * We've found an extant rdataset.  See if
04725                                  * we're interested in it.
04726                                  */
04727                                 if (header->type == dns_rdatatype_ns) {
04728                                         found = header;
04729                                         if (foundsig != NULL)
04730                                                 break;
04731                                 } else if (header->type ==
04732                                            RBTDB_RDATATYPE_SIGNS) {
04733                                         foundsig = header;
04734                                         if (found != NULL)
04735                                                 break;
04736                                 }
04737                                 header_prev = header;
04738                         } else
04739                                 header_prev = header;
04740                 }
04741 
04742                 if (found != NULL) {
04743                         /*
04744                          * If we have to set foundname, we do it before
04745                          * anything else.  If we were to set foundname after
04746                          * we had set nodep or bound the rdataset, then we'd
04747                          * have to undo that work if dns_name_concatenate()
04748                          * failed.  By setting foundname first, there's
04749                          * nothing to undo if we have trouble.
04750                          */
04751                         if (foundname != NULL) {
04752                                 dns_name_init(&name, NULL);
04753                                 dns_rbt_namefromnode(node, &name);
04754                                 result = dns_name_copy(&name, foundname, NULL);
04755                                 while (result == ISC_R_SUCCESS && i > 0) {
04756                                         i--;
04757                                         level_node = search->chain.levels[i];
04758                                         dns_name_init(&name, NULL);
04759                                         dns_rbt_namefromnode(level_node,
04760                                                              &name);
04761                                         result =
04762                                                 dns_name_concatenate(foundname,
04763                                                                      &name,
04764                                                                      foundname,
04765                                                                      NULL);
04766                                 }
04767                                 if (result != ISC_R_SUCCESS) {
04768                                         *nodep = NULL;
04769                                         goto node_exit;
04770                                 }
04771                         }
04772                         result = DNS_R_DELEGATION;
04773                         if (nodep != NULL) {
04774                                 new_reference(search->rbtdb, node);
04775                                 *nodep = node;
04776                         }
04777                         bind_rdataset(search->rbtdb, node, found, search->now,
04778                                       rdataset);
04779                         if (foundsig != NULL)
04780                                 bind_rdataset(search->rbtdb, node, foundsig,
04781                                               search->now, sigrdataset);
04782                         if (need_headerupdate(found, search->now) ||
04783                             (foundsig != NULL &&
04784                              need_headerupdate(foundsig, search->now))) {
04785                                 if (locktype != isc_rwlocktype_write) {
04786                                         NODE_UNLOCK(lock, locktype);
04787                                         NODE_LOCK(lock, isc_rwlocktype_write);
04788                                         locktype = isc_rwlocktype_write;
04789                                         POST(locktype);
04790                                 }
04791                                 if (need_headerupdate(found, search->now))
04792                                         update_header(search->rbtdb, found,
04793                                                       search->now);
04794                                 if (foundsig != NULL &&
04795                                     need_headerupdate(foundsig, search->now)) {
04796                                         update_header(search->rbtdb, foundsig,
04797                                                       search->now);
04798                                 }
04799                         }
04800                 }
04801 
04802         node_exit:
04803                 NODE_UNLOCK(lock, locktype);
04804 
04805                 if (found == NULL && i > 0) {
04806                         i--;
04807                         node = search->chain.levels[i];
04808                 } else
04809                         done = ISC_TRUE;
04810 
04811         } while (!done);
04812 
04813         return (result);
04814 }
04815 
04816 static isc_result_t
04817 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
04818                   isc_stdtime_t now, dns_name_t *foundname,
04819                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04820 {
04821         dns_rbtnode_t *node;
04822         rdatasetheader_t *header, *header_next, *header_prev;
04823         rdatasetheader_t *found, *foundsig;
04824         isc_boolean_t empty_node;
04825         isc_result_t result;
04826         dns_fixedname_t fname, forigin;
04827         dns_name_t *name, *origin;
04828         rbtdb_rdatatype_t matchtype, sigmatchtype;
04829         nodelock_t *lock;
04830         isc_rwlocktype_t locktype;
04831 
04832         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
04833         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
04834                                              dns_rdatatype_nsec);
04835 
04836         do {
04837                 node = NULL;
04838                 dns_fixedname_init(&fname);
04839                 name = dns_fixedname_name(&fname);
04840                 dns_fixedname_init(&forigin);
04841                 origin = dns_fixedname_name(&forigin);
04842                 result = dns_rbtnodechain_current(&search->chain, name,
04843                                                   origin, &node);
04844                 if (result != ISC_R_SUCCESS)
04845                         return (result);
04846                 locktype = isc_rwlocktype_read;
04847                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
04848                 NODE_LOCK(lock, locktype);
04849                 found = NULL;
04850                 foundsig = NULL;
04851                 empty_node = ISC_TRUE;
04852                 header_prev = NULL;
04853                 for (header = node->data;
04854                      header != NULL;
04855                      header = header_next) {
04856                         header_next = header->next;
04857                         if (header->rdh_ttl <  now) {
04858                                 /*
04859                                  * This rdataset is stale.  If no one else is
04860                                  * using the node, we can clean it up right
04861                                  * now, otherwise we mark it as stale, and the
04862                                  * node as dirty, so it will get cleaned up
04863                                  * later.
04864                                  */
04865                                 if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
04866                                     (locktype == isc_rwlocktype_write ||
04867                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
04868                                         /*
04869                                          * We update the node's status only
04870                                          * when we can get write access.
04871                                          */
04872                                         locktype = isc_rwlocktype_write;
04873 
04874                                         if (dns_rbtnode_refcurrent(node)
04875                                             == 0) {
04876                                                 isc_mem_t *m;
04877 
04878                                                 m = search->rbtdb->common.mctx;
04879                                                 clean_stale_headers(
04880                                                         search->rbtdb,
04881                                                         m, header);
04882                                                 if (header_prev != NULL)
04883                                                         header_prev->next =
04884                                                                 header->next;
04885                                                 else
04886                                                         node->data = header->next;
04887                                                 free_rdataset(search->rbtdb, m,
04888                                                               header);
04889                                         } else {
04890                                                 mark_stale_header(search->rbtdb,
04891                                                                   header);
04892                                                 header_prev = header;
04893                                         }
04894                                 } else
04895                                         header_prev = header;
04896                                 continue;
04897                         }
04898                         if (NONEXISTENT(header) ||
04899                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
04900                                 header_prev = header;
04901                                 continue;
04902                         }
04903                         empty_node = ISC_FALSE;
04904                         if (header->type == matchtype)
04905                                 found = header;
04906                         else if (header->type == sigmatchtype)
04907                                 foundsig = header;
04908                         header_prev = header;
04909                 }
04910                 if (found != NULL) {
04911                         result = dns_name_concatenate(name, origin,
04912                                                       foundname, NULL);
04913                         if (result != ISC_R_SUCCESS)
04914                                 goto unlock_node;
04915                         bind_rdataset(search->rbtdb, node, found,
04916                                       now, rdataset);
04917                         if (foundsig != NULL)
04918                                 bind_rdataset(search->rbtdb, node, foundsig,
04919                                               now, sigrdataset);
04920                         new_reference(search->rbtdb, node);
04921                         *nodep = node;
04922                         result = DNS_R_COVERINGNSEC;
04923                 } else if (!empty_node) {
04924                         result = ISC_R_NOTFOUND;
04925                 } else
04926                         result = dns_rbtnodechain_prev(&search->chain, NULL,
04927                                                        NULL);
04928  unlock_node:
04929                 NODE_UNLOCK(lock, locktype);
04930         } while (empty_node && result == ISC_R_SUCCESS);
04931         return (result);
04932 }
04933 
04934 /*
04935  * Connect this RBTDB to the response policy zone summary data for the view.
04936  */
04937 static void
04938 rpz_attach(dns_db_t *db, dns_rpz_zones_t *rpzs, dns_rpz_num_t rpz_num) {
04939         dns_rbtdb_t * rbtdb;
04940 
04941         rbtdb = (dns_rbtdb_t *)db;
04942         REQUIRE(VALID_RBTDB(rbtdb));
04943 
04944         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04945         REQUIRE(rbtdb->rpzs == NULL && rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
04946         dns_rpz_attach_rpzs(rpzs, &rbtdb->rpzs);
04947         rbtdb->rpz_num = rpz_num;
04948         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04949 }
04950 
04951 /*
04952  * Enable this RBTDB as a response policy zone.
04953  */
04954 static isc_result_t
04955 rpz_ready(dns_db_t *db) {
04956         dns_rbtdb_t * rbtdb;
04957         isc_result_t result;
04958 
04959         rbtdb = (dns_rbtdb_t *)db;
04960         REQUIRE(VALID_RBTDB(rbtdb));
04961 
04962         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04963         if (rbtdb->rpzs == NULL) {
04964                 INSIST(rbtdb->rpz_num == DNS_RPZ_INVALID_NUM);
04965                 result = ISC_R_SUCCESS;
04966         } else {
04967                 result = dns_rpz_ready(rbtdb->rpzs, &rbtdb->load_rpzs,
04968                                        rbtdb->rpz_num);
04969         }
04970         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
04971         return (result);
04972 }
04973 
04974 static isc_result_t
04975 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
04976            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
04977            dns_dbnode_t **nodep, dns_name_t *foundname,
04978            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
04979 {
04980         dns_rbtnode_t *node = NULL;
04981         isc_result_t result;
04982         rbtdb_search_t search;
04983         isc_boolean_t cname_ok = ISC_TRUE;
04984         isc_boolean_t empty_node;
04985         nodelock_t *lock;
04986         isc_rwlocktype_t locktype;
04987         rdatasetheader_t *header, *header_prev, *header_next;
04988         rdatasetheader_t *found, *nsheader;
04989         rdatasetheader_t *foundsig, *nssig, *cnamesig;
04990         rdatasetheader_t *update, *updatesig;
04991         rbtdb_rdatatype_t sigtype, negtype;
04992 
04993         UNUSED(version);
04994 
04995         search.rbtdb = (dns_rbtdb_t *)db;
04996 
04997         REQUIRE(VALID_RBTDB(search.rbtdb));
04998         REQUIRE(version == NULL);
04999 
05000         if (now == 0)
05001                 isc_stdtime_get(&now);
05002 
05003         search.rbtversion = NULL;
05004         search.serial = 1;
05005         search.options = options;
05006         search.copy_name = ISC_FALSE;
05007         search.need_cleanup = ISC_FALSE;
05008         search.wild = ISC_FALSE;
05009         search.zonecut = NULL;
05010         dns_fixedname_init(&search.zonecut_name);
05011         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
05012         search.now = now;
05013         update = NULL;
05014         updatesig = NULL;
05015 
05016         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05017 
05018         /*
05019          * Search down from the root of the tree.  If, while going down, we
05020          * encounter a callback node, cache_zonecut_callback() will search the
05021          * rdatasets at the zone cut for a DNAME rdataset.
05022          */
05023         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
05024                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
05025                                   cache_zonecut_callback, &search);
05026 
05027         if (result == DNS_R_PARTIALMATCH) {
05028                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
05029                         result = find_coveringnsec(&search, nodep, now,
05030                                                    foundname, rdataset,
05031                                                    sigrdataset);
05032                         if (result == DNS_R_COVERINGNSEC)
05033                                 goto tree_exit;
05034                 }
05035                 if (search.zonecut != NULL) {
05036                     result = setup_delegation(&search, nodep, foundname,
05037                                               rdataset, sigrdataset);
05038                     goto tree_exit;
05039                 } else {
05040                 find_ns:
05041                         result = find_deepest_zonecut(&search, node, nodep,
05042                                                       foundname, rdataset,
05043                                                       sigrdataset);
05044                         goto tree_exit;
05045                 }
05046         } else if (result != ISC_R_SUCCESS)
05047                 goto tree_exit;
05048 
05049         /*
05050          * Certain DNSSEC types are not subject to CNAME matching
05051          * (RFC4035, section 2.5 and RFC3007).
05052          *
05053          * We don't check for RRSIG, because we don't store RRSIG records
05054          * directly.
05055          */
05056         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
05057                 cname_ok = ISC_FALSE;
05058 
05059         /*
05060          * We now go looking for rdata...
05061          */
05062 
05063         lock = &(search.rbtdb->node_locks[node->locknum].lock);
05064         locktype = isc_rwlocktype_read;
05065         NODE_LOCK(lock, locktype);
05066 
05067         found = NULL;
05068         foundsig = NULL;
05069         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
05070         negtype = RBTDB_RDATATYPE_VALUE(0, type);
05071         nsheader = NULL;
05072         nssig = NULL;
05073         cnamesig = NULL;
05074         empty_node = ISC_TRUE;
05075         header_prev = NULL;
05076         for (header = node->data; header != NULL; header = header_next) {
05077                 header_next = header->next;
05078                 if (header->rdh_ttl <  now) {
05079                         /*
05080                          * This rdataset is stale.  If no one else is using the
05081                          * node, we can clean it up right now, otherwise we
05082                          * mark it as stale, and the node as dirty, so it will
05083                          * get cleaned up later.
05084                          */
05085                         if ((header->rdh_ttl <  now - RBTDB_VIRTUAL) &&
05086                             (locktype == isc_rwlocktype_write ||
05087                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
05088                                 /*
05089                                  * We update the node's status only when we
05090                                  * can get write access.
05091                                  */
05092                                 locktype = isc_rwlocktype_write;
05093 
05094                                 if (dns_rbtnode_refcurrent(node) == 0) {
05095                                         isc_mem_t *mctx;
05096 
05097                                         mctx = search.rbtdb->common.mctx;
05098                                         clean_stale_headers(search.rbtdb, mctx,
05099                                                             header);
05100                                         if (header_prev != NULL)
05101                                                 header_prev->next =
05102                                                         header->next;
05103                                         else
05104                                                 node->data = header->next;
05105                                         free_rdataset(search.rbtdb, mctx,
05106                                                       header);
05107                                 } else {
05108                                         mark_stale_header(search.rbtdb, header);
05109                                         header_prev = header;
05110                                 }
05111                         } else
05112                                 header_prev = header;
05113                 } else if (EXISTS(header)) {
05114                         /*
05115                          * We now know that there is at least one active
05116                          * non-stale rdataset at this node.
05117                          */
05118                         empty_node = ISC_FALSE;
05119 
05120                         /*
05121                          * If we found a type we were looking for, remember
05122                          * it.
05123                          */
05124                         if (header->type == type ||
05125                             (type == dns_rdatatype_any &&
05126                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
05127                             (cname_ok && header->type ==
05128                              dns_rdatatype_cname)) {
05129                                 /*
05130                                  * We've found the answer.
05131                                  */
05132                                 found = header;
05133                                 if (header->type == dns_rdatatype_cname &&
05134                                     cname_ok &&
05135                                     cnamesig != NULL) {
05136                                         /*
05137                                          * If we've already got the
05138                                          * CNAME RRSIG, use it.
05139                                          */
05140                                         foundsig = cnamesig;
05141                                 }
05142                         } else if (header->type == sigtype) {
05143                                 /*
05144                                  * We've found the RRSIG rdataset for our
05145                                  * target type.  Remember it.
05146                                  */
05147                                 foundsig = header;
05148                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
05149                                    header->type == negtype) {
05150                                 /*
05151                                  * We've found a negative cache entry.
05152                                  */
05153                                 found = header;
05154                         } else if (header->type == dns_rdatatype_ns) {
05155                                 /*
05156                                  * Remember a NS rdataset even if we're
05157                                  * not specifically looking for it, because
05158                                  * we might need it later.
05159                                  */
05160                                 nsheader = header;
05161                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
05162                                 /*
05163                                  * If we need the NS rdataset, we'll also
05164                                  * need its signature.
05165                                  */
05166                                 nssig = header;
05167                         } else if (cname_ok &&
05168                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
05169                                 /*
05170                                  * If we get a CNAME match, we'll also need
05171                                  * its signature.
05172                                  */
05173                                 cnamesig = header;
05174                         }
05175                         header_prev = header;
05176                 } else
05177                         header_prev = header;
05178         }
05179 
05180         if (empty_node) {
05181                 /*
05182                  * We have an exact match for the name, but there are no
05183                  * extant rdatasets.  That means that this node doesn't
05184                  * meaningfully exist, and that we really have a partial match.
05185                  */
05186                 NODE_UNLOCK(lock, locktype);
05187                 goto find_ns;
05188         }
05189 
05190         /*
05191          * If we didn't find what we were looking for...
05192          */
05193         if (found == NULL ||
05194             (DNS_TRUST_ADDITIONAL(found->trust) &&
05195              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
05196             (found->trust == dns_trust_glue &&
05197              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
05198             (DNS_TRUST_PENDING(found->trust) &&
05199              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
05200                 /*
05201                  * If there is an NS rdataset at this node, then this is the
05202                  * deepest zone cut.
05203                  */
05204                 if (nsheader != NULL) {
05205                         if (nodep != NULL) {
05206                                 new_reference(search.rbtdb, node);
05207                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
05208                                 *nodep = node;
05209                         }
05210                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
05211                                       rdataset);
05212                         if (need_headerupdate(nsheader, search.now))
05213                                 update = nsheader;
05214                         if (nssig != NULL) {
05215                                 bind_rdataset(search.rbtdb, node, nssig,
05216                                               search.now, sigrdataset);
05217                                 if (need_headerupdate(nssig, search.now))
05218                                         updatesig = nssig;
05219                         }
05220                         result = DNS_R_DELEGATION;
05221                         goto node_exit;
05222                 }
05223 
05224                 /*
05225                  * Go find the deepest zone cut.
05226                  */
05227                 NODE_UNLOCK(lock, locktype);
05228                 goto find_ns;
05229         }
05230 
05231         /*
05232          * We found what we were looking for, or we found a CNAME.
05233          */
05234 
05235         if (nodep != NULL) {
05236                 new_reference(search.rbtdb, node);
05237                 INSIST(!ISC_LINK_LINKED(node, deadlink));
05238                 *nodep = node;
05239         }
05240 
05241         if (NEGATIVE(found)) {
05242                 /*
05243                  * We found a negative cache entry.
05244                  */
05245                 if (NXDOMAIN(found))
05246                         result = DNS_R_NCACHENXDOMAIN;
05247                 else
05248                         result = DNS_R_NCACHENXRRSET;
05249         } else if (type != found->type &&
05250                    type != dns_rdatatype_any &&
05251                    found->type == dns_rdatatype_cname) {
05252                 /*
05253                  * We weren't doing an ANY query and we found a CNAME instead
05254                  * of the type we were looking for, so we need to indicate
05255                  * that result to the caller.
05256                  */
05257                 result = DNS_R_CNAME;
05258         } else {
05259                 /*
05260                  * An ordinary successful query!
05261                  */
05262                 result = ISC_R_SUCCESS;
05263         }
05264 
05265         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
05266             result == DNS_R_NCACHENXRRSET) {
05267                 bind_rdataset(search.rbtdb, node, found, search.now,
05268                               rdataset);
05269                 if (need_headerupdate(found, search.now))
05270                         update = found;
05271                 if (!NEGATIVE(found) && foundsig != NULL) {
05272                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
05273                                       sigrdataset);
05274                         if (need_headerupdate(foundsig, search.now))
05275                                 updatesig = foundsig;
05276                 }
05277         }
05278 
05279  node_exit:
05280         if ((update != NULL || updatesig != NULL) &&
05281             locktype != isc_rwlocktype_write) {
05282                 NODE_UNLOCK(lock, locktype);
05283                 NODE_LOCK(lock, isc_rwlocktype_write);
05284                 locktype = isc_rwlocktype_write;
05285                 POST(locktype);
05286         }
05287         if (update != NULL && need_headerupdate(update, search.now))
05288                 update_header(search.rbtdb, update, search.now);
05289         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
05290                 update_header(search.rbtdb, updatesig, search.now);
05291 
05292         NODE_UNLOCK(lock, locktype);
05293 
05294  tree_exit:
05295         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05296 
05297         /*
05298          * If we found a zonecut but aren't going to use it, we have to
05299          * let go of it.
05300          */
05301         if (search.need_cleanup) {
05302                 node = search.zonecut;
05303                 INSIST(node != NULL);
05304                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
05305 
05306                 NODE_LOCK(lock, isc_rwlocktype_read);
05307                 decrement_reference(search.rbtdb, node, 0,
05308                                     isc_rwlocktype_read, isc_rwlocktype_none,
05309                                     ISC_FALSE);
05310                 NODE_UNLOCK(lock, isc_rwlocktype_read);
05311         }
05312 
05313         dns_rbtnodechain_reset(&search.chain);
05314 
05315         update_cachestats(search.rbtdb, result);
05316         return (result);
05317 }
05318 
05319 static isc_result_t
05320 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
05321                   isc_stdtime_t now, dns_dbnode_t **nodep,
05322                   dns_name_t *foundname,
05323                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
05324 {
05325         dns_rbtnode_t *node = NULL;
05326         nodelock_t *lock;
05327         isc_result_t result;
05328         rbtdb_search_t search;
05329         rdatasetheader_t *header, *header_prev, *header_next;
05330         rdatasetheader_t *found, *foundsig;
05331         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
05332         isc_rwlocktype_t locktype;
05333 
05334         search.rbtdb = (dns_rbtdb_t *)db;
05335 
05336         REQUIRE(VALID_RBTDB(search.rbtdb));
05337 
05338         if (now == 0)
05339                 isc_stdtime_get(&now);
05340 
05341         search.rbtversion = NULL;
05342         search.serial = 1;
05343         search.options = options;
05344         search.copy_name = ISC_FALSE;
05345         search.need_cleanup = ISC_FALSE;
05346         search.wild = ISC_FALSE;
05347         search.zonecut = NULL;
05348         dns_fixedname_init(&search.zonecut_name);
05349         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
05350         search.now = now;
05351 
05352         if ((options & DNS_DBFIND_NOEXACT) != 0)
05353                 rbtoptions |= DNS_RBTFIND_NOEXACT;
05354 
05355         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05356 
05357         /*
05358          * Search down from the root of the tree.
05359          */
05360         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
05361                                   &search.chain, rbtoptions, NULL, &search);
05362 
05363         if (result == DNS_R_PARTIALMATCH) {
05364         find_ns:
05365                 result = find_deepest_zonecut(&search, node, nodep, foundname,
05366                                               rdataset, sigrdataset);
05367                 goto tree_exit;
05368         } else if (result != ISC_R_SUCCESS)
05369                 goto tree_exit;
05370 
05371         /*
05372          * We now go looking for an NS rdataset at the node.
05373          */
05374 
05375         lock = &(search.rbtdb->node_locks[node->locknum].lock);
05376         locktype = isc_rwlocktype_read;
05377         NODE_LOCK(lock, locktype);
05378 
05379         found = NULL;
05380         foundsig = NULL;
05381         header_prev = NULL;
05382         for (header = node->data; header != NULL; header = header_next) {
05383                 header_next = header->next;
05384                 if (header->rdh_ttl <  now) {
05385                         /*
05386                          * This rdataset is stale.  If no one else is using the
05387                          * node, we can clean it up right now, otherwise we
05388                          * mark it as stale, and the node as dirty, so it will
05389                          * get cleaned up later.
05390                          */
05391                         if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
05392                             (locktype == isc_rwlocktype_write ||
05393                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
05394                                 /*
05395                                  * We update the node's status only when we
05396                                  * can get write access.
05397                                  */
05398                                 locktype = isc_rwlocktype_write;
05399 
05400                                 if (dns_rbtnode_refcurrent(node) == 0) {
05401                                         isc_mem_t *mctx;
05402 
05403                                         mctx = search.rbtdb->common.mctx;
05404                                         clean_stale_headers(search.rbtdb, mctx,
05405                                                             header);
05406                                         if (header_prev != NULL)
05407                                                 header_prev->next =
05408                                                         header->next;
05409                                         else
05410                                                 node->data = header->next;
05411                                         free_rdataset(search.rbtdb, mctx,
05412                                                       header);
05413                                 } else {
05414                                         mark_stale_header(search.rbtdb, header);
05415                                         header_prev = header;
05416                                 }
05417                         } else
05418                                 header_prev = header;
05419                 } else if (EXISTS(header)) {
05420                         /*
05421                          * If we found a type we were looking for, remember
05422                          * it.
05423                          */
05424                         if (header->type == dns_rdatatype_ns) {
05425                                 /*
05426                                  * Remember a NS rdataset even if we're
05427                                  * not specifically looking for it, because
05428                                  * we might need it later.
05429                                  */
05430                                 found = header;
05431                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
05432                                 /*
05433                                  * If we need the NS rdataset, we'll also
05434                                  * need its signature.
05435                                  */
05436                                 foundsig = header;
05437                         }
05438                         header_prev = header;
05439                 } else
05440                         header_prev = header;
05441         }
05442 
05443         if (found == NULL) {
05444                 /*
05445                  * No NS records here.
05446                  */
05447                 NODE_UNLOCK(lock, locktype);
05448                 goto find_ns;
05449         }
05450 
05451         if (nodep != NULL) {
05452                 new_reference(search.rbtdb, node);
05453                 INSIST(!ISC_LINK_LINKED(node, deadlink));
05454                 *nodep = node;
05455         }
05456 
05457         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
05458         if (foundsig != NULL)
05459                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
05460                               sigrdataset);
05461 
05462         if (need_headerupdate(found, search.now) ||
05463             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
05464                 if (locktype != isc_rwlocktype_write) {
05465                         NODE_UNLOCK(lock, locktype);
05466                         NODE_LOCK(lock, isc_rwlocktype_write);
05467                         locktype = isc_rwlocktype_write;
05468                         POST(locktype);
05469                 }
05470                 if (need_headerupdate(found, search.now))
05471                         update_header(search.rbtdb, found, search.now);
05472                 if (foundsig != NULL &&
05473                     need_headerupdate(foundsig, search.now)) {
05474                         update_header(search.rbtdb, foundsig, search.now);
05475                 }
05476         }
05477 
05478         NODE_UNLOCK(lock, locktype);
05479 
05480  tree_exit:
05481         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
05482 
05483         INSIST(!search.need_cleanup);
05484 
05485         dns_rbtnodechain_reset(&search.chain);
05486 
05487         if (result == DNS_R_DELEGATION)
05488                 result = ISC_R_SUCCESS;
05489 
05490         return (result);
05491 }
05492 
05493 static void
05494 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
05495         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05496         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
05497         unsigned int refs;
05498 
05499         REQUIRE(VALID_RBTDB(rbtdb));
05500         REQUIRE(targetp != NULL && *targetp == NULL);
05501 
05502         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
05503         dns_rbtnode_refincrement(node, &refs);
05504         INSIST(refs != 0);
05505         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
05506 
05507         *targetp = source;
05508 }
05509 
05510 static void
05511 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
05512         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05513         dns_rbtnode_t *node;
05514         isc_boolean_t want_free = ISC_FALSE;
05515         isc_boolean_t inactive = ISC_FALSE;
05516         rbtdb_nodelock_t *nodelock;
05517 
05518         REQUIRE(VALID_RBTDB(rbtdb));
05519         REQUIRE(targetp != NULL && *targetp != NULL);
05520 
05521         node = (dns_rbtnode_t *)(*targetp);
05522         nodelock = &rbtdb->node_locks[node->locknum];
05523 
05524         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
05525 
05526         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
05527                                 isc_rwlocktype_none, ISC_FALSE)) {
05528                 if (isc_refcount_current(&nodelock->references) == 0 &&
05529                     nodelock->exiting) {
05530                         inactive = ISC_TRUE;
05531                 }
05532         }
05533 
05534         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
05535 
05536         *targetp = NULL;
05537 
05538         if (inactive) {
05539                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
05540                 rbtdb->active--;
05541                 if (rbtdb->active == 0)
05542                         want_free = ISC_TRUE;
05543                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
05544                 if (want_free) {
05545                         char buf[DNS_NAME_FORMATSIZE];
05546                         if (dns_name_dynamic(&rbtdb->common.origin))
05547                                 dns_name_format(&rbtdb->common.origin, buf,
05548                                                 sizeof(buf));
05549                         else
05550                                 strcpy(buf, "<UNKNOWN>");
05551                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
05552                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
05553                                       "calling free_rbtdb(%s)", buf);
05554                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
05555                 }
05556         }
05557 }
05558 
05559 static isc_result_t
05560 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
05561         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05562         dns_rbtnode_t *rbtnode = node;
05563         rdatasetheader_t *header;
05564         isc_boolean_t force_expire = ISC_FALSE;
05565         /*
05566          * These are the category and module used by the cache cleaner.
05567          */
05568         isc_boolean_t log = ISC_FALSE;
05569         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
05570         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
05571         int level = ISC_LOG_DEBUG(2);
05572         char printname[DNS_NAME_FORMATSIZE];
05573 
05574         REQUIRE(VALID_RBTDB(rbtdb));
05575 
05576         /*
05577          * Caller must hold a tree lock.
05578          */
05579 
05580         if (now == 0)
05581                 isc_stdtime_get(&now);
05582 
05583         if (isc_mem_isovermem(rbtdb->common.mctx)) {
05584                 isc_uint32_t val;
05585 
05586                 isc_random_get(&val);
05587                 /*
05588                  * XXXDCL Could stand to have a better policy, like LRU.
05589                  */
05590                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
05591 
05592                 /*
05593                  * Note that 'log' can be true IFF overmem is also true.
05594                  * overmem can currently only be true for cache
05595                  * databases -- hence all of the "overmem cache" log strings.
05596                  */
05597                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
05598                 if (log)
05599                         isc_log_write(dns_lctx, category, module, level,
05600                                       "overmem cache: %s %s",
05601                                       force_expire ? "FORCE" : "check",
05602                                       dns_rbt_formatnodename(rbtnode,
05603                                                            printname,
05604                                                            sizeof(printname)));
05605         }
05606 
05607         /*
05608          * We may not need write access, but this code path is not performance
05609          * sensitive, so it should be okay to always lock as a writer.
05610          */
05611         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05612                   isc_rwlocktype_write);
05613 
05614         for (header = rbtnode->data; header != NULL; header = header->next)
05615                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
05616                         /*
05617                          * We don't check if refcurrent(rbtnode) == 0 and try
05618                          * to free like we do in cache_find(), because
05619                          * refcurrent(rbtnode) must be non-zero.  This is so
05620                          * because 'node' is an argument to the function.
05621                          */
05622                         mark_stale_header(rbtdb, header);
05623                         if (log)
05624                                 isc_log_write(dns_lctx, category, module,
05625                                               level, "overmem cache: stale %s",
05626                                               printname);
05627                 } else if (force_expire) {
05628                         if (! RETAIN(header)) {
05629                                 set_ttl(rbtdb, header, 0);
05630                                 mark_stale_header(rbtdb, header);
05631                         } else if (log) {
05632                                 isc_log_write(dns_lctx, category, module,
05633                                               level, "overmem cache: "
05634                                               "reprieve by RETAIN() %s",
05635                                               printname);
05636                         }
05637                 } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
05638                         isc_log_write(dns_lctx, category, module, level,
05639                                       "overmem cache: saved %s", printname);
05640 
05641         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05642                     isc_rwlocktype_write);
05643 
05644         return (ISC_R_SUCCESS);
05645 }
05646 
05647 static void
05648 overmem(dns_db_t *db, isc_boolean_t over) {
05649         /* This is an empty callback.  See adb.c:water() */
05650 
05651         UNUSED(db);
05652         UNUSED(over);
05653 
05654         return;
05655 }
05656 
05657 static void
05658 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
05659         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05660         dns_rbtnode_t *rbtnode = node;
05661         isc_boolean_t first;
05662 
05663         REQUIRE(VALID_RBTDB(rbtdb));
05664 
05665         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05666                   isc_rwlocktype_read);
05667 
05668         fprintf(out, "node %p, %u references, locknum = %u\n",
05669                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
05670                 rbtnode->locknum);
05671         if (rbtnode->data != NULL) {
05672                 rdatasetheader_t *current, *top_next;
05673 
05674                 for (current = rbtnode->data; current != NULL;
05675                      current = top_next) {
05676                         top_next = current->next;
05677                         first = ISC_TRUE;
05678                         fprintf(out, "\ttype %u", current->type);
05679                         do {
05680                                 if (!first)
05681                                         fprintf(out, "\t");
05682                                 first = ISC_FALSE;
05683                                 fprintf(out,
05684                                         "\tserial = %lu, ttl = %u, "
05685                                         "trust = %u, attributes = %u, "
05686                                         "resign = %u\n",
05687                                         (unsigned long)current->serial,
05688                                         current->rdh_ttl,
05689                                         current->trust,
05690                                         current->attributes,
05691                                         current->resign);
05692                                 current = current->down;
05693                         } while (current != NULL);
05694                 }
05695         } else
05696                 fprintf(out, "(empty)\n");
05697 
05698         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05699                     isc_rwlocktype_read);
05700 }
05701 
05702 static isc_result_t
05703 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
05704 {
05705         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05706         rbtdb_dbiterator_t *rbtdbiter;
05707 
05708         REQUIRE(VALID_RBTDB(rbtdb));
05709 
05710         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
05711         if (rbtdbiter == NULL)
05712                 return (ISC_R_NOMEMORY);
05713 
05714         rbtdbiter->common.methods = &dbiterator_methods;
05715         rbtdbiter->common.db = NULL;
05716         dns_db_attach(db, &rbtdbiter->common.db);
05717         rbtdbiter->common.relative_names =
05718                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
05719         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
05720         rbtdbiter->common.cleaning = ISC_FALSE;
05721         rbtdbiter->paused = ISC_TRUE;
05722         rbtdbiter->tree_locked = isc_rwlocktype_none;
05723         rbtdbiter->result = ISC_R_SUCCESS;
05724         dns_fixedname_init(&rbtdbiter->name);
05725         dns_fixedname_init(&rbtdbiter->origin);
05726         rbtdbiter->node = NULL;
05727         rbtdbiter->delete = 0;
05728         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
05729         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
05730         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
05731         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
05732         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
05733         if (rbtdbiter->nsec3only)
05734                 rbtdbiter->current = &rbtdbiter->nsec3chain;
05735         else
05736                 rbtdbiter->current = &rbtdbiter->chain;
05737 
05738         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
05739 
05740         return (ISC_R_SUCCESS);
05741 }
05742 
05743 static isc_result_t
05744 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
05745                   dns_rdatatype_t type, dns_rdatatype_t covers,
05746                   isc_stdtime_t now, dns_rdataset_t *rdataset,
05747                   dns_rdataset_t *sigrdataset)
05748 {
05749         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05750         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
05751         rdatasetheader_t *header, *header_next, *found, *foundsig;
05752         rbtdb_serial_t serial;
05753         rbtdb_version_t *rbtversion = version;
05754         isc_boolean_t close_version = ISC_FALSE;
05755         rbtdb_rdatatype_t matchtype, sigmatchtype;
05756 
05757         REQUIRE(VALID_RBTDB(rbtdb));
05758         REQUIRE(type != dns_rdatatype_any);
05759         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
05760 
05761         if (rbtversion == NULL) {
05762                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
05763                 close_version = ISC_TRUE;
05764         }
05765         serial = rbtversion->serial;
05766         now = 0;
05767 
05768         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05769                   isc_rwlocktype_read);
05770 
05771         found = NULL;
05772         foundsig = NULL;
05773         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
05774         if (covers == 0)
05775                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
05776         else
05777                 sigmatchtype = 0;
05778 
05779         for (header = rbtnode->data; header != NULL; header = header_next) {
05780                 header_next = header->next;
05781                 do {
05782                         if (header->serial <= serial &&
05783                             !IGNORE(header)) {
05784                                 /*
05785                                  * Is this a "this rdataset doesn't
05786                                  * exist" record?
05787                                  */
05788                                 if (NONEXISTENT(header))
05789                                         header = NULL;
05790                                 break;
05791                         } else
05792                                 header = header->down;
05793                 } while (header != NULL);
05794                 if (header != NULL) {
05795                         /*
05796                          * We have an active, extant rdataset.  If it's a
05797                          * type we're looking for, remember it.
05798                          */
05799                         if (header->type == matchtype) {
05800                                 found = header;
05801                                 if (foundsig != NULL)
05802                                         break;
05803                         } else if (header->type == sigmatchtype) {
05804                                 foundsig = header;
05805                                 if (found != NULL)
05806                                         break;
05807                         }
05808                 }
05809         }
05810         if (found != NULL) {
05811                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
05812                 if (foundsig != NULL)
05813                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
05814                                       sigrdataset);
05815         }
05816 
05817         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
05818                     isc_rwlocktype_read);
05819 
05820         if (close_version)
05821                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
05822                              ISC_FALSE);
05823 
05824         if (found == NULL)
05825                 return (ISC_R_NOTFOUND);
05826 
05827         return (ISC_R_SUCCESS);
05828 }
05829 
05830 static isc_result_t
05831 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
05832                    dns_rdatatype_t type, dns_rdatatype_t covers,
05833                    isc_stdtime_t now, dns_rdataset_t *rdataset,
05834                    dns_rdataset_t *sigrdataset)
05835 {
05836         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05837         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
05838         rdatasetheader_t *header, *header_next, *found, *foundsig;
05839         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
05840         isc_result_t result;
05841         nodelock_t *lock;
05842         isc_rwlocktype_t locktype;
05843 
05844         REQUIRE(VALID_RBTDB(rbtdb));
05845         REQUIRE(type != dns_rdatatype_any);
05846 
05847         UNUSED(version);
05848 
05849         result = ISC_R_SUCCESS;
05850 
05851         if (now == 0)
05852                 isc_stdtime_get(&now);
05853 
05854         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
05855         locktype = isc_rwlocktype_read;
05856         NODE_LOCK(lock, locktype);
05857 
05858         found = NULL;
05859         foundsig = NULL;
05860         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
05861         negtype = RBTDB_RDATATYPE_VALUE(0, type);
05862         if (covers == 0)
05863                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
05864         else
05865                 sigmatchtype = 0;
05866 
05867         for (header = rbtnode->data; header != NULL; header = header_next) {
05868                 header_next = header->next;
05869                 if (header->rdh_ttl < now) {
05870                         if ((header->rdh_ttl < now - RBTDB_VIRTUAL) &&
05871                             (locktype == isc_rwlocktype_write ||
05872                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
05873                                 /*
05874                                  * We update the node's status only when we
05875                                  * can get write access.
05876                                  */
05877                                 locktype = isc_rwlocktype_write;
05878 
05879                                 /*
05880                                  * We don't check if refcurrent(rbtnode) == 0
05881                                  * and try to free like we do in cache_find(),
05882                                  * because refcurrent(rbtnode) must be
05883                                  * non-zero.  This is so because 'node' is an
05884                                  * argument to the function.
05885                                  */
05886                                 mark_stale_header(rbtdb, header);
05887                         }
05888                 } else if (EXISTS(header)) {
05889                         if (header->type == matchtype)
05890                                 found = header;
05891                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
05892                                  header->type == negtype)
05893                                 found = header;
05894                         else if (header->type == sigmatchtype)
05895                                 foundsig = header;
05896                 }
05897         }
05898         if (found != NULL) {
05899                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
05900                 if (!NEGATIVE(found) && foundsig != NULL)
05901                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
05902                                       sigrdataset);
05903         }
05904 
05905         NODE_UNLOCK(lock, locktype);
05906 
05907         if (found == NULL)
05908                 return (ISC_R_NOTFOUND);
05909 
05910         if (NEGATIVE(found)) {
05911                 /*
05912                  * We found a negative cache entry.
05913                  */
05914                 if (NXDOMAIN(found))
05915                         result = DNS_R_NCACHENXDOMAIN;
05916                 else
05917                         result = DNS_R_NCACHENXRRSET;
05918         }
05919 
05920         update_cachestats(rbtdb, result);
05921 
05922         return (result);
05923 }
05924 
05925 static isc_result_t
05926 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
05927              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
05928 {
05929         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
05930         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
05931         rbtdb_version_t *rbtversion = version;
05932         rbtdb_rdatasetiter_t *iterator;
05933         unsigned int refs;
05934 
05935         REQUIRE(VALID_RBTDB(rbtdb));
05936 
05937         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
05938         if (iterator == NULL)
05939                 return (ISC_R_NOMEMORY);
05940 
05941         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
05942                 now = 0;
05943                 if (rbtversion == NULL)
05944                         currentversion(db,
05945                                  (dns_dbversion_t **) (void *)(&rbtversion));
05946                 else {
05947                         INSIST(rbtversion->rbtdb == rbtdb);
05948 
05949                         isc_refcount_increment(&rbtversion->references,
05950                                                &refs);
05951                         INSIST(refs > 1);
05952                 }
05953         } else {
05954                 if (now == 0)
05955                         isc_stdtime_get(&now);
05956                 rbtversion = NULL;
05957         }
05958 
05959         iterator->common.magic = DNS_RDATASETITER_MAGIC;
05960         iterator->common.methods = &rdatasetiter_methods;
05961         iterator->common.db = db;
05962         iterator->common.node = node;
05963         iterator->common.version = (dns_dbversion_t *)rbtversion;
05964         iterator->common.now = now;
05965 
05966         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
05967 
05968         dns_rbtnode_refincrement(rbtnode, &refs);
05969         INSIST(refs != 0);
05970 
05971         iterator->current = NULL;
05972 
05973         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
05974 
05975         *iteratorp = (dns_rdatasetiter_t *)iterator;
05976 
05977         return (ISC_R_SUCCESS);
05978 }
05979 
05980 static isc_boolean_t
05981 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
05982         rdatasetheader_t *header, *header_next;
05983         isc_boolean_t cname, other_data;
05984         dns_rdatatype_t rdtype;
05985 
05986         /*
05987          * The caller must hold the node lock.
05988          */
05989 
05990         /*
05991          * Look for CNAME and "other data" rdatasets active in our version.
05992          */
05993         cname = ISC_FALSE;
05994         other_data = ISC_FALSE;
05995         for (header = node->data; header != NULL; header = header_next) {
05996                 header_next = header->next;
05997                 if (header->type == dns_rdatatype_cname) {
05998                         /*
05999                          * Look for an active extant CNAME.
06000                          */
06001                         do {
06002                                 if (header->serial <= serial &&
06003                                     !IGNORE(header)) {
06004                                         /*
06005                                          * Is this a "this rdataset doesn't
06006                                          * exist" record?
06007                                          */
06008                                         if (NONEXISTENT(header))
06009                                                 header = NULL;
06010                                         break;
06011                                 } else
06012                                         header = header->down;
06013                         } while (header != NULL);
06014                         if (header != NULL)
06015                                 cname = ISC_TRUE;
06016                 } else {
06017                         /*
06018                          * Look for active extant "other data".
06019                          *
06020                          * "Other data" is any rdataset whose type is not
06021                          * KEY, NSEC, SIG or RRSIG.
06022                          */
06023                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
06024                         if (rdtype != dns_rdatatype_key &&
06025                             rdtype != dns_rdatatype_sig &&
06026                             rdtype != dns_rdatatype_nsec &&
06027                             rdtype != dns_rdatatype_rrsig) {
06028                                 /*
06029                                  * Is it active and extant?
06030                                  */
06031                                 do {
06032                                         if (header->serial <= serial &&
06033                                             !IGNORE(header)) {
06034                                                 /*
06035                                                  * Is this a "this rdataset
06036                                                  * doesn't exist" record?
06037                                                  */
06038                                                 if (NONEXISTENT(header))
06039                                                         header = NULL;
06040                                                 break;
06041                                         } else
06042                                                 header = header->down;
06043                                 } while (header != NULL);
06044                                 if (header != NULL)
06045                                         other_data = ISC_TRUE;
06046                         }
06047                 }
06048         }
06049 
06050         if (cname && other_data)
06051                 return (ISC_TRUE);
06052 
06053         return (ISC_FALSE);
06054 }
06055 
06056 static isc_result_t
06057 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
06058         isc_result_t result;
06059 
06060         INSIST(!IS_CACHE(rbtdb));
06061         INSIST(newheader->heap_index == 0);
06062         INSIST(!ISC_LINK_LINKED(newheader, link));
06063 
06064         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
06065         return (result);
06066 }
06067 
06068 static void
06069 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
06070               rdatasetheader_t *header)
06071 {
06072         /*
06073          * Remove the old header from the heap
06074          */
06075         if (header != NULL && header->heap_index != 0) {
06076                 isc_heap_delete(rbtdb->heaps[header->node->locknum],
06077                                 header->heap_index);
06078                 header->heap_index = 0;
06079                 if (version != NULL) {
06080                         new_reference(rbtdb, header->node);
06081                         ISC_LIST_APPEND(version->resigned_list, header, link);
06082                 }
06083         }
06084 }
06085 
06086 static isc_result_t
06087 add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
06088       rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
06089       dns_rdataset_t *addedrdataset, isc_stdtime_t now)
06090 {
06091         rbtdb_changed_t *changed = NULL;
06092         rdatasetheader_t *topheader, *topheader_prev, *header, *sigheader;
06093         unsigned char *merged;
06094         isc_result_t result;
06095         isc_boolean_t header_nx;
06096         isc_boolean_t newheader_nx;
06097         isc_boolean_t merge;
06098         dns_rdatatype_t rdtype, covers;
06099         rbtdb_rdatatype_t negtype, sigtype;
06100         dns_trust_t trust;
06101         int idx;
06102 
06103         /*
06104          * Add an rdatasetheader_t to a node.
06105          */
06106 
06107         /*
06108          * Caller must be holding the node lock.
06109          */
06110 
06111         if ((options & DNS_DBADD_MERGE) != 0) {
06112                 REQUIRE(rbtversion != NULL);
06113                 merge = ISC_TRUE;
06114         } else
06115                 merge = ISC_FALSE;
06116 
06117         if ((options & DNS_DBADD_FORCE) != 0)
06118                 trust = dns_trust_ultimate;
06119         else
06120                 trust = newheader->trust;
06121 
06122         if (rbtversion != NULL && !loading) {
06123                 /*
06124                  * We always add a changed record, even if no changes end up
06125                  * being made to this node, because it's harmless and
06126                  * simplifies the code.
06127                  */
06128                 changed = add_changed(rbtdb, rbtversion, rbtnode);
06129                 if (changed == NULL) {
06130                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06131                         return (ISC_R_NOMEMORY);
06132                 }
06133         }
06134 
06135         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
06136         topheader_prev = NULL;
06137         sigheader = NULL;
06138         negtype = 0;
06139         if (rbtversion == NULL && !newheader_nx) {
06140                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
06141                 covers = RBTDB_RDATATYPE_EXT(newheader->type);
06142                 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
06143                 if (NEGATIVE(newheader)) {
06144                         /*
06145                          * We're adding a negative cache entry.
06146                          */
06147                         for (topheader = rbtnode->data;
06148                              topheader != NULL;
06149                              topheader = topheader->next) {
06150                                 /*
06151                                  * If we're adding an negative cache entry
06152                                  * which covers all types (NXDOMAIN,
06153                                  * NODATA(QTYPE=ANY)).
06154                                  *
06155                                  * We make all other data stale so that the
06156                                  * only rdataset that can be found at this
06157                                  * node is the negative cache entry.
06158                                  *
06159                                  * Otherwise look for any RRSIGs of the
06160                                  * given type so they can be marked stale
06161                                  * later.
06162                                  */
06163                                 if (covers == dns_rdatatype_any) {
06164                                         set_ttl(rbtdb, topheader, 0);
06165                                         mark_stale_header(rbtdb, topheader);
06166                                 } else if (topheader->type == sigtype)
06167                                         sigheader = topheader;
06168                         }
06169                         if (covers == dns_rdatatype_any)
06170                                 goto find_header;
06171                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
06172                 } else {
06173                         /*
06174                          * We're adding something that isn't a
06175                          * negative cache entry.  Look for an extant
06176                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
06177                          * cache entry.  If we're adding an RRSIG, also
06178                          * check for an extant non-stale NODATA ncache
06179                          * entry which covers the same type as the RRSIG.
06180                          */
06181                         for (topheader = rbtnode->data;
06182                              topheader != NULL;
06183                              topheader = topheader->next) {
06184                                 if ((topheader->type ==
06185                                         RBTDB_RDATATYPE_NCACHEANY) ||
06186                                         (newheader->type == sigtype &&
06187                                         topheader->type ==
06188                                         RBTDB_RDATATYPE_VALUE(0, covers))) {
06189                                                 break;
06190                                         }
06191                         }
06192                         if (topheader != NULL && EXISTS(topheader) &&
06193                             topheader->rdh_ttl >= now) {
06194                                 /*
06195                                  * Found one.
06196                                  */
06197                                 if (trust < topheader->trust) {
06198                                         /*
06199                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
06200                                          * is more trusted.
06201                                          */
06202                                         free_rdataset(rbtdb,
06203                                                       rbtdb->common.mctx,
06204                                                       newheader);
06205                                         if (addedrdataset != NULL)
06206                                                 bind_rdataset(rbtdb, rbtnode,
06207                                                               topheader, now,
06208                                                               addedrdataset);
06209                                         return (DNS_R_UNCHANGED);
06210                                 }
06211                                 /*
06212                                  * The new rdataset is better.  Expire the
06213                                  * ncache entry.
06214                                  */
06215                                 set_ttl(rbtdb, topheader, 0);
06216                                 mark_stale_header(rbtdb, topheader);
06217                                 topheader = NULL;
06218                                 goto find_header;
06219                         }
06220                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
06221                 }
06222         }
06223 
06224         for (topheader = rbtnode->data;
06225              topheader != NULL;
06226              topheader = topheader->next) {
06227                 if (topheader->type == newheader->type ||
06228                     topheader->type == negtype)
06229                         break;
06230                 topheader_prev = topheader;
06231         }
06232 
06233  find_header:
06234         /*
06235          * If header isn't NULL, we've found the right type.  There may be
06236          * IGNORE rdatasets between the top of the chain and the first real
06237          * data.  We skip over them.
06238          */
06239         header = topheader;
06240         while (header != NULL && IGNORE(header))
06241                 header = header->down;
06242         if (header != NULL) {
06243                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
06244 
06245                 /*
06246                  * Deleting an already non-existent rdataset has no effect.
06247                  */
06248                 if (header_nx && newheader_nx) {
06249                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06250                         return (DNS_R_UNCHANGED);
06251                 }
06252 
06253                 /*
06254                  * Trying to add an rdataset with lower trust to a cache DB
06255                  * has no effect, provided that the cache data isn't stale.
06256                  */
06257                 if (rbtversion == NULL && trust < header->trust &&
06258                     (header->rdh_ttl >= now || header_nx)) {
06259                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06260                         if (addedrdataset != NULL)
06261                                 bind_rdataset(rbtdb, rbtnode, header, now,
06262                                               addedrdataset);
06263                         return (DNS_R_UNCHANGED);
06264                 }
06265 
06266                 /*
06267                  * Don't merge if a nonexistent rdataset is involved.
06268                  */
06269                 if (merge && (header_nx || newheader_nx))
06270                         merge = ISC_FALSE;
06271 
06272                 /*
06273                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
06274                  * that is the union of 'newheader' and 'header'.
06275                  */
06276                 if (merge) {
06277                         unsigned int flags = 0;
06278                         INSIST(rbtversion->serial >= header->serial);
06279                         merged = NULL;
06280                         result = ISC_R_SUCCESS;
06281 
06282                         if ((options & DNS_DBADD_EXACT) != 0)
06283                                 flags |= DNS_RDATASLAB_EXACT;
06284                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
06285                              newheader->rdh_ttl != header->rdh_ttl)
06286                                         result = DNS_R_NOTEXACT;
06287                         else if (newheader->rdh_ttl != header->rdh_ttl)
06288                                 flags |= DNS_RDATASLAB_FORCE;
06289                         if (result == ISC_R_SUCCESS)
06290                                 result = dns_rdataslab_merge(
06291                                              (unsigned char *)header,
06292                                              (unsigned char *)newheader,
06293                                              (unsigned int)(sizeof(*newheader)),
06294                                              rbtdb->common.mctx,
06295                                              rbtdb->common.rdclass,
06296                                              (dns_rdatatype_t)header->type,
06297                                              flags, &merged);
06298                         if (result == ISC_R_SUCCESS) {
06299                                 /*
06300                                  * If 'header' has the same serial number as
06301                                  * we do, we could clean it up now if we knew
06302                                  * that our caller had no references to it.
06303                                  * We don't know this, however, so we leave it
06304                                  * alone.  It will get cleaned up when
06305                                  * clean_zone_node() runs.
06306                                  */
06307                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06308                                               newheader);
06309                                 newheader = (rdatasetheader_t *)merged;
06310                                 init_rdataset(rbtdb, newheader);
06311                                 update_newheader(newheader, header);
06312                                 if (loading && RESIGN(newheader) &&
06313                                     RESIGN(header) &&
06314                                     header->resign < newheader->resign)
06315                                         newheader->resign = header->resign;
06316                         } else {
06317                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06318                                               newheader);
06319                                 return (result);
06320                         }
06321                 }
06322                 /*
06323                  * Don't replace existing NS, A and AAAA RRsets
06324                  * in the cache if they are already exist.  This
06325                  * prevents named being locked to old servers.
06326                  * Don't lower trust of existing record if the
06327                  * update is forced.
06328                  */
06329                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
06330                     header->type == dns_rdatatype_ns &&
06331                     !header_nx && !newheader_nx &&
06332                     header->trust >= newheader->trust &&
06333                     dns_rdataslab_equalx((unsigned char *)header,
06334                                          (unsigned char *)newheader,
06335                                          (unsigned int)(sizeof(*newheader)),
06336                                          rbtdb->common.rdclass,
06337                                          (dns_rdatatype_t)header->type)) {
06338                         /*
06339                          * Honour the new ttl if it is less than the
06340                          * older one.
06341                          */
06342                         if (header->rdh_ttl > newheader->rdh_ttl)
06343                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
06344                         if (header->noqname == NULL &&
06345                             newheader->noqname != NULL) {
06346                                 header->noqname = newheader->noqname;
06347                                 newheader->noqname = NULL;
06348                         }
06349                         if (header->closest == NULL &&
06350                             newheader->closest != NULL) {
06351                                 header->closest = newheader->closest;
06352                                 newheader->closest = NULL;
06353                         }
06354                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06355                         if (addedrdataset != NULL)
06356                                 bind_rdataset(rbtdb, rbtnode, header, now,
06357                                               addedrdataset);
06358                         return (ISC_R_SUCCESS);
06359                 }
06360                 /*
06361                  * If we have will be replacing a NS RRset force its TTL
06362                  * to be no more than the current NS RRset's TTL.  This
06363                  * ensures the delegations that are withdrawn are honoured.
06364                  */
06365                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
06366                     header->type == dns_rdatatype_ns &&
06367                     !header_nx && !newheader_nx &&
06368                     header->trust <= newheader->trust) {
06369                         if (newheader->rdh_ttl > header->rdh_ttl) {
06370                                 newheader->rdh_ttl = header->rdh_ttl;
06371                         }
06372                 }
06373                 if (IS_CACHE(rbtdb) && header->rdh_ttl >= now &&
06374                     (options & DNS_DBADD_PREFETCH) == 0 &&
06375                     (header->type == dns_rdatatype_a ||
06376                      header->type == dns_rdatatype_aaaa ||
06377                      header->type == dns_rdatatype_ds ||
06378                      header->type == RBTDB_RDATATYPE_SIGDDS) &&
06379                     !header_nx && !newheader_nx &&
06380                     header->trust >= newheader->trust &&
06381                     dns_rdataslab_equal((unsigned char *)header,
06382                                         (unsigned char *)newheader,
06383                                         (unsigned int)(sizeof(*newheader)))) {
06384                         /*
06385                          * Honour the new ttl if it is less than the
06386                          * older one.
06387                          */
06388                         if (header->rdh_ttl > newheader->rdh_ttl)
06389                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
06390                         if (header->noqname == NULL &&
06391                             newheader->noqname != NULL) {
06392                                 header->noqname = newheader->noqname;
06393                                 newheader->noqname = NULL;
06394                         }
06395                         if (header->closest == NULL &&
06396                             newheader->closest != NULL) {
06397                                 header->closest = newheader->closest;
06398                                 newheader->closest = NULL;
06399                         }
06400                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06401                         if (addedrdataset != NULL)
06402                                 bind_rdataset(rbtdb, rbtnode, header, now,
06403                                               addedrdataset);
06404                         return (ISC_R_SUCCESS);
06405                 }
06406                 INSIST(rbtversion == NULL ||
06407                        rbtversion->serial >= topheader->serial);
06408                 if (topheader_prev != NULL)
06409                         topheader_prev->next = newheader;
06410                 else
06411                         rbtnode->data = newheader;
06412                 newheader->next = topheader->next;
06413                 if (loading) {
06414                         /*
06415                          * There are no other references to 'header' when
06416                          * loading, so we MAY clean up 'header' now.
06417                          * Since we don't generate changed records when
06418                          * loading, we MUST clean up 'header' now.
06419                          */
06420                         newheader->down = NULL;
06421                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
06422 
06423                         idx = newheader->node->locknum;
06424                         if (IS_CACHE(rbtdb)) {
06425                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
06426                                                  newheader, link);
06427                                 INSIST(rbtdb->heaps != NULL);
06428                                 (void)isc_heap_insert(rbtdb->heaps[idx],
06429                                                       newheader);
06430                         } else if (RESIGN(newheader)) {
06431                                 result = resign_insert(rbtdb, idx, newheader);
06432                                 if (result != ISC_R_SUCCESS)
06433                                         return (result);
06434                         }
06435                 } else {
06436                         newheader->down = topheader;
06437                         topheader->next = newheader;
06438                         rbtnode->dirty = 1;
06439                         if (changed != NULL)
06440                                 changed->dirty = ISC_TRUE;
06441                         if (rbtversion == NULL) {
06442                                 set_ttl(rbtdb, header, 0);
06443                                 mark_stale_header(rbtdb, header);
06444                                 if (sigheader != NULL) {
06445                                         set_ttl(rbtdb, sigheader, 0);
06446                                         mark_stale_header(rbtdb, sigheader);
06447                                 }
06448                         }
06449                         idx = newheader->node->locknum;
06450                         if (IS_CACHE(rbtdb)) {
06451                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
06452                                                  newheader, link);
06453                                 /*
06454                                  * XXXMLG We don't check the return value
06455                                  * here.  If it fails, we will not do TTL
06456                                  * based expiry on this node.  However, we
06457                                  * will do it on the LRU side, so memory
06458                                  * will not leak... for long.
06459                                  */
06460                                 INSIST(rbtdb->heaps != NULL);
06461                                 (void)isc_heap_insert(rbtdb->heaps[idx],
06462                                                       newheader);
06463                         } else if (RESIGN(newheader)) {
06464                                 resign_delete(rbtdb, rbtversion, header);
06465                                 result = resign_insert(rbtdb, idx, newheader);
06466                                 if (result != ISC_R_SUCCESS)
06467                                         return (result);
06468                         }
06469                 }
06470         } else {
06471                 /*
06472                  * No non-IGNORED rdatasets of the given type exist at
06473                  * this node.
06474                  */
06475 
06476                 /*
06477                  * If we're trying to delete the type, don't bother.
06478                  */
06479                 if (newheader_nx) {
06480                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06481                         return (DNS_R_UNCHANGED);
06482                 }
06483 
06484                 if (topheader != NULL) {
06485                         /*
06486                          * We have an list of rdatasets of the given type,
06487                          * but they're all marked IGNORE.  We simply insert
06488                          * the new rdataset at the head of the list.
06489                          *
06490                          * Ignored rdatasets cannot occur during loading, so
06491                          * we INSIST on it.
06492                          */
06493                         INSIST(!loading);
06494                         INSIST(rbtversion == NULL ||
06495                                rbtversion->serial >= topheader->serial);
06496                         if (topheader_prev != NULL)
06497                                 topheader_prev->next = newheader;
06498                         else
06499                                 rbtnode->data = newheader;
06500                         newheader->next = topheader->next;
06501                         newheader->down = topheader;
06502                         topheader->next = newheader;
06503                         rbtnode->dirty = 1;
06504                         if (changed != NULL)
06505                                 changed->dirty = ISC_TRUE;
06506                 } else {
06507                         /*
06508                          * No rdatasets of the given type exist at the node.
06509                          */
06510                         newheader->next = rbtnode->data;
06511                         newheader->down = NULL;
06512                         rbtnode->data = newheader;
06513                 }
06514                 idx = newheader->node->locknum;
06515                 if (IS_CACHE(rbtdb)) {
06516                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
06517                                          newheader, link);
06518                         isc_heap_insert(rbtdb->heaps[idx], newheader);
06519                 } else if (RESIGN(newheader)) {
06520                         resign_delete(rbtdb, rbtversion, header);
06521                         result = resign_insert(rbtdb, idx, newheader);
06522                         if (result != ISC_R_SUCCESS)
06523                                 return (result);
06524                 }
06525         }
06526 
06527         /*
06528          * Check if the node now contains CNAME and other data.
06529          */
06530         if (rbtversion != NULL &&
06531             cname_and_other_data(rbtnode, rbtversion->serial))
06532                 return (DNS_R_CNAMEANDOTHER);
06533 
06534         if (addedrdataset != NULL)
06535                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
06536 
06537         return (ISC_R_SUCCESS);
06538 }
06539 
06540 static inline isc_boolean_t
06541 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
06542                 rbtdb_rdatatype_t type)
06543 {
06544         if (IS_CACHE(rbtdb)) {
06545                 if (type == dns_rdatatype_dname)
06546                         return (ISC_TRUE);
06547                 else
06548                         return (ISC_FALSE);
06549         } else if (type == dns_rdatatype_dname ||
06550                    (type == dns_rdatatype_ns &&
06551                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
06552                 return (ISC_TRUE);
06553         return (ISC_FALSE);
06554 }
06555 
06556 static inline isc_result_t
06557 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
06558            dns_rdataset_t *rdataset)
06559 {
06560         struct noqname *noqname;
06561         isc_mem_t *mctx = rbtdb->common.mctx;
06562         dns_name_t name;
06563         dns_rdataset_t neg, negsig;
06564         isc_result_t result;
06565         isc_region_t r;
06566 
06567         dns_name_init(&name, NULL);
06568         dns_rdataset_init(&neg);
06569         dns_rdataset_init(&negsig);
06570 
06571         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
06572         RUNTIME_CHECK(result == ISC_R_SUCCESS);
06573 
06574         noqname = isc_mem_get(mctx, sizeof(*noqname));
06575         if (noqname == NULL) {
06576                 result = ISC_R_NOMEMORY;
06577                 goto cleanup;
06578         }
06579         dns_name_init(&noqname->name, NULL);
06580         noqname->neg = NULL;
06581         noqname->negsig = NULL;
06582         noqname->type = neg.type;
06583         result = dns_name_dup(&name, mctx, &noqname->name);
06584         if (result != ISC_R_SUCCESS)
06585                 goto cleanup;
06586         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
06587         if (result != ISC_R_SUCCESS)
06588                 goto cleanup;
06589         noqname->neg = r.base;
06590         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
06591         if (result != ISC_R_SUCCESS)
06592                 goto cleanup;
06593         noqname->negsig = r.base;
06594         dns_rdataset_disassociate(&neg);
06595         dns_rdataset_disassociate(&negsig);
06596         newheader->noqname = noqname;
06597         return (ISC_R_SUCCESS);
06598 
06599 cleanup:
06600         dns_rdataset_disassociate(&neg);
06601         dns_rdataset_disassociate(&negsig);
06602         if (noqname != NULL)
06603                 free_noqname(mctx, &noqname);
06604         return(result);
06605 }
06606 
06607 static inline isc_result_t
06608 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
06609            dns_rdataset_t *rdataset)
06610 {
06611         struct noqname *closest;
06612         isc_mem_t *mctx = rbtdb->common.mctx;
06613         dns_name_t name;
06614         dns_rdataset_t neg, negsig;
06615         isc_result_t result;
06616         isc_region_t r;
06617 
06618         dns_name_init(&name, NULL);
06619         dns_rdataset_init(&neg);
06620         dns_rdataset_init(&negsig);
06621 
06622         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
06623         RUNTIME_CHECK(result == ISC_R_SUCCESS);
06624 
06625         closest = isc_mem_get(mctx, sizeof(*closest));
06626         if (closest == NULL) {
06627                 result = ISC_R_NOMEMORY;
06628                 goto cleanup;
06629         }
06630         dns_name_init(&closest->name, NULL);
06631         closest->neg = NULL;
06632         closest->negsig = NULL;
06633         closest->type = neg.type;
06634         result = dns_name_dup(&name, mctx, &closest->name);
06635         if (result != ISC_R_SUCCESS)
06636                 goto cleanup;
06637         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
06638         if (result != ISC_R_SUCCESS)
06639                 goto cleanup;
06640         closest->neg = r.base;
06641         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
06642         if (result != ISC_R_SUCCESS)
06643                 goto cleanup;
06644         closest->negsig = r.base;
06645         dns_rdataset_disassociate(&neg);
06646         dns_rdataset_disassociate(&negsig);
06647         newheader->closest = closest;
06648         return (ISC_R_SUCCESS);
06649 
06650  cleanup:
06651         dns_rdataset_disassociate(&neg);
06652         dns_rdataset_disassociate(&negsig);
06653         if (closest != NULL)
06654                 free_noqname(mctx, &closest);
06655         return(result);
06656 }
06657 
06658 static dns_dbmethods_t zone_methods;
06659 
06660 static isc_result_t
06661 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
06662             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
06663             dns_rdataset_t *addedrdataset)
06664 {
06665         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
06666         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
06667         rbtdb_version_t *rbtversion = version;
06668         isc_region_t region;
06669         rdatasetheader_t *newheader;
06670         rdatasetheader_t *header;
06671         isc_result_t result;
06672         isc_boolean_t delegating;
06673         isc_boolean_t newnsec;
06674         isc_boolean_t tree_locked = ISC_FALSE;
06675         isc_boolean_t cache_is_overmem = ISC_FALSE;
06676         dns_fixedname_t fixed;
06677         dns_name_t *name;
06678 
06679         REQUIRE(VALID_RBTDB(rbtdb));
06680         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
06681 
06682         if (rbtdb->common.methods == &zone_methods)
06683                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
06684                           (rdataset->type == dns_rdatatype_nsec3 ||
06685                            rdataset->covers == dns_rdatatype_nsec3)) ||
06686                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
06687                            rdataset->type != dns_rdatatype_nsec3 &&
06688                            rdataset->covers != dns_rdatatype_nsec3)));
06689 
06690         if (rbtversion == NULL) {
06691                 if (now == 0)
06692                         isc_stdtime_get(&now);
06693         } else
06694                 now = 0;
06695 
06696         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
06697                                             &region, sizeof(rdatasetheader_t));
06698         if (result != ISC_R_SUCCESS)
06699                 return (result);
06700 
06701         dns_fixedname_init(&fixed);
06702         name = dns_fixedname_name(&fixed);
06703         dns_rbt_fullnamefromnode(node, name);
06704         dns_rdataset_getownercase(rdataset, name);
06705 
06706         newheader = (rdatasetheader_t *)region.base;
06707         init_rdataset(rbtdb, newheader);
06708         setownercase(newheader, name);
06709         set_ttl(rbtdb, newheader, rdataset->ttl + now);
06710         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
06711                                                 rdataset->covers);
06712         newheader->attributes = 0;
06713         newheader->noqname = NULL;
06714         newheader->closest = NULL;
06715         newheader->count = init_count++;
06716         newheader->trust = rdataset->trust;
06717         newheader->additional_auth = NULL;
06718         newheader->additional_glue = NULL;
06719         newheader->last_used = now;
06720         newheader->node = rbtnode;
06721         if (rbtversion != NULL) {
06722                 newheader->serial = rbtversion->serial;
06723                 now = 0;
06724 
06725                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
06726                         newheader->attributes |= RDATASET_ATTR_RESIGN;
06727                         newheader->resign = rdataset->resign;
06728                 } else
06729                         newheader->resign = 0;
06730         } else {
06731                 newheader->serial = 1;
06732                 newheader->resign = 0;
06733                 if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0)
06734                         newheader->attributes |= RDATASET_ATTR_PREFETCH;
06735                 if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
06736                         newheader->attributes |= RDATASET_ATTR_NEGATIVE;
06737                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
06738                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
06739                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
06740                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
06741                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
06742                         result = addnoqname(rbtdb, newheader, rdataset);
06743                         if (result != ISC_R_SUCCESS) {
06744                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06745                                               newheader);
06746                                 return (result);
06747                         }
06748                 }
06749                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
06750                         result = addclosest(rbtdb, newheader, rdataset);
06751                         if (result != ISC_R_SUCCESS) {
06752                                 free_rdataset(rbtdb, rbtdb->common.mctx,
06753                                               newheader);
06754                                 return (result);
06755                         }
06756                 }
06757         }
06758 
06759         /*
06760          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
06761          * just DNAME for the cache), then we need to set the callback bit
06762          * on the node.
06763          */
06764         if (delegating_type(rbtdb, rbtnode, rdataset->type))
06765                 delegating = ISC_TRUE;
06766         else
06767                 delegating = ISC_FALSE;
06768 
06769         /*
06770          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
06771          */
06772         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
06773             rdataset->type == dns_rdatatype_nsec)
06774                 newnsec = ISC_TRUE;
06775         else
06776                 newnsec = ISC_FALSE;
06777 
06778         /*
06779          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
06780          * or the DB is a cache in an overmem state, hold an exclusive lock on
06781          * the tree.  In the latter case the lock does not necessarily have to
06782          * be acquired but it will help purge stale entries more effectively.
06783          */
06784         if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
06785                 cache_is_overmem = ISC_TRUE;
06786         if (delegating || newnsec || cache_is_overmem) {
06787                 tree_locked = ISC_TRUE;
06788                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
06789         }
06790 
06791         if (cache_is_overmem)
06792                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
06793 
06794         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06795                   isc_rwlocktype_write);
06796 
06797         if (rbtdb->rrsetstats != NULL) {
06798                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
06799                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
06800         }
06801 
06802         if (IS_CACHE(rbtdb)) {
06803                 if (tree_locked)
06804                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
06805 
06806                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
06807                 if (header && header->rdh_ttl < now - RBTDB_VIRTUAL)
06808                         expire_header(rbtdb, header, tree_locked,
06809                                       expire_ttl);
06810 
06811                 /*
06812                  * If we've been holding a write lock on the tree just for
06813                  * cleaning, we can release it now.  However, we still need the
06814                  * node lock.
06815                  */
06816                 if (tree_locked && !delegating && !newnsec) {
06817                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
06818                         tree_locked = ISC_FALSE;
06819                 }
06820         }
06821 
06822         result = ISC_R_SUCCESS;
06823         if (newnsec) {
06824                 dns_rbtnode_t *nsecnode;
06825 
06826                 dns_rbt_fullnamefromnode(rbtnode, name);
06827                 nsecnode = NULL;
06828                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
06829                 if (result == ISC_R_SUCCESS) {
06830                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
06831                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
06832                 } else if (result == ISC_R_EXISTS) {
06833                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
06834                         result = ISC_R_SUCCESS;
06835                 }
06836         }
06837 
06838         if (result == ISC_R_SUCCESS)
06839                 result = add32(rbtdb, rbtnode, rbtversion, newheader, options,
06840                                ISC_FALSE, addedrdataset, now);
06841         if (result == ISC_R_SUCCESS && delegating)
06842                 rbtnode->find_callback = 1;
06843 
06844         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06845                     isc_rwlocktype_write);
06846 
06847         if (tree_locked)
06848                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
06849 
06850         /*
06851          * Update the zone's secure status.  If version is non-NULL
06852          * this is deferred until closeversion() is called.
06853          */
06854         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
06855                 iszonesecure(db, version, rbtdb->origin_node);
06856 
06857         return (result);
06858 }
06859 
06860 static isc_result_t
06861 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
06862                  dns_rdataset_t *rdataset, unsigned int options,
06863                  dns_rdataset_t *newrdataset)
06864 {
06865         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
06866         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
06867         rbtdb_version_t *rbtversion = version;
06868         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
06869         unsigned char *subresult;
06870         isc_region_t region;
06871         isc_result_t result;
06872         rbtdb_changed_t *changed;
06873 
06874         REQUIRE(VALID_RBTDB(rbtdb));
06875         REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
06876 
06877         if (rbtdb->common.methods == &zone_methods)
06878                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
06879                           (rdataset->type == dns_rdatatype_nsec3 ||
06880                            rdataset->covers == dns_rdatatype_nsec3)) ||
06881                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
06882                            rdataset->type != dns_rdatatype_nsec3 &&
06883                            rdataset->covers != dns_rdatatype_nsec3)));
06884 
06885         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
06886                                             &region, sizeof(rdatasetheader_t));
06887         if (result != ISC_R_SUCCESS)
06888                 return (result);
06889         newheader = (rdatasetheader_t *)region.base;
06890         init_rdataset(rbtdb, newheader);
06891         set_ttl(rbtdb, newheader, rdataset->ttl);
06892         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
06893                                                 rdataset->covers);
06894         newheader->attributes = 0;
06895         newheader->serial = rbtversion->serial;
06896         newheader->trust = 0;
06897         newheader->noqname = NULL;
06898         newheader->closest = NULL;
06899         newheader->count = init_count++;
06900         newheader->additional_auth = NULL;
06901         newheader->additional_glue = NULL;
06902         newheader->last_used = 0;
06903         newheader->node = rbtnode;
06904         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
06905                 newheader->attributes |= RDATASET_ATTR_RESIGN;
06906                 newheader->resign = rdataset->resign;
06907         } else
06908                 newheader->resign = 0;
06909 
06910         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06911                   isc_rwlocktype_write);
06912 
06913         changed = add_changed(rbtdb, rbtversion, rbtnode);
06914         if (changed == NULL) {
06915                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06916                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
06917                             isc_rwlocktype_write);
06918                 return (ISC_R_NOMEMORY);
06919         }
06920 
06921         topheader_prev = NULL;
06922         for (topheader = rbtnode->data;
06923              topheader != NULL;
06924              topheader = topheader->next) {
06925                 if (topheader->type == newheader->type)
06926                         break;
06927                 topheader_prev = topheader;
06928         }
06929         /*
06930          * If header isn't NULL, we've found the right type.  There may be
06931          * IGNORE rdatasets between the top of the chain and the first real
06932          * data.  We skip over them.
06933          */
06934         header = topheader;
06935         while (header != NULL && IGNORE(header))
06936                 header = header->down;
06937         if (header != NULL && EXISTS(header)) {
06938                 unsigned int flags = 0;
06939                 subresult = NULL;
06940                 result = ISC_R_SUCCESS;
06941                 if ((options & DNS_DBSUB_EXACT) != 0) {
06942                         flags |= DNS_RDATASLAB_EXACT;
06943                         if (newheader->rdh_ttl != header->rdh_ttl)
06944                                 result = DNS_R_NOTEXACT;
06945                 }
06946                 if (result == ISC_R_SUCCESS)
06947                         result = dns_rdataslab_subtract(
06948                                         (unsigned char *)header,
06949                                         (unsigned char *)newheader,
06950                                         (unsigned int)(sizeof(*newheader)),
06951                                         rbtdb->common.mctx,
06952                                         rbtdb->common.rdclass,
06953                                         (dns_rdatatype_t)header->type,
06954                                         flags, &subresult);
06955                 if (result == ISC_R_SUCCESS) {
06956                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06957                         newheader = (rdatasetheader_t *)subresult;
06958                         init_rdataset(rbtdb, newheader);
06959                         update_newheader(newheader, header);
06960                         /*
06961                          * We have to set the serial since the rdataslab
06962                          * subtraction routine copies the reserved portion of
06963                          * header, not newheader.
06964                          */
06965                         newheader->serial = rbtversion->serial;
06966                         /*
06967                          * XXXJT: dns_rdataslab_subtract() copied the pointers
06968                          * to additional info.  We need to clear these fields
06969                          * to avoid having duplicated references.
06970                          */
06971                         newheader->additional_auth = NULL;
06972                         newheader->additional_glue = NULL;
06973                 } else if (result == DNS_R_NXRRSET) {
06974                         /*
06975                          * This subtraction would remove all of the rdata;
06976                          * add a nonexistent header instead.
06977                          */
06978                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
06979                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
06980                         if (newheader == NULL) {
06981                                 result = ISC_R_NOMEMORY;
06982                                 goto unlock;
06983                         }
06984                         init_rdataset(rbtdb, newheader);
06985                         set_ttl(rbtdb, newheader, 0);
06986                         newheader->type = topheader->type;
06987                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
06988                         newheader->trust = 0;
06989                         newheader->serial = rbtversion->serial;
06990                         newheader->noqname = NULL;
06991                         newheader->closest = NULL;
06992                         newheader->count = 0;
06993                         newheader->additional_auth = NULL;
06994                         newheader->additional_glue = NULL;
06995                         newheader->node = rbtnode;
06996                         newheader->resign = 0;
06997                         newheader->last_used = 0;
06998                 } else {
06999                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
07000                         goto unlock;
07001                 }
07002 
07003                 /*
07004                  * If we're here, we want to link newheader in front of
07005                  * topheader.
07006                  */
07007                 INSIST(rbtversion->serial >= topheader->serial);
07008                 if (topheader_prev != NULL)
07009                         topheader_prev->next = newheader;
07010                 else
07011                         rbtnode->data = newheader;
07012                 newheader->next = topheader->next;
07013                 newheader->down = topheader;
07014                 topheader->next = newheader;
07015                 rbtnode->dirty = 1;
07016                 changed->dirty = ISC_TRUE;
07017                 resign_delete(rbtdb, rbtversion, header);
07018         } else {
07019                 /*
07020                  * The rdataset doesn't exist, so we don't need to do anything
07021                  * to satisfy the deletion request.
07022                  */
07023                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
07024                 if ((options & DNS_DBSUB_EXACT) != 0)
07025                         result = DNS_R_NOTEXACT;
07026                 else
07027                         result = DNS_R_UNCHANGED;
07028         }
07029 
07030         if (result == ISC_R_SUCCESS && newrdataset != NULL)
07031                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
07032 
07033         if (result == DNS_R_NXRRSET && newrdataset != NULL &&
07034             (options & DNS_DBSUB_WANTOLD) != 0)
07035                 bind_rdataset(rbtdb, rbtnode, header, 0, newrdataset);
07036 
07037  unlock:
07038         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
07039                     isc_rwlocktype_write);
07040 
07041         /*
07042          * Update the zone's secure status.  If version is non-NULL
07043          * this is deferred until closeversion() is called.
07044          */
07045         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
07046                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
07047 
07048         return (result);
07049 }
07050 
07051 static isc_result_t
07052 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
07053                dns_rdatatype_t type, dns_rdatatype_t covers)
07054 {
07055         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
07056         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
07057         rbtdb_version_t *rbtversion = version;
07058         isc_result_t result;
07059         rdatasetheader_t *newheader;
07060 
07061         REQUIRE(VALID_RBTDB(rbtdb));
07062         INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
07063 
07064         if (type == dns_rdatatype_any)
07065                 return (ISC_R_NOTIMPLEMENTED);
07066         if (type == dns_rdatatype_rrsig && covers == 0)
07067                 return (ISC_R_NOTIMPLEMENTED);
07068 
07069         newheader =