blob: a47b98a7c933346e7dd0335537d926608d00f0c3 [file] [log] [blame]
drha059ad02001-04-17 20:09:11 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drha059ad02001-04-17 20:09:11 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drha059ad02001-04-17 20:09:11 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drha059ad02001-04-17 20:09:11 +000010**
11*************************************************************************
drh73509ee2003-04-06 20:44:45 +000012** $Id: btree.c,v 1.87 2003/04/06 20:44:45 drh Exp $
drh8b2f49b2001-06-08 00:21:52 +000013**
14** This file implements a external (disk-based) database using BTrees.
15** For a detailed discussion of BTrees, refer to
16**
17** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
18** "Sorting And Searching", pages 473-480. Addison-Wesley
19** Publishing Company, Reading, Massachusetts.
20**
21** The basic idea is that each page of the file contains N database
22** entries and N+1 pointers to subpages.
23**
24** ----------------------------------------------------------------
25** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) |
26** ----------------------------------------------------------------
27**
28** All of the keys on the page that Ptr(0) points to have values less
29** than Key(0). All of the keys on page Ptr(1) and its subpages have
30** values greater than Key(0) and less than Key(1). All of the keys
31** on Ptr(N+1) and its subpages have values greater than Key(N). And
32** so forth.
33**
drh5e00f6c2001-09-13 13:46:56 +000034** Finding a particular key requires reading O(log(M)) pages from the
35** disk where M is the number of entries in the tree.
drh8b2f49b2001-06-08 00:21:52 +000036**
37** In this implementation, a single file can hold one or more separate
38** BTrees. Each BTree is identified by the index of its root page. The
39** key and data for any entry are combined to form the "payload". Up to
40** MX_LOCAL_PAYLOAD bytes of payload can be carried directly on the
41** database page. If the payload is larger than MX_LOCAL_PAYLOAD bytes
42** then surplus bytes are stored on overflow pages. The payload for an
43** entry and the preceding pointer are combined to form a "Cell". Each
drhb19a2bc2001-09-16 00:13:26 +000044** page has a small header which contains the Ptr(N+1) pointer.
drh8b2f49b2001-06-08 00:21:52 +000045**
46** The first page of the file contains a magic string used to verify that
47** the file really is a valid BTree database, a pointer to a list of unused
48** pages in the file, and some meta information. The root of the first
49** BTree begins on page 2 of the file. (Pages are numbered beginning with
50** 1, not 0.) Thus a minimum database contains 2 pages.
drha059ad02001-04-17 20:09:11 +000051*/
paulb95a8862003-04-01 21:16:41 +000052/* We don't want the btree function macros */
53#define SQLITE_NO_BTREE_DEFS
54
drha059ad02001-04-17 20:09:11 +000055#include "sqliteInt.h"
56#include "pager.h"
57#include "btree.h"
58#include <assert.h>
59
paulb95a8862003-04-01 21:16:41 +000060/* Forward declarations */
61static BtOps sqliteBtreeOps;
62static BtCursorOps sqliteBtreeCursorOps;
63
drh8c42ca92001-06-22 19:15:00 +000064/*
drh0d316a42002-08-11 20:10:47 +000065** Macros used for byteswapping. B is a pointer to the Btree
66** structure. This is needed to access the Btree.needSwab boolean
67** in order to tell if byte swapping is needed or not.
68** X is an unsigned integer. SWAB16 byte swaps a 16-bit integer.
69** SWAB32 byteswaps a 32-bit integer.
70*/
71#define SWAB16(B,X) ((B)->needSwab? swab16(X) : (X))
72#define SWAB32(B,X) ((B)->needSwab? swab32(X) : (X))
73#define SWAB_ADD(B,X,A) \
74 if((B)->needSwab){ X=swab32(swab32(X)+A); }else{ X += (A); }
75
76/*
77** The following global variable - available only if SQLITE_TEST is
78** defined - is used to determine whether new databases are created in
79** native byte order or in non-native byte order. Non-native byte order
80** databases are created for testing purposes only. Under normal operation,
81** only native byte-order databases should be created, but we should be
82** able to read or write existing databases regardless of the byteorder.
83*/
84#ifdef SQLITE_TEST
85int btree_native_byte_order = 1;
drh74587e52002-08-13 00:01:16 +000086#else
87# define btree_native_byte_order 1
drh0d316a42002-08-11 20:10:47 +000088#endif
89
90/*
drh365d68f2001-05-11 11:02:46 +000091** Forward declarations of structures used only in this file.
92*/
drhbd03cae2001-06-02 02:40:57 +000093typedef struct PageOne PageOne;
drh2af926b2001-05-15 00:39:25 +000094typedef struct MemPage MemPage;
drh365d68f2001-05-11 11:02:46 +000095typedef struct PageHdr PageHdr;
96typedef struct Cell Cell;
drh3b7511c2001-05-26 13:15:44 +000097typedef struct CellHdr CellHdr;
drh365d68f2001-05-11 11:02:46 +000098typedef struct FreeBlk FreeBlk;
drh2af926b2001-05-15 00:39:25 +000099typedef struct OverflowPage OverflowPage;
drh30e58752002-03-02 20:41:57 +0000100typedef struct FreelistInfo FreelistInfo;
drh2af926b2001-05-15 00:39:25 +0000101
102/*
103** All structures on a database page are aligned to 4-byte boundries.
104** This routine rounds up a number of bytes to the next multiple of 4.
drh306dc212001-05-21 13:45:10 +0000105**
106** This might need to change for computer architectures that require
107** and 8-byte alignment boundry for structures.
drh2af926b2001-05-15 00:39:25 +0000108*/
109#define ROUNDUP(X) ((X+3) & ~3)
drha059ad02001-04-17 20:09:11 +0000110
drh08ed44e2001-04-29 23:32:55 +0000111/*
drhbd03cae2001-06-02 02:40:57 +0000112** This is a magic string that appears at the beginning of every
drh8c42ca92001-06-22 19:15:00 +0000113** SQLite database in order to identify the file as a real database.
drh08ed44e2001-04-29 23:32:55 +0000114*/
drhbd03cae2001-06-02 02:40:57 +0000115static const char zMagicHeader[] =
drh80ff32f2001-11-04 18:32:46 +0000116 "** This file contains an SQLite 2.1 database **";
drhbd03cae2001-06-02 02:40:57 +0000117#define MAGIC_SIZE (sizeof(zMagicHeader))
drh08ed44e2001-04-29 23:32:55 +0000118
119/*
drh5e00f6c2001-09-13 13:46:56 +0000120** This is a magic integer also used to test the integrity of the database
drh8c42ca92001-06-22 19:15:00 +0000121** file. This integer is used in addition to the string above so that
122** if the file is written on a little-endian architecture and read
123** on a big-endian architectures (or vice versa) we can detect the
124** problem.
125**
126** The number used was obtained at random and has no special
drhb19a2bc2001-09-16 00:13:26 +0000127** significance other than the fact that it represents a different
128** integer on little-endian and big-endian machines.
drh8c42ca92001-06-22 19:15:00 +0000129*/
130#define MAGIC 0xdae37528
131
132/*
drhbd03cae2001-06-02 02:40:57 +0000133** The first page of the database file contains a magic header string
134** to identify the file as an SQLite database file. It also contains
135** a pointer to the first free page of the file. Page 2 contains the
drh8b2f49b2001-06-08 00:21:52 +0000136** root of the principle BTree. The file might contain other BTrees
137** rooted on pages above 2.
138**
139** The first page also contains SQLITE_N_BTREE_META integers that
140** can be used by higher-level routines.
drh08ed44e2001-04-29 23:32:55 +0000141**
drhbd03cae2001-06-02 02:40:57 +0000142** Remember that pages are numbered beginning with 1. (See pager.c
143** for additional information.) Page 0 does not exist and a page
144** number of 0 is used to mean "no such page".
145*/
146struct PageOne {
147 char zMagic[MAGIC_SIZE]; /* String that identifies the file as a database */
drh8c42ca92001-06-22 19:15:00 +0000148 int iMagic; /* Integer to verify correct byte order */
149 Pgno freeList; /* First free page in a list of all free pages */
drh2aa679f2001-06-25 02:11:07 +0000150 int nFree; /* Number of pages on the free list */
151 int aMeta[SQLITE_N_BTREE_META-1]; /* User defined integers */
drhbd03cae2001-06-02 02:40:57 +0000152};
153
154/*
155** Each database page has a header that is an instance of this
156** structure.
drh08ed44e2001-04-29 23:32:55 +0000157**
drh8b2f49b2001-06-08 00:21:52 +0000158** PageHdr.firstFree is 0 if there is no free space on this page.
drh14acc042001-06-10 19:56:58 +0000159** Otherwise, PageHdr.firstFree is the index in MemPage.u.aDisk[] of a
drh8b2f49b2001-06-08 00:21:52 +0000160** FreeBlk structure that describes the first block of free space.
161** All free space is defined by a linked list of FreeBlk structures.
drh08ed44e2001-04-29 23:32:55 +0000162**
drh8b2f49b2001-06-08 00:21:52 +0000163** Data is stored in a linked list of Cell structures. PageHdr.firstCell
drh14acc042001-06-10 19:56:58 +0000164** is the index into MemPage.u.aDisk[] of the first cell on the page. The
drh306dc212001-05-21 13:45:10 +0000165** Cells are kept in sorted order.
drh8b2f49b2001-06-08 00:21:52 +0000166**
167** A Cell contains all information about a database entry and a pointer
168** to a child page that contains other entries less than itself. In
169** other words, the i-th Cell contains both Ptr(i) and Key(i). The
170** right-most pointer of the page is contained in PageHdr.rightChild.
drh08ed44e2001-04-29 23:32:55 +0000171*/
drh365d68f2001-05-11 11:02:46 +0000172struct PageHdr {
drh5e2f8b92001-05-28 00:41:15 +0000173 Pgno rightChild; /* Child page that comes after all cells on this page */
drh14acc042001-06-10 19:56:58 +0000174 u16 firstCell; /* Index in MemPage.u.aDisk[] of the first cell */
175 u16 firstFree; /* Index in MemPage.u.aDisk[] of the first free block */
drh365d68f2001-05-11 11:02:46 +0000176};
drh306dc212001-05-21 13:45:10 +0000177
drh3b7511c2001-05-26 13:15:44 +0000178/*
179** Entries on a page of the database are called "Cells". Each Cell
180** has a header and data. This structure defines the header. The
drhbd03cae2001-06-02 02:40:57 +0000181** key and data (collectively the "payload") follow this header on
182** the database page.
183**
184** A definition of the complete Cell structure is given below. The
drh8c42ca92001-06-22 19:15:00 +0000185** header for the cell must be defined first in order to do some
drhbd03cae2001-06-02 02:40:57 +0000186** of the sizing #defines that follow.
drh3b7511c2001-05-26 13:15:44 +0000187*/
188struct CellHdr {
drh5e2f8b92001-05-28 00:41:15 +0000189 Pgno leftChild; /* Child page that comes before this cell */
drh3b7511c2001-05-26 13:15:44 +0000190 u16 nKey; /* Number of bytes in the key */
drh14acc042001-06-10 19:56:58 +0000191 u16 iNext; /* Index in MemPage.u.aDisk[] of next cell in sorted order */
drh58a11682001-11-10 13:51:08 +0000192 u8 nKeyHi; /* Upper 8 bits of key size for keys larger than 64K bytes */
193 u8 nDataHi; /* Upper 8 bits of data size when the size is more than 64K */
drh80ff32f2001-11-04 18:32:46 +0000194 u16 nData; /* Number of bytes of data */
drh8c42ca92001-06-22 19:15:00 +0000195};
drh58a11682001-11-10 13:51:08 +0000196
197/*
198** The key and data size are split into a lower 16-bit segment and an
199** upper 8-bit segment in order to pack them together into a smaller
200** space. The following macros reassembly a key or data size back
201** into an integer.
202*/
drh0d316a42002-08-11 20:10:47 +0000203#define NKEY(b,h) (SWAB16(b,h.nKey) + h.nKeyHi*65536)
204#define NDATA(b,h) (SWAB16(b,h.nData) + h.nDataHi*65536)
drh3b7511c2001-05-26 13:15:44 +0000205
206/*
207** The minimum size of a complete Cell. The Cell must contain a header
drhbd03cae2001-06-02 02:40:57 +0000208** and at least 4 bytes of payload.
drh3b7511c2001-05-26 13:15:44 +0000209*/
210#define MIN_CELL_SIZE (sizeof(CellHdr)+4)
211
212/*
213** The maximum number of database entries that can be held in a single
214** page of the database.
215*/
216#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)
217
218/*
drh6019e162001-07-02 17:51:45 +0000219** The amount of usable space on a single page of the BTree. This is the
220** page size minus the overhead of the page header.
221*/
222#define USABLE_SPACE (SQLITE_PAGE_SIZE - sizeof(PageHdr))
223
224/*
drh8c42ca92001-06-22 19:15:00 +0000225** The maximum amount of payload (in bytes) that can be stored locally for
226** a database entry. If the entry contains more data than this, the
drh3b7511c2001-05-26 13:15:44 +0000227** extra goes onto overflow pages.
drhbd03cae2001-06-02 02:40:57 +0000228**
229** This number is chosen so that at least 4 cells will fit on every page.
drh3b7511c2001-05-26 13:15:44 +0000230*/
drh6019e162001-07-02 17:51:45 +0000231#define MX_LOCAL_PAYLOAD ((USABLE_SPACE/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3)
drh3b7511c2001-05-26 13:15:44 +0000232
drh306dc212001-05-21 13:45:10 +0000233/*
234** Data on a database page is stored as a linked list of Cell structures.
drh5e2f8b92001-05-28 00:41:15 +0000235** Both the key and the data are stored in aPayload[]. The key always comes
236** first. The aPayload[] field grows as necessary to hold the key and data,
drh306dc212001-05-21 13:45:10 +0000237** up to a maximum of MX_LOCAL_PAYLOAD bytes. If the size of the key and
drh3b7511c2001-05-26 13:15:44 +0000238** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the
239** page number of the first overflow page.
240**
241** Though this structure is fixed in size, the Cell on the database
drhbd03cae2001-06-02 02:40:57 +0000242** page varies in size. Every cell has a CellHdr and at least 4 bytes
drh3b7511c2001-05-26 13:15:44 +0000243** of payload space. Additional payload bytes (up to the maximum of
244** MX_LOCAL_PAYLOAD) and the Cell.ovfl value are allocated only as
245** needed.
drh306dc212001-05-21 13:45:10 +0000246*/
drh365d68f2001-05-11 11:02:46 +0000247struct Cell {
drh5e2f8b92001-05-28 00:41:15 +0000248 CellHdr h; /* The cell header */
249 char aPayload[MX_LOCAL_PAYLOAD]; /* Key and data */
250 Pgno ovfl; /* The first overflow page */
drh365d68f2001-05-11 11:02:46 +0000251};
drh306dc212001-05-21 13:45:10 +0000252
253/*
254** Free space on a page is remembered using a linked list of the FreeBlk
255** structures. Space on a database page is allocated in increments of
drh72f82862001-05-24 21:06:34 +0000256** at least 4 bytes and is always aligned to a 4-byte boundry. The
drh8b2f49b2001-06-08 00:21:52 +0000257** linked list of FreeBlks is always kept in order by address.
drh306dc212001-05-21 13:45:10 +0000258*/
drh365d68f2001-05-11 11:02:46 +0000259struct FreeBlk {
drh72f82862001-05-24 21:06:34 +0000260 u16 iSize; /* Number of bytes in this block of free space */
drh14acc042001-06-10 19:56:58 +0000261 u16 iNext; /* Index in MemPage.u.aDisk[] of the next free block */
drh365d68f2001-05-11 11:02:46 +0000262};
drh306dc212001-05-21 13:45:10 +0000263
264/*
drh14acc042001-06-10 19:56:58 +0000265** The number of bytes of payload that will fit on a single overflow page.
drh3b7511c2001-05-26 13:15:44 +0000266*/
267#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno))
268
269/*
drh306dc212001-05-21 13:45:10 +0000270** When the key and data for a single entry in the BTree will not fit in
drh8c42ca92001-06-22 19:15:00 +0000271** the MX_LOCAL_PAYLOAD bytes of space available on the database page,
drh8b2f49b2001-06-08 00:21:52 +0000272** then all extra bytes are written to a linked list of overflow pages.
drh306dc212001-05-21 13:45:10 +0000273** Each overflow page is an instance of the following structure.
274**
275** Unused pages in the database are also represented by instances of
drhbd03cae2001-06-02 02:40:57 +0000276** the OverflowPage structure. The PageOne.freeList field is the
drh306dc212001-05-21 13:45:10 +0000277** page number of the first page in a linked list of unused database
278** pages.
279*/
drh2af926b2001-05-15 00:39:25 +0000280struct OverflowPage {
drh14acc042001-06-10 19:56:58 +0000281 Pgno iNext;
drh5e2f8b92001-05-28 00:41:15 +0000282 char aPayload[OVERFLOW_SIZE];
drh7e3b0a02001-04-28 16:52:40 +0000283};
drh7e3b0a02001-04-28 16:52:40 +0000284
285/*
drh30e58752002-03-02 20:41:57 +0000286** The PageOne.freeList field points to a linked list of overflow pages
287** hold information about free pages. The aPayload section of each
288** overflow page contains an instance of the following structure. The
289** aFree[] array holds the page number of nFree unused pages in the disk
290** file.
291*/
292struct FreelistInfo {
293 int nFree;
294 Pgno aFree[(OVERFLOW_SIZE-sizeof(int))/sizeof(Pgno)];
295};
296
297/*
drh7e3b0a02001-04-28 16:52:40 +0000298** For every page in the database file, an instance of the following structure
drh14acc042001-06-10 19:56:58 +0000299** is stored in memory. The u.aDisk[] array contains the raw bits read from
drh6446c4d2001-12-15 14:22:18 +0000300** the disk. The rest is auxiliary information held in memory only. The
drhbd03cae2001-06-02 02:40:57 +0000301** auxiliary info is only valid for regular database pages - it is not
302** used for overflow pages and pages on the freelist.
drh306dc212001-05-21 13:45:10 +0000303**
drhbd03cae2001-06-02 02:40:57 +0000304** Of particular interest in the auxiliary info is the apCell[] entry. Each
drh14acc042001-06-10 19:56:58 +0000305** apCell[] entry is a pointer to a Cell structure in u.aDisk[]. The cells are
drh306dc212001-05-21 13:45:10 +0000306** put in this array so that they can be accessed in constant time, rather
drhbd03cae2001-06-02 02:40:57 +0000307** than in linear time which would be needed if we had to walk the linked
308** list on every access.
drh72f82862001-05-24 21:06:34 +0000309**
drh14acc042001-06-10 19:56:58 +0000310** Note that apCell[] contains enough space to hold up to two more Cells
311** than can possibly fit on one page. In the steady state, every apCell[]
312** points to memory inside u.aDisk[]. But in the middle of an insert
313** operation, some apCell[] entries may temporarily point to data space
314** outside of u.aDisk[]. This is a transient situation that is quickly
315** resolved. But while it is happening, it is possible for a database
316** page to hold as many as two more cells than it might otherwise hold.
drh18b81e52001-11-01 13:52:52 +0000317** The extra two entries in apCell[] are an allowance for this situation.
drh14acc042001-06-10 19:56:58 +0000318**
drh72f82862001-05-24 21:06:34 +0000319** The pParent field points back to the parent page. This allows us to
320** walk up the BTree from any leaf to the root. Care must be taken to
321** unref() the parent page pointer when this page is no longer referenced.
drhbd03cae2001-06-02 02:40:57 +0000322** The pageDestructor() routine handles that chore.
drh7e3b0a02001-04-28 16:52:40 +0000323*/
324struct MemPage {
drh14acc042001-06-10 19:56:58 +0000325 union {
326 char aDisk[SQLITE_PAGE_SIZE]; /* Page data stored on disk */
327 PageHdr hdr; /* Overlay page header */
328 } u;
drh428ae8c2003-01-04 16:48:09 +0000329 u8 isInit; /* True if auxiliary data is initialized */
330 u8 idxShift; /* True if apCell[] indices have changed */
331 u8 isOverfull; /* Some apCell[] points outside u.aDisk[] */
drh72f82862001-05-24 21:06:34 +0000332 MemPage *pParent; /* The parent of this page. NULL for root */
drh428ae8c2003-01-04 16:48:09 +0000333 int idxParent; /* Index in pParent->apCell[] of this node */
drh14acc042001-06-10 19:56:58 +0000334 int nFree; /* Number of free bytes in u.aDisk[] */
drh306dc212001-05-21 13:45:10 +0000335 int nCell; /* Number of entries on this page */
drh14acc042001-06-10 19:56:58 +0000336 Cell *apCell[MX_CELL+2]; /* All data entires in sorted order */
drh8c42ca92001-06-22 19:15:00 +0000337};
drh7e3b0a02001-04-28 16:52:40 +0000338
339/*
drh3b7511c2001-05-26 13:15:44 +0000340** The in-memory image of a disk page has the auxiliary information appended
341** to the end. EXTRA_SIZE is the number of bytes of space needed to hold
342** that extra information.
343*/
344#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE)
345
346/*
drha059ad02001-04-17 20:09:11 +0000347** Everything we need to know about an open database
348*/
349struct Btree {
paulb95a8862003-04-01 21:16:41 +0000350 BtOps *pOps; /* Function table */
drha059ad02001-04-17 20:09:11 +0000351 Pager *pPager; /* The page cache */
drh306dc212001-05-21 13:45:10 +0000352 BtCursor *pCursor; /* A list of all open cursors */
drhbd03cae2001-06-02 02:40:57 +0000353 PageOne *page1; /* First page of the database */
drh663fc632002-02-02 18:49:19 +0000354 u8 inTrans; /* True if a transaction is in progress */
355 u8 inCkpt; /* True if there is a checkpoint on the transaction */
drh5df72a52002-06-06 23:16:05 +0000356 u8 readOnly; /* True if the underlying file is readonly */
drh0d316a42002-08-11 20:10:47 +0000357 u8 needSwab; /* Need to byte-swapping */
drha059ad02001-04-17 20:09:11 +0000358};
359typedef Btree Bt;
360
drh365d68f2001-05-11 11:02:46 +0000361/*
362** A cursor is a pointer to a particular entry in the BTree.
363** The entry is identified by its MemPage and the index in
drh5e2f8b92001-05-28 00:41:15 +0000364** MemPage.apCell[] of the entry.
drh365d68f2001-05-11 11:02:46 +0000365*/
drh72f82862001-05-24 21:06:34 +0000366struct BtCursor {
paulb95a8862003-04-01 21:16:41 +0000367 BtCursorOps *pOps; /* Function table */
drh5e2f8b92001-05-28 00:41:15 +0000368 Btree *pBt; /* The Btree to which this cursor belongs */
drh14acc042001-06-10 19:56:58 +0000369 BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */
drhf74b8d92002-09-01 23:20:45 +0000370 BtCursor *pShared; /* Loop of cursors with the same root page */
drh8b2f49b2001-06-08 00:21:52 +0000371 Pgno pgnoRoot; /* The root page of this tree */
drh5e2f8b92001-05-28 00:41:15 +0000372 MemPage *pPage; /* Page that contains the entry */
drh8c42ca92001-06-22 19:15:00 +0000373 int idx; /* Index of the entry in pPage->apCell[] */
drhecdc7532001-09-23 02:35:53 +0000374 u8 wrFlag; /* True if writable */
drh2dcc9aa2002-12-04 13:40:25 +0000375 u8 eSkip; /* Determines if next step operation is a no-op */
drh5e2f8b92001-05-28 00:41:15 +0000376 u8 iMatch; /* compare result from last sqliteBtreeMoveto() */
drh365d68f2001-05-11 11:02:46 +0000377};
drh7e3b0a02001-04-28 16:52:40 +0000378
drha059ad02001-04-17 20:09:11 +0000379/*
drh2dcc9aa2002-12-04 13:40:25 +0000380** Legal values for BtCursor.eSkip.
381*/
382#define SKIP_NONE 0 /* Always step the cursor */
383#define SKIP_NEXT 1 /* The next sqliteBtreeNext() is a no-op */
384#define SKIP_PREV 2 /* The next sqliteBtreePrevious() is a no-op */
385#define SKIP_INVALID 3 /* Calls to Next() and Previous() are invalid */
386
paulb95a8862003-04-01 21:16:41 +0000387/* Forward declarations */
388static int sqliteBtreeCloseCursor(BtCursor *pCur);
389
drh2dcc9aa2002-12-04 13:40:25 +0000390/*
drh0d316a42002-08-11 20:10:47 +0000391** Routines for byte swapping.
392*/
393u16 swab16(u16 x){
394 return ((x & 0xff)<<8) | ((x>>8)&0xff);
395}
396u32 swab32(u32 x){
397 return ((x & 0xff)<<24) | ((x & 0xff00)<<8) |
398 ((x>>8) & 0xff00) | ((x>>24)&0xff);
399}
400
401/*
drh3b7511c2001-05-26 13:15:44 +0000402** Compute the total number of bytes that a Cell needs on the main
drh5e2f8b92001-05-28 00:41:15 +0000403** database page. The number returned includes the Cell header,
404** local payload storage, and the pointer to overflow pages (if
drh8c42ca92001-06-22 19:15:00 +0000405** applicable). Additional space allocated on overflow pages
drhbd03cae2001-06-02 02:40:57 +0000406** is NOT included in the value returned from this routine.
drh3b7511c2001-05-26 13:15:44 +0000407*/
drh0d316a42002-08-11 20:10:47 +0000408static int cellSize(Btree *pBt, Cell *pCell){
409 int n = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
drh3b7511c2001-05-26 13:15:44 +0000410 if( n>MX_LOCAL_PAYLOAD ){
411 n = MX_LOCAL_PAYLOAD + sizeof(Pgno);
412 }else{
413 n = ROUNDUP(n);
414 }
415 n += sizeof(CellHdr);
416 return n;
417}
418
419/*
drh72f82862001-05-24 21:06:34 +0000420** Defragment the page given. All Cells are moved to the
421** beginning of the page and all free space is collected
422** into one big FreeBlk at the end of the page.
drh365d68f2001-05-11 11:02:46 +0000423*/
drh0d316a42002-08-11 20:10:47 +0000424static void defragmentPage(Btree *pBt, MemPage *pPage){
drh14acc042001-06-10 19:56:58 +0000425 int pc, i, n;
drh2af926b2001-05-15 00:39:25 +0000426 FreeBlk *pFBlk;
427 char newPage[SQLITE_PAGE_SIZE];
428
drh6019e162001-07-02 17:51:45 +0000429 assert( sqlitepager_iswriteable(pPage) );
drh7aa128d2002-06-21 13:09:16 +0000430 assert( pPage->isInit );
drhbd03cae2001-06-02 02:40:57 +0000431 pc = sizeof(PageHdr);
drh0d316a42002-08-11 20:10:47 +0000432 pPage->u.hdr.firstCell = SWAB16(pBt, pc);
drh14acc042001-06-10 19:56:58 +0000433 memcpy(newPage, pPage->u.aDisk, pc);
drh2af926b2001-05-15 00:39:25 +0000434 for(i=0; i<pPage->nCell; i++){
drh2aa679f2001-06-25 02:11:07 +0000435 Cell *pCell = pPage->apCell[i];
drh8c42ca92001-06-22 19:15:00 +0000436
437 /* This routine should never be called on an overfull page. The
438 ** following asserts verify that constraint. */
drh7c717f72001-06-24 20:39:41 +0000439 assert( Addr(pCell) > Addr(pPage) );
440 assert( Addr(pCell) < Addr(pPage) + SQLITE_PAGE_SIZE );
drh8c42ca92001-06-22 19:15:00 +0000441
drh0d316a42002-08-11 20:10:47 +0000442 n = cellSize(pBt, pCell);
443 pCell->h.iNext = SWAB16(pBt, pc + n);
drh2af926b2001-05-15 00:39:25 +0000444 memcpy(&newPage[pc], pCell, n);
drh14acc042001-06-10 19:56:58 +0000445 pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
drh2af926b2001-05-15 00:39:25 +0000446 pc += n;
447 }
drh72f82862001-05-24 21:06:34 +0000448 assert( pPage->nFree==SQLITE_PAGE_SIZE-pc );
drh14acc042001-06-10 19:56:58 +0000449 memcpy(pPage->u.aDisk, newPage, pc);
drh2aa679f2001-06-25 02:11:07 +0000450 if( pPage->nCell>0 ){
451 pPage->apCell[pPage->nCell-1]->h.iNext = 0;
452 }
drh8c42ca92001-06-22 19:15:00 +0000453 pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
drh0d316a42002-08-11 20:10:47 +0000454 pFBlk->iSize = SWAB16(pBt, SQLITE_PAGE_SIZE - pc);
drh2af926b2001-05-15 00:39:25 +0000455 pFBlk->iNext = 0;
drh0d316a42002-08-11 20:10:47 +0000456 pPage->u.hdr.firstFree = SWAB16(pBt, pc);
drh2af926b2001-05-15 00:39:25 +0000457 memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
drh365d68f2001-05-11 11:02:46 +0000458}
459
drha059ad02001-04-17 20:09:11 +0000460/*
drh8b2f49b2001-06-08 00:21:52 +0000461** Allocate nByte bytes of space on a page. nByte must be a
462** multiple of 4.
drhbd03cae2001-06-02 02:40:57 +0000463**
drh14acc042001-06-10 19:56:58 +0000464** Return the index into pPage->u.aDisk[] of the first byte of
drhbd03cae2001-06-02 02:40:57 +0000465** the new allocation. Or return 0 if there is not enough free
466** space on the page to satisfy the allocation request.
drh2af926b2001-05-15 00:39:25 +0000467**
drh72f82862001-05-24 21:06:34 +0000468** If the page contains nBytes of free space but does not contain
drh8b2f49b2001-06-08 00:21:52 +0000469** nBytes of contiguous free space, then this routine automatically
470** calls defragementPage() to consolidate all free space before
471** allocating the new chunk.
drh7e3b0a02001-04-28 16:52:40 +0000472*/
drh0d316a42002-08-11 20:10:47 +0000473static int allocateSpace(Btree *pBt, MemPage *pPage, int nByte){
drh2af926b2001-05-15 00:39:25 +0000474 FreeBlk *p;
475 u16 *pIdx;
476 int start;
drh8c42ca92001-06-22 19:15:00 +0000477 int cnt = 0;
drh0d316a42002-08-11 20:10:47 +0000478 int iSize;
drh72f82862001-05-24 21:06:34 +0000479
drh6019e162001-07-02 17:51:45 +0000480 assert( sqlitepager_iswriteable(pPage) );
drh5e2f8b92001-05-28 00:41:15 +0000481 assert( nByte==ROUNDUP(nByte) );
drh7aa128d2002-06-21 13:09:16 +0000482 assert( pPage->isInit );
drh14acc042001-06-10 19:56:58 +0000483 if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
484 pIdx = &pPage->u.hdr.firstFree;
drh0d316a42002-08-11 20:10:47 +0000485 p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
486 while( (iSize = SWAB16(pBt, p->iSize))<nByte ){
drh8c42ca92001-06-22 19:15:00 +0000487 assert( cnt++ < SQLITE_PAGE_SIZE/4 );
drh2af926b2001-05-15 00:39:25 +0000488 if( p->iNext==0 ){
drh0d316a42002-08-11 20:10:47 +0000489 defragmentPage(pBt, pPage);
drh14acc042001-06-10 19:56:58 +0000490 pIdx = &pPage->u.hdr.firstFree;
drh2af926b2001-05-15 00:39:25 +0000491 }else{
492 pIdx = &p->iNext;
493 }
drh0d316a42002-08-11 20:10:47 +0000494 p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
drh2af926b2001-05-15 00:39:25 +0000495 }
drh0d316a42002-08-11 20:10:47 +0000496 if( iSize==nByte ){
497 start = SWAB16(pBt, *pIdx);
drh2af926b2001-05-15 00:39:25 +0000498 *pIdx = p->iNext;
499 }else{
drh8c42ca92001-06-22 19:15:00 +0000500 FreeBlk *pNew;
drh0d316a42002-08-11 20:10:47 +0000501 start = SWAB16(pBt, *pIdx);
drh8c42ca92001-06-22 19:15:00 +0000502 pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte];
drh72f82862001-05-24 21:06:34 +0000503 pNew->iNext = p->iNext;
drh0d316a42002-08-11 20:10:47 +0000504 pNew->iSize = SWAB16(pBt, iSize - nByte);
505 *pIdx = SWAB16(pBt, start + nByte);
drh2af926b2001-05-15 00:39:25 +0000506 }
507 pPage->nFree -= nByte;
508 return start;
drh7e3b0a02001-04-28 16:52:40 +0000509}
510
511/*
drh14acc042001-06-10 19:56:58 +0000512** Return a section of the MemPage.u.aDisk[] to the freelist.
513** The first byte of the new free block is pPage->u.aDisk[start]
514** and the size of the block is "size" bytes. Size must be
515** a multiple of 4.
drh306dc212001-05-21 13:45:10 +0000516**
517** Most of the effort here is involved in coalesing adjacent
518** free blocks into a single big free block.
drh7e3b0a02001-04-28 16:52:40 +0000519*/
drh0d316a42002-08-11 20:10:47 +0000520static void freeSpace(Btree *pBt, MemPage *pPage, int start, int size){
drh2af926b2001-05-15 00:39:25 +0000521 int end = start + size;
522 u16 *pIdx, idx;
523 FreeBlk *pFBlk;
524 FreeBlk *pNew;
525 FreeBlk *pNext;
drh0d316a42002-08-11 20:10:47 +0000526 int iSize;
drh2af926b2001-05-15 00:39:25 +0000527
drh6019e162001-07-02 17:51:45 +0000528 assert( sqlitepager_iswriteable(pPage) );
drh2af926b2001-05-15 00:39:25 +0000529 assert( size == ROUNDUP(size) );
530 assert( start == ROUNDUP(start) );
drh7aa128d2002-06-21 13:09:16 +0000531 assert( pPage->isInit );
drh14acc042001-06-10 19:56:58 +0000532 pIdx = &pPage->u.hdr.firstFree;
drh0d316a42002-08-11 20:10:47 +0000533 idx = SWAB16(pBt, *pIdx);
drh2af926b2001-05-15 00:39:25 +0000534 while( idx!=0 && idx<start ){
drh14acc042001-06-10 19:56:58 +0000535 pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
drh0d316a42002-08-11 20:10:47 +0000536 iSize = SWAB16(pBt, pFBlk->iSize);
537 if( idx + iSize == start ){
538 pFBlk->iSize = SWAB16(pBt, iSize + size);
539 if( idx + iSize + size == SWAB16(pBt, pFBlk->iNext) ){
540 pNext = (FreeBlk*)&pPage->u.aDisk[idx + iSize + size];
541 if( pBt->needSwab ){
542 pFBlk->iSize = swab16(swab16(pNext->iSize)+iSize+size);
543 }else{
544 pFBlk->iSize += pNext->iSize;
545 }
drh2af926b2001-05-15 00:39:25 +0000546 pFBlk->iNext = pNext->iNext;
547 }
548 pPage->nFree += size;
549 return;
550 }
551 pIdx = &pFBlk->iNext;
drh0d316a42002-08-11 20:10:47 +0000552 idx = SWAB16(pBt, *pIdx);
drh2af926b2001-05-15 00:39:25 +0000553 }
drh14acc042001-06-10 19:56:58 +0000554 pNew = (FreeBlk*)&pPage->u.aDisk[start];
drh2af926b2001-05-15 00:39:25 +0000555 if( idx != end ){
drh0d316a42002-08-11 20:10:47 +0000556 pNew->iSize = SWAB16(pBt, size);
557 pNew->iNext = SWAB16(pBt, idx);
drh2af926b2001-05-15 00:39:25 +0000558 }else{
drh14acc042001-06-10 19:56:58 +0000559 pNext = (FreeBlk*)&pPage->u.aDisk[idx];
drh0d316a42002-08-11 20:10:47 +0000560 pNew->iSize = SWAB16(pBt, size + SWAB16(pBt, pNext->iSize));
drh2af926b2001-05-15 00:39:25 +0000561 pNew->iNext = pNext->iNext;
562 }
drh0d316a42002-08-11 20:10:47 +0000563 *pIdx = SWAB16(pBt, start);
drh2af926b2001-05-15 00:39:25 +0000564 pPage->nFree += size;
drh7e3b0a02001-04-28 16:52:40 +0000565}
566
567/*
568** Initialize the auxiliary information for a disk block.
drh72f82862001-05-24 21:06:34 +0000569**
drhbd03cae2001-06-02 02:40:57 +0000570** The pParent parameter must be a pointer to the MemPage which
571** is the parent of the page being initialized. The root of the
drh8b2f49b2001-06-08 00:21:52 +0000572** BTree (usually page 2) has no parent and so for that page,
573** pParent==NULL.
drh5e2f8b92001-05-28 00:41:15 +0000574**
drh72f82862001-05-24 21:06:34 +0000575** Return SQLITE_OK on success. If we see that the page does
drhda47d772002-12-02 04:25:19 +0000576** not contain a well-formed database page, then return
drh72f82862001-05-24 21:06:34 +0000577** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
578** guarantee that the page is well-formed. It only shows that
579** we failed to detect any corruption.
drh7e3b0a02001-04-28 16:52:40 +0000580*/
drh0d316a42002-08-11 20:10:47 +0000581static int initPage(Bt *pBt, MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
drh14acc042001-06-10 19:56:58 +0000582 int idx; /* An index into pPage->u.aDisk[] */
583 Cell *pCell; /* A pointer to a Cell in pPage->u.aDisk[] */
584 FreeBlk *pFBlk; /* A pointer to a free block in pPage->u.aDisk[] */
drh5e2f8b92001-05-28 00:41:15 +0000585 int sz; /* The size of a Cell in bytes */
586 int freeSpace; /* Amount of free space on the page */
drh2af926b2001-05-15 00:39:25 +0000587
drh5e2f8b92001-05-28 00:41:15 +0000588 if( pPage->pParent ){
589 assert( pPage->pParent==pParent );
590 return SQLITE_OK;
591 }
592 if( pParent ){
593 pPage->pParent = pParent;
594 sqlitepager_ref(pParent);
595 }
596 if( pPage->isInit ) return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000597 pPage->isInit = 1;
drh7e3b0a02001-04-28 16:52:40 +0000598 pPage->nCell = 0;
drh6019e162001-07-02 17:51:45 +0000599 freeSpace = USABLE_SPACE;
drh0d316a42002-08-11 20:10:47 +0000600 idx = SWAB16(pBt, pPage->u.hdr.firstCell);
drh7e3b0a02001-04-28 16:52:40 +0000601 while( idx!=0 ){
drh8c42ca92001-06-22 19:15:00 +0000602 if( idx>SQLITE_PAGE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
drhbd03cae2001-06-02 02:40:57 +0000603 if( idx<sizeof(PageHdr) ) goto page_format_error;
drh8c42ca92001-06-22 19:15:00 +0000604 if( idx!=ROUNDUP(idx) ) goto page_format_error;
drh14acc042001-06-10 19:56:58 +0000605 pCell = (Cell*)&pPage->u.aDisk[idx];
drh0d316a42002-08-11 20:10:47 +0000606 sz = cellSize(pBt, pCell);
drh5e2f8b92001-05-28 00:41:15 +0000607 if( idx+sz > SQLITE_PAGE_SIZE ) goto page_format_error;
608 freeSpace -= sz;
609 pPage->apCell[pPage->nCell++] = pCell;
drh0d316a42002-08-11 20:10:47 +0000610 idx = SWAB16(pBt, pCell->h.iNext);
drh2af926b2001-05-15 00:39:25 +0000611 }
612 pPage->nFree = 0;
drh0d316a42002-08-11 20:10:47 +0000613 idx = SWAB16(pBt, pPage->u.hdr.firstFree);
drh2af926b2001-05-15 00:39:25 +0000614 while( idx!=0 ){
drh0d316a42002-08-11 20:10:47 +0000615 int iNext;
drh2af926b2001-05-15 00:39:25 +0000616 if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
drhbd03cae2001-06-02 02:40:57 +0000617 if( idx<sizeof(PageHdr) ) goto page_format_error;
drh14acc042001-06-10 19:56:58 +0000618 pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
drh0d316a42002-08-11 20:10:47 +0000619 pPage->nFree += SWAB16(pBt, pFBlk->iSize);
620 iNext = SWAB16(pBt, pFBlk->iNext);
621 if( iNext>0 && iNext <= idx ) goto page_format_error;
622 idx = iNext;
drh7e3b0a02001-04-28 16:52:40 +0000623 }
drh8b2f49b2001-06-08 00:21:52 +0000624 if( pPage->nCell==0 && pPage->nFree==0 ){
625 /* As a special case, an uninitialized root page appears to be
626 ** an empty database */
627 return SQLITE_OK;
628 }
drh5e2f8b92001-05-28 00:41:15 +0000629 if( pPage->nFree!=freeSpace ) goto page_format_error;
drh7e3b0a02001-04-28 16:52:40 +0000630 return SQLITE_OK;
drh2af926b2001-05-15 00:39:25 +0000631
632page_format_error:
633 return SQLITE_CORRUPT;
drh7e3b0a02001-04-28 16:52:40 +0000634}
635
636/*
drh8b2f49b2001-06-08 00:21:52 +0000637** Set up a raw page so that it looks like a database page holding
638** no entries.
drhbd03cae2001-06-02 02:40:57 +0000639*/
drh0d316a42002-08-11 20:10:47 +0000640static void zeroPage(Btree *pBt, MemPage *pPage){
drhbd03cae2001-06-02 02:40:57 +0000641 PageHdr *pHdr;
642 FreeBlk *pFBlk;
drh6019e162001-07-02 17:51:45 +0000643 assert( sqlitepager_iswriteable(pPage) );
drhbd03cae2001-06-02 02:40:57 +0000644 memset(pPage, 0, SQLITE_PAGE_SIZE);
drh14acc042001-06-10 19:56:58 +0000645 pHdr = &pPage->u.hdr;
drhbd03cae2001-06-02 02:40:57 +0000646 pHdr->firstCell = 0;
drh0d316a42002-08-11 20:10:47 +0000647 pHdr->firstFree = SWAB16(pBt, sizeof(*pHdr));
drhbd03cae2001-06-02 02:40:57 +0000648 pFBlk = (FreeBlk*)&pHdr[1];
649 pFBlk->iNext = 0;
drh0d316a42002-08-11 20:10:47 +0000650 pPage->nFree = SQLITE_PAGE_SIZE - sizeof(*pHdr);
651 pFBlk->iSize = SWAB16(pBt, pPage->nFree);
drh8c42ca92001-06-22 19:15:00 +0000652 pPage->nCell = 0;
653 pPage->isOverfull = 0;
drhbd03cae2001-06-02 02:40:57 +0000654}
655
656/*
drh72f82862001-05-24 21:06:34 +0000657** This routine is called when the reference count for a page
658** reaches zero. We need to unref the pParent pointer when that
659** happens.
660*/
661static void pageDestructor(void *pData){
662 MemPage *pPage = (MemPage*)pData;
663 if( pPage->pParent ){
664 MemPage *pParent = pPage->pParent;
665 pPage->pParent = 0;
666 sqlitepager_unref(pParent);
667 }
668}
669
670/*
drh306dc212001-05-21 13:45:10 +0000671** Open a new database.
672**
673** Actually, this routine just sets up the internal data structures
drh72f82862001-05-24 21:06:34 +0000674** for accessing the database. We do not open the database file
675** until the first page is loaded.
drh382c0242001-10-06 16:33:02 +0000676**
677** zFilename is the name of the database file. If zFilename is NULL
drh1bee3d72001-10-15 00:44:35 +0000678** a new database with a random name is created. This randomly named
679** database file will be deleted when sqliteBtreeClose() is called.
drha059ad02001-04-17 20:09:11 +0000680*/
drh6019e162001-07-02 17:51:45 +0000681int sqliteBtreeOpen(
682 const char *zFilename, /* Name of the file containing the BTree database */
drhda47d772002-12-02 04:25:19 +0000683 int omitJournal, /* if TRUE then do not journal this file */
drh6019e162001-07-02 17:51:45 +0000684 int nCache, /* How many pages in the page cache */
685 Btree **ppBtree /* Pointer to new Btree object written here */
686){
drha059ad02001-04-17 20:09:11 +0000687 Btree *pBt;
drh8c42ca92001-06-22 19:15:00 +0000688 int rc;
drha059ad02001-04-17 20:09:11 +0000689
drhd62d3d02003-01-24 12:14:20 +0000690 /*
691 ** The following asserts make sure that structures used by the btree are
692 ** the right size. This is to guard against size changes that result
693 ** when compiling on a different architecture.
694 */
695 assert( sizeof(u32)==4 );
696 assert( sizeof(u16)==2 );
697 assert( sizeof(Pgno)==4 );
698 assert( sizeof(PageHdr)==8 );
699 assert( sizeof(CellHdr)==12 );
700 assert( sizeof(FreeBlk)==4 );
701 assert( sizeof(OverflowPage)==SQLITE_PAGE_SIZE );
702 assert( sizeof(FreelistInfo)==OVERFLOW_SIZE );
703 assert( sizeof(ptr)==sizeof(char*) );
704 assert( sizeof(uptr)==sizeof(ptr) );
705
drha059ad02001-04-17 20:09:11 +0000706 pBt = sqliteMalloc( sizeof(*pBt) );
707 if( pBt==0 ){
drh8c42ca92001-06-22 19:15:00 +0000708 *ppBtree = 0;
drha059ad02001-04-17 20:09:11 +0000709 return SQLITE_NOMEM;
710 }
drh6019e162001-07-02 17:51:45 +0000711 if( nCache<10 ) nCache = 10;
drhda47d772002-12-02 04:25:19 +0000712 rc = sqlitepager_open(&pBt->pPager, zFilename, nCache, EXTRA_SIZE,
713 !omitJournal);
drha059ad02001-04-17 20:09:11 +0000714 if( rc!=SQLITE_OK ){
715 if( pBt->pPager ) sqlitepager_close(pBt->pPager);
716 sqliteFree(pBt);
717 *ppBtree = 0;
718 return rc;
719 }
drh72f82862001-05-24 21:06:34 +0000720 sqlitepager_set_destructor(pBt->pPager, pageDestructor);
drha059ad02001-04-17 20:09:11 +0000721 pBt->pCursor = 0;
722 pBt->page1 = 0;
drh5df72a52002-06-06 23:16:05 +0000723 pBt->readOnly = sqlitepager_isreadonly(pBt->pPager);
paulb95a8862003-04-01 21:16:41 +0000724 pBt->pOps = &sqliteBtreeOps;
drha059ad02001-04-17 20:09:11 +0000725 *ppBtree = pBt;
726 return SQLITE_OK;
727}
728
729/*
730** Close an open database and invalidate all cursors.
731*/
paulb95a8862003-04-01 21:16:41 +0000732static int sqliteBtreeClose(Btree *pBt){
drha059ad02001-04-17 20:09:11 +0000733 while( pBt->pCursor ){
734 sqliteBtreeCloseCursor(pBt->pCursor);
735 }
736 sqlitepager_close(pBt->pPager);
737 sqliteFree(pBt);
738 return SQLITE_OK;
739}
740
741/*
drhda47d772002-12-02 04:25:19 +0000742** Change the limit on the number of pages allowed in the cache.
drhcd61c282002-03-06 22:01:34 +0000743**
744** The maximum number of cache pages is set to the absolute
745** value of mxPage. If mxPage is negative, the pager will
746** operate asynchronously - it will not stop to do fsync()s
747** to insure data is written to the disk surface before
748** continuing. Transactions still work if synchronous is off,
749** and the database cannot be corrupted if this program
750** crashes. But if the operating system crashes or there is
751** an abrupt power failure when synchronous is off, the database
752** could be left in an inconsistent and unrecoverable state.
753** Synchronous is on by default so database corruption is not
754** normally a worry.
drhf57b14a2001-09-14 18:54:08 +0000755*/
paulb95a8862003-04-01 21:16:41 +0000756static int sqliteBtreeSetCacheSize(Btree *pBt, int mxPage){
drhf57b14a2001-09-14 18:54:08 +0000757 sqlitepager_set_cachesize(pBt->pPager, mxPage);
758 return SQLITE_OK;
759}
760
761/*
drh973b6e32003-02-12 14:09:42 +0000762** Change the way data is synced to disk in order to increase or decrease
763** how well the database resists damage due to OS crashes and power
764** failures. Level 1 is the same as asynchronous (no syncs() occur and
765** there is a high probability of damage) Level 2 is the default. There
766** is a very low but non-zero probability of damage. Level 3 reduces the
767** probability of damage to near zero but with a write performance reduction.
768*/
paulb95a8862003-04-01 21:16:41 +0000769static int sqliteBtreeSetSafetyLevel(Btree *pBt, int level){
drh973b6e32003-02-12 14:09:42 +0000770 sqlitepager_set_safety_level(pBt->pPager, level);
771 return SQLITE_OK;
772}
773
774/*
drh306dc212001-05-21 13:45:10 +0000775** Get a reference to page1 of the database file. This will
776** also acquire a readlock on that file.
777**
778** SQLITE_OK is returned on success. If the file is not a
779** well-formed database file, then SQLITE_CORRUPT is returned.
780** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
781** is returned if we run out of memory. SQLITE_PROTOCOL is returned
782** if there is a locking protocol violation.
783*/
784static int lockBtree(Btree *pBt){
785 int rc;
786 if( pBt->page1 ) return SQLITE_OK;
drh8c42ca92001-06-22 19:15:00 +0000787 rc = sqlitepager_get(pBt->pPager, 1, (void**)&pBt->page1);
drh306dc212001-05-21 13:45:10 +0000788 if( rc!=SQLITE_OK ) return rc;
drh306dc212001-05-21 13:45:10 +0000789
790 /* Do some checking to help insure the file we opened really is
791 ** a valid database file.
792 */
793 if( sqlitepager_pagecount(pBt->pPager)>0 ){
drhbd03cae2001-06-02 02:40:57 +0000794 PageOne *pP1 = pBt->page1;
drh0d316a42002-08-11 20:10:47 +0000795 if( strcmp(pP1->zMagic,zMagicHeader)!=0 ||
796 (pP1->iMagic!=MAGIC && swab32(pP1->iMagic)!=MAGIC) ){
drh306dc212001-05-21 13:45:10 +0000797 rc = SQLITE_CORRUPT;
drh72f82862001-05-24 21:06:34 +0000798 goto page1_init_failed;
drh306dc212001-05-21 13:45:10 +0000799 }
drh0d316a42002-08-11 20:10:47 +0000800 pBt->needSwab = pP1->iMagic!=MAGIC;
drh306dc212001-05-21 13:45:10 +0000801 }
802 return rc;
803
drh72f82862001-05-24 21:06:34 +0000804page1_init_failed:
drh306dc212001-05-21 13:45:10 +0000805 sqlitepager_unref(pBt->page1);
806 pBt->page1 = 0;
drh72f82862001-05-24 21:06:34 +0000807 return rc;
drh306dc212001-05-21 13:45:10 +0000808}
809
810/*
drhb8ca3072001-12-05 00:21:20 +0000811** If there are no outstanding cursors and we are not in the middle
812** of a transaction but there is a read lock on the database, then
813** this routine unrefs the first page of the database file which
814** has the effect of releasing the read lock.
815**
816** If there are any outstanding cursors, this routine is a no-op.
817**
818** If there is a transaction in progress, this routine is a no-op.
819*/
820static void unlockBtreeIfUnused(Btree *pBt){
821 if( pBt->inTrans==0 && pBt->pCursor==0 && pBt->page1!=0 ){
822 sqlitepager_unref(pBt->page1);
823 pBt->page1 = 0;
824 pBt->inTrans = 0;
drh663fc632002-02-02 18:49:19 +0000825 pBt->inCkpt = 0;
drhb8ca3072001-12-05 00:21:20 +0000826 }
827}
828
829/*
drh8c42ca92001-06-22 19:15:00 +0000830** Create a new database by initializing the first two pages of the
831** file.
drh8b2f49b2001-06-08 00:21:52 +0000832*/
833static int newDatabase(Btree *pBt){
834 MemPage *pRoot;
835 PageOne *pP1;
drh8c42ca92001-06-22 19:15:00 +0000836 int rc;
drh7c717f72001-06-24 20:39:41 +0000837 if( sqlitepager_pagecount(pBt->pPager)>1 ) return SQLITE_OK;
drh8b2f49b2001-06-08 00:21:52 +0000838 pP1 = pBt->page1;
839 rc = sqlitepager_write(pBt->page1);
840 if( rc ) return rc;
drh8c42ca92001-06-22 19:15:00 +0000841 rc = sqlitepager_get(pBt->pPager, 2, (void**)&pRoot);
drh8b2f49b2001-06-08 00:21:52 +0000842 if( rc ) return rc;
843 rc = sqlitepager_write(pRoot);
844 if( rc ){
845 sqlitepager_unref(pRoot);
846 return rc;
847 }
848 strcpy(pP1->zMagic, zMagicHeader);
drh0d316a42002-08-11 20:10:47 +0000849 if( btree_native_byte_order ){
850 pP1->iMagic = MAGIC;
851 pBt->needSwab = 0;
852 }else{
853 pP1->iMagic = swab32(MAGIC);
854 pBt->needSwab = 1;
855 }
drh0d316a42002-08-11 20:10:47 +0000856 zeroPage(pBt, pRoot);
drh8b2f49b2001-06-08 00:21:52 +0000857 sqlitepager_unref(pRoot);
858 return SQLITE_OK;
859}
860
861/*
drh72f82862001-05-24 21:06:34 +0000862** Attempt to start a new transaction.
drh8b2f49b2001-06-08 00:21:52 +0000863**
864** A transaction must be started before attempting any changes
865** to the database. None of the following routines will work
866** unless a transaction is started first:
867**
868** sqliteBtreeCreateTable()
drhc6b52df2002-01-04 03:09:29 +0000869** sqliteBtreeCreateIndex()
drh8b2f49b2001-06-08 00:21:52 +0000870** sqliteBtreeClearTable()
871** sqliteBtreeDropTable()
872** sqliteBtreeInsert()
873** sqliteBtreeDelete()
874** sqliteBtreeUpdateMeta()
drha059ad02001-04-17 20:09:11 +0000875*/
paulb95a8862003-04-01 21:16:41 +0000876static int sqliteBtreeBeginTrans(Btree *pBt){
drha059ad02001-04-17 20:09:11 +0000877 int rc;
878 if( pBt->inTrans ) return SQLITE_ERROR;
drhf74b8d92002-09-01 23:20:45 +0000879 if( pBt->readOnly ) return SQLITE_READONLY;
drha059ad02001-04-17 20:09:11 +0000880 if( pBt->page1==0 ){
drh7e3b0a02001-04-28 16:52:40 +0000881 rc = lockBtree(pBt);
drh8c42ca92001-06-22 19:15:00 +0000882 if( rc!=SQLITE_OK ){
883 return rc;
884 }
drha059ad02001-04-17 20:09:11 +0000885 }
drhf74b8d92002-09-01 23:20:45 +0000886 rc = sqlitepager_begin(pBt->page1);
887 if( rc==SQLITE_OK ){
888 rc = newDatabase(pBt);
drha059ad02001-04-17 20:09:11 +0000889 }
drhb8ca3072001-12-05 00:21:20 +0000890 if( rc==SQLITE_OK ){
891 pBt->inTrans = 1;
drh663fc632002-02-02 18:49:19 +0000892 pBt->inCkpt = 0;
drhb8ca3072001-12-05 00:21:20 +0000893 }else{
894 unlockBtreeIfUnused(pBt);
drha059ad02001-04-17 20:09:11 +0000895 }
drhb8ca3072001-12-05 00:21:20 +0000896 return rc;
drha059ad02001-04-17 20:09:11 +0000897}
898
899/*
drh2aa679f2001-06-25 02:11:07 +0000900** Commit the transaction currently in progress.
drh5e00f6c2001-09-13 13:46:56 +0000901**
902** This will release the write lock on the database file. If there
903** are no active cursors, it also releases the read lock.
drha059ad02001-04-17 20:09:11 +0000904*/
paulb95a8862003-04-01 21:16:41 +0000905static int sqliteBtreeCommit(Btree *pBt){
drha059ad02001-04-17 20:09:11 +0000906 int rc;
drh5df72a52002-06-06 23:16:05 +0000907 rc = pBt->readOnly ? SQLITE_OK : sqlitepager_commit(pBt->pPager);
drh7c717f72001-06-24 20:39:41 +0000908 pBt->inTrans = 0;
drh663fc632002-02-02 18:49:19 +0000909 pBt->inCkpt = 0;
drh5e00f6c2001-09-13 13:46:56 +0000910 unlockBtreeIfUnused(pBt);
drha059ad02001-04-17 20:09:11 +0000911 return rc;
912}
913
914/*
drhecdc7532001-09-23 02:35:53 +0000915** Rollback the transaction in progress. All cursors will be
916** invalided by this operation. Any attempt to use a cursor
917** that was open at the beginning of this operation will result
918** in an error.
drh5e00f6c2001-09-13 13:46:56 +0000919**
920** This will release the write lock on the database file. If there
921** are no active cursors, it also releases the read lock.
drha059ad02001-04-17 20:09:11 +0000922*/
paulb95a8862003-04-01 21:16:41 +0000923static int sqliteBtreeRollback(Btree *pBt){
drha059ad02001-04-17 20:09:11 +0000924 int rc;
drhecdc7532001-09-23 02:35:53 +0000925 BtCursor *pCur;
drh7c717f72001-06-24 20:39:41 +0000926 if( pBt->inTrans==0 ) return SQLITE_OK;
927 pBt->inTrans = 0;
drh663fc632002-02-02 18:49:19 +0000928 pBt->inCkpt = 0;
drh3a840692003-01-29 22:58:26 +0000929 rc = pBt->readOnly ? SQLITE_OK : sqlitepager_rollback(pBt->pPager);
drhecdc7532001-09-23 02:35:53 +0000930 for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
drh3a840692003-01-29 22:58:26 +0000931 if( pCur->pPage && pCur->pPage->isInit==0 ){
drhecdc7532001-09-23 02:35:53 +0000932 sqlitepager_unref(pCur->pPage);
933 pCur->pPage = 0;
934 }
935 }
drh5e00f6c2001-09-13 13:46:56 +0000936 unlockBtreeIfUnused(pBt);
drha059ad02001-04-17 20:09:11 +0000937 return rc;
938}
939
940/*
drh663fc632002-02-02 18:49:19 +0000941** Set the checkpoint for the current transaction. The checkpoint serves
942** as a sub-transaction that can be rolled back independently of the
943** main transaction. You must start a transaction before starting a
944** checkpoint. The checkpoint is ended automatically if the transaction
945** commits or rolls back.
946**
947** Only one checkpoint may be active at a time. It is an error to try
948** to start a new checkpoint if another checkpoint is already active.
949*/
paulb95a8862003-04-01 21:16:41 +0000950static int sqliteBtreeBeginCkpt(Btree *pBt){
drh663fc632002-02-02 18:49:19 +0000951 int rc;
drh0d65dc02002-02-03 00:56:09 +0000952 if( !pBt->inTrans || pBt->inCkpt ){
drhf74b8d92002-09-01 23:20:45 +0000953 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh0d65dc02002-02-03 00:56:09 +0000954 }
drh5df72a52002-06-06 23:16:05 +0000955 rc = pBt->readOnly ? SQLITE_OK : sqlitepager_ckpt_begin(pBt->pPager);
drh663fc632002-02-02 18:49:19 +0000956 pBt->inCkpt = 1;
957 return rc;
958}
959
960
961/*
962** Commit a checkpoint to transaction currently in progress. If no
963** checkpoint is active, this is a no-op.
964*/
paulb95a8862003-04-01 21:16:41 +0000965static int sqliteBtreeCommitCkpt(Btree *pBt){
drh663fc632002-02-02 18:49:19 +0000966 int rc;
drh5df72a52002-06-06 23:16:05 +0000967 if( pBt->inCkpt && !pBt->readOnly ){
drh663fc632002-02-02 18:49:19 +0000968 rc = sqlitepager_ckpt_commit(pBt->pPager);
969 }else{
970 rc = SQLITE_OK;
971 }
drh0d65dc02002-02-03 00:56:09 +0000972 pBt->inCkpt = 0;
drh663fc632002-02-02 18:49:19 +0000973 return rc;
974}
975
976/*
977** Rollback the checkpoint to the current transaction. If there
978** is no active checkpoint or transaction, this routine is a no-op.
979**
980** All cursors will be invalided by this operation. Any attempt
981** to use a cursor that was open at the beginning of this operation
982** will result in an error.
983*/
paulb95a8862003-04-01 21:16:41 +0000984static int sqliteBtreeRollbackCkpt(Btree *pBt){
drh663fc632002-02-02 18:49:19 +0000985 int rc;
986 BtCursor *pCur;
drh5df72a52002-06-06 23:16:05 +0000987 if( pBt->inCkpt==0 || pBt->readOnly ) return SQLITE_OK;
drh3a840692003-01-29 22:58:26 +0000988 rc = sqlitepager_ckpt_rollback(pBt->pPager);
drh663fc632002-02-02 18:49:19 +0000989 for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
drh3a840692003-01-29 22:58:26 +0000990 if( pCur->pPage && pCur->pPage->isInit==0 ){
drh663fc632002-02-02 18:49:19 +0000991 sqlitepager_unref(pCur->pPage);
992 pCur->pPage = 0;
993 }
994 }
drh0d65dc02002-02-03 00:56:09 +0000995 pBt->inCkpt = 0;
drh663fc632002-02-02 18:49:19 +0000996 return rc;
997}
998
999/*
drh8b2f49b2001-06-08 00:21:52 +00001000** Create a new cursor for the BTree whose root is on the page
1001** iTable. The act of acquiring a cursor gets a read lock on
1002** the database file.
drh1bee3d72001-10-15 00:44:35 +00001003**
1004** If wrFlag==0, then the cursor can only be used for reading.
drhf74b8d92002-09-01 23:20:45 +00001005** If wrFlag==1, then the cursor can be used for reading or for
1006** writing if other conditions for writing are also met. These
1007** are the conditions that must be met in order for writing to
1008** be allowed:
drh6446c4d2001-12-15 14:22:18 +00001009**
drhf74b8d92002-09-01 23:20:45 +00001010** 1: The cursor must have been opened with wrFlag==1
1011**
1012** 2: No other cursors may be open with wrFlag==0 on the same table
1013**
1014** 3: The database must be writable (not on read-only media)
1015**
1016** 4: There must be an active transaction.
1017**
1018** Condition 2 warrants further discussion. If any cursor is opened
1019** on a table with wrFlag==0, that prevents all other cursors from
1020** writing to that table. This is a kind of "read-lock". When a cursor
1021** is opened with wrFlag==0 it is guaranteed that the table will not
1022** change as long as the cursor is open. This allows the cursor to
1023** do a sequential scan of the table without having to worry about
1024** entries being inserted or deleted during the scan. Cursors should
1025** be opened with wrFlag==0 only if this read-lock property is needed.
1026** That is to say, cursors should be opened with wrFlag==0 only if they
1027** intend to use the sqliteBtreeNext() system call. All other cursors
1028** should be opened with wrFlag==1 even if they never really intend
1029** to write.
1030**
drh6446c4d2001-12-15 14:22:18 +00001031** No checking is done to make sure that page iTable really is the
1032** root page of a b-tree. If it is not, then the cursor acquired
1033** will not work correctly.
drha059ad02001-04-17 20:09:11 +00001034*/
paulb95a8862003-04-01 21:16:41 +00001035static int sqliteBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){
drha059ad02001-04-17 20:09:11 +00001036 int rc;
drhf74b8d92002-09-01 23:20:45 +00001037 BtCursor *pCur, *pRing;
drhecdc7532001-09-23 02:35:53 +00001038
drha059ad02001-04-17 20:09:11 +00001039 if( pBt->page1==0 ){
1040 rc = lockBtree(pBt);
1041 if( rc!=SQLITE_OK ){
1042 *ppCur = 0;
1043 return rc;
1044 }
1045 }
1046 pCur = sqliteMalloc( sizeof(*pCur) );
1047 if( pCur==0 ){
drhbd03cae2001-06-02 02:40:57 +00001048 rc = SQLITE_NOMEM;
1049 goto create_cursor_exception;
1050 }
drh8b2f49b2001-06-08 00:21:52 +00001051 pCur->pgnoRoot = (Pgno)iTable;
drh8c42ca92001-06-22 19:15:00 +00001052 rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pCur->pPage);
drhbd03cae2001-06-02 02:40:57 +00001053 if( rc!=SQLITE_OK ){
1054 goto create_cursor_exception;
1055 }
drh0d316a42002-08-11 20:10:47 +00001056 rc = initPage(pBt, pCur->pPage, pCur->pgnoRoot, 0);
drhbd03cae2001-06-02 02:40:57 +00001057 if( rc!=SQLITE_OK ){
1058 goto create_cursor_exception;
drha059ad02001-04-17 20:09:11 +00001059 }
paulb95a8862003-04-01 21:16:41 +00001060 pCur->pOps = &sqliteBtreeCursorOps;
drh14acc042001-06-10 19:56:58 +00001061 pCur->pBt = pBt;
drhecdc7532001-09-23 02:35:53 +00001062 pCur->wrFlag = wrFlag;
drh14acc042001-06-10 19:56:58 +00001063 pCur->idx = 0;
drh2dcc9aa2002-12-04 13:40:25 +00001064 pCur->eSkip = SKIP_INVALID;
drha059ad02001-04-17 20:09:11 +00001065 pCur->pNext = pBt->pCursor;
1066 if( pCur->pNext ){
1067 pCur->pNext->pPrev = pCur;
1068 }
drh14acc042001-06-10 19:56:58 +00001069 pCur->pPrev = 0;
drhf74b8d92002-09-01 23:20:45 +00001070 pRing = pBt->pCursor;
1071 while( pRing && pRing->pgnoRoot!=pCur->pgnoRoot ){ pRing = pRing->pNext; }
1072 if( pRing ){
1073 pCur->pShared = pRing->pShared;
1074 pRing->pShared = pCur;
1075 }else{
1076 pCur->pShared = pCur;
1077 }
drha059ad02001-04-17 20:09:11 +00001078 pBt->pCursor = pCur;
drh2af926b2001-05-15 00:39:25 +00001079 *ppCur = pCur;
1080 return SQLITE_OK;
drhbd03cae2001-06-02 02:40:57 +00001081
1082create_cursor_exception:
1083 *ppCur = 0;
1084 if( pCur ){
1085 if( pCur->pPage ) sqlitepager_unref(pCur->pPage);
1086 sqliteFree(pCur);
1087 }
drh5e00f6c2001-09-13 13:46:56 +00001088 unlockBtreeIfUnused(pBt);
drhbd03cae2001-06-02 02:40:57 +00001089 return rc;
drha059ad02001-04-17 20:09:11 +00001090}
1091
1092/*
drh5e00f6c2001-09-13 13:46:56 +00001093** Close a cursor. The read lock on the database file is released
drhbd03cae2001-06-02 02:40:57 +00001094** when the last cursor is closed.
drha059ad02001-04-17 20:09:11 +00001095*/
paulb95a8862003-04-01 21:16:41 +00001096static int sqliteBtreeCloseCursor(BtCursor *pCur){
drha059ad02001-04-17 20:09:11 +00001097 Btree *pBt = pCur->pBt;
drha059ad02001-04-17 20:09:11 +00001098 if( pCur->pPrev ){
1099 pCur->pPrev->pNext = pCur->pNext;
1100 }else{
1101 pBt->pCursor = pCur->pNext;
1102 }
1103 if( pCur->pNext ){
1104 pCur->pNext->pPrev = pCur->pPrev;
1105 }
drhecdc7532001-09-23 02:35:53 +00001106 if( pCur->pPage ){
1107 sqlitepager_unref(pCur->pPage);
1108 }
drhf74b8d92002-09-01 23:20:45 +00001109 if( pCur->pShared!=pCur ){
1110 BtCursor *pRing = pCur->pShared;
1111 while( pRing->pShared!=pCur ){ pRing = pRing->pShared; }
1112 pRing->pShared = pCur->pShared;
1113 }
drh5e00f6c2001-09-13 13:46:56 +00001114 unlockBtreeIfUnused(pBt);
drha059ad02001-04-17 20:09:11 +00001115 sqliteFree(pCur);
drh8c42ca92001-06-22 19:15:00 +00001116 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +00001117}
1118
drh7e3b0a02001-04-28 16:52:40 +00001119/*
drh5e2f8b92001-05-28 00:41:15 +00001120** Make a temporary cursor by filling in the fields of pTempCur.
1121** The temporary cursor is not on the cursor list for the Btree.
1122*/
drh14acc042001-06-10 19:56:58 +00001123static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
drh5e2f8b92001-05-28 00:41:15 +00001124 memcpy(pTempCur, pCur, sizeof(*pCur));
1125 pTempCur->pNext = 0;
1126 pTempCur->pPrev = 0;
drhecdc7532001-09-23 02:35:53 +00001127 if( pTempCur->pPage ){
1128 sqlitepager_ref(pTempCur->pPage);
1129 }
drh5e2f8b92001-05-28 00:41:15 +00001130}
1131
1132/*
drhbd03cae2001-06-02 02:40:57 +00001133** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
drh5e2f8b92001-05-28 00:41:15 +00001134** function above.
1135*/
drh14acc042001-06-10 19:56:58 +00001136static void releaseTempCursor(BtCursor *pCur){
drhecdc7532001-09-23 02:35:53 +00001137 if( pCur->pPage ){
1138 sqlitepager_unref(pCur->pPage);
1139 }
drh5e2f8b92001-05-28 00:41:15 +00001140}
1141
1142/*
drhbd03cae2001-06-02 02:40:57 +00001143** Set *pSize to the number of bytes of key in the entry the
1144** cursor currently points to. Always return SQLITE_OK.
1145** Failure is not possible. If the cursor is not currently
1146** pointing to an entry (which can happen, for example, if
1147** the database is empty) then *pSize is set to 0.
drh7e3b0a02001-04-28 16:52:40 +00001148*/
paulb95a8862003-04-01 21:16:41 +00001149static int sqliteBtreeKeySize(BtCursor *pCur, int *pSize){
drh2af926b2001-05-15 00:39:25 +00001150 Cell *pCell;
1151 MemPage *pPage;
1152
1153 pPage = pCur->pPage;
drh8c1238a2003-01-02 14:43:55 +00001154 assert( pPage!=0 );
1155 if( pCur->idx >= pPage->nCell ){
drh72f82862001-05-24 21:06:34 +00001156 *pSize = 0;
1157 }else{
drh5e2f8b92001-05-28 00:41:15 +00001158 pCell = pPage->apCell[pCur->idx];
drh0d316a42002-08-11 20:10:47 +00001159 *pSize = NKEY(pCur->pBt, pCell->h);
drh72f82862001-05-24 21:06:34 +00001160 }
1161 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +00001162}
drh2af926b2001-05-15 00:39:25 +00001163
drh72f82862001-05-24 21:06:34 +00001164/*
1165** Read payload information from the entry that the pCur cursor is
1166** pointing to. Begin reading the payload at "offset" and read
1167** a total of "amt" bytes. Put the result in zBuf.
1168**
1169** This routine does not make a distinction between key and data.
1170** It just reads bytes from the payload area.
1171*/
drh2af926b2001-05-15 00:39:25 +00001172static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){
drh5e2f8b92001-05-28 00:41:15 +00001173 char *aPayload;
drh2af926b2001-05-15 00:39:25 +00001174 Pgno nextPage;
drh8c42ca92001-06-22 19:15:00 +00001175 int rc;
drh0d316a42002-08-11 20:10:47 +00001176 Btree *pBt = pCur->pBt;
drh72f82862001-05-24 21:06:34 +00001177 assert( pCur!=0 && pCur->pPage!=0 );
drh8c42ca92001-06-22 19:15:00 +00001178 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
1179 aPayload = pCur->pPage->apCell[pCur->idx]->aPayload;
drh2af926b2001-05-15 00:39:25 +00001180 if( offset<MX_LOCAL_PAYLOAD ){
1181 int a = amt;
1182 if( a+offset>MX_LOCAL_PAYLOAD ){
1183 a = MX_LOCAL_PAYLOAD - offset;
1184 }
drh5e2f8b92001-05-28 00:41:15 +00001185 memcpy(zBuf, &aPayload[offset], a);
drh2af926b2001-05-15 00:39:25 +00001186 if( a==amt ){
1187 return SQLITE_OK;
1188 }
drh2aa679f2001-06-25 02:11:07 +00001189 offset = 0;
drh2af926b2001-05-15 00:39:25 +00001190 zBuf += a;
1191 amt -= a;
drhdd793422001-06-28 01:54:48 +00001192 }else{
1193 offset -= MX_LOCAL_PAYLOAD;
drhbd03cae2001-06-02 02:40:57 +00001194 }
1195 if( amt>0 ){
drh0d316a42002-08-11 20:10:47 +00001196 nextPage = SWAB32(pBt, pCur->pPage->apCell[pCur->idx]->ovfl);
drh2af926b2001-05-15 00:39:25 +00001197 }
1198 while( amt>0 && nextPage ){
1199 OverflowPage *pOvfl;
drh0d316a42002-08-11 20:10:47 +00001200 rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl);
drh2af926b2001-05-15 00:39:25 +00001201 if( rc!=0 ){
1202 return rc;
1203 }
drh0d316a42002-08-11 20:10:47 +00001204 nextPage = SWAB32(pBt, pOvfl->iNext);
drh2af926b2001-05-15 00:39:25 +00001205 if( offset<OVERFLOW_SIZE ){
1206 int a = amt;
1207 if( a + offset > OVERFLOW_SIZE ){
1208 a = OVERFLOW_SIZE - offset;
1209 }
drh5e2f8b92001-05-28 00:41:15 +00001210 memcpy(zBuf, &pOvfl->aPayload[offset], a);
drh2aa679f2001-06-25 02:11:07 +00001211 offset = 0;
drh2af926b2001-05-15 00:39:25 +00001212 amt -= a;
1213 zBuf += a;
drh2aa679f2001-06-25 02:11:07 +00001214 }else{
1215 offset -= OVERFLOW_SIZE;
drh2af926b2001-05-15 00:39:25 +00001216 }
1217 sqlitepager_unref(pOvfl);
1218 }
drha7fcb052001-12-14 15:09:55 +00001219 if( amt>0 ){
1220 return SQLITE_CORRUPT;
1221 }
1222 return SQLITE_OK;
drh2af926b2001-05-15 00:39:25 +00001223}
1224
drh72f82862001-05-24 21:06:34 +00001225/*
drh5e00f6c2001-09-13 13:46:56 +00001226** Read part of the key associated with cursor pCur. A maximum
drh72f82862001-05-24 21:06:34 +00001227** of "amt" bytes will be transfered into zBuf[]. The transfer
drh5e00f6c2001-09-13 13:46:56 +00001228** begins at "offset". The number of bytes actually read is
drh8c1238a2003-01-02 14:43:55 +00001229** returned.
1230**
1231** Change: It used to be that the amount returned will be smaller
1232** than the amount requested if there are not enough bytes in the key
1233** to satisfy the request. But now, it must be the case that there
1234** is enough data available to satisfy the request. If not, an exception
1235** is raised. The change was made in an effort to boost performance
1236** by eliminating unneeded tests.
drh72f82862001-05-24 21:06:34 +00001237*/
paulb95a8862003-04-01 21:16:41 +00001238static int sqliteBtreeKey(BtCursor *pCur, int offset, int amt, char *zBuf){
drh72f82862001-05-24 21:06:34 +00001239 MemPage *pPage;
drha059ad02001-04-17 20:09:11 +00001240
drh8c1238a2003-01-02 14:43:55 +00001241 assert( amt>=0 );
1242 assert( offset>=0 );
1243 assert( pCur->pPage!=0 );
drh72f82862001-05-24 21:06:34 +00001244 pPage = pCur->pPage;
drh72f82862001-05-24 21:06:34 +00001245 if( pCur->idx >= pPage->nCell ){
drh5e00f6c2001-09-13 13:46:56 +00001246 return 0;
drh72f82862001-05-24 21:06:34 +00001247 }
drh8c1238a2003-01-02 14:43:55 +00001248 assert( amt+offset <= NKEY(pCur->pBt, pPage->apCell[pCur->idx]->h) );
drh5e00f6c2001-09-13 13:46:56 +00001249 getPayload(pCur, offset, amt, zBuf);
1250 return amt;
drh72f82862001-05-24 21:06:34 +00001251}
1252
1253/*
drhbd03cae2001-06-02 02:40:57 +00001254** Set *pSize to the number of bytes of data in the entry the
1255** cursor currently points to. Always return SQLITE_OK.
1256** Failure is not possible. If the cursor is not currently
1257** pointing to an entry (which can happen, for example, if
1258** the database is empty) then *pSize is set to 0.
drh72f82862001-05-24 21:06:34 +00001259*/
paulb95a8862003-04-01 21:16:41 +00001260static int sqliteBtreeDataSize(BtCursor *pCur, int *pSize){
drh72f82862001-05-24 21:06:34 +00001261 Cell *pCell;
1262 MemPage *pPage;
1263
1264 pPage = pCur->pPage;
drh8c1238a2003-01-02 14:43:55 +00001265 assert( pPage!=0 );
1266 if( pCur->idx >= pPage->nCell ){
drh72f82862001-05-24 21:06:34 +00001267 *pSize = 0;
1268 }else{
drh5e2f8b92001-05-28 00:41:15 +00001269 pCell = pPage->apCell[pCur->idx];
drh0d316a42002-08-11 20:10:47 +00001270 *pSize = NDATA(pCur->pBt, pCell->h);
drh72f82862001-05-24 21:06:34 +00001271 }
1272 return SQLITE_OK;
1273}
1274
1275/*
drh5e00f6c2001-09-13 13:46:56 +00001276** Read part of the data associated with cursor pCur. A maximum
drh72f82862001-05-24 21:06:34 +00001277** of "amt" bytes will be transfered into zBuf[]. The transfer
drh5e00f6c2001-09-13 13:46:56 +00001278** begins at "offset". The number of bytes actually read is
1279** returned. The amount returned will be smaller than the
1280** amount requested if there are not enough bytes in the data
1281** to satisfy the request.
drh72f82862001-05-24 21:06:34 +00001282*/
paulb95a8862003-04-01 21:16:41 +00001283static int sqliteBtreeData(BtCursor *pCur, int offset, int amt, char *zBuf){
drh72f82862001-05-24 21:06:34 +00001284 Cell *pCell;
1285 MemPage *pPage;
1286
drh8c1238a2003-01-02 14:43:55 +00001287 assert( amt>=0 );
1288 assert( offset>=0 );
1289 assert( pCur->pPage!=0 );
drh72f82862001-05-24 21:06:34 +00001290 pPage = pCur->pPage;
drh8c1238a2003-01-02 14:43:55 +00001291 if( pCur->idx >= pPage->nCell ){
drh5e00f6c2001-09-13 13:46:56 +00001292 return 0;
drh72f82862001-05-24 21:06:34 +00001293 }
drh5e2f8b92001-05-28 00:41:15 +00001294 pCell = pPage->apCell[pCur->idx];
drh8c1238a2003-01-02 14:43:55 +00001295 assert( amt+offset <= NDATA(pCur->pBt, pCell->h) );
drh0d316a42002-08-11 20:10:47 +00001296 getPayload(pCur, offset + NKEY(pCur->pBt, pCell->h), amt, zBuf);
drh5e00f6c2001-09-13 13:46:56 +00001297 return amt;
drh72f82862001-05-24 21:06:34 +00001298}
drha059ad02001-04-17 20:09:11 +00001299
drh2af926b2001-05-15 00:39:25 +00001300/*
drh8721ce42001-11-07 14:22:00 +00001301** Compare an external key against the key on the entry that pCur points to.
1302**
1303** The external key is pKey and is nKey bytes long. The last nIgnore bytes
1304** of the key associated with pCur are ignored, as if they do not exist.
1305** (The normal case is for nIgnore to be zero in which case the entire
1306** internal key is used in the comparison.)
1307**
1308** The comparison result is written to *pRes as follows:
drh2af926b2001-05-15 00:39:25 +00001309**
drh717e6402001-09-27 03:22:32 +00001310** *pRes<0 This means pCur<pKey
1311**
1312** *pRes==0 This means pCur==pKey for all nKey bytes
1313**
1314** *pRes>0 This means pCur>pKey
1315**
drh8721ce42001-11-07 14:22:00 +00001316** When one key is an exact prefix of the other, the shorter key is
1317** considered less than the longer one. In order to be equal the
1318** keys must be exactly the same length. (The length of the pCur key
1319** is the actual key length minus nIgnore bytes.)
drh2af926b2001-05-15 00:39:25 +00001320*/
paulb95a8862003-04-01 21:16:41 +00001321static int sqliteBtreeKeyCompare(
drh8721ce42001-11-07 14:22:00 +00001322 BtCursor *pCur, /* Pointer to entry to compare against */
1323 const void *pKey, /* Key to compare against entry that pCur points to */
1324 int nKey, /* Number of bytes in pKey */
1325 int nIgnore, /* Ignore this many bytes at the end of pCur */
1326 int *pResult /* Write the result here */
drh5c4d9702001-08-20 00:33:58 +00001327){
drh2af926b2001-05-15 00:39:25 +00001328 Pgno nextPage;
drh8721ce42001-11-07 14:22:00 +00001329 int n, c, rc, nLocal;
drh2af926b2001-05-15 00:39:25 +00001330 Cell *pCell;
drh0d316a42002-08-11 20:10:47 +00001331 Btree *pBt = pCur->pBt;
drh717e6402001-09-27 03:22:32 +00001332 const char *zKey = (const char*)pKey;
drh2af926b2001-05-15 00:39:25 +00001333
1334 assert( pCur->pPage );
1335 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
drhbd03cae2001-06-02 02:40:57 +00001336 pCell = pCur->pPage->apCell[pCur->idx];
drh0d316a42002-08-11 20:10:47 +00001337 nLocal = NKEY(pBt, pCell->h) - nIgnore;
drh8721ce42001-11-07 14:22:00 +00001338 if( nLocal<0 ) nLocal = 0;
1339 n = nKey<nLocal ? nKey : nLocal;
drh2af926b2001-05-15 00:39:25 +00001340 if( n>MX_LOCAL_PAYLOAD ){
1341 n = MX_LOCAL_PAYLOAD;
1342 }
drh717e6402001-09-27 03:22:32 +00001343 c = memcmp(pCell->aPayload, zKey, n);
drh2af926b2001-05-15 00:39:25 +00001344 if( c!=0 ){
1345 *pResult = c;
1346 return SQLITE_OK;
1347 }
drh717e6402001-09-27 03:22:32 +00001348 zKey += n;
drh2af926b2001-05-15 00:39:25 +00001349 nKey -= n;
drh8721ce42001-11-07 14:22:00 +00001350 nLocal -= n;
drh0d316a42002-08-11 20:10:47 +00001351 nextPage = SWAB32(pBt, pCell->ovfl);
drh8721ce42001-11-07 14:22:00 +00001352 while( nKey>0 && nLocal>0 ){
drh2af926b2001-05-15 00:39:25 +00001353 OverflowPage *pOvfl;
1354 if( nextPage==0 ){
1355 return SQLITE_CORRUPT;
1356 }
drh0d316a42002-08-11 20:10:47 +00001357 rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl);
drh72f82862001-05-24 21:06:34 +00001358 if( rc ){
drh2af926b2001-05-15 00:39:25 +00001359 return rc;
1360 }
drh0d316a42002-08-11 20:10:47 +00001361 nextPage = SWAB32(pBt, pOvfl->iNext);
drh8721ce42001-11-07 14:22:00 +00001362 n = nKey<nLocal ? nKey : nLocal;
drh2af926b2001-05-15 00:39:25 +00001363 if( n>OVERFLOW_SIZE ){
1364 n = OVERFLOW_SIZE;
1365 }
drh717e6402001-09-27 03:22:32 +00001366 c = memcmp(pOvfl->aPayload, zKey, n);
drh2af926b2001-05-15 00:39:25 +00001367 sqlitepager_unref(pOvfl);
1368 if( c!=0 ){
1369 *pResult = c;
1370 return SQLITE_OK;
1371 }
1372 nKey -= n;
drh8721ce42001-11-07 14:22:00 +00001373 nLocal -= n;
drh717e6402001-09-27 03:22:32 +00001374 zKey += n;
drh2af926b2001-05-15 00:39:25 +00001375 }
drh717e6402001-09-27 03:22:32 +00001376 if( c==0 ){
drh8721ce42001-11-07 14:22:00 +00001377 c = nLocal - nKey;
drh717e6402001-09-27 03:22:32 +00001378 }
drh2af926b2001-05-15 00:39:25 +00001379 *pResult = c;
1380 return SQLITE_OK;
1381}
1382
drh72f82862001-05-24 21:06:34 +00001383/*
drh8178a752003-01-05 21:41:40 +00001384** Move the cursor down to a new child page. The newPgno argument is the
1385** page number of the child page in the byte order of the disk image.
drh72f82862001-05-24 21:06:34 +00001386*/
drh5e2f8b92001-05-28 00:41:15 +00001387static int moveToChild(BtCursor *pCur, int newPgno){
drh72f82862001-05-24 21:06:34 +00001388 int rc;
1389 MemPage *pNewPage;
drh0d316a42002-08-11 20:10:47 +00001390 Btree *pBt = pCur->pBt;
drh72f82862001-05-24 21:06:34 +00001391
drh8178a752003-01-05 21:41:40 +00001392 newPgno = SWAB32(pBt, newPgno);
drh0d316a42002-08-11 20:10:47 +00001393 rc = sqlitepager_get(pBt->pPager, newPgno, (void**)&pNewPage);
drh6019e162001-07-02 17:51:45 +00001394 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00001395 rc = initPage(pBt, pNewPage, newPgno, pCur->pPage);
drh6019e162001-07-02 17:51:45 +00001396 if( rc ) return rc;
drh428ae8c2003-01-04 16:48:09 +00001397 assert( pCur->idx>=pCur->pPage->nCell
1398 || pCur->pPage->apCell[pCur->idx]->h.leftChild==SWAB32(pBt,newPgno) );
1399 assert( pCur->idx<pCur->pPage->nCell
1400 || pCur->pPage->u.hdr.rightChild==SWAB32(pBt,newPgno) );
1401 pNewPage->idxParent = pCur->idx;
1402 pCur->pPage->idxShift = 0;
drh72f82862001-05-24 21:06:34 +00001403 sqlitepager_unref(pCur->pPage);
1404 pCur->pPage = pNewPage;
1405 pCur->idx = 0;
drh1dd8c402003-03-30 18:41:22 +00001406 if( pNewPage->nCell<1 ) return SQLITE_CORRUPT;
drh72f82862001-05-24 21:06:34 +00001407 return SQLITE_OK;
1408}
1409
1410/*
drh5e2f8b92001-05-28 00:41:15 +00001411** Move the cursor up to the parent page.
1412**
1413** pCur->idx is set to the cell index that contains the pointer
1414** to the page we are coming from. If we are coming from the
1415** right-most child page then pCur->idx is set to one more than
drhbd03cae2001-06-02 02:40:57 +00001416** the largest cell index.
drh72f82862001-05-24 21:06:34 +00001417*/
drh8178a752003-01-05 21:41:40 +00001418static void moveToParent(BtCursor *pCur){
drh72f82862001-05-24 21:06:34 +00001419 Pgno oldPgno;
1420 MemPage *pParent;
drh8178a752003-01-05 21:41:40 +00001421 MemPage *pPage;
drh428ae8c2003-01-04 16:48:09 +00001422 int idxParent;
drh8178a752003-01-05 21:41:40 +00001423 pPage = pCur->pPage;
1424 assert( pPage!=0 );
1425 pParent = pPage->pParent;
1426 assert( pParent!=0 );
1427 idxParent = pPage->idxParent;
drh72f82862001-05-24 21:06:34 +00001428 sqlitepager_ref(pParent);
drh8178a752003-01-05 21:41:40 +00001429 sqlitepager_unref(pPage);
drh72f82862001-05-24 21:06:34 +00001430 pCur->pPage = pParent;
drh428ae8c2003-01-04 16:48:09 +00001431 assert( pParent->idxShift==0 );
1432 if( pParent->idxShift==0 ){
1433 pCur->idx = idxParent;
1434#ifndef NDEBUG
1435 /* Verify that pCur->idx is the correct index to point back to the child
1436 ** page we just came from
1437 */
drh8178a752003-01-05 21:41:40 +00001438 oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage));
drh428ae8c2003-01-04 16:48:09 +00001439 if( pCur->idx<pParent->nCell ){
1440 assert( pParent->apCell[idxParent]->h.leftChild==oldPgno );
1441 }else{
1442 assert( pParent->u.hdr.rightChild==oldPgno );
1443 }
1444#endif
1445 }else{
1446 /* The MemPage.idxShift flag indicates that cell indices might have
1447 ** changed since idxParent was set and hence idxParent might be out
1448 ** of date. So recompute the parent cell index by scanning all cells
1449 ** and locating the one that points to the child we just came from.
1450 */
1451 int i;
1452 pCur->idx = pParent->nCell;
drh8178a752003-01-05 21:41:40 +00001453 oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage));
drh428ae8c2003-01-04 16:48:09 +00001454 for(i=0; i<pParent->nCell; i++){
1455 if( pParent->apCell[i]->h.leftChild==oldPgno ){
1456 pCur->idx = i;
1457 break;
1458 }
drh72f82862001-05-24 21:06:34 +00001459 }
1460 }
1461}
1462
1463/*
1464** Move the cursor to the root page
1465*/
drh5e2f8b92001-05-28 00:41:15 +00001466static int moveToRoot(BtCursor *pCur){
drh72f82862001-05-24 21:06:34 +00001467 MemPage *pNew;
drhbd03cae2001-06-02 02:40:57 +00001468 int rc;
drh0d316a42002-08-11 20:10:47 +00001469 Btree *pBt = pCur->pBt;
drhbd03cae2001-06-02 02:40:57 +00001470
drh0d316a42002-08-11 20:10:47 +00001471 rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pNew);
drhbd03cae2001-06-02 02:40:57 +00001472 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00001473 rc = initPage(pBt, pNew, pCur->pgnoRoot, 0);
drh6019e162001-07-02 17:51:45 +00001474 if( rc ) return rc;
drh72f82862001-05-24 21:06:34 +00001475 sqlitepager_unref(pCur->pPage);
1476 pCur->pPage = pNew;
1477 pCur->idx = 0;
1478 return SQLITE_OK;
1479}
drh2af926b2001-05-15 00:39:25 +00001480
drh5e2f8b92001-05-28 00:41:15 +00001481/*
1482** Move the cursor down to the left-most leaf entry beneath the
1483** entry to which it is currently pointing.
1484*/
1485static int moveToLeftmost(BtCursor *pCur){
1486 Pgno pgno;
1487 int rc;
1488
1489 while( (pgno = pCur->pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
drh8178a752003-01-05 21:41:40 +00001490 rc = moveToChild(pCur, pgno);
drh5e2f8b92001-05-28 00:41:15 +00001491 if( rc ) return rc;
1492 }
1493 return SQLITE_OK;
1494}
1495
drh2dcc9aa2002-12-04 13:40:25 +00001496/*
1497** Move the cursor down to the right-most leaf entry beneath the
1498** page to which it is currently pointing. Notice the difference
1499** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
1500** finds the left-most entry beneath the *entry* whereas moveToRightmost()
1501** finds the right-most entry beneath the *page*.
1502*/
1503static int moveToRightmost(BtCursor *pCur){
1504 Pgno pgno;
1505 int rc;
1506
1507 while( (pgno = pCur->pPage->u.hdr.rightChild)!=0 ){
drh428ae8c2003-01-04 16:48:09 +00001508 pCur->idx = pCur->pPage->nCell;
drh8178a752003-01-05 21:41:40 +00001509 rc = moveToChild(pCur, pgno);
drh2dcc9aa2002-12-04 13:40:25 +00001510 if( rc ) return rc;
1511 }
1512 pCur->idx = pCur->pPage->nCell - 1;
1513 return SQLITE_OK;
1514}
1515
drh5e00f6c2001-09-13 13:46:56 +00001516/* Move the cursor to the first entry in the table. Return SQLITE_OK
1517** on success. Set *pRes to 0 if the cursor actually points to something
drh77c679c2002-02-19 22:43:58 +00001518** or set *pRes to 1 if the table is empty.
drh5e00f6c2001-09-13 13:46:56 +00001519*/
paulb95a8862003-04-01 21:16:41 +00001520static int sqliteBtreeFirst(BtCursor *pCur, int *pRes){
drh5e00f6c2001-09-13 13:46:56 +00001521 int rc;
drhecdc7532001-09-23 02:35:53 +00001522 if( pCur->pPage==0 ) return SQLITE_ABORT;
drh5e00f6c2001-09-13 13:46:56 +00001523 rc = moveToRoot(pCur);
1524 if( rc ) return rc;
1525 if( pCur->pPage->nCell==0 ){
1526 *pRes = 1;
1527 return SQLITE_OK;
1528 }
1529 *pRes = 0;
1530 rc = moveToLeftmost(pCur);
drh2dcc9aa2002-12-04 13:40:25 +00001531 pCur->eSkip = SKIP_NONE;
drh5e00f6c2001-09-13 13:46:56 +00001532 return rc;
1533}
drh5e2f8b92001-05-28 00:41:15 +00001534
drh9562b552002-02-19 15:00:07 +00001535/* Move the cursor to the last entry in the table. Return SQLITE_OK
1536** on success. Set *pRes to 0 if the cursor actually points to something
drh77c679c2002-02-19 22:43:58 +00001537** or set *pRes to 1 if the table is empty.
drh9562b552002-02-19 15:00:07 +00001538*/
paulb95a8862003-04-01 21:16:41 +00001539static int sqliteBtreeLast(BtCursor *pCur, int *pRes){
drh9562b552002-02-19 15:00:07 +00001540 int rc;
drh9562b552002-02-19 15:00:07 +00001541 if( pCur->pPage==0 ) return SQLITE_ABORT;
1542 rc = moveToRoot(pCur);
1543 if( rc ) return rc;
drh7aa128d2002-06-21 13:09:16 +00001544 assert( pCur->pPage->isInit );
drh9562b552002-02-19 15:00:07 +00001545 if( pCur->pPage->nCell==0 ){
1546 *pRes = 1;
1547 return SQLITE_OK;
1548 }
1549 *pRes = 0;
drh2dcc9aa2002-12-04 13:40:25 +00001550 rc = moveToRightmost(pCur);
1551 pCur->eSkip = SKIP_NONE;
drh9562b552002-02-19 15:00:07 +00001552 return rc;
1553}
1554
drha059ad02001-04-17 20:09:11 +00001555/* Move the cursor so that it points to an entry near pKey.
drh72f82862001-05-24 21:06:34 +00001556** Return a success code.
1557**
drh5e2f8b92001-05-28 00:41:15 +00001558** If an exact match is not found, then the cursor is always
drhbd03cae2001-06-02 02:40:57 +00001559** left pointing at a leaf page which would hold the entry if it
drh5e2f8b92001-05-28 00:41:15 +00001560** were present. The cursor might point to an entry that comes
1561** before or after the key.
1562**
drhbd03cae2001-06-02 02:40:57 +00001563** The result of comparing the key with the entry to which the
1564** cursor is left pointing is stored in pCur->iMatch. The same
1565** value is also written to *pRes if pRes!=NULL. The meaning of
1566** this value is as follows:
1567**
1568** *pRes<0 The cursor is left pointing at an entry that
drh1a844c32002-12-04 22:29:28 +00001569** is smaller than pKey or if the table is empty
1570** and the cursor is therefore left point to nothing.
drhbd03cae2001-06-02 02:40:57 +00001571**
1572** *pRes==0 The cursor is left pointing at an entry that
1573** exactly matches pKey.
1574**
1575** *pRes>0 The cursor is left pointing at an entry that
drh7c717f72001-06-24 20:39:41 +00001576** is larger than pKey.
drha059ad02001-04-17 20:09:11 +00001577*/
paulb95a8862003-04-01 21:16:41 +00001578static int sqliteBtreeMoveto(BtCursor *pCur, const void *pKey, int nKey, int *pRes){
drh72f82862001-05-24 21:06:34 +00001579 int rc;
drhecdc7532001-09-23 02:35:53 +00001580 if( pCur->pPage==0 ) return SQLITE_ABORT;
drh2dcc9aa2002-12-04 13:40:25 +00001581 pCur->eSkip = SKIP_NONE;
drh5e2f8b92001-05-28 00:41:15 +00001582 rc = moveToRoot(pCur);
drh72f82862001-05-24 21:06:34 +00001583 if( rc ) return rc;
1584 for(;;){
1585 int lwr, upr;
1586 Pgno chldPg;
1587 MemPage *pPage = pCur->pPage;
drh1a844c32002-12-04 22:29:28 +00001588 int c = -1; /* pRes return if table is empty must be -1 */
drh72f82862001-05-24 21:06:34 +00001589 lwr = 0;
1590 upr = pPage->nCell-1;
1591 while( lwr<=upr ){
drh72f82862001-05-24 21:06:34 +00001592 pCur->idx = (lwr+upr)/2;
drh8721ce42001-11-07 14:22:00 +00001593 rc = sqliteBtreeKeyCompare(pCur, pKey, nKey, 0, &c);
drh72f82862001-05-24 21:06:34 +00001594 if( rc ) return rc;
1595 if( c==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001596 pCur->iMatch = c;
drh72f82862001-05-24 21:06:34 +00001597 if( pRes ) *pRes = 0;
1598 return SQLITE_OK;
1599 }
1600 if( c<0 ){
1601 lwr = pCur->idx+1;
1602 }else{
1603 upr = pCur->idx-1;
1604 }
1605 }
1606 assert( lwr==upr+1 );
drh7aa128d2002-06-21 13:09:16 +00001607 assert( pPage->isInit );
drh72f82862001-05-24 21:06:34 +00001608 if( lwr>=pPage->nCell ){
drh14acc042001-06-10 19:56:58 +00001609 chldPg = pPage->u.hdr.rightChild;
drh72f82862001-05-24 21:06:34 +00001610 }else{
drh5e2f8b92001-05-28 00:41:15 +00001611 chldPg = pPage->apCell[lwr]->h.leftChild;
drh72f82862001-05-24 21:06:34 +00001612 }
1613 if( chldPg==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001614 pCur->iMatch = c;
drh72f82862001-05-24 21:06:34 +00001615 if( pRes ) *pRes = c;
1616 return SQLITE_OK;
1617 }
drh428ae8c2003-01-04 16:48:09 +00001618 pCur->idx = lwr;
drh8178a752003-01-05 21:41:40 +00001619 rc = moveToChild(pCur, chldPg);
drh72f82862001-05-24 21:06:34 +00001620 if( rc ) return rc;
1621 }
drhbd03cae2001-06-02 02:40:57 +00001622 /* NOT REACHED */
drh72f82862001-05-24 21:06:34 +00001623}
1624
1625/*
drhbd03cae2001-06-02 02:40:57 +00001626** Advance the cursor to the next entry in the database. If
drh8c1238a2003-01-02 14:43:55 +00001627** successful then set *pRes=0. If the cursor
drhbd03cae2001-06-02 02:40:57 +00001628** was already pointing to the last entry in the database before
drh8c1238a2003-01-02 14:43:55 +00001629** this routine was called, then set *pRes=1.
drh72f82862001-05-24 21:06:34 +00001630*/
paulb95a8862003-04-01 21:16:41 +00001631static int sqliteBtreeNext(BtCursor *pCur, int *pRes){
drh72f82862001-05-24 21:06:34 +00001632 int rc;
drh8178a752003-01-05 21:41:40 +00001633 MemPage *pPage = pCur->pPage;
drh8c1238a2003-01-02 14:43:55 +00001634 assert( pRes!=0 );
drh8178a752003-01-05 21:41:40 +00001635 if( pPage==0 ){
drh8c1238a2003-01-02 14:43:55 +00001636 *pRes = 1;
drhecdc7532001-09-23 02:35:53 +00001637 return SQLITE_ABORT;
1638 }
drh8178a752003-01-05 21:41:40 +00001639 assert( pPage->isInit );
drh2dcc9aa2002-12-04 13:40:25 +00001640 assert( pCur->eSkip!=SKIP_INVALID );
drh8178a752003-01-05 21:41:40 +00001641 if( pPage->nCell==0 ){
drh8c1238a2003-01-02 14:43:55 +00001642 *pRes = 1;
drh2dcc9aa2002-12-04 13:40:25 +00001643 return SQLITE_OK;
1644 }
drh8178a752003-01-05 21:41:40 +00001645 assert( pCur->idx<pPage->nCell );
drh2dcc9aa2002-12-04 13:40:25 +00001646 if( pCur->eSkip==SKIP_NEXT ){
1647 pCur->eSkip = SKIP_NONE;
drh8c1238a2003-01-02 14:43:55 +00001648 *pRes = 0;
drh72f82862001-05-24 21:06:34 +00001649 return SQLITE_OK;
1650 }
drh2dcc9aa2002-12-04 13:40:25 +00001651 pCur->eSkip = SKIP_NONE;
drh72f82862001-05-24 21:06:34 +00001652 pCur->idx++;
drh8178a752003-01-05 21:41:40 +00001653 if( pCur->idx>=pPage->nCell ){
1654 if( pPage->u.hdr.rightChild ){
1655 rc = moveToChild(pCur, pPage->u.hdr.rightChild);
drh5e2f8b92001-05-28 00:41:15 +00001656 if( rc ) return rc;
1657 rc = moveToLeftmost(pCur);
drh8c1238a2003-01-02 14:43:55 +00001658 *pRes = 0;
1659 return rc;
drh72f82862001-05-24 21:06:34 +00001660 }
drh5e2f8b92001-05-28 00:41:15 +00001661 do{
drh8178a752003-01-05 21:41:40 +00001662 if( pPage->pParent==0 ){
drh8c1238a2003-01-02 14:43:55 +00001663 *pRes = 1;
drh5e2f8b92001-05-28 00:41:15 +00001664 return SQLITE_OK;
1665 }
drh8178a752003-01-05 21:41:40 +00001666 moveToParent(pCur);
1667 pPage = pCur->pPage;
1668 }while( pCur->idx>=pPage->nCell );
drh8c1238a2003-01-02 14:43:55 +00001669 *pRes = 0;
drh8178a752003-01-05 21:41:40 +00001670 return SQLITE_OK;
1671 }
1672 *pRes = 0;
1673 if( pPage->u.hdr.rightChild==0 ){
1674 return SQLITE_OK;
drh72f82862001-05-24 21:06:34 +00001675 }
drh5e2f8b92001-05-28 00:41:15 +00001676 rc = moveToLeftmost(pCur);
drh8c1238a2003-01-02 14:43:55 +00001677 return rc;
drh72f82862001-05-24 21:06:34 +00001678}
1679
drh3b7511c2001-05-26 13:15:44 +00001680/*
drh2dcc9aa2002-12-04 13:40:25 +00001681** Step the cursor to the back to the previous entry in the database. If
drh8178a752003-01-05 21:41:40 +00001682** successful then set *pRes=0. If the cursor
drh2dcc9aa2002-12-04 13:40:25 +00001683** was already pointing to the first entry in the database before
drh8178a752003-01-05 21:41:40 +00001684** this routine was called, then set *pRes=1.
drh2dcc9aa2002-12-04 13:40:25 +00001685*/
paulb95a8862003-04-01 21:16:41 +00001686static int sqliteBtreePrevious(BtCursor *pCur, int *pRes){
drh2dcc9aa2002-12-04 13:40:25 +00001687 int rc;
1688 Pgno pgno;
drh8178a752003-01-05 21:41:40 +00001689 MemPage *pPage;
1690 pPage = pCur->pPage;
1691 if( pPage==0 ){
1692 *pRes = 1;
drh2dcc9aa2002-12-04 13:40:25 +00001693 return SQLITE_ABORT;
1694 }
drh8178a752003-01-05 21:41:40 +00001695 assert( pPage->isInit );
drh2dcc9aa2002-12-04 13:40:25 +00001696 assert( pCur->eSkip!=SKIP_INVALID );
drh8178a752003-01-05 21:41:40 +00001697 if( pPage->nCell==0 ){
1698 *pRes = 1;
drh2dcc9aa2002-12-04 13:40:25 +00001699 return SQLITE_OK;
1700 }
1701 if( pCur->eSkip==SKIP_PREV ){
1702 pCur->eSkip = SKIP_NONE;
drh8178a752003-01-05 21:41:40 +00001703 *pRes = 0;
drh2dcc9aa2002-12-04 13:40:25 +00001704 return SQLITE_OK;
1705 }
1706 pCur->eSkip = SKIP_NONE;
1707 assert( pCur->idx>=0 );
drh8178a752003-01-05 21:41:40 +00001708 if( (pgno = pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
1709 rc = moveToChild(pCur, pgno);
drh2dcc9aa2002-12-04 13:40:25 +00001710 if( rc ) return rc;
1711 rc = moveToRightmost(pCur);
1712 }else{
1713 while( pCur->idx==0 ){
drh8178a752003-01-05 21:41:40 +00001714 if( pPage->pParent==0 ){
drh2dcc9aa2002-12-04 13:40:25 +00001715 if( pRes ) *pRes = 1;
1716 return SQLITE_OK;
1717 }
drh8178a752003-01-05 21:41:40 +00001718 moveToParent(pCur);
1719 pPage = pCur->pPage;
drh2dcc9aa2002-12-04 13:40:25 +00001720 }
1721 pCur->idx--;
1722 rc = SQLITE_OK;
1723 }
drh8178a752003-01-05 21:41:40 +00001724 *pRes = 0;
drh2dcc9aa2002-12-04 13:40:25 +00001725 return rc;
1726}
1727
1728/*
drh3b7511c2001-05-26 13:15:44 +00001729** Allocate a new page from the database file.
1730**
1731** The new page is marked as dirty. (In other words, sqlitepager_write()
1732** has already been called on the new page.) The new page has also
1733** been referenced and the calling routine is responsible for calling
1734** sqlitepager_unref() on the new page when it is done.
1735**
1736** SQLITE_OK is returned on success. Any other return value indicates
1737** an error. *ppPage and *pPgno are undefined in the event of an error.
1738** Do not invoke sqlitepager_unref() on *ppPage if an error is returned.
drhbea00b92002-07-08 10:59:50 +00001739**
drh199e3cf2002-07-18 11:01:47 +00001740** If the "nearby" parameter is not 0, then a (feeble) effort is made to
1741** locate a page close to the page number "nearby". This can be used in an
drhbea00b92002-07-08 10:59:50 +00001742** attempt to keep related pages close to each other in the database file,
1743** which in turn can make database access faster.
drh3b7511c2001-05-26 13:15:44 +00001744*/
drh199e3cf2002-07-18 11:01:47 +00001745static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno, Pgno nearby){
drhbd03cae2001-06-02 02:40:57 +00001746 PageOne *pPage1 = pBt->page1;
drh8c42ca92001-06-22 19:15:00 +00001747 int rc;
drh3b7511c2001-05-26 13:15:44 +00001748 if( pPage1->freeList ){
1749 OverflowPage *pOvfl;
drh30e58752002-03-02 20:41:57 +00001750 FreelistInfo *pInfo;
1751
drh3b7511c2001-05-26 13:15:44 +00001752 rc = sqlitepager_write(pPage1);
1753 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00001754 SWAB_ADD(pBt, pPage1->nFree, -1);
1755 rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList),
1756 (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001757 if( rc ) return rc;
1758 rc = sqlitepager_write(pOvfl);
1759 if( rc ){
1760 sqlitepager_unref(pOvfl);
1761 return rc;
1762 }
drh30e58752002-03-02 20:41:57 +00001763 pInfo = (FreelistInfo*)pOvfl->aPayload;
1764 if( pInfo->nFree==0 ){
drh0d316a42002-08-11 20:10:47 +00001765 *pPgno = SWAB32(pBt, pPage1->freeList);
drh30e58752002-03-02 20:41:57 +00001766 pPage1->freeList = pOvfl->iNext;
1767 *ppPage = (MemPage*)pOvfl;
1768 }else{
drh0d316a42002-08-11 20:10:47 +00001769 int closest, n;
1770 n = SWAB32(pBt, pInfo->nFree);
1771 if( n>1 && nearby>0 ){
drhbea00b92002-07-08 10:59:50 +00001772 int i, dist;
1773 closest = 0;
drh0d316a42002-08-11 20:10:47 +00001774 dist = SWAB32(pBt, pInfo->aFree[0]) - nearby;
drhbea00b92002-07-08 10:59:50 +00001775 if( dist<0 ) dist = -dist;
drh0d316a42002-08-11 20:10:47 +00001776 for(i=1; i<n; i++){
1777 int d2 = SWAB32(pBt, pInfo->aFree[i]) - nearby;
drhbea00b92002-07-08 10:59:50 +00001778 if( d2<0 ) d2 = -d2;
1779 if( d2<dist ) closest = i;
1780 }
1781 }else{
1782 closest = 0;
1783 }
drh0d316a42002-08-11 20:10:47 +00001784 SWAB_ADD(pBt, pInfo->nFree, -1);
1785 *pPgno = SWAB32(pBt, pInfo->aFree[closest]);
1786 pInfo->aFree[closest] = pInfo->aFree[n-1];
drh30e58752002-03-02 20:41:57 +00001787 rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
1788 sqlitepager_unref(pOvfl);
1789 if( rc==SQLITE_OK ){
1790 sqlitepager_dont_rollback(*ppPage);
1791 rc = sqlitepager_write(*ppPage);
1792 }
1793 }
drh3b7511c2001-05-26 13:15:44 +00001794 }else{
drh2aa679f2001-06-25 02:11:07 +00001795 *pPgno = sqlitepager_pagecount(pBt->pPager) + 1;
drh8c42ca92001-06-22 19:15:00 +00001796 rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
drh3b7511c2001-05-26 13:15:44 +00001797 if( rc ) return rc;
1798 rc = sqlitepager_write(*ppPage);
1799 }
1800 return rc;
1801}
1802
1803/*
1804** Add a page of the database file to the freelist. Either pgno or
1805** pPage but not both may be 0.
drh5e2f8b92001-05-28 00:41:15 +00001806**
drhdd793422001-06-28 01:54:48 +00001807** sqlitepager_unref() is NOT called for pPage.
drh3b7511c2001-05-26 13:15:44 +00001808*/
1809static int freePage(Btree *pBt, void *pPage, Pgno pgno){
drhbd03cae2001-06-02 02:40:57 +00001810 PageOne *pPage1 = pBt->page1;
drh3b7511c2001-05-26 13:15:44 +00001811 OverflowPage *pOvfl = (OverflowPage*)pPage;
1812 int rc;
drhdd793422001-06-28 01:54:48 +00001813 int needUnref = 0;
1814 MemPage *pMemPage;
drh8b2f49b2001-06-08 00:21:52 +00001815
drh3b7511c2001-05-26 13:15:44 +00001816 if( pgno==0 ){
1817 assert( pOvfl!=0 );
1818 pgno = sqlitepager_pagenumber(pOvfl);
1819 }
drh2aa679f2001-06-25 02:11:07 +00001820 assert( pgno>2 );
drhda47d772002-12-02 04:25:19 +00001821 assert( sqlitepager_pagenumber(pOvfl)==pgno );
drh193a6b42002-07-07 16:52:46 +00001822 pMemPage = (MemPage*)pPage;
1823 pMemPage->isInit = 0;
1824 if( pMemPage->pParent ){
1825 sqlitepager_unref(pMemPage->pParent);
1826 pMemPage->pParent = 0;
1827 }
drh3b7511c2001-05-26 13:15:44 +00001828 rc = sqlitepager_write(pPage1);
1829 if( rc ){
1830 return rc;
1831 }
drh0d316a42002-08-11 20:10:47 +00001832 SWAB_ADD(pBt, pPage1->nFree, 1);
1833 if( pPage1->nFree!=0 && pPage1->freeList!=0 ){
drh30e58752002-03-02 20:41:57 +00001834 OverflowPage *pFreeIdx;
drh0d316a42002-08-11 20:10:47 +00001835 rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList),
1836 (void**)&pFreeIdx);
drh30e58752002-03-02 20:41:57 +00001837 if( rc==SQLITE_OK ){
1838 FreelistInfo *pInfo = (FreelistInfo*)pFreeIdx->aPayload;
drh0d316a42002-08-11 20:10:47 +00001839 int n = SWAB32(pBt, pInfo->nFree);
1840 if( n<(sizeof(pInfo->aFree)/sizeof(pInfo->aFree[0])) ){
drh30e58752002-03-02 20:41:57 +00001841 rc = sqlitepager_write(pFreeIdx);
1842 if( rc==SQLITE_OK ){
drh0d316a42002-08-11 20:10:47 +00001843 pInfo->aFree[n] = SWAB32(pBt, pgno);
1844 SWAB_ADD(pBt, pInfo->nFree, 1);
drh30e58752002-03-02 20:41:57 +00001845 sqlitepager_unref(pFreeIdx);
1846 sqlitepager_dont_write(pBt->pPager, pgno);
1847 return rc;
1848 }
1849 }
1850 sqlitepager_unref(pFreeIdx);
1851 }
1852 }
drh3b7511c2001-05-26 13:15:44 +00001853 if( pOvfl==0 ){
1854 assert( pgno>0 );
drh8c42ca92001-06-22 19:15:00 +00001855 rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001856 if( rc ) return rc;
drhdd793422001-06-28 01:54:48 +00001857 needUnref = 1;
drh3b7511c2001-05-26 13:15:44 +00001858 }
1859 rc = sqlitepager_write(pOvfl);
1860 if( rc ){
drhdd793422001-06-28 01:54:48 +00001861 if( needUnref ) sqlitepager_unref(pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001862 return rc;
1863 }
drh14acc042001-06-10 19:56:58 +00001864 pOvfl->iNext = pPage1->freeList;
drh0d316a42002-08-11 20:10:47 +00001865 pPage1->freeList = SWAB32(pBt, pgno);
drh5e2f8b92001-05-28 00:41:15 +00001866 memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
drhdd793422001-06-28 01:54:48 +00001867 if( needUnref ) rc = sqlitepager_unref(pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001868 return rc;
1869}
1870
1871/*
1872** Erase all the data out of a cell. This involves returning overflow
1873** pages back the freelist.
1874*/
1875static int clearCell(Btree *pBt, Cell *pCell){
1876 Pager *pPager = pBt->pPager;
1877 OverflowPage *pOvfl;
drh3b7511c2001-05-26 13:15:44 +00001878 Pgno ovfl, nextOvfl;
1879 int rc;
1880
drh0d316a42002-08-11 20:10:47 +00001881 if( NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h) <= MX_LOCAL_PAYLOAD ){
drh5e2f8b92001-05-28 00:41:15 +00001882 return SQLITE_OK;
1883 }
drh0d316a42002-08-11 20:10:47 +00001884 ovfl = SWAB32(pBt, pCell->ovfl);
drh3b7511c2001-05-26 13:15:44 +00001885 pCell->ovfl = 0;
1886 while( ovfl ){
drh8c42ca92001-06-22 19:15:00 +00001887 rc = sqlitepager_get(pPager, ovfl, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001888 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00001889 nextOvfl = SWAB32(pBt, pOvfl->iNext);
drhbd03cae2001-06-02 02:40:57 +00001890 rc = freePage(pBt, pOvfl, ovfl);
1891 if( rc ) return rc;
drhdd793422001-06-28 01:54:48 +00001892 sqlitepager_unref(pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001893 ovfl = nextOvfl;
drh3b7511c2001-05-26 13:15:44 +00001894 }
drh5e2f8b92001-05-28 00:41:15 +00001895 return SQLITE_OK;
drh3b7511c2001-05-26 13:15:44 +00001896}
1897
1898/*
1899** Create a new cell from key and data. Overflow pages are allocated as
1900** necessary and linked to this cell.
1901*/
1902static int fillInCell(
1903 Btree *pBt, /* The whole Btree. Needed to allocate pages */
1904 Cell *pCell, /* Populate this Cell structure */
drh5c4d9702001-08-20 00:33:58 +00001905 const void *pKey, int nKey, /* The key */
1906 const void *pData,int nData /* The data */
drh3b7511c2001-05-26 13:15:44 +00001907){
drhdd793422001-06-28 01:54:48 +00001908 OverflowPage *pOvfl, *pPrior;
drh3b7511c2001-05-26 13:15:44 +00001909 Pgno *pNext;
1910 int spaceLeft;
drh8c42ca92001-06-22 19:15:00 +00001911 int n, rc;
drh3b7511c2001-05-26 13:15:44 +00001912 int nPayload;
drh5c4d9702001-08-20 00:33:58 +00001913 const char *pPayload;
drh3b7511c2001-05-26 13:15:44 +00001914 char *pSpace;
drh199e3cf2002-07-18 11:01:47 +00001915 Pgno nearby = 0;
drh3b7511c2001-05-26 13:15:44 +00001916
drh5e2f8b92001-05-28 00:41:15 +00001917 pCell->h.leftChild = 0;
drh0d316a42002-08-11 20:10:47 +00001918 pCell->h.nKey = SWAB16(pBt, nKey & 0xffff);
drh80ff32f2001-11-04 18:32:46 +00001919 pCell->h.nKeyHi = nKey >> 16;
drh0d316a42002-08-11 20:10:47 +00001920 pCell->h.nData = SWAB16(pBt, nData & 0xffff);
drh80ff32f2001-11-04 18:32:46 +00001921 pCell->h.nDataHi = nData >> 16;
drh3b7511c2001-05-26 13:15:44 +00001922 pCell->h.iNext = 0;
1923
1924 pNext = &pCell->ovfl;
drh5e2f8b92001-05-28 00:41:15 +00001925 pSpace = pCell->aPayload;
drh3b7511c2001-05-26 13:15:44 +00001926 spaceLeft = MX_LOCAL_PAYLOAD;
1927 pPayload = pKey;
1928 pKey = 0;
1929 nPayload = nKey;
drhdd793422001-06-28 01:54:48 +00001930 pPrior = 0;
drh3b7511c2001-05-26 13:15:44 +00001931 while( nPayload>0 ){
1932 if( spaceLeft==0 ){
drh199e3cf2002-07-18 11:01:47 +00001933 rc = allocatePage(pBt, (MemPage**)&pOvfl, pNext, nearby);
drh3b7511c2001-05-26 13:15:44 +00001934 if( rc ){
1935 *pNext = 0;
drhbea00b92002-07-08 10:59:50 +00001936 }else{
drh199e3cf2002-07-18 11:01:47 +00001937 nearby = *pNext;
drhdd793422001-06-28 01:54:48 +00001938 }
1939 if( pPrior ) sqlitepager_unref(pPrior);
1940 if( rc ){
drh5e2f8b92001-05-28 00:41:15 +00001941 clearCell(pBt, pCell);
drh3b7511c2001-05-26 13:15:44 +00001942 return rc;
1943 }
drh0d316a42002-08-11 20:10:47 +00001944 if( pBt->needSwab ) *pNext = swab32(*pNext);
drhdd793422001-06-28 01:54:48 +00001945 pPrior = pOvfl;
drh3b7511c2001-05-26 13:15:44 +00001946 spaceLeft = OVERFLOW_SIZE;
drh5e2f8b92001-05-28 00:41:15 +00001947 pSpace = pOvfl->aPayload;
drh8c42ca92001-06-22 19:15:00 +00001948 pNext = &pOvfl->iNext;
drh3b7511c2001-05-26 13:15:44 +00001949 }
1950 n = nPayload;
1951 if( n>spaceLeft ) n = spaceLeft;
1952 memcpy(pSpace, pPayload, n);
1953 nPayload -= n;
1954 if( nPayload==0 && pData ){
1955 pPayload = pData;
1956 nPayload = nData;
1957 pData = 0;
1958 }else{
1959 pPayload += n;
1960 }
1961 spaceLeft -= n;
1962 pSpace += n;
1963 }
drhdd793422001-06-28 01:54:48 +00001964 *pNext = 0;
1965 if( pPrior ){
1966 sqlitepager_unref(pPrior);
1967 }
drh3b7511c2001-05-26 13:15:44 +00001968 return SQLITE_OK;
1969}
1970
1971/*
drhbd03cae2001-06-02 02:40:57 +00001972** Change the MemPage.pParent pointer on the page whose number is
drh8b2f49b2001-06-08 00:21:52 +00001973** given in the second argument so that MemPage.pParent holds the
drhbd03cae2001-06-02 02:40:57 +00001974** pointer in the third argument.
1975*/
drh428ae8c2003-01-04 16:48:09 +00001976static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent,int idx){
drhbd03cae2001-06-02 02:40:57 +00001977 MemPage *pThis;
1978
drhdd793422001-06-28 01:54:48 +00001979 if( pgno==0 ) return;
1980 assert( pPager!=0 );
drhbd03cae2001-06-02 02:40:57 +00001981 pThis = sqlitepager_lookup(pPager, pgno);
drh6019e162001-07-02 17:51:45 +00001982 if( pThis && pThis->isInit ){
drhdd793422001-06-28 01:54:48 +00001983 if( pThis->pParent!=pNewParent ){
1984 if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
1985 pThis->pParent = pNewParent;
1986 if( pNewParent ) sqlitepager_ref(pNewParent);
1987 }
drh428ae8c2003-01-04 16:48:09 +00001988 pThis->idxParent = idx;
drhdd793422001-06-28 01:54:48 +00001989 sqlitepager_unref(pThis);
drhbd03cae2001-06-02 02:40:57 +00001990 }
1991}
1992
1993/*
1994** Reparent all children of the given page to be the given page.
1995** In other words, for every child of pPage, invoke reparentPage()
drh5e00f6c2001-09-13 13:46:56 +00001996** to make sure that each child knows that pPage is its parent.
drhbd03cae2001-06-02 02:40:57 +00001997**
1998** This routine gets called after you memcpy() one page into
1999** another.
2000*/
drh0d316a42002-08-11 20:10:47 +00002001static void reparentChildPages(Btree *pBt, MemPage *pPage){
drhbd03cae2001-06-02 02:40:57 +00002002 int i;
drh0d316a42002-08-11 20:10:47 +00002003 Pager *pPager = pBt->pPager;
drhbd03cae2001-06-02 02:40:57 +00002004 for(i=0; i<pPage->nCell; i++){
drh428ae8c2003-01-04 16:48:09 +00002005 reparentPage(pPager, SWAB32(pBt, pPage->apCell[i]->h.leftChild), pPage, i);
drhbd03cae2001-06-02 02:40:57 +00002006 }
drh428ae8c2003-01-04 16:48:09 +00002007 reparentPage(pPager, SWAB32(pBt, pPage->u.hdr.rightChild), pPage, i);
2008 pPage->idxShift = 0;
drh14acc042001-06-10 19:56:58 +00002009}
2010
2011/*
2012** Remove the i-th cell from pPage. This routine effects pPage only.
2013** The cell content is not freed or deallocated. It is assumed that
2014** the cell content has been copied someplace else. This routine just
2015** removes the reference to the cell from pPage.
2016**
2017** "sz" must be the number of bytes in the cell.
2018**
2019** Do not bother maintaining the integrity of the linked list of Cells.
drh8c42ca92001-06-22 19:15:00 +00002020** Only the pPage->apCell[] array is important. The relinkCellList()
2021** routine will be called soon after this routine in order to rebuild
2022** the linked list.
drh14acc042001-06-10 19:56:58 +00002023*/
drh0d316a42002-08-11 20:10:47 +00002024static void dropCell(Btree *pBt, MemPage *pPage, int idx, int sz){
drh14acc042001-06-10 19:56:58 +00002025 int j;
drh8c42ca92001-06-22 19:15:00 +00002026 assert( idx>=0 && idx<pPage->nCell );
drh0d316a42002-08-11 20:10:47 +00002027 assert( sz==cellSize(pBt, pPage->apCell[idx]) );
drh6019e162001-07-02 17:51:45 +00002028 assert( sqlitepager_iswriteable(pPage) );
drh0d316a42002-08-11 20:10:47 +00002029 freeSpace(pBt, pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
drh7c717f72001-06-24 20:39:41 +00002030 for(j=idx; j<pPage->nCell-1; j++){
drh14acc042001-06-10 19:56:58 +00002031 pPage->apCell[j] = pPage->apCell[j+1];
2032 }
2033 pPage->nCell--;
drh428ae8c2003-01-04 16:48:09 +00002034 pPage->idxShift = 1;
drh14acc042001-06-10 19:56:58 +00002035}
2036
2037/*
2038** Insert a new cell on pPage at cell index "i". pCell points to the
2039** content of the cell.
2040**
2041** If the cell content will fit on the page, then put it there. If it
2042** will not fit, then just make pPage->apCell[i] point to the content
2043** and set pPage->isOverfull.
2044**
2045** Do not bother maintaining the integrity of the linked list of Cells.
drh8c42ca92001-06-22 19:15:00 +00002046** Only the pPage->apCell[] array is important. The relinkCellList()
2047** routine will be called soon after this routine in order to rebuild
2048** the linked list.
drh14acc042001-06-10 19:56:58 +00002049*/
drh0d316a42002-08-11 20:10:47 +00002050static void insertCell(Btree *pBt, MemPage *pPage, int i, Cell *pCell, int sz){
drh14acc042001-06-10 19:56:58 +00002051 int idx, j;
2052 assert( i>=0 && i<=pPage->nCell );
drh0d316a42002-08-11 20:10:47 +00002053 assert( sz==cellSize(pBt, pCell) );
drh6019e162001-07-02 17:51:45 +00002054 assert( sqlitepager_iswriteable(pPage) );
drh0d316a42002-08-11 20:10:47 +00002055 idx = allocateSpace(pBt, pPage, sz);
drh14acc042001-06-10 19:56:58 +00002056 for(j=pPage->nCell; j>i; j--){
2057 pPage->apCell[j] = pPage->apCell[j-1];
2058 }
2059 pPage->nCell++;
drh14acc042001-06-10 19:56:58 +00002060 if( idx<=0 ){
2061 pPage->isOverfull = 1;
2062 pPage->apCell[i] = pCell;
2063 }else{
2064 memcpy(&pPage->u.aDisk[idx], pCell, sz);
drh8c42ca92001-06-22 19:15:00 +00002065 pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx];
drh14acc042001-06-10 19:56:58 +00002066 }
drh428ae8c2003-01-04 16:48:09 +00002067 pPage->idxShift = 1;
drh14acc042001-06-10 19:56:58 +00002068}
2069
2070/*
2071** Rebuild the linked list of cells on a page so that the cells
drh8c42ca92001-06-22 19:15:00 +00002072** occur in the order specified by the pPage->apCell[] array.
2073** Invoke this routine once to repair damage after one or more
2074** invocations of either insertCell() or dropCell().
drh14acc042001-06-10 19:56:58 +00002075*/
drh0d316a42002-08-11 20:10:47 +00002076static void relinkCellList(Btree *pBt, MemPage *pPage){
drh14acc042001-06-10 19:56:58 +00002077 int i;
2078 u16 *pIdx;
drh6019e162001-07-02 17:51:45 +00002079 assert( sqlitepager_iswriteable(pPage) );
drh14acc042001-06-10 19:56:58 +00002080 pIdx = &pPage->u.hdr.firstCell;
2081 for(i=0; i<pPage->nCell; i++){
drh7c717f72001-06-24 20:39:41 +00002082 int idx = Addr(pPage->apCell[i]) - Addr(pPage);
drh8c42ca92001-06-22 19:15:00 +00002083 assert( idx>0 && idx<SQLITE_PAGE_SIZE );
drh0d316a42002-08-11 20:10:47 +00002084 *pIdx = SWAB16(pBt, idx);
drh14acc042001-06-10 19:56:58 +00002085 pIdx = &pPage->apCell[i]->h.iNext;
2086 }
2087 *pIdx = 0;
2088}
2089
2090/*
2091** Make a copy of the contents of pFrom into pTo. The pFrom->apCell[]
drh5e00f6c2001-09-13 13:46:56 +00002092** pointers that point into pFrom->u.aDisk[] must be adjusted to point
drhdd793422001-06-28 01:54:48 +00002093** into pTo->u.aDisk[] instead. But some pFrom->apCell[] entries might
drh14acc042001-06-10 19:56:58 +00002094** not point to pFrom->u.aDisk[]. Those are unchanged.
2095*/
2096static void copyPage(MemPage *pTo, MemPage *pFrom){
2097 uptr from, to;
2098 int i;
2099 memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_PAGE_SIZE);
drhdd793422001-06-28 01:54:48 +00002100 pTo->pParent = 0;
drh14acc042001-06-10 19:56:58 +00002101 pTo->isInit = 1;
2102 pTo->nCell = pFrom->nCell;
2103 pTo->nFree = pFrom->nFree;
2104 pTo->isOverfull = pFrom->isOverfull;
drh7c717f72001-06-24 20:39:41 +00002105 to = Addr(pTo);
2106 from = Addr(pFrom);
drh14acc042001-06-10 19:56:58 +00002107 for(i=0; i<pTo->nCell; i++){
drh7c717f72001-06-24 20:39:41 +00002108 uptr x = Addr(pFrom->apCell[i]);
drh8c42ca92001-06-22 19:15:00 +00002109 if( x>from && x<from+SQLITE_PAGE_SIZE ){
2110 *((uptr*)&pTo->apCell[i]) = x + to - from;
drhdd793422001-06-28 01:54:48 +00002111 }else{
2112 pTo->apCell[i] = pFrom->apCell[i];
drh14acc042001-06-10 19:56:58 +00002113 }
2114 }
drhbd03cae2001-06-02 02:40:57 +00002115}
2116
2117/*
drhc3b70572003-01-04 19:44:07 +00002118** The following parameters determine how many adjacent pages get involved
2119** in a balancing operation. NN is the number of neighbors on either side
2120** of the page that participate in the balancing operation. NB is the
2121** total number of pages that participate, including the target page and
2122** NN neighbors on either side.
2123**
2124** The minimum value of NN is 1 (of course). Increasing NN above 1
2125** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
2126** in exchange for a larger degradation in INSERT and UPDATE performance.
2127** The value of NN appears to give the best results overall.
2128*/
2129#define NN 1 /* Number of neighbors on either side of pPage */
2130#define NB (NN*2+1) /* Total pages involved in the balance */
2131
2132/*
drh8b2f49b2001-06-08 00:21:52 +00002133** This routine redistributes Cells on pPage and up to two siblings
2134** of pPage so that all pages have about the same amount of free space.
drh14acc042001-06-10 19:56:58 +00002135** Usually one sibling on either side of pPage is used in the balancing,
drh8b2f49b2001-06-08 00:21:52 +00002136** though both siblings might come from one side if pPage is the first
2137** or last child of its parent. If pPage has fewer than two siblings
2138** (something which can only happen if pPage is the root page or a
drh14acc042001-06-10 19:56:58 +00002139** child of root) then all available siblings participate in the balancing.
drh8b2f49b2001-06-08 00:21:52 +00002140**
2141** The number of siblings of pPage might be increased or decreased by
drh8c42ca92001-06-22 19:15:00 +00002142** one in an effort to keep pages between 66% and 100% full. The root page
2143** is special and is allowed to be less than 66% full. If pPage is
2144** the root page, then the depth of the tree might be increased
drh8b2f49b2001-06-08 00:21:52 +00002145** or decreased by one, as necessary, to keep the root page from being
2146** overfull or empty.
2147**
drh14acc042001-06-10 19:56:58 +00002148** This routine calls relinkCellList() on its input page regardless of
2149** whether or not it does any real balancing. Client routines will typically
2150** invoke insertCell() or dropCell() before calling this routine, so we
2151** need to call relinkCellList() to clean up the mess that those other
2152** routines left behind.
2153**
2154** pCur is left pointing to the same cell as when this routine was called
drh8c42ca92001-06-22 19:15:00 +00002155** even if that cell gets moved to a different page. pCur may be NULL.
2156** Set the pCur parameter to NULL if you do not care about keeping track
2157** of a cell as that will save this routine the work of keeping track of it.
drh14acc042001-06-10 19:56:58 +00002158**
drh8b2f49b2001-06-08 00:21:52 +00002159** Note that when this routine is called, some of the Cells on pPage
drh14acc042001-06-10 19:56:58 +00002160** might not actually be stored in pPage->u.aDisk[]. This can happen
drh8b2f49b2001-06-08 00:21:52 +00002161** if the page is overfull. Part of the job of this routine is to
drh14acc042001-06-10 19:56:58 +00002162** make sure all Cells for pPage once again fit in pPage->u.aDisk[].
2163**
drh8c42ca92001-06-22 19:15:00 +00002164** In the course of balancing the siblings of pPage, the parent of pPage
2165** might become overfull or underfull. If that happens, then this routine
2166** is called recursively on the parent.
2167**
drh5e00f6c2001-09-13 13:46:56 +00002168** If this routine fails for any reason, it might leave the database
2169** in a corrupted state. So if this routine fails, the database should
2170** be rolled back.
drh8b2f49b2001-06-08 00:21:52 +00002171*/
drh14acc042001-06-10 19:56:58 +00002172static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
drh8b2f49b2001-06-08 00:21:52 +00002173 MemPage *pParent; /* The parent of pPage */
drh8b2f49b2001-06-08 00:21:52 +00002174 int nCell; /* Number of cells in apCell[] */
2175 int nOld; /* Number of pages in apOld[] */
2176 int nNew; /* Number of pages in apNew[] */
drh8b2f49b2001-06-08 00:21:52 +00002177 int nDiv; /* Number of cells in apDiv[] */
drh14acc042001-06-10 19:56:58 +00002178 int i, j, k; /* Loop counters */
2179 int idx; /* Index of pPage in pParent->apCell[] */
2180 int nxDiv; /* Next divider slot in pParent->apCell[] */
2181 int rc; /* The return code */
2182 int iCur; /* apCell[iCur] is the cell of the cursor */
drh5edc3122001-09-13 21:53:09 +00002183 MemPage *pOldCurPage; /* The cursor originally points to this page */
drh6019e162001-07-02 17:51:45 +00002184 int subtotal; /* Subtotal of bytes in cells on one page */
drh9ca7d3b2001-06-28 11:50:21 +00002185 MemPage *extraUnref = 0; /* A page that needs to be unref-ed */
drhc3b70572003-01-04 19:44:07 +00002186 MemPage *apOld[NB]; /* pPage and up to two siblings */
2187 Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
2188 MemPage *apNew[NB+1]; /* pPage and up to NB siblings after balancing */
2189 Pgno pgnoNew[NB+1]; /* Page numbers for each page in apNew[] */
2190 int idxDiv[NB]; /* Indices of divider cells in pParent */
2191 Cell *apDiv[NB]; /* Divider cells in pParent */
2192 Cell aTemp[NB]; /* Temporary holding area for apDiv[] */
2193 int cntNew[NB+1]; /* Index in apCell[] of cell after i-th page */
2194 int szNew[NB+1]; /* Combined size of cells place on i-th page */
2195 MemPage aOld[NB]; /* Temporary copies of pPage and its siblings */
2196 Cell *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
2197 int szCell[(MX_CELL+2)*NB]; /* Local size of all cells */
drh8b2f49b2001-06-08 00:21:52 +00002198
drh14acc042001-06-10 19:56:58 +00002199 /*
2200 ** Return without doing any work if pPage is neither overfull nor
2201 ** underfull.
drh8b2f49b2001-06-08 00:21:52 +00002202 */
drh6019e162001-07-02 17:51:45 +00002203 assert( sqlitepager_iswriteable(pPage) );
drha1b351a2001-09-14 16:42:12 +00002204 if( !pPage->isOverfull && pPage->nFree<SQLITE_PAGE_SIZE/2
2205 && pPage->nCell>=2){
drh0d316a42002-08-11 20:10:47 +00002206 relinkCellList(pBt, pPage);
drh8b2f49b2001-06-08 00:21:52 +00002207 return SQLITE_OK;
2208 }
2209
2210 /*
drh14acc042001-06-10 19:56:58 +00002211 ** Find the parent of the page to be balanceed.
2212 ** If there is no parent, it means this page is the root page and
drh8b2f49b2001-06-08 00:21:52 +00002213 ** special rules apply.
2214 */
drh14acc042001-06-10 19:56:58 +00002215 pParent = pPage->pParent;
drh8b2f49b2001-06-08 00:21:52 +00002216 if( pParent==0 ){
2217 Pgno pgnoChild;
drh8c42ca92001-06-22 19:15:00 +00002218 MemPage *pChild;
drh7aa128d2002-06-21 13:09:16 +00002219 assert( pPage->isInit );
drh8b2f49b2001-06-08 00:21:52 +00002220 if( pPage->nCell==0 ){
drh14acc042001-06-10 19:56:58 +00002221 if( pPage->u.hdr.rightChild ){
2222 /*
2223 ** The root page is empty. Copy the one child page
drh8b2f49b2001-06-08 00:21:52 +00002224 ** into the root page and return. This reduces the depth
2225 ** of the BTree by one.
2226 */
drh0d316a42002-08-11 20:10:47 +00002227 pgnoChild = SWAB32(pBt, pPage->u.hdr.rightChild);
drh8c42ca92001-06-22 19:15:00 +00002228 rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
drh8b2f49b2001-06-08 00:21:52 +00002229 if( rc ) return rc;
2230 memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
2231 pPage->isInit = 0;
drh0d316a42002-08-11 20:10:47 +00002232 rc = initPage(pBt, pPage, sqlitepager_pagenumber(pPage), 0);
drh6019e162001-07-02 17:51:45 +00002233 assert( rc==SQLITE_OK );
drh0d316a42002-08-11 20:10:47 +00002234 reparentChildPages(pBt, pPage);
drh5edc3122001-09-13 21:53:09 +00002235 if( pCur && pCur->pPage==pChild ){
2236 sqlitepager_unref(pChild);
2237 pCur->pPage = pPage;
2238 sqlitepager_ref(pPage);
2239 }
drh8b2f49b2001-06-08 00:21:52 +00002240 freePage(pBt, pChild, pgnoChild);
2241 sqlitepager_unref(pChild);
drhefc251d2001-07-01 22:12:01 +00002242 }else{
drh0d316a42002-08-11 20:10:47 +00002243 relinkCellList(pBt, pPage);
drh8b2f49b2001-06-08 00:21:52 +00002244 }
2245 return SQLITE_OK;
2246 }
drh14acc042001-06-10 19:56:58 +00002247 if( !pPage->isOverfull ){
drh8b2f49b2001-06-08 00:21:52 +00002248 /* It is OK for the root page to be less than half full.
2249 */
drh0d316a42002-08-11 20:10:47 +00002250 relinkCellList(pBt, pPage);
drh8b2f49b2001-06-08 00:21:52 +00002251 return SQLITE_OK;
2252 }
drh14acc042001-06-10 19:56:58 +00002253 /*
2254 ** If we get to here, it means the root page is overfull.
drh8b2f49b2001-06-08 00:21:52 +00002255 ** When this happens, Create a new child page and copy the
2256 ** contents of the root into the child. Then make the root
drh14acc042001-06-10 19:56:58 +00002257 ** page an empty page with rightChild pointing to the new
drh8b2f49b2001-06-08 00:21:52 +00002258 ** child. Then fall thru to the code below which will cause
2259 ** the overfull child page to be split.
2260 */
drh14acc042001-06-10 19:56:58 +00002261 rc = sqlitepager_write(pPage);
2262 if( rc ) return rc;
drhbea00b92002-07-08 10:59:50 +00002263 rc = allocatePage(pBt, &pChild, &pgnoChild, sqlitepager_pagenumber(pPage));
drh8b2f49b2001-06-08 00:21:52 +00002264 if( rc ) return rc;
drh6019e162001-07-02 17:51:45 +00002265 assert( sqlitepager_iswriteable(pChild) );
drh14acc042001-06-10 19:56:58 +00002266 copyPage(pChild, pPage);
2267 pChild->pParent = pPage;
drhbb49aba2003-01-04 18:53:27 +00002268 pChild->idxParent = 0;
drhdd793422001-06-28 01:54:48 +00002269 sqlitepager_ref(pPage);
drh14acc042001-06-10 19:56:58 +00002270 pChild->isOverfull = 1;
drh5edc3122001-09-13 21:53:09 +00002271 if( pCur && pCur->pPage==pPage ){
2272 sqlitepager_unref(pPage);
drh14acc042001-06-10 19:56:58 +00002273 pCur->pPage = pChild;
drh9ca7d3b2001-06-28 11:50:21 +00002274 }else{
2275 extraUnref = pChild;
drh8b2f49b2001-06-08 00:21:52 +00002276 }
drh0d316a42002-08-11 20:10:47 +00002277 zeroPage(pBt, pPage);
2278 pPage->u.hdr.rightChild = SWAB32(pBt, pgnoChild);
drh8b2f49b2001-06-08 00:21:52 +00002279 pParent = pPage;
2280 pPage = pChild;
drh8b2f49b2001-06-08 00:21:52 +00002281 }
drh6019e162001-07-02 17:51:45 +00002282 rc = sqlitepager_write(pParent);
2283 if( rc ) return rc;
drh7aa128d2002-06-21 13:09:16 +00002284 assert( pParent->isInit );
drh14acc042001-06-10 19:56:58 +00002285
drh8b2f49b2001-06-08 00:21:52 +00002286 /*
drh14acc042001-06-10 19:56:58 +00002287 ** Find the Cell in the parent page whose h.leftChild points back
2288 ** to pPage. The "idx" variable is the index of that cell. If pPage
2289 ** is the rightmost child of pParent then set idx to pParent->nCell
drh8b2f49b2001-06-08 00:21:52 +00002290 */
drhbb49aba2003-01-04 18:53:27 +00002291 if( pParent->idxShift ){
2292 Pgno pgno, swabPgno;
2293 pgno = sqlitepager_pagenumber(pPage);
2294 swabPgno = SWAB32(pBt, pgno);
2295 for(idx=0; idx<pParent->nCell; idx++){
2296 if( pParent->apCell[idx]->h.leftChild==swabPgno ){
2297 break;
2298 }
drh8b2f49b2001-06-08 00:21:52 +00002299 }
drhbb49aba2003-01-04 18:53:27 +00002300 assert( idx<pParent->nCell || pParent->u.hdr.rightChild==swabPgno );
2301 }else{
2302 idx = pPage->idxParent;
drh8b2f49b2001-06-08 00:21:52 +00002303 }
drh8b2f49b2001-06-08 00:21:52 +00002304
2305 /*
drh14acc042001-06-10 19:56:58 +00002306 ** Initialize variables so that it will be safe to jump
drh5edc3122001-09-13 21:53:09 +00002307 ** directly to balance_cleanup at any moment.
drh8b2f49b2001-06-08 00:21:52 +00002308 */
drh14acc042001-06-10 19:56:58 +00002309 nOld = nNew = 0;
2310 sqlitepager_ref(pParent);
2311
2312 /*
2313 ** Find sibling pages to pPage and the Cells in pParent that divide
drhc3b70572003-01-04 19:44:07 +00002314 ** the siblings. An attempt is made to find NN siblings on either
2315 ** side of pPage. More siblings are taken from one side, however, if
2316 ** pPage there are fewer than NN siblings on the other side. If pParent
2317 ** has NB or fewer children then all children of pParent are taken.
drh14acc042001-06-10 19:56:58 +00002318 */
drhc3b70572003-01-04 19:44:07 +00002319 nxDiv = idx - NN;
2320 if( nxDiv + NB > pParent->nCell ){
2321 nxDiv = pParent->nCell - NB + 1;
drh8b2f49b2001-06-08 00:21:52 +00002322 }
drhc3b70572003-01-04 19:44:07 +00002323 if( nxDiv<0 ){
2324 nxDiv = 0;
2325 }
drh8b2f49b2001-06-08 00:21:52 +00002326 nDiv = 0;
drhc3b70572003-01-04 19:44:07 +00002327 for(i=0, k=nxDiv; i<NB; i++, k++){
drh14acc042001-06-10 19:56:58 +00002328 if( k<pParent->nCell ){
2329 idxDiv[i] = k;
2330 apDiv[i] = pParent->apCell[k];
drh8b2f49b2001-06-08 00:21:52 +00002331 nDiv++;
drh0d316a42002-08-11 20:10:47 +00002332 pgnoOld[i] = SWAB32(pBt, apDiv[i]->h.leftChild);
drh14acc042001-06-10 19:56:58 +00002333 }else if( k==pParent->nCell ){
drh0d316a42002-08-11 20:10:47 +00002334 pgnoOld[i] = SWAB32(pBt, pParent->u.hdr.rightChild);
drh14acc042001-06-10 19:56:58 +00002335 }else{
2336 break;
drh8b2f49b2001-06-08 00:21:52 +00002337 }
drh8c42ca92001-06-22 19:15:00 +00002338 rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);
drh14acc042001-06-10 19:56:58 +00002339 if( rc ) goto balance_cleanup;
drh0d316a42002-08-11 20:10:47 +00002340 rc = initPage(pBt, apOld[i], pgnoOld[i], pParent);
drh6019e162001-07-02 17:51:45 +00002341 if( rc ) goto balance_cleanup;
drh428ae8c2003-01-04 16:48:09 +00002342 apOld[i]->idxParent = k;
drh14acc042001-06-10 19:56:58 +00002343 nOld++;
drh8b2f49b2001-06-08 00:21:52 +00002344 }
2345
2346 /*
drh14acc042001-06-10 19:56:58 +00002347 ** Set iCur to be the index in apCell[] of the cell that the cursor
2348 ** is pointing to. We will need this later on in order to keep the
drh5edc3122001-09-13 21:53:09 +00002349 ** cursor pointing at the same cell. If pCur points to a page that
2350 ** has no involvement with this rebalancing, then set iCur to a large
2351 ** number so that the iCur==j tests always fail in the main cell
2352 ** distribution loop below.
drh14acc042001-06-10 19:56:58 +00002353 */
2354 if( pCur ){
drh5edc3122001-09-13 21:53:09 +00002355 iCur = 0;
2356 for(i=0; i<nOld; i++){
2357 if( pCur->pPage==apOld[i] ){
2358 iCur += pCur->idx;
2359 break;
2360 }
2361 iCur += apOld[i]->nCell;
2362 if( i<nOld-1 && pCur->pPage==pParent && pCur->idx==idxDiv[i] ){
2363 break;
2364 }
2365 iCur++;
drh14acc042001-06-10 19:56:58 +00002366 }
drh5edc3122001-09-13 21:53:09 +00002367 pOldCurPage = pCur->pPage;
drh14acc042001-06-10 19:56:58 +00002368 }
2369
2370 /*
2371 ** Make copies of the content of pPage and its siblings into aOld[].
2372 ** The rest of this function will use data from the copies rather
2373 ** that the original pages since the original pages will be in the
2374 ** process of being overwritten.
2375 */
2376 for(i=0; i<nOld; i++){
2377 copyPage(&aOld[i], apOld[i]);
drh14acc042001-06-10 19:56:58 +00002378 }
2379
2380 /*
2381 ** Load pointers to all cells on sibling pages and the divider cells
2382 ** into the local apCell[] array. Make copies of the divider cells
2383 ** into aTemp[] and remove the the divider Cells from pParent.
drh8b2f49b2001-06-08 00:21:52 +00002384 */
2385 nCell = 0;
2386 for(i=0; i<nOld; i++){
drh6b308672002-07-08 02:16:37 +00002387 MemPage *pOld = &aOld[i];
drh8b2f49b2001-06-08 00:21:52 +00002388 for(j=0; j<pOld->nCell; j++){
drh14acc042001-06-10 19:56:58 +00002389 apCell[nCell] = pOld->apCell[j];
drh0d316a42002-08-11 20:10:47 +00002390 szCell[nCell] = cellSize(pBt, apCell[nCell]);
drh14acc042001-06-10 19:56:58 +00002391 nCell++;
drh8b2f49b2001-06-08 00:21:52 +00002392 }
2393 if( i<nOld-1 ){
drh0d316a42002-08-11 20:10:47 +00002394 szCell[nCell] = cellSize(pBt, apDiv[i]);
drh8c42ca92001-06-22 19:15:00 +00002395 memcpy(&aTemp[i], apDiv[i], szCell[nCell]);
drh14acc042001-06-10 19:56:58 +00002396 apCell[nCell] = &aTemp[i];
drh0d316a42002-08-11 20:10:47 +00002397 dropCell(pBt, pParent, nxDiv, szCell[nCell]);
2398 assert( SWAB32(pBt, apCell[nCell]->h.leftChild)==pgnoOld[i] );
drh14acc042001-06-10 19:56:58 +00002399 apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
2400 nCell++;
drh8b2f49b2001-06-08 00:21:52 +00002401 }
2402 }
2403
2404 /*
drh6019e162001-07-02 17:51:45 +00002405 ** Figure out the number of pages needed to hold all nCell cells.
2406 ** Store this number in "k". Also compute szNew[] which is the total
2407 ** size of all cells on the i-th page and cntNew[] which is the index
2408 ** in apCell[] of the cell that divides path i from path i+1.
2409 ** cntNew[k] should equal nCell.
2410 **
2411 ** This little patch of code is critical for keeping the tree
2412 ** balanced.
drh8b2f49b2001-06-08 00:21:52 +00002413 */
drh6019e162001-07-02 17:51:45 +00002414 for(subtotal=k=i=0; i<nCell; i++){
2415 subtotal += szCell[i];
2416 if( subtotal > USABLE_SPACE ){
2417 szNew[k] = subtotal - szCell[i];
2418 cntNew[k] = i;
2419 subtotal = 0;
2420 k++;
2421 }
2422 }
2423 szNew[k] = subtotal;
2424 cntNew[k] = nCell;
2425 k++;
2426 for(i=k-1; i>0; i--){
2427 while( szNew[i]<USABLE_SPACE/2 ){
2428 cntNew[i-1]--;
2429 assert( cntNew[i-1]>0 );
2430 szNew[i] += szCell[cntNew[i-1]];
2431 szNew[i-1] -= szCell[cntNew[i-1]-1];
2432 }
2433 }
2434 assert( cntNew[0]>0 );
drh8b2f49b2001-06-08 00:21:52 +00002435
2436 /*
drh6b308672002-07-08 02:16:37 +00002437 ** Allocate k new pages. Reuse old pages where possible.
drh8b2f49b2001-06-08 00:21:52 +00002438 */
drh14acc042001-06-10 19:56:58 +00002439 for(i=0; i<k; i++){
drh6b308672002-07-08 02:16:37 +00002440 if( i<nOld ){
2441 apNew[i] = apOld[i];
2442 pgnoNew[i] = pgnoOld[i];
2443 apOld[i] = 0;
2444 sqlitepager_write(apNew[i]);
2445 }else{
drhbea00b92002-07-08 10:59:50 +00002446 rc = allocatePage(pBt, &apNew[i], &pgnoNew[i], pgnoNew[i-1]);
drh6b308672002-07-08 02:16:37 +00002447 if( rc ) goto balance_cleanup;
2448 }
drh14acc042001-06-10 19:56:58 +00002449 nNew++;
drh0d316a42002-08-11 20:10:47 +00002450 zeroPage(pBt, apNew[i]);
drh6019e162001-07-02 17:51:45 +00002451 apNew[i]->isInit = 1;
drh8b2f49b2001-06-08 00:21:52 +00002452 }
2453
drh6b308672002-07-08 02:16:37 +00002454 /* Free any old pages that were not reused as new pages.
2455 */
2456 while( i<nOld ){
2457 rc = freePage(pBt, apOld[i], pgnoOld[i]);
2458 if( rc ) goto balance_cleanup;
2459 sqlitepager_unref(apOld[i]);
2460 apOld[i] = 0;
2461 i++;
2462 }
2463
drh8b2f49b2001-06-08 00:21:52 +00002464 /*
drhf9ffac92002-03-02 19:00:31 +00002465 ** Put the new pages in accending order. This helps to
2466 ** keep entries in the disk file in order so that a scan
2467 ** of the table is a linear scan through the file. That
2468 ** in turn helps the operating system to deliver pages
2469 ** from the disk more rapidly.
2470 **
2471 ** An O(n^2) insertion sort algorithm is used, but since
drhc3b70572003-01-04 19:44:07 +00002472 ** n is never more than NB (a small constant), that should
2473 ** not be a problem.
drhf9ffac92002-03-02 19:00:31 +00002474 **
drhc3b70572003-01-04 19:44:07 +00002475 ** When NB==3, this one optimization makes the database
2476 ** about 25% faster for large insertions and deletions.
drhf9ffac92002-03-02 19:00:31 +00002477 */
2478 for(i=0; i<k-1; i++){
2479 int minV = pgnoNew[i];
2480 int minI = i;
2481 for(j=i+1; j<k; j++){
2482 if( pgnoNew[j]<minV ){
2483 minI = j;
2484 minV = pgnoNew[j];
2485 }
2486 }
2487 if( minI>i ){
2488 int t;
2489 MemPage *pT;
2490 t = pgnoNew[i];
2491 pT = apNew[i];
2492 pgnoNew[i] = pgnoNew[minI];
2493 apNew[i] = apNew[minI];
2494 pgnoNew[minI] = t;
2495 apNew[minI] = pT;
2496 }
2497 }
2498
2499 /*
drh14acc042001-06-10 19:56:58 +00002500 ** Evenly distribute the data in apCell[] across the new pages.
2501 ** Insert divider cells into pParent as necessary.
2502 */
2503 j = 0;
2504 for(i=0; i<nNew; i++){
2505 MemPage *pNew = apNew[i];
drh6019e162001-07-02 17:51:45 +00002506 while( j<cntNew[i] ){
2507 assert( pNew->nFree>=szCell[j] );
drh14acc042001-06-10 19:56:58 +00002508 if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
drh0d316a42002-08-11 20:10:47 +00002509 insertCell(pBt, pNew, pNew->nCell, apCell[j], szCell[j]);
drh14acc042001-06-10 19:56:58 +00002510 j++;
2511 }
drh6019e162001-07-02 17:51:45 +00002512 assert( pNew->nCell>0 );
drh14acc042001-06-10 19:56:58 +00002513 assert( !pNew->isOverfull );
drh0d316a42002-08-11 20:10:47 +00002514 relinkCellList(pBt, pNew);
drh14acc042001-06-10 19:56:58 +00002515 if( i<nNew-1 && j<nCell ){
2516 pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
drh0d316a42002-08-11 20:10:47 +00002517 apCell[j]->h.leftChild = SWAB32(pBt, pgnoNew[i]);
drh14acc042001-06-10 19:56:58 +00002518 if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
drh0d316a42002-08-11 20:10:47 +00002519 insertCell(pBt, pParent, nxDiv, apCell[j], szCell[j]);
drh14acc042001-06-10 19:56:58 +00002520 j++;
2521 nxDiv++;
2522 }
2523 }
drh6019e162001-07-02 17:51:45 +00002524 assert( j==nCell );
drh6b308672002-07-08 02:16:37 +00002525 apNew[nNew-1]->u.hdr.rightChild = aOld[nOld-1].u.hdr.rightChild;
drh14acc042001-06-10 19:56:58 +00002526 if( nxDiv==pParent->nCell ){
drh0d316a42002-08-11 20:10:47 +00002527 pParent->u.hdr.rightChild = SWAB32(pBt, pgnoNew[nNew-1]);
drh14acc042001-06-10 19:56:58 +00002528 }else{
drh0d316a42002-08-11 20:10:47 +00002529 pParent->apCell[nxDiv]->h.leftChild = SWAB32(pBt, pgnoNew[nNew-1]);
drh14acc042001-06-10 19:56:58 +00002530 }
2531 if( pCur ){
drh3fc190c2001-09-14 03:24:23 +00002532 if( j<=iCur && pCur->pPage==pParent && pCur->idx>idxDiv[nOld-1] ){
2533 assert( pCur->pPage==pOldCurPage );
2534 pCur->idx += nNew - nOld;
2535 }else{
2536 assert( pOldCurPage!=0 );
2537 sqlitepager_ref(pCur->pPage);
2538 sqlitepager_unref(pOldCurPage);
2539 }
drh14acc042001-06-10 19:56:58 +00002540 }
2541
2542 /*
2543 ** Reparent children of all cells.
drh8b2f49b2001-06-08 00:21:52 +00002544 */
2545 for(i=0; i<nNew; i++){
drh0d316a42002-08-11 20:10:47 +00002546 reparentChildPages(pBt, apNew[i]);
drh8b2f49b2001-06-08 00:21:52 +00002547 }
drh0d316a42002-08-11 20:10:47 +00002548 reparentChildPages(pBt, pParent);
drh8b2f49b2001-06-08 00:21:52 +00002549
2550 /*
drh14acc042001-06-10 19:56:58 +00002551 ** balance the parent page.
drh8b2f49b2001-06-08 00:21:52 +00002552 */
drh5edc3122001-09-13 21:53:09 +00002553 rc = balance(pBt, pParent, pCur);
drh8b2f49b2001-06-08 00:21:52 +00002554
2555 /*
drh14acc042001-06-10 19:56:58 +00002556 ** Cleanup before returning.
drh8b2f49b2001-06-08 00:21:52 +00002557 */
drh14acc042001-06-10 19:56:58 +00002558balance_cleanup:
drh9ca7d3b2001-06-28 11:50:21 +00002559 if( extraUnref ){
2560 sqlitepager_unref(extraUnref);
2561 }
drh8b2f49b2001-06-08 00:21:52 +00002562 for(i=0; i<nOld; i++){
drh6b308672002-07-08 02:16:37 +00002563 if( apOld[i]!=0 && apOld[i]!=&aOld[i] ) sqlitepager_unref(apOld[i]);
drh8b2f49b2001-06-08 00:21:52 +00002564 }
drh14acc042001-06-10 19:56:58 +00002565 for(i=0; i<nNew; i++){
2566 sqlitepager_unref(apNew[i]);
drh8b2f49b2001-06-08 00:21:52 +00002567 }
drh14acc042001-06-10 19:56:58 +00002568 if( pCur && pCur->pPage==0 ){
2569 pCur->pPage = pParent;
2570 pCur->idx = 0;
2571 }else{
2572 sqlitepager_unref(pParent);
drh8b2f49b2001-06-08 00:21:52 +00002573 }
2574 return rc;
2575}
2576
2577/*
drhf74b8d92002-09-01 23:20:45 +00002578** This routine checks all cursors that point to the same table
2579** as pCur points to. If any of those cursors were opened with
2580** wrFlag==0 then this routine returns SQLITE_LOCKED. If all
2581** cursors point to the same table were opened with wrFlag==1
2582** then this routine returns SQLITE_OK.
2583**
2584** In addition to checking for read-locks (where a read-lock
2585** means a cursor opened with wrFlag==0) this routine also moves
2586** all cursors other than pCur so that they are pointing to the
2587** first Cell on root page. This is necessary because an insert
2588** or delete might change the number of cells on a page or delete
2589** a page entirely and we do not want to leave any cursors
2590** pointing to non-existant pages or cells.
2591*/
2592static int checkReadLocks(BtCursor *pCur){
2593 BtCursor *p;
2594 assert( pCur->wrFlag );
2595 for(p=pCur->pShared; p!=pCur; p=p->pShared){
2596 assert( p );
2597 assert( p->pgnoRoot==pCur->pgnoRoot );
2598 if( p->wrFlag==0 ) return SQLITE_LOCKED;
2599 if( sqlitepager_pagenumber(p->pPage)!=p->pgnoRoot ){
2600 moveToRoot(p);
2601 }
2602 }
2603 return SQLITE_OK;
2604}
2605
2606/*
drh3b7511c2001-05-26 13:15:44 +00002607** Insert a new record into the BTree. The key is given by (pKey,nKey)
2608** and the data is given by (pData,nData). The cursor is used only to
2609** define what database the record should be inserted into. The cursor
drh14acc042001-06-10 19:56:58 +00002610** is left pointing at the new record.
drh3b7511c2001-05-26 13:15:44 +00002611*/
paulb95a8862003-04-01 21:16:41 +00002612static int sqliteBtreeInsert(
drh5c4d9702001-08-20 00:33:58 +00002613 BtCursor *pCur, /* Insert data into the table of this cursor */
drhbe0072d2001-09-13 14:46:09 +00002614 const void *pKey, int nKey, /* The key of the new record */
drh5c4d9702001-08-20 00:33:58 +00002615 const void *pData, int nData /* The data of the new record */
drh3b7511c2001-05-26 13:15:44 +00002616){
2617 Cell newCell;
2618 int rc;
2619 int loc;
drh14acc042001-06-10 19:56:58 +00002620 int szNew;
drh3b7511c2001-05-26 13:15:44 +00002621 MemPage *pPage;
2622 Btree *pBt = pCur->pBt;
2623
drhecdc7532001-09-23 02:35:53 +00002624 if( pCur->pPage==0 ){
2625 return SQLITE_ABORT; /* A rollback destroyed this cursor */
2626 }
drhf74b8d92002-09-01 23:20:45 +00002627 if( !pBt->inTrans || nKey+nData==0 ){
2628 /* Must start a transaction before doing an insert */
2629 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh8b2f49b2001-06-08 00:21:52 +00002630 }
drhf74b8d92002-09-01 23:20:45 +00002631 assert( !pBt->readOnly );
drhecdc7532001-09-23 02:35:53 +00002632 if( !pCur->wrFlag ){
2633 return SQLITE_PERM; /* Cursor not open for writing */
2634 }
drhf74b8d92002-09-01 23:20:45 +00002635 if( checkReadLocks(pCur) ){
2636 return SQLITE_LOCKED; /* The table pCur points to has a read lock */
2637 }
drh14acc042001-06-10 19:56:58 +00002638 rc = sqliteBtreeMoveto(pCur, pKey, nKey, &loc);
drh3b7511c2001-05-26 13:15:44 +00002639 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00002640 pPage = pCur->pPage;
drh7aa128d2002-06-21 13:09:16 +00002641 assert( pPage->isInit );
drh14acc042001-06-10 19:56:58 +00002642 rc = sqlitepager_write(pPage);
drhbd03cae2001-06-02 02:40:57 +00002643 if( rc ) return rc;
drh3b7511c2001-05-26 13:15:44 +00002644 rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
2645 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00002646 szNew = cellSize(pBt, &newCell);
drh3b7511c2001-05-26 13:15:44 +00002647 if( loc==0 ){
drh14acc042001-06-10 19:56:58 +00002648 newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild;
2649 rc = clearCell(pBt, pPage->apCell[pCur->idx]);
drh5e2f8b92001-05-28 00:41:15 +00002650 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00002651 dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pPage->apCell[pCur->idx]));
drh7c717f72001-06-24 20:39:41 +00002652 }else if( loc<0 && pPage->nCell>0 ){
drh14acc042001-06-10 19:56:58 +00002653 assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
2654 pCur->idx++;
2655 }else{
2656 assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
drh3b7511c2001-05-26 13:15:44 +00002657 }
drh0d316a42002-08-11 20:10:47 +00002658 insertCell(pBt, pPage, pCur->idx, &newCell, szNew);
drh14acc042001-06-10 19:56:58 +00002659 rc = balance(pCur->pBt, pPage, pCur);
drh3fc190c2001-09-14 03:24:23 +00002660 /* sqliteBtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
2661 /* fflush(stdout); */
drh2dcc9aa2002-12-04 13:40:25 +00002662 pCur->eSkip = SKIP_INVALID;
drh5e2f8b92001-05-28 00:41:15 +00002663 return rc;
2664}
2665
2666/*
drhbd03cae2001-06-02 02:40:57 +00002667** Delete the entry that the cursor is pointing to.
drh5e2f8b92001-05-28 00:41:15 +00002668**
drhbd03cae2001-06-02 02:40:57 +00002669** The cursor is left pointing at either the next or the previous
2670** entry. If the cursor is left pointing to the next entry, then
drh2dcc9aa2002-12-04 13:40:25 +00002671** the pCur->eSkip flag is set to SKIP_NEXT which forces the next call to
drhbd03cae2001-06-02 02:40:57 +00002672** sqliteBtreeNext() to be a no-op. That way, you can always call
2673** sqliteBtreeNext() after a delete and the cursor will be left
drh2dcc9aa2002-12-04 13:40:25 +00002674** pointing to the first entry after the deleted entry. Similarly,
2675** pCur->eSkip is set to SKIP_PREV is the cursor is left pointing to
2676** the entry prior to the deleted entry so that a subsequent call to
2677** sqliteBtreePrevious() will always leave the cursor pointing at the
2678** entry immediately before the one that was deleted.
drh3b7511c2001-05-26 13:15:44 +00002679*/
paulb95a8862003-04-01 21:16:41 +00002680static int sqliteBtreeDelete(BtCursor *pCur){
drh5e2f8b92001-05-28 00:41:15 +00002681 MemPage *pPage = pCur->pPage;
2682 Cell *pCell;
2683 int rc;
drh8c42ca92001-06-22 19:15:00 +00002684 Pgno pgnoChild;
drh0d316a42002-08-11 20:10:47 +00002685 Btree *pBt = pCur->pBt;
drh8b2f49b2001-06-08 00:21:52 +00002686
drh7aa128d2002-06-21 13:09:16 +00002687 assert( pPage->isInit );
drhecdc7532001-09-23 02:35:53 +00002688 if( pCur->pPage==0 ){
2689 return SQLITE_ABORT; /* A rollback destroyed this cursor */
2690 }
drhf74b8d92002-09-01 23:20:45 +00002691 if( !pBt->inTrans ){
2692 /* Must start a transaction before doing a delete */
2693 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh8b2f49b2001-06-08 00:21:52 +00002694 }
drhf74b8d92002-09-01 23:20:45 +00002695 assert( !pBt->readOnly );
drhbd03cae2001-06-02 02:40:57 +00002696 if( pCur->idx >= pPage->nCell ){
2697 return SQLITE_ERROR; /* The cursor is not pointing to anything */
2698 }
drhecdc7532001-09-23 02:35:53 +00002699 if( !pCur->wrFlag ){
2700 return SQLITE_PERM; /* Did not open this cursor for writing */
2701 }
drhf74b8d92002-09-01 23:20:45 +00002702 if( checkReadLocks(pCur) ){
2703 return SQLITE_LOCKED; /* The table pCur points to has a read lock */
2704 }
drhbd03cae2001-06-02 02:40:57 +00002705 rc = sqlitepager_write(pPage);
2706 if( rc ) return rc;
drh5e2f8b92001-05-28 00:41:15 +00002707 pCell = pPage->apCell[pCur->idx];
drh0d316a42002-08-11 20:10:47 +00002708 pgnoChild = SWAB32(pBt, pCell->h.leftChild);
2709 clearCell(pBt, pCell);
drh14acc042001-06-10 19:56:58 +00002710 if( pgnoChild ){
2711 /*
drh5e00f6c2001-09-13 13:46:56 +00002712 ** The entry we are about to delete is not a leaf so if we do not
drh9ca7d3b2001-06-28 11:50:21 +00002713 ** do something we will leave a hole on an internal page.
2714 ** We have to fill the hole by moving in a cell from a leaf. The
2715 ** next Cell after the one to be deleted is guaranteed to exist and
2716 ** to be a leaf so we can use it.
drh5e2f8b92001-05-28 00:41:15 +00002717 */
drh14acc042001-06-10 19:56:58 +00002718 BtCursor leafCur;
2719 Cell *pNext;
2720 int szNext;
drh8c1238a2003-01-02 14:43:55 +00002721 int notUsed;
drh14acc042001-06-10 19:56:58 +00002722 getTempCursor(pCur, &leafCur);
drh8c1238a2003-01-02 14:43:55 +00002723 rc = sqliteBtreeNext(&leafCur, &notUsed);
drh14acc042001-06-10 19:56:58 +00002724 if( rc!=SQLITE_OK ){
2725 return SQLITE_CORRUPT;
drh5e2f8b92001-05-28 00:41:15 +00002726 }
drh6019e162001-07-02 17:51:45 +00002727 rc = sqlitepager_write(leafCur.pPage);
2728 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00002729 dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
drh8c42ca92001-06-22 19:15:00 +00002730 pNext = leafCur.pPage->apCell[leafCur.idx];
drh0d316a42002-08-11 20:10:47 +00002731 szNext = cellSize(pBt, pNext);
2732 pNext->h.leftChild = SWAB32(pBt, pgnoChild);
2733 insertCell(pBt, pPage, pCur->idx, pNext, szNext);
2734 rc = balance(pBt, pPage, pCur);
drh5e2f8b92001-05-28 00:41:15 +00002735 if( rc ) return rc;
drh2dcc9aa2002-12-04 13:40:25 +00002736 pCur->eSkip = SKIP_NEXT;
drh0d316a42002-08-11 20:10:47 +00002737 dropCell(pBt, leafCur.pPage, leafCur.idx, szNext);
2738 rc = balance(pBt, leafCur.pPage, pCur);
drh8c42ca92001-06-22 19:15:00 +00002739 releaseTempCursor(&leafCur);
drh5e2f8b92001-05-28 00:41:15 +00002740 }else{
drh0d316a42002-08-11 20:10:47 +00002741 dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
drh5edc3122001-09-13 21:53:09 +00002742 if( pCur->idx>=pPage->nCell ){
2743 pCur->idx = pPage->nCell-1;
drhf5bf0a72001-11-23 00:24:12 +00002744 if( pCur->idx<0 ){
2745 pCur->idx = 0;
drh2dcc9aa2002-12-04 13:40:25 +00002746 pCur->eSkip = SKIP_NEXT;
drhf5bf0a72001-11-23 00:24:12 +00002747 }else{
drh2dcc9aa2002-12-04 13:40:25 +00002748 pCur->eSkip = SKIP_PREV;
drhf5bf0a72001-11-23 00:24:12 +00002749 }
drh6019e162001-07-02 17:51:45 +00002750 }else{
drh2dcc9aa2002-12-04 13:40:25 +00002751 pCur->eSkip = SKIP_NEXT;
drh6019e162001-07-02 17:51:45 +00002752 }
drh0d316a42002-08-11 20:10:47 +00002753 rc = balance(pBt, pPage, pCur);
drh5e2f8b92001-05-28 00:41:15 +00002754 }
drh5e2f8b92001-05-28 00:41:15 +00002755 return rc;
drh3b7511c2001-05-26 13:15:44 +00002756}
drh8b2f49b2001-06-08 00:21:52 +00002757
2758/*
drhc6b52df2002-01-04 03:09:29 +00002759** Create a new BTree table. Write into *piTable the page
2760** number for the root page of the new table.
2761**
2762** In the current implementation, BTree tables and BTree indices are the
2763** the same. But in the future, we may change this so that BTree tables
2764** are restricted to having a 4-byte integer key and arbitrary data and
2765** BTree indices are restricted to having an arbitrary key and no data.
drh8b2f49b2001-06-08 00:21:52 +00002766*/
paulb95a8862003-04-01 21:16:41 +00002767static int sqliteBtreeCreateTable(Btree *pBt, int *piTable){
drh8b2f49b2001-06-08 00:21:52 +00002768 MemPage *pRoot;
2769 Pgno pgnoRoot;
2770 int rc;
2771 if( !pBt->inTrans ){
drhf74b8d92002-09-01 23:20:45 +00002772 /* Must start a transaction first */
2773 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh8b2f49b2001-06-08 00:21:52 +00002774 }
drh5df72a52002-06-06 23:16:05 +00002775 if( pBt->readOnly ){
2776 return SQLITE_READONLY;
2777 }
drhbea00b92002-07-08 10:59:50 +00002778 rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0);
drh8b2f49b2001-06-08 00:21:52 +00002779 if( rc ) return rc;
drh6019e162001-07-02 17:51:45 +00002780 assert( sqlitepager_iswriteable(pRoot) );
drh0d316a42002-08-11 20:10:47 +00002781 zeroPage(pBt, pRoot);
drh8b2f49b2001-06-08 00:21:52 +00002782 sqlitepager_unref(pRoot);
2783 *piTable = (int)pgnoRoot;
2784 return SQLITE_OK;
2785}
2786
2787/*
drhc6b52df2002-01-04 03:09:29 +00002788** Create a new BTree index. Write into *piTable the page
2789** number for the root page of the new index.
2790**
2791** In the current implementation, BTree tables and BTree indices are the
2792** the same. But in the future, we may change this so that BTree tables
2793** are restricted to having a 4-byte integer key and arbitrary data and
2794** BTree indices are restricted to having an arbitrary key and no data.
2795*/
paulb95a8862003-04-01 21:16:41 +00002796static int sqliteBtreeCreateIndex(Btree *pBt, int *piIndex){
drh5df72a52002-06-06 23:16:05 +00002797 return sqliteBtreeCreateTable(pBt, piIndex);
drhc6b52df2002-01-04 03:09:29 +00002798}
2799
2800/*
drh8b2f49b2001-06-08 00:21:52 +00002801** Erase the given database page and all its children. Return
2802** the page to the freelist.
2803*/
drh2aa679f2001-06-25 02:11:07 +00002804static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
drh8b2f49b2001-06-08 00:21:52 +00002805 MemPage *pPage;
2806 int rc;
drh8b2f49b2001-06-08 00:21:52 +00002807 Cell *pCell;
2808 int idx;
2809
drh8c42ca92001-06-22 19:15:00 +00002810 rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);
drh8b2f49b2001-06-08 00:21:52 +00002811 if( rc ) return rc;
drh6019e162001-07-02 17:51:45 +00002812 rc = sqlitepager_write(pPage);
2813 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00002814 rc = initPage(pBt, pPage, pgno, 0);
drh7aa128d2002-06-21 13:09:16 +00002815 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00002816 idx = SWAB16(pBt, pPage->u.hdr.firstCell);
drh8b2f49b2001-06-08 00:21:52 +00002817 while( idx>0 ){
drh14acc042001-06-10 19:56:58 +00002818 pCell = (Cell*)&pPage->u.aDisk[idx];
drh0d316a42002-08-11 20:10:47 +00002819 idx = SWAB16(pBt, pCell->h.iNext);
drh8b2f49b2001-06-08 00:21:52 +00002820 if( pCell->h.leftChild ){
drh0d316a42002-08-11 20:10:47 +00002821 rc = clearDatabasePage(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
drh8b2f49b2001-06-08 00:21:52 +00002822 if( rc ) return rc;
2823 }
drh8c42ca92001-06-22 19:15:00 +00002824 rc = clearCell(pBt, pCell);
drh8b2f49b2001-06-08 00:21:52 +00002825 if( rc ) return rc;
2826 }
drh2aa679f2001-06-25 02:11:07 +00002827 if( pPage->u.hdr.rightChild ){
drh0d316a42002-08-11 20:10:47 +00002828 rc = clearDatabasePage(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
drh2aa679f2001-06-25 02:11:07 +00002829 if( rc ) return rc;
2830 }
2831 if( freePageFlag ){
2832 rc = freePage(pBt, pPage, pgno);
2833 }else{
drh0d316a42002-08-11 20:10:47 +00002834 zeroPage(pBt, pPage);
drh2aa679f2001-06-25 02:11:07 +00002835 }
drhdd793422001-06-28 01:54:48 +00002836 sqlitepager_unref(pPage);
drh2aa679f2001-06-25 02:11:07 +00002837 return rc;
drh8b2f49b2001-06-08 00:21:52 +00002838}
2839
2840/*
2841** Delete all information from a single table in the database.
2842*/
paulb95a8862003-04-01 21:16:41 +00002843static int sqliteBtreeClearTable(Btree *pBt, int iTable){
drh8b2f49b2001-06-08 00:21:52 +00002844 int rc;
drhf74b8d92002-09-01 23:20:45 +00002845 BtCursor *pCur;
drh8b2f49b2001-06-08 00:21:52 +00002846 if( !pBt->inTrans ){
drhf74b8d92002-09-01 23:20:45 +00002847 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh8b2f49b2001-06-08 00:21:52 +00002848 }
drhf74b8d92002-09-01 23:20:45 +00002849 for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
2850 if( pCur->pgnoRoot==(Pgno)iTable ){
2851 if( pCur->wrFlag==0 ) return SQLITE_LOCKED;
2852 moveToRoot(pCur);
2853 }
drhecdc7532001-09-23 02:35:53 +00002854 }
drh2aa679f2001-06-25 02:11:07 +00002855 rc = clearDatabasePage(pBt, (Pgno)iTable, 0);
drh8b2f49b2001-06-08 00:21:52 +00002856 if( rc ){
2857 sqliteBtreeRollback(pBt);
drh8b2f49b2001-06-08 00:21:52 +00002858 }
drh8c42ca92001-06-22 19:15:00 +00002859 return rc;
drh8b2f49b2001-06-08 00:21:52 +00002860}
2861
2862/*
2863** Erase all information in a table and add the root of the table to
2864** the freelist. Except, the root of the principle table (the one on
2865** page 2) is never added to the freelist.
2866*/
paulb95a8862003-04-01 21:16:41 +00002867static int sqliteBtreeDropTable(Btree *pBt, int iTable){
drh8b2f49b2001-06-08 00:21:52 +00002868 int rc;
2869 MemPage *pPage;
drhf74b8d92002-09-01 23:20:45 +00002870 BtCursor *pCur;
drh8b2f49b2001-06-08 00:21:52 +00002871 if( !pBt->inTrans ){
drhf74b8d92002-09-01 23:20:45 +00002872 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh8b2f49b2001-06-08 00:21:52 +00002873 }
drhf74b8d92002-09-01 23:20:45 +00002874 for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
2875 if( pCur->pgnoRoot==(Pgno)iTable ){
2876 return SQLITE_LOCKED; /* Cannot drop a table that has a cursor */
2877 }
drh5df72a52002-06-06 23:16:05 +00002878 }
drh8c42ca92001-06-22 19:15:00 +00002879 rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage);
drh2aa679f2001-06-25 02:11:07 +00002880 if( rc ) return rc;
2881 rc = sqliteBtreeClearTable(pBt, iTable);
2882 if( rc ) return rc;
2883 if( iTable>2 ){
2884 rc = freePage(pBt, pPage, iTable);
2885 }else{
drh0d316a42002-08-11 20:10:47 +00002886 zeroPage(pBt, pPage);
drh8b2f49b2001-06-08 00:21:52 +00002887 }
drhdd793422001-06-28 01:54:48 +00002888 sqlitepager_unref(pPage);
drh8b2f49b2001-06-08 00:21:52 +00002889 return rc;
2890}
2891
drh001bbcb2003-03-19 03:14:00 +00002892#if 0 /* UNTESTED */
2893/*
2894** Copy all cell data from one database file into another.
2895** pages back the freelist.
2896*/
2897static int copyCell(Btree *pBtFrom, BTree *pBtTo, Cell *pCell){
2898 Pager *pFromPager = pBtFrom->pPager;
2899 OverflowPage *pOvfl;
2900 Pgno ovfl, nextOvfl;
2901 Pgno *pPrev;
2902 int rc = SQLITE_OK;
2903 MemPage *pNew, *pPrevPg;
2904 Pgno new;
2905
2906 if( NKEY(pBtTo, pCell->h) + NDATA(pBtTo, pCell->h) <= MX_LOCAL_PAYLOAD ){
2907 return SQLITE_OK;
2908 }
2909 pPrev = &pCell->ovfl;
2910 pPrevPg = 0;
2911 ovfl = SWAB32(pBtTo, pCell->ovfl);
2912 while( ovfl && rc==SQLITE_OK ){
2913 rc = sqlitepager_get(pFromPager, ovfl, (void**)&pOvfl);
2914 if( rc ) return rc;
2915 nextOvfl = SWAB32(pBtFrom, pOvfl->iNext);
2916 rc = allocatePage(pBtTo, &pNew, &new, 0);
2917 if( rc==SQLITE_OK ){
2918 rc = sqlitepager_write(pNew);
2919 if( rc==SQLITE_OK ){
2920 memcpy(pNew, pOvfl, SQLITE_PAGE_SIZE);
2921 *pPrev = SWAB32(pBtTo, new);
2922 if( pPrevPg ){
2923 sqlitepager_unref(pPrevPg);
2924 }
2925 pPrev = &pOvfl->iNext;
2926 pPrevPg = pNew;
2927 }
2928 }
2929 sqlitepager_unref(pOvfl);
2930 ovfl = nextOvfl;
2931 }
2932 if( pPrevPg ){
2933 sqlitepager_unref(pPrevPg);
2934 }
2935 return rc;
2936}
2937#endif
2938
2939
2940#if 0 /* UNTESTED */
2941/*
2942** Copy a page of data from one database over to another.
2943*/
2944static int copyDatabasePage(
2945 Btree *pBtFrom,
2946 Pgno pgnoFrom,
2947 Btree *pBtTo,
2948 Pgno *pTo
2949){
2950 MemPage *pPageFrom, *pPage;
2951 Pgno to;
2952 int rc;
2953 Cell *pCell;
2954 int idx;
2955
2956 rc = sqlitepager_get(pBtFrom->pPager, pgno, (void**)&pPageFrom);
2957 if( rc ) return rc;
2958 rc = allocatePage(pBt, &pPage, pTo, 0);
2959 if( rc==SQLITE_OK ){
2960 rc = sqlitepager_write(pPage);
2961 }
2962 if( rc==SQLITE_OK ){
2963 memcpy(pPage, pPageFrom, SQLITE_PAGE_SIZE);
2964 idx = SWAB16(pBt, pPage->u.hdr.firstCell);
2965 while( idx>0 ){
2966 pCell = (Cell*)&pPage->u.aDisk[idx];
2967 idx = SWAB16(pBt, pCell->h.iNext);
2968 if( pCell->h.leftChild ){
2969 Pgno newChld;
2970 rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pCell->h.leftChild),
2971 pBtTo, &newChld);
2972 if( rc ) return rc;
2973 pCell->h.leftChild = SWAB32(pBtFrom, newChld);
2974 }
2975 rc = copyCell(pBtFrom, pBtTo, pCell);
2976 if( rc ) return rc;
2977 }
2978 if( pPage->u.hdr.rightChild ){
2979 Pgno newChld;
2980 rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pPage->u.hdr.rightChild),
2981 pBtTo, &newChld);
2982 if( rc ) return rc;
2983 pPage->u.hdr.rightChild = SWAB32(pBtTo, newChild);
2984 }
2985 }
2986 sqlitepager_unref(pPage);
2987 return rc;
2988}
2989#endif
2990
drh8b2f49b2001-06-08 00:21:52 +00002991/*
2992** Read the meta-information out of a database file.
2993*/
paulb95a8862003-04-01 21:16:41 +00002994static int sqliteBtreeGetMeta(Btree *pBt, int *aMeta){
drh8b2f49b2001-06-08 00:21:52 +00002995 PageOne *pP1;
2996 int rc;
drh0d316a42002-08-11 20:10:47 +00002997 int i;
drh8b2f49b2001-06-08 00:21:52 +00002998
drh8c42ca92001-06-22 19:15:00 +00002999 rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1);
drh8b2f49b2001-06-08 00:21:52 +00003000 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00003001 aMeta[0] = SWAB32(pBt, pP1->nFree);
3002 for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){
3003 aMeta[i+1] = SWAB32(pBt, pP1->aMeta[i]);
3004 }
drh8b2f49b2001-06-08 00:21:52 +00003005 sqlitepager_unref(pP1);
3006 return SQLITE_OK;
3007}
3008
3009/*
3010** Write meta-information back into the database.
3011*/
paulb95a8862003-04-01 21:16:41 +00003012static int sqliteBtreeUpdateMeta(Btree *pBt, int *aMeta){
drh8b2f49b2001-06-08 00:21:52 +00003013 PageOne *pP1;
drh0d316a42002-08-11 20:10:47 +00003014 int rc, i;
drh8b2f49b2001-06-08 00:21:52 +00003015 if( !pBt->inTrans ){
drhf74b8d92002-09-01 23:20:45 +00003016 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh5df72a52002-06-06 23:16:05 +00003017 }
drh8b2f49b2001-06-08 00:21:52 +00003018 pP1 = pBt->page1;
3019 rc = sqlitepager_write(pP1);
drh9adf9ac2002-05-15 11:44:13 +00003020 if( rc ) return rc;
drh0d316a42002-08-11 20:10:47 +00003021 for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){
3022 pP1->aMeta[i] = SWAB32(pBt, aMeta[i+1]);
3023 }
drh8b2f49b2001-06-08 00:21:52 +00003024 return SQLITE_OK;
3025}
drh8c42ca92001-06-22 19:15:00 +00003026
drh5eddca62001-06-30 21:53:53 +00003027/******************************************************************************
3028** The complete implementation of the BTree subsystem is above this line.
3029** All the code the follows is for testing and troubleshooting the BTree
3030** subsystem. None of the code that follows is used during normal operation.
drh5eddca62001-06-30 21:53:53 +00003031******************************************************************************/
drh5eddca62001-06-30 21:53:53 +00003032
drh8c42ca92001-06-22 19:15:00 +00003033/*
3034** Print a disassembly of the given page on standard output. This routine
3035** is used for debugging and testing only.
3036*/
drhaaab5722002-02-19 13:39:21 +00003037#ifdef SQLITE_TEST
paulb95a8862003-04-01 21:16:41 +00003038static int sqliteBtreePageDump(Btree *pBt, int pgno, int recursive){
drh8c42ca92001-06-22 19:15:00 +00003039 int rc;
3040 MemPage *pPage;
3041 int i, j;
3042 int nFree;
3043 u16 idx;
3044 char range[20];
3045 unsigned char payload[20];
3046 rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
3047 if( rc ){
3048 return rc;
3049 }
drh6019e162001-07-02 17:51:45 +00003050 if( recursive ) printf("PAGE %d:\n", pgno);
drh8c42ca92001-06-22 19:15:00 +00003051 i = 0;
drh0d316a42002-08-11 20:10:47 +00003052 idx = SWAB16(pBt, pPage->u.hdr.firstCell);
drh8c42ca92001-06-22 19:15:00 +00003053 while( idx>0 && idx<=SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
3054 Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
drh0d316a42002-08-11 20:10:47 +00003055 int sz = cellSize(pBt, pCell);
drh8c42ca92001-06-22 19:15:00 +00003056 sprintf(range,"%d..%d", idx, idx+sz-1);
drh0d316a42002-08-11 20:10:47 +00003057 sz = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
drh8c42ca92001-06-22 19:15:00 +00003058 if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
3059 memcpy(payload, pCell->aPayload, sz);
3060 for(j=0; j<sz; j++){
3061 if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
3062 }
3063 payload[sz] = 0;
3064 printf(
drh6019e162001-07-02 17:51:45 +00003065 "cell %2d: i=%-10s chld=%-4d nk=%-4d nd=%-4d payload=%s\n",
drh0d316a42002-08-11 20:10:47 +00003066 i, range, (int)pCell->h.leftChild,
3067 NKEY(pBt, pCell->h), NDATA(pBt, pCell->h),
drh2aa679f2001-06-25 02:11:07 +00003068 payload
drh8c42ca92001-06-22 19:15:00 +00003069 );
drh6019e162001-07-02 17:51:45 +00003070 if( pPage->isInit && pPage->apCell[i]!=pCell ){
drh2aa679f2001-06-25 02:11:07 +00003071 printf("**** apCell[%d] does not match on prior entry ****\n", i);
3072 }
drh7c717f72001-06-24 20:39:41 +00003073 i++;
drh0d316a42002-08-11 20:10:47 +00003074 idx = SWAB16(pBt, pCell->h.iNext);
drh8c42ca92001-06-22 19:15:00 +00003075 }
3076 if( idx!=0 ){
3077 printf("ERROR: next cell index out of range: %d\n", idx);
3078 }
drh0d316a42002-08-11 20:10:47 +00003079 printf("right_child: %d\n", SWAB32(pBt, pPage->u.hdr.rightChild));
drh8c42ca92001-06-22 19:15:00 +00003080 nFree = 0;
3081 i = 0;
drh0d316a42002-08-11 20:10:47 +00003082 idx = SWAB16(pBt, pPage->u.hdr.firstFree);
drh8c42ca92001-06-22 19:15:00 +00003083 while( idx>0 && idx<SQLITE_PAGE_SIZE ){
3084 FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
3085 sprintf(range,"%d..%d", idx, idx+p->iSize-1);
drh0d316a42002-08-11 20:10:47 +00003086 nFree += SWAB16(pBt, p->iSize);
drh8c42ca92001-06-22 19:15:00 +00003087 printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
drh0d316a42002-08-11 20:10:47 +00003088 i, range, SWAB16(pBt, p->iSize), nFree);
3089 idx = SWAB16(pBt, p->iNext);
drh2aa679f2001-06-25 02:11:07 +00003090 i++;
drh8c42ca92001-06-22 19:15:00 +00003091 }
3092 if( idx!=0 ){
3093 printf("ERROR: next freeblock index out of range: %d\n", idx);
3094 }
drh6019e162001-07-02 17:51:45 +00003095 if( recursive && pPage->u.hdr.rightChild!=0 ){
drh0d316a42002-08-11 20:10:47 +00003096 idx = SWAB16(pBt, pPage->u.hdr.firstCell);
drh6019e162001-07-02 17:51:45 +00003097 while( idx>0 && idx<SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
3098 Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
drh0d316a42002-08-11 20:10:47 +00003099 sqliteBtreePageDump(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
3100 idx = SWAB16(pBt, pCell->h.iNext);
drh6019e162001-07-02 17:51:45 +00003101 }
drh0d316a42002-08-11 20:10:47 +00003102 sqliteBtreePageDump(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
drh6019e162001-07-02 17:51:45 +00003103 }
drh8c42ca92001-06-22 19:15:00 +00003104 sqlitepager_unref(pPage);
3105 return SQLITE_OK;
3106}
drhaaab5722002-02-19 13:39:21 +00003107#endif
drh8c42ca92001-06-22 19:15:00 +00003108
drhaaab5722002-02-19 13:39:21 +00003109#ifdef SQLITE_TEST
drh8c42ca92001-06-22 19:15:00 +00003110/*
drh2aa679f2001-06-25 02:11:07 +00003111** Fill aResult[] with information about the entry and page that the
3112** cursor is pointing to.
3113**
3114** aResult[0] = The page number
3115** aResult[1] = The entry number
3116** aResult[2] = Total number of entries on this page
3117** aResult[3] = Size of this entry
3118** aResult[4] = Number of free bytes on this page
3119** aResult[5] = Number of free blocks on the page
3120** aResult[6] = Page number of the left child of this entry
3121** aResult[7] = Page number of the right child for the whole page
drh5eddca62001-06-30 21:53:53 +00003122**
3123** This routine is used for testing and debugging only.
drh8c42ca92001-06-22 19:15:00 +00003124*/
paulb95a8862003-04-01 21:16:41 +00003125static int sqliteBtreeCursorDump(BtCursor *pCur, int *aResult){
drh2aa679f2001-06-25 02:11:07 +00003126 int cnt, idx;
3127 MemPage *pPage = pCur->pPage;
drh0d316a42002-08-11 20:10:47 +00003128 Btree *pBt = pCur->pBt;
drh2aa679f2001-06-25 02:11:07 +00003129 aResult[0] = sqlitepager_pagenumber(pPage);
drh8c42ca92001-06-22 19:15:00 +00003130 aResult[1] = pCur->idx;
drh2aa679f2001-06-25 02:11:07 +00003131 aResult[2] = pPage->nCell;
3132 if( pCur->idx>=0 && pCur->idx<pPage->nCell ){
drh0d316a42002-08-11 20:10:47 +00003133 aResult[3] = cellSize(pBt, pPage->apCell[pCur->idx]);
3134 aResult[6] = SWAB32(pBt, pPage->apCell[pCur->idx]->h.leftChild);
drh2aa679f2001-06-25 02:11:07 +00003135 }else{
3136 aResult[3] = 0;
3137 aResult[6] = 0;
3138 }
3139 aResult[4] = pPage->nFree;
3140 cnt = 0;
drh0d316a42002-08-11 20:10:47 +00003141 idx = SWAB16(pBt, pPage->u.hdr.firstFree);
drh2aa679f2001-06-25 02:11:07 +00003142 while( idx>0 && idx<SQLITE_PAGE_SIZE ){
3143 cnt++;
drh0d316a42002-08-11 20:10:47 +00003144 idx = SWAB16(pBt, ((FreeBlk*)&pPage->u.aDisk[idx])->iNext);
drh2aa679f2001-06-25 02:11:07 +00003145 }
3146 aResult[5] = cnt;
drh0d316a42002-08-11 20:10:47 +00003147 aResult[7] = SWAB32(pBt, pPage->u.hdr.rightChild);
drh8c42ca92001-06-22 19:15:00 +00003148 return SQLITE_OK;
3149}
drhaaab5722002-02-19 13:39:21 +00003150#endif
drhdd793422001-06-28 01:54:48 +00003151
drhaaab5722002-02-19 13:39:21 +00003152#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00003153/*
drh5eddca62001-06-30 21:53:53 +00003154** Return the pager associated with a BTree. This routine is used for
3155** testing and debugging only.
drhdd793422001-06-28 01:54:48 +00003156*/
paulb95a8862003-04-01 21:16:41 +00003157static Pager *sqliteBtreePager(Btree *pBt){
drhdd793422001-06-28 01:54:48 +00003158 return pBt->pPager;
3159}
drhaaab5722002-02-19 13:39:21 +00003160#endif
drh5eddca62001-06-30 21:53:53 +00003161
3162/*
3163** This structure is passed around through all the sanity checking routines
3164** in order to keep track of some global state information.
3165*/
drhaaab5722002-02-19 13:39:21 +00003166typedef struct IntegrityCk IntegrityCk;
3167struct IntegrityCk {
drh100569d2001-10-02 13:01:48 +00003168 Btree *pBt; /* The tree being checked out */
3169 Pager *pPager; /* The associated pager. Also accessible by pBt->pPager */
3170 int nPage; /* Number of pages in the database */
3171 int *anRef; /* Number of times each page is referenced */
3172 int nTreePage; /* Number of BTree pages */
3173 int nByte; /* Number of bytes of data stored on BTree pages */
3174 char *zErrMsg; /* An error message. NULL of no errors seen. */
drh5eddca62001-06-30 21:53:53 +00003175};
3176
3177/*
3178** Append a message to the error message string.
3179*/
drhaaab5722002-02-19 13:39:21 +00003180static void checkAppendMsg(IntegrityCk *pCheck, char *zMsg1, char *zMsg2){
drh5eddca62001-06-30 21:53:53 +00003181 if( pCheck->zErrMsg ){
3182 char *zOld = pCheck->zErrMsg;
3183 pCheck->zErrMsg = 0;
3184 sqliteSetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, 0);
3185 sqliteFree(zOld);
3186 }else{
3187 sqliteSetString(&pCheck->zErrMsg, zMsg1, zMsg2, 0);
3188 }
3189}
3190
3191/*
3192** Add 1 to the reference count for page iPage. If this is the second
3193** reference to the page, add an error message to pCheck->zErrMsg.
3194** Return 1 if there are 2 ore more references to the page and 0 if
3195** if this is the first reference to the page.
3196**
3197** Also check that the page number is in bounds.
3198*/
drhaaab5722002-02-19 13:39:21 +00003199static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
drh5eddca62001-06-30 21:53:53 +00003200 if( iPage==0 ) return 1;
drh0de8c112002-07-06 16:32:14 +00003201 if( iPage>pCheck->nPage || iPage<0 ){
drh5eddca62001-06-30 21:53:53 +00003202 char zBuf[100];
3203 sprintf(zBuf, "invalid page number %d", iPage);
3204 checkAppendMsg(pCheck, zContext, zBuf);
3205 return 1;
3206 }
3207 if( pCheck->anRef[iPage]==1 ){
3208 char zBuf[100];
3209 sprintf(zBuf, "2nd reference to page %d", iPage);
3210 checkAppendMsg(pCheck, zContext, zBuf);
3211 return 1;
3212 }
3213 return (pCheck->anRef[iPage]++)>1;
3214}
3215
3216/*
3217** Check the integrity of the freelist or of an overflow page list.
3218** Verify that the number of pages on the list is N.
3219*/
drh30e58752002-03-02 20:41:57 +00003220static void checkList(
3221 IntegrityCk *pCheck, /* Integrity checking context */
3222 int isFreeList, /* True for a freelist. False for overflow page list */
3223 int iPage, /* Page number for first page in the list */
3224 int N, /* Expected number of pages in the list */
3225 char *zContext /* Context for error messages */
3226){
3227 int i;
drh5eddca62001-06-30 21:53:53 +00003228 char zMsg[100];
drh30e58752002-03-02 20:41:57 +00003229 while( N-- > 0 ){
drh5eddca62001-06-30 21:53:53 +00003230 OverflowPage *pOvfl;
3231 if( iPage<1 ){
3232 sprintf(zMsg, "%d pages missing from overflow list", N+1);
3233 checkAppendMsg(pCheck, zContext, zMsg);
3234 break;
3235 }
3236 if( checkRef(pCheck, iPage, zContext) ) break;
3237 if( sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pOvfl) ){
3238 sprintf(zMsg, "failed to get page %d", iPage);
3239 checkAppendMsg(pCheck, zContext, zMsg);
3240 break;
3241 }
drh30e58752002-03-02 20:41:57 +00003242 if( isFreeList ){
3243 FreelistInfo *pInfo = (FreelistInfo*)pOvfl->aPayload;
drh0d316a42002-08-11 20:10:47 +00003244 int n = SWAB32(pCheck->pBt, pInfo->nFree);
3245 for(i=0; i<n; i++){
3246 checkRef(pCheck, SWAB32(pCheck->pBt, pInfo->aFree[i]), zMsg);
drh30e58752002-03-02 20:41:57 +00003247 }
drh0d316a42002-08-11 20:10:47 +00003248 N -= n;
drh30e58752002-03-02 20:41:57 +00003249 }
drh0d316a42002-08-11 20:10:47 +00003250 iPage = SWAB32(pCheck->pBt, pOvfl->iNext);
drh5eddca62001-06-30 21:53:53 +00003251 sqlitepager_unref(pOvfl);
3252 }
3253}
3254
3255/*
drh1bffb9c2002-02-03 17:37:36 +00003256** Return negative if zKey1<zKey2.
3257** Return zero if zKey1==zKey2.
3258** Return positive if zKey1>zKey2.
3259*/
3260static int keyCompare(
3261 const char *zKey1, int nKey1,
3262 const char *zKey2, int nKey2
3263){
3264 int min = nKey1>nKey2 ? nKey2 : nKey1;
3265 int c = memcmp(zKey1, zKey2, min);
3266 if( c==0 ){
3267 c = nKey1 - nKey2;
3268 }
3269 return c;
3270}
3271
3272/*
drh5eddca62001-06-30 21:53:53 +00003273** Do various sanity checks on a single page of a tree. Return
3274** the tree depth. Root pages return 0. Parents of root pages
3275** return 1, and so forth.
3276**
3277** These checks are done:
3278**
3279** 1. Make sure that cells and freeblocks do not overlap
3280** but combine to completely cover the page.
3281** 2. Make sure cell keys are in order.
3282** 3. Make sure no key is less than or equal to zLowerBound.
3283** 4. Make sure no key is greater than or equal to zUpperBound.
3284** 5. Check the integrity of overflow pages.
3285** 6. Recursively call checkTreePage on all children.
3286** 7. Verify that the depth of all children is the same.
drh6019e162001-07-02 17:51:45 +00003287** 8. Make sure this page is at least 33% full or else it is
drh5eddca62001-06-30 21:53:53 +00003288** the root of the tree.
3289*/
3290static int checkTreePage(
drhaaab5722002-02-19 13:39:21 +00003291 IntegrityCk *pCheck, /* Context for the sanity check */
drh5eddca62001-06-30 21:53:53 +00003292 int iPage, /* Page number of the page to check */
3293 MemPage *pParent, /* Parent page */
3294 char *zParentContext, /* Parent context */
3295 char *zLowerBound, /* All keys should be greater than this, if not NULL */
drh1bffb9c2002-02-03 17:37:36 +00003296 int nLower, /* Number of characters in zLowerBound */
3297 char *zUpperBound, /* All keys should be less than this, if not NULL */
3298 int nUpper /* Number of characters in zUpperBound */
drh5eddca62001-06-30 21:53:53 +00003299){
3300 MemPage *pPage;
3301 int i, rc, depth, d2, pgno;
3302 char *zKey1, *zKey2;
drh1bffb9c2002-02-03 17:37:36 +00003303 int nKey1, nKey2;
drh5eddca62001-06-30 21:53:53 +00003304 BtCursor cur;
drh0d316a42002-08-11 20:10:47 +00003305 Btree *pBt;
drh5eddca62001-06-30 21:53:53 +00003306 char zMsg[100];
3307 char zContext[100];
3308 char hit[SQLITE_PAGE_SIZE];
3309
3310 /* Check that the page exists
3311 */
drh0d316a42002-08-11 20:10:47 +00003312 cur.pBt = pBt = pCheck->pBt;
drh5eddca62001-06-30 21:53:53 +00003313 if( iPage==0 ) return 0;
3314 if( checkRef(pCheck, iPage, zParentContext) ) return 0;
3315 sprintf(zContext, "On tree page %d: ", iPage);
3316 if( (rc = sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pPage))!=0 ){
3317 sprintf(zMsg, "unable to get the page. error code=%d", rc);
3318 checkAppendMsg(pCheck, zContext, zMsg);
3319 return 0;
3320 }
drh0d316a42002-08-11 20:10:47 +00003321 if( (rc = initPage(pBt, pPage, (Pgno)iPage, pParent))!=0 ){
drh5eddca62001-06-30 21:53:53 +00003322 sprintf(zMsg, "initPage() returns error code %d", rc);
3323 checkAppendMsg(pCheck, zContext, zMsg);
3324 sqlitepager_unref(pPage);
3325 return 0;
3326 }
3327
3328 /* Check out all the cells.
3329 */
3330 depth = 0;
drh1bffb9c2002-02-03 17:37:36 +00003331 if( zLowerBound ){
3332 zKey1 = sqliteMalloc( nLower+1 );
3333 memcpy(zKey1, zLowerBound, nLower);
3334 zKey1[nLower] = 0;
3335 }else{
3336 zKey1 = 0;
3337 }
3338 nKey1 = nLower;
drh5eddca62001-06-30 21:53:53 +00003339 cur.pPage = pPage;
drh5eddca62001-06-30 21:53:53 +00003340 for(i=0; i<pPage->nCell; i++){
3341 Cell *pCell = pPage->apCell[i];
3342 int sz;
3343
3344 /* Check payload overflow pages
3345 */
drh0d316a42002-08-11 20:10:47 +00003346 nKey2 = NKEY(pBt, pCell->h);
3347 sz = nKey2 + NDATA(pBt, pCell->h);
drh5eddca62001-06-30 21:53:53 +00003348 sprintf(zContext, "On page %d cell %d: ", iPage, i);
3349 if( sz>MX_LOCAL_PAYLOAD ){
3350 int nPage = (sz - MX_LOCAL_PAYLOAD + OVERFLOW_SIZE - 1)/OVERFLOW_SIZE;
drh0d316a42002-08-11 20:10:47 +00003351 checkList(pCheck, 0, SWAB32(pBt, pCell->ovfl), nPage, zContext);
drh5eddca62001-06-30 21:53:53 +00003352 }
3353
3354 /* Check that keys are in the right order
3355 */
3356 cur.idx = i;
drh8c1238a2003-01-02 14:43:55 +00003357 zKey2 = sqliteMallocRaw( nKey2+1 );
drh1bffb9c2002-02-03 17:37:36 +00003358 getPayload(&cur, 0, nKey2, zKey2);
3359 if( zKey1 && keyCompare(zKey1, nKey1, zKey2, nKey2)>=0 ){
drh5eddca62001-06-30 21:53:53 +00003360 checkAppendMsg(pCheck, zContext, "Key is out of order");
3361 }
3362
3363 /* Check sanity of left child page.
3364 */
drh0d316a42002-08-11 20:10:47 +00003365 pgno = SWAB32(pBt, pCell->h.leftChild);
drh1bffb9c2002-02-03 17:37:36 +00003366 d2 = checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zKey2,nKey2);
drh5eddca62001-06-30 21:53:53 +00003367 if( i>0 && d2!=depth ){
3368 checkAppendMsg(pCheck, zContext, "Child page depth differs");
3369 }
3370 depth = d2;
3371 sqliteFree(zKey1);
3372 zKey1 = zKey2;
drh1bffb9c2002-02-03 17:37:36 +00003373 nKey1 = nKey2;
drh5eddca62001-06-30 21:53:53 +00003374 }
drh0d316a42002-08-11 20:10:47 +00003375 pgno = SWAB32(pBt, pPage->u.hdr.rightChild);
drh5eddca62001-06-30 21:53:53 +00003376 sprintf(zContext, "On page %d at right child: ", iPage);
drh1bffb9c2002-02-03 17:37:36 +00003377 checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zUpperBound,nUpper);
drh5eddca62001-06-30 21:53:53 +00003378 sqliteFree(zKey1);
3379
3380 /* Check for complete coverage of the page
3381 */
3382 memset(hit, 0, sizeof(hit));
3383 memset(hit, 1, sizeof(PageHdr));
drh0d316a42002-08-11 20:10:47 +00003384 for(i=SWAB16(pBt, pPage->u.hdr.firstCell); i>0 && i<SQLITE_PAGE_SIZE; ){
drh5eddca62001-06-30 21:53:53 +00003385 Cell *pCell = (Cell*)&pPage->u.aDisk[i];
3386 int j;
drh0d316a42002-08-11 20:10:47 +00003387 for(j=i+cellSize(pBt, pCell)-1; j>=i; j--) hit[j]++;
3388 i = SWAB16(pBt, pCell->h.iNext);
drh5eddca62001-06-30 21:53:53 +00003389 }
drh0d316a42002-08-11 20:10:47 +00003390 for(i=SWAB16(pBt,pPage->u.hdr.firstFree); i>0 && i<SQLITE_PAGE_SIZE; ){
drh5eddca62001-06-30 21:53:53 +00003391 FreeBlk *pFBlk = (FreeBlk*)&pPage->u.aDisk[i];
3392 int j;
drh0d316a42002-08-11 20:10:47 +00003393 for(j=i+SWAB16(pBt,pFBlk->iSize)-1; j>=i; j--) hit[j]++;
3394 i = SWAB16(pBt,pFBlk->iNext);
drh5eddca62001-06-30 21:53:53 +00003395 }
3396 for(i=0; i<SQLITE_PAGE_SIZE; i++){
3397 if( hit[i]==0 ){
3398 sprintf(zMsg, "Unused space at byte %d of page %d", i, iPage);
3399 checkAppendMsg(pCheck, zMsg, 0);
3400 break;
3401 }else if( hit[i]>1 ){
3402 sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
3403 checkAppendMsg(pCheck, zMsg, 0);
3404 break;
3405 }
3406 }
3407
3408 /* Check that free space is kept to a minimum
3409 */
drh6019e162001-07-02 17:51:45 +00003410#if 0
3411 if( pParent && pParent->nCell>2 && pPage->nFree>3*SQLITE_PAGE_SIZE/4 ){
drh5eddca62001-06-30 21:53:53 +00003412 sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
3413 SQLITE_PAGE_SIZE/3);
3414 checkAppendMsg(pCheck, zContext, zMsg);
3415 }
drh6019e162001-07-02 17:51:45 +00003416#endif
3417
3418 /* Update freespace totals.
3419 */
3420 pCheck->nTreePage++;
3421 pCheck->nByte += USABLE_SPACE - pPage->nFree;
drh5eddca62001-06-30 21:53:53 +00003422
3423 sqlitepager_unref(pPage);
3424 return depth;
3425}
3426
3427/*
3428** This routine does a complete check of the given BTree file. aRoot[] is
3429** an array of pages numbers were each page number is the root page of
3430** a table. nRoot is the number of entries in aRoot.
3431**
3432** If everything checks out, this routine returns NULL. If something is
3433** amiss, an error message is written into memory obtained from malloc()
3434** and a pointer to that error message is returned. The calling function
3435** is responsible for freeing the error message when it is done.
3436*/
drhaaab5722002-02-19 13:39:21 +00003437char *sqliteBtreeIntegrityCheck(Btree *pBt, int *aRoot, int nRoot){
drh5eddca62001-06-30 21:53:53 +00003438 int i;
3439 int nRef;
drhaaab5722002-02-19 13:39:21 +00003440 IntegrityCk sCheck;
drh5eddca62001-06-30 21:53:53 +00003441
3442 nRef = *sqlitepager_stats(pBt->pPager);
drhefc251d2001-07-01 22:12:01 +00003443 if( lockBtree(pBt)!=SQLITE_OK ){
3444 return sqliteStrDup("Unable to acquire a read lock on the database");
3445 }
drh5eddca62001-06-30 21:53:53 +00003446 sCheck.pBt = pBt;
3447 sCheck.pPager = pBt->pPager;
3448 sCheck.nPage = sqlitepager_pagecount(sCheck.pPager);
drh0de8c112002-07-06 16:32:14 +00003449 if( sCheck.nPage==0 ){
3450 unlockBtreeIfUnused(pBt);
3451 return 0;
3452 }
drh8c1238a2003-01-02 14:43:55 +00003453 sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
drh5eddca62001-06-30 21:53:53 +00003454 sCheck.anRef[1] = 1;
3455 for(i=2; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
3456 sCheck.zErrMsg = 0;
3457
3458 /* Check the integrity of the freelist
3459 */
drh0d316a42002-08-11 20:10:47 +00003460 checkList(&sCheck, 1, SWAB32(pBt, pBt->page1->freeList),
3461 SWAB32(pBt, pBt->page1->nFree), "Main freelist: ");
drh5eddca62001-06-30 21:53:53 +00003462
3463 /* Check all the tables.
3464 */
3465 for(i=0; i<nRoot; i++){
drh4ff6dfa2002-03-03 23:06:00 +00003466 if( aRoot[i]==0 ) continue;
drh1bffb9c2002-02-03 17:37:36 +00003467 checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ", 0,0,0,0);
drh5eddca62001-06-30 21:53:53 +00003468 }
3469
3470 /* Make sure every page in the file is referenced
3471 */
3472 for(i=1; i<=sCheck.nPage; i++){
3473 if( sCheck.anRef[i]==0 ){
3474 char zBuf[100];
3475 sprintf(zBuf, "Page %d is never used", i);
3476 checkAppendMsg(&sCheck, zBuf, 0);
3477 }
3478 }
3479
3480 /* Make sure this analysis did not leave any unref() pages
3481 */
drh5e00f6c2001-09-13 13:46:56 +00003482 unlockBtreeIfUnused(pBt);
drh5eddca62001-06-30 21:53:53 +00003483 if( nRef != *sqlitepager_stats(pBt->pPager) ){
3484 char zBuf[100];
3485 sprintf(zBuf,
3486 "Outstanding page count goes from %d to %d during this analysis",
3487 nRef, *sqlitepager_stats(pBt->pPager)
3488 );
3489 checkAppendMsg(&sCheck, zBuf, 0);
3490 }
3491
3492 /* Clean up and report errors.
3493 */
3494 sqliteFree(sCheck.anRef);
3495 return sCheck.zErrMsg;
3496}
paulb95a8862003-04-01 21:16:41 +00003497
drh73509ee2003-04-06 20:44:45 +00003498/*
3499** Return the full pathname of the underlying database file.
3500*/
3501static const char *sqliteBtreeGetFilename(Btree *pBt){
3502 assert( pBt->pPager!=0 );
3503 return sqlitepager_filename(pBt->pPager);
3504}
3505
3506/*
3507** Change the name of the underlying database file.
3508*/
3509static int sqliteBtreeChangeFilename(Btree *pBt, const char *zNew){
3510 return sqlitepager_rename(pBt->pPager, zNew);
3511}
3512
3513/*
3514** The following tables contain pointers to all of the interface
3515** routines for this implementation of the B*Tree backend. To
3516** substitute a different implemention of the backend, one has merely
3517** to provide pointers to alternative functions in similar tables.
3518*/
paulb95a8862003-04-01 21:16:41 +00003519static BtOps sqliteBtreeOps = {
3520 sqliteBtreeClose,
3521 sqliteBtreeSetCacheSize,
3522 sqliteBtreeSetSafetyLevel,
3523 sqliteBtreeBeginTrans,
3524 sqliteBtreeCommit,
3525 sqliteBtreeRollback,
3526 sqliteBtreeBeginCkpt,
3527 sqliteBtreeCommitCkpt,
3528 sqliteBtreeRollbackCkpt,
3529 sqliteBtreeCreateTable,
3530 sqliteBtreeCreateIndex,
3531 sqliteBtreeDropTable,
3532 sqliteBtreeClearTable,
3533 sqliteBtreeCursor,
3534 sqliteBtreeGetMeta,
3535 sqliteBtreeUpdateMeta,
3536 sqliteBtreeIntegrityCheck,
drh73509ee2003-04-06 20:44:45 +00003537 sqliteBtreeGetFilename,
3538 sqliteBtreeChangeFilename,
paulb95a8862003-04-01 21:16:41 +00003539#ifdef SQLITE_TEST
3540 sqliteBtreePageDump,
3541 sqliteBtreePager
3542#endif
3543};
paulb95a8862003-04-01 21:16:41 +00003544static BtCursorOps sqliteBtreeCursorOps = {
3545 sqliteBtreeMoveto,
3546 sqliteBtreeDelete,
3547 sqliteBtreeInsert,
3548 sqliteBtreeFirst,
3549 sqliteBtreeLast,
3550 sqliteBtreeNext,
3551 sqliteBtreePrevious,
3552 sqliteBtreeKeySize,
3553 sqliteBtreeKey,
3554 sqliteBtreeKeyCompare,
3555 sqliteBtreeDataSize,
3556 sqliteBtreeData,
3557 sqliteBtreeCloseCursor,
3558#ifdef SQLITE_TEST
3559 sqliteBtreeCursorDump,
3560#endif
3561};