blob: d1761c5fba1baea4918c2773e66e3904e324ac89 [file] [log] [blame]
drha059ad02001-04-17 20:09:11 +00001/*
2** Copyright (c) 2001 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
drh2aa679f2001-06-25 02:11:07 +000024** $Id: btree.c,v 1.15 2001/06/25 02:11:07 drh Exp $
drh8b2f49b2001-06-08 00:21:52 +000025**
26** This file implements a external (disk-based) database using BTrees.
27** For a detailed discussion of BTrees, refer to
28**
29** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
30** "Sorting And Searching", pages 473-480. Addison-Wesley
31** Publishing Company, Reading, Massachusetts.
32**
33** The basic idea is that each page of the file contains N database
34** entries and N+1 pointers to subpages.
35**
36** ----------------------------------------------------------------
37** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) |
38** ----------------------------------------------------------------
39**
40** All of the keys on the page that Ptr(0) points to have values less
41** than Key(0). All of the keys on page Ptr(1) and its subpages have
42** values greater than Key(0) and less than Key(1). All of the keys
43** on Ptr(N+1) and its subpages have values greater than Key(N). And
44** so forth.
45**
46** Finding a particular key requires reading O(log(M)) pages from the file
47** where M is the number of entries in the tree.
48**
49** In this implementation, a single file can hold one or more separate
50** BTrees. Each BTree is identified by the index of its root page. The
51** key and data for any entry are combined to form the "payload". Up to
52** MX_LOCAL_PAYLOAD bytes of payload can be carried directly on the
53** database page. If the payload is larger than MX_LOCAL_PAYLOAD bytes
54** then surplus bytes are stored on overflow pages. The payload for an
55** entry and the preceding pointer are combined to form a "Cell". Each
56** page has a smaller header which contains the Ptr(N+1) pointer.
57**
58** The first page of the file contains a magic string used to verify that
59** the file really is a valid BTree database, a pointer to a list of unused
60** pages in the file, and some meta information. The root of the first
61** BTree begins on page 2 of the file. (Pages are numbered beginning with
62** 1, not 0.) Thus a minimum database contains 2 pages.
drha059ad02001-04-17 20:09:11 +000063*/
64#include "sqliteInt.h"
65#include "pager.h"
66#include "btree.h"
67#include <assert.h>
68
drh2af926b2001-05-15 00:39:25 +000069
drh2af926b2001-05-15 00:39:25 +000070/*
71** Primitive data types. u32 must be 4 bytes and u16 must be 2 bytes.
drh14acc042001-06-10 19:56:58 +000072** The uptr type must be big enough to hold a pointer.
drh306dc212001-05-21 13:45:10 +000073** Change these typedefs when porting to new architectures.
drh2af926b2001-05-15 00:39:25 +000074*/
drh14acc042001-06-10 19:56:58 +000075typedef unsigned int uptr;
drh8c42ca92001-06-22 19:15:00 +000076/* typedef unsigned int u32; -- already defined in sqliteInt.h */
drh365d68f2001-05-11 11:02:46 +000077typedef unsigned short int u16;
drh5e2f8b92001-05-28 00:41:15 +000078typedef unsigned char u8;
drh365d68f2001-05-11 11:02:46 +000079
80/*
drh8c42ca92001-06-22 19:15:00 +000081** This macro casts a pointer to an integer. Useful for doing
82** pointer arithmetic.
83*/
drh7c717f72001-06-24 20:39:41 +000084#define Addr(X) ((uptr)X)
drh8c42ca92001-06-22 19:15:00 +000085
86/*
drh365d68f2001-05-11 11:02:46 +000087** Forward declarations of structures used only in this file.
88*/
drhbd03cae2001-06-02 02:40:57 +000089typedef struct PageOne PageOne;
drh2af926b2001-05-15 00:39:25 +000090typedef struct MemPage MemPage;
drh365d68f2001-05-11 11:02:46 +000091typedef struct PageHdr PageHdr;
92typedef struct Cell Cell;
drh3b7511c2001-05-26 13:15:44 +000093typedef struct CellHdr CellHdr;
drh365d68f2001-05-11 11:02:46 +000094typedef struct FreeBlk FreeBlk;
drh2af926b2001-05-15 00:39:25 +000095typedef struct OverflowPage OverflowPage;
96
97/*
98** All structures on a database page are aligned to 4-byte boundries.
99** This routine rounds up a number of bytes to the next multiple of 4.
drh306dc212001-05-21 13:45:10 +0000100**
101** This might need to change for computer architectures that require
102** and 8-byte alignment boundry for structures.
drh2af926b2001-05-15 00:39:25 +0000103*/
104#define ROUNDUP(X) ((X+3) & ~3)
drha059ad02001-04-17 20:09:11 +0000105
drh08ed44e2001-04-29 23:32:55 +0000106/*
drhbd03cae2001-06-02 02:40:57 +0000107** This is a magic string that appears at the beginning of every
drh8c42ca92001-06-22 19:15:00 +0000108** SQLite database in order to identify the file as a real database.
drh08ed44e2001-04-29 23:32:55 +0000109*/
drhbd03cae2001-06-02 02:40:57 +0000110static const char zMagicHeader[] =
drh8c42ca92001-06-22 19:15:00 +0000111 "** This file contains an SQLite 2.0 database **";
drhbd03cae2001-06-02 02:40:57 +0000112#define MAGIC_SIZE (sizeof(zMagicHeader))
drh08ed44e2001-04-29 23:32:55 +0000113
114/*
drh8c42ca92001-06-22 19:15:00 +0000115** This is a magic integer also used to the integrety of the database
116** file. This integer is used in addition to the string above so that
117** if the file is written on a little-endian architecture and read
118** on a big-endian architectures (or vice versa) we can detect the
119** problem.
120**
121** The number used was obtained at random and has no special
122** significance.
123*/
124#define MAGIC 0xdae37528
125
126/*
drhbd03cae2001-06-02 02:40:57 +0000127** The first page of the database file contains a magic header string
128** to identify the file as an SQLite database file. It also contains
129** a pointer to the first free page of the file. Page 2 contains the
drh8b2f49b2001-06-08 00:21:52 +0000130** root of the principle BTree. The file might contain other BTrees
131** rooted on pages above 2.
132**
133** The first page also contains SQLITE_N_BTREE_META integers that
134** can be used by higher-level routines.
drh08ed44e2001-04-29 23:32:55 +0000135**
drhbd03cae2001-06-02 02:40:57 +0000136** Remember that pages are numbered beginning with 1. (See pager.c
137** for additional information.) Page 0 does not exist and a page
138** number of 0 is used to mean "no such page".
139*/
140struct PageOne {
141 char zMagic[MAGIC_SIZE]; /* String that identifies the file as a database */
drh8c42ca92001-06-22 19:15:00 +0000142 int iMagic; /* Integer to verify correct byte order */
143 Pgno freeList; /* First free page in a list of all free pages */
drh2aa679f2001-06-25 02:11:07 +0000144 int nFree; /* Number of pages on the free list */
145 int aMeta[SQLITE_N_BTREE_META-1]; /* User defined integers */
drhbd03cae2001-06-02 02:40:57 +0000146};
147
148/*
149** Each database page has a header that is an instance of this
150** structure.
drh08ed44e2001-04-29 23:32:55 +0000151**
drh8b2f49b2001-06-08 00:21:52 +0000152** PageHdr.firstFree is 0 if there is no free space on this page.
drh14acc042001-06-10 19:56:58 +0000153** Otherwise, PageHdr.firstFree is the index in MemPage.u.aDisk[] of a
drh8b2f49b2001-06-08 00:21:52 +0000154** FreeBlk structure that describes the first block of free space.
155** All free space is defined by a linked list of FreeBlk structures.
drh08ed44e2001-04-29 23:32:55 +0000156**
drh8b2f49b2001-06-08 00:21:52 +0000157** Data is stored in a linked list of Cell structures. PageHdr.firstCell
drh14acc042001-06-10 19:56:58 +0000158** is the index into MemPage.u.aDisk[] of the first cell on the page. The
drh306dc212001-05-21 13:45:10 +0000159** Cells are kept in sorted order.
drh8b2f49b2001-06-08 00:21:52 +0000160**
161** A Cell contains all information about a database entry and a pointer
162** to a child page that contains other entries less than itself. In
163** other words, the i-th Cell contains both Ptr(i) and Key(i). The
164** right-most pointer of the page is contained in PageHdr.rightChild.
drh08ed44e2001-04-29 23:32:55 +0000165*/
drh365d68f2001-05-11 11:02:46 +0000166struct PageHdr {
drh5e2f8b92001-05-28 00:41:15 +0000167 Pgno rightChild; /* Child page that comes after all cells on this page */
drh14acc042001-06-10 19:56:58 +0000168 u16 firstCell; /* Index in MemPage.u.aDisk[] of the first cell */
169 u16 firstFree; /* Index in MemPage.u.aDisk[] of the first free block */
drh365d68f2001-05-11 11:02:46 +0000170};
drh306dc212001-05-21 13:45:10 +0000171
drh3b7511c2001-05-26 13:15:44 +0000172/*
173** Entries on a page of the database are called "Cells". Each Cell
174** has a header and data. This structure defines the header. The
drhbd03cae2001-06-02 02:40:57 +0000175** key and data (collectively the "payload") follow this header on
176** the database page.
177**
178** A definition of the complete Cell structure is given below. The
drh8c42ca92001-06-22 19:15:00 +0000179** header for the cell must be defined first in order to do some
drhbd03cae2001-06-02 02:40:57 +0000180** of the sizing #defines that follow.
drh3b7511c2001-05-26 13:15:44 +0000181*/
182struct CellHdr {
drh5e2f8b92001-05-28 00:41:15 +0000183 Pgno leftChild; /* Child page that comes before this cell */
drh3b7511c2001-05-26 13:15:44 +0000184 u16 nKey; /* Number of bytes in the key */
drh14acc042001-06-10 19:56:58 +0000185 u16 iNext; /* Index in MemPage.u.aDisk[] of next cell in sorted order */
drh3b7511c2001-05-26 13:15:44 +0000186 u32 nData; /* Number of bytes of data */
drh8c42ca92001-06-22 19:15:00 +0000187};
drh3b7511c2001-05-26 13:15:44 +0000188
189/*
190** The minimum size of a complete Cell. The Cell must contain a header
drhbd03cae2001-06-02 02:40:57 +0000191** and at least 4 bytes of payload.
drh3b7511c2001-05-26 13:15:44 +0000192*/
193#define MIN_CELL_SIZE (sizeof(CellHdr)+4)
194
195/*
196** The maximum number of database entries that can be held in a single
197** page of the database.
198*/
199#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)
200
201/*
drh8c42ca92001-06-22 19:15:00 +0000202** The maximum amount of payload (in bytes) that can be stored locally for
203** a database entry. If the entry contains more data than this, the
drh3b7511c2001-05-26 13:15:44 +0000204** extra goes onto overflow pages.
drhbd03cae2001-06-02 02:40:57 +0000205**
206** This number is chosen so that at least 4 cells will fit on every page.
drh3b7511c2001-05-26 13:15:44 +0000207*/
208#define MX_LOCAL_PAYLOAD \
drh2aa679f2001-06-25 02:11:07 +0000209 (((SQLITE_PAGE_SIZE-sizeof(PageHdr))/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3)
drh3b7511c2001-05-26 13:15:44 +0000210
drh306dc212001-05-21 13:45:10 +0000211/*
212** Data on a database page is stored as a linked list of Cell structures.
drh5e2f8b92001-05-28 00:41:15 +0000213** Both the key and the data are stored in aPayload[]. The key always comes
214** first. The aPayload[] field grows as necessary to hold the key and data,
drh306dc212001-05-21 13:45:10 +0000215** up to a maximum of MX_LOCAL_PAYLOAD bytes. If the size of the key and
drh3b7511c2001-05-26 13:15:44 +0000216** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the
217** page number of the first overflow page.
218**
219** Though this structure is fixed in size, the Cell on the database
drhbd03cae2001-06-02 02:40:57 +0000220** page varies in size. Every cell has a CellHdr and at least 4 bytes
drh3b7511c2001-05-26 13:15:44 +0000221** of payload space. Additional payload bytes (up to the maximum of
222** MX_LOCAL_PAYLOAD) and the Cell.ovfl value are allocated only as
223** needed.
drh306dc212001-05-21 13:45:10 +0000224*/
drh365d68f2001-05-11 11:02:46 +0000225struct Cell {
drh5e2f8b92001-05-28 00:41:15 +0000226 CellHdr h; /* The cell header */
227 char aPayload[MX_LOCAL_PAYLOAD]; /* Key and data */
228 Pgno ovfl; /* The first overflow page */
drh365d68f2001-05-11 11:02:46 +0000229};
drh306dc212001-05-21 13:45:10 +0000230
231/*
232** Free space on a page is remembered using a linked list of the FreeBlk
233** structures. Space on a database page is allocated in increments of
drh72f82862001-05-24 21:06:34 +0000234** at least 4 bytes and is always aligned to a 4-byte boundry. The
drh8b2f49b2001-06-08 00:21:52 +0000235** linked list of FreeBlks is always kept in order by address.
drh306dc212001-05-21 13:45:10 +0000236*/
drh365d68f2001-05-11 11:02:46 +0000237struct FreeBlk {
drh72f82862001-05-24 21:06:34 +0000238 u16 iSize; /* Number of bytes in this block of free space */
drh14acc042001-06-10 19:56:58 +0000239 u16 iNext; /* Index in MemPage.u.aDisk[] of the next free block */
drh365d68f2001-05-11 11:02:46 +0000240};
drh306dc212001-05-21 13:45:10 +0000241
242/*
drh14acc042001-06-10 19:56:58 +0000243** The number of bytes of payload that will fit on a single overflow page.
drh3b7511c2001-05-26 13:15:44 +0000244*/
245#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno))
246
247/*
drh306dc212001-05-21 13:45:10 +0000248** When the key and data for a single entry in the BTree will not fit in
drh8c42ca92001-06-22 19:15:00 +0000249** the MX_LOCAL_PAYLOAD bytes of space available on the database page,
drh8b2f49b2001-06-08 00:21:52 +0000250** then all extra bytes are written to a linked list of overflow pages.
drh306dc212001-05-21 13:45:10 +0000251** Each overflow page is an instance of the following structure.
252**
253** Unused pages in the database are also represented by instances of
drhbd03cae2001-06-02 02:40:57 +0000254** the OverflowPage structure. The PageOne.freeList field is the
drh306dc212001-05-21 13:45:10 +0000255** page number of the first page in a linked list of unused database
256** pages.
257*/
drh2af926b2001-05-15 00:39:25 +0000258struct OverflowPage {
drh14acc042001-06-10 19:56:58 +0000259 Pgno iNext;
drh5e2f8b92001-05-28 00:41:15 +0000260 char aPayload[OVERFLOW_SIZE];
drh7e3b0a02001-04-28 16:52:40 +0000261};
drh7e3b0a02001-04-28 16:52:40 +0000262
263/*
264** For every page in the database file, an instance of the following structure
drh14acc042001-06-10 19:56:58 +0000265** is stored in memory. The u.aDisk[] array contains the raw bits read from
drhbd03cae2001-06-02 02:40:57 +0000266** the disk. The rest is auxiliary information that held in memory only. The
267** auxiliary info is only valid for regular database pages - it is not
268** used for overflow pages and pages on the freelist.
drh306dc212001-05-21 13:45:10 +0000269**
drhbd03cae2001-06-02 02:40:57 +0000270** Of particular interest in the auxiliary info is the apCell[] entry. Each
drh14acc042001-06-10 19:56:58 +0000271** apCell[] entry is a pointer to a Cell structure in u.aDisk[]. The cells are
drh306dc212001-05-21 13:45:10 +0000272** put in this array so that they can be accessed in constant time, rather
drhbd03cae2001-06-02 02:40:57 +0000273** than in linear time which would be needed if we had to walk the linked
274** list on every access.
drh72f82862001-05-24 21:06:34 +0000275**
drh14acc042001-06-10 19:56:58 +0000276** Note that apCell[] contains enough space to hold up to two more Cells
277** than can possibly fit on one page. In the steady state, every apCell[]
278** points to memory inside u.aDisk[]. But in the middle of an insert
279** operation, some apCell[] entries may temporarily point to data space
280** outside of u.aDisk[]. This is a transient situation that is quickly
281** resolved. But while it is happening, it is possible for a database
282** page to hold as many as two more cells than it might otherwise hold.
283** The extra too entries in apCell[] are an allowance for this situation.
284**
drh72f82862001-05-24 21:06:34 +0000285** The pParent field points back to the parent page. This allows us to
286** walk up the BTree from any leaf to the root. Care must be taken to
287** unref() the parent page pointer when this page is no longer referenced.
drhbd03cae2001-06-02 02:40:57 +0000288** The pageDestructor() routine handles that chore.
drh7e3b0a02001-04-28 16:52:40 +0000289*/
290struct MemPage {
drh14acc042001-06-10 19:56:58 +0000291 union {
292 char aDisk[SQLITE_PAGE_SIZE]; /* Page data stored on disk */
293 PageHdr hdr; /* Overlay page header */
294 } u;
drh5e2f8b92001-05-28 00:41:15 +0000295 int isInit; /* True if auxiliary data is initialized */
drh72f82862001-05-24 21:06:34 +0000296 MemPage *pParent; /* The parent of this page. NULL for root */
drh14acc042001-06-10 19:56:58 +0000297 int nFree; /* Number of free bytes in u.aDisk[] */
drh306dc212001-05-21 13:45:10 +0000298 int nCell; /* Number of entries on this page */
drh14acc042001-06-10 19:56:58 +0000299 int isOverfull; /* Some apCell[] points outside u.aDisk[] */
300 Cell *apCell[MX_CELL+2]; /* All data entires in sorted order */
drh8c42ca92001-06-22 19:15:00 +0000301};
drh7e3b0a02001-04-28 16:52:40 +0000302
303/*
drh3b7511c2001-05-26 13:15:44 +0000304** The in-memory image of a disk page has the auxiliary information appended
305** to the end. EXTRA_SIZE is the number of bytes of space needed to hold
306** that extra information.
307*/
308#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE)
309
310/*
drha059ad02001-04-17 20:09:11 +0000311** Everything we need to know about an open database
312*/
313struct Btree {
314 Pager *pPager; /* The page cache */
drh306dc212001-05-21 13:45:10 +0000315 BtCursor *pCursor; /* A list of all open cursors */
drhbd03cae2001-06-02 02:40:57 +0000316 PageOne *page1; /* First page of the database */
drh306dc212001-05-21 13:45:10 +0000317 int inTrans; /* True if a transaction is in progress */
drha059ad02001-04-17 20:09:11 +0000318};
319typedef Btree Bt;
320
drh365d68f2001-05-11 11:02:46 +0000321/*
322** A cursor is a pointer to a particular entry in the BTree.
323** The entry is identified by its MemPage and the index in
drh5e2f8b92001-05-28 00:41:15 +0000324** MemPage.apCell[] of the entry.
drh365d68f2001-05-11 11:02:46 +0000325*/
drh72f82862001-05-24 21:06:34 +0000326struct BtCursor {
drh5e2f8b92001-05-28 00:41:15 +0000327 Btree *pBt; /* The Btree to which this cursor belongs */
drh14acc042001-06-10 19:56:58 +0000328 BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */
drh8b2f49b2001-06-08 00:21:52 +0000329 Pgno pgnoRoot; /* The root page of this tree */
drh5e2f8b92001-05-28 00:41:15 +0000330 MemPage *pPage; /* Page that contains the entry */
drh8c42ca92001-06-22 19:15:00 +0000331 int idx; /* Index of the entry in pPage->apCell[] */
drh5e2f8b92001-05-28 00:41:15 +0000332 u8 bSkipNext; /* sqliteBtreeNext() is no-op if true */
333 u8 iMatch; /* compare result from last sqliteBtreeMoveto() */
drh365d68f2001-05-11 11:02:46 +0000334};
drh7e3b0a02001-04-28 16:52:40 +0000335
drha059ad02001-04-17 20:09:11 +0000336/*
drh3b7511c2001-05-26 13:15:44 +0000337** Compute the total number of bytes that a Cell needs on the main
drh5e2f8b92001-05-28 00:41:15 +0000338** database page. The number returned includes the Cell header,
339** local payload storage, and the pointer to overflow pages (if
drh8c42ca92001-06-22 19:15:00 +0000340** applicable). Additional space allocated on overflow pages
drhbd03cae2001-06-02 02:40:57 +0000341** is NOT included in the value returned from this routine.
drh3b7511c2001-05-26 13:15:44 +0000342*/
343static int cellSize(Cell *pCell){
344 int n = pCell->h.nKey + pCell->h.nData;
345 if( n>MX_LOCAL_PAYLOAD ){
346 n = MX_LOCAL_PAYLOAD + sizeof(Pgno);
347 }else{
348 n = ROUNDUP(n);
349 }
350 n += sizeof(CellHdr);
351 return n;
352}
353
354/*
drh72f82862001-05-24 21:06:34 +0000355** Defragment the page given. All Cells are moved to the
356** beginning of the page and all free space is collected
357** into one big FreeBlk at the end of the page.
drh365d68f2001-05-11 11:02:46 +0000358*/
359static void defragmentPage(MemPage *pPage){
drh14acc042001-06-10 19:56:58 +0000360 int pc, i, n;
drh2af926b2001-05-15 00:39:25 +0000361 FreeBlk *pFBlk;
362 char newPage[SQLITE_PAGE_SIZE];
363
drhbd03cae2001-06-02 02:40:57 +0000364 pc = sizeof(PageHdr);
drh14acc042001-06-10 19:56:58 +0000365 pPage->u.hdr.firstCell = pc;
366 memcpy(newPage, pPage->u.aDisk, pc);
drh2af926b2001-05-15 00:39:25 +0000367 for(i=0; i<pPage->nCell; i++){
drh2aa679f2001-06-25 02:11:07 +0000368 Cell *pCell = pPage->apCell[i];
drh8c42ca92001-06-22 19:15:00 +0000369
370 /* This routine should never be called on an overfull page. The
371 ** following asserts verify that constraint. */
drh7c717f72001-06-24 20:39:41 +0000372 assert( Addr(pCell) > Addr(pPage) );
373 assert( Addr(pCell) < Addr(pPage) + SQLITE_PAGE_SIZE );
drh8c42ca92001-06-22 19:15:00 +0000374
drh3b7511c2001-05-26 13:15:44 +0000375 n = cellSize(pCell);
drh2aa679f2001-06-25 02:11:07 +0000376 pCell->h.iNext = pc + n;
drh2af926b2001-05-15 00:39:25 +0000377 memcpy(&newPage[pc], pCell, n);
drh14acc042001-06-10 19:56:58 +0000378 pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
drh2af926b2001-05-15 00:39:25 +0000379 pc += n;
380 }
drh72f82862001-05-24 21:06:34 +0000381 assert( pPage->nFree==SQLITE_PAGE_SIZE-pc );
drh14acc042001-06-10 19:56:58 +0000382 memcpy(pPage->u.aDisk, newPage, pc);
drh2aa679f2001-06-25 02:11:07 +0000383 if( pPage->nCell>0 ){
384 pPage->apCell[pPage->nCell-1]->h.iNext = 0;
385 }
drh8c42ca92001-06-22 19:15:00 +0000386 pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
drh2af926b2001-05-15 00:39:25 +0000387 pFBlk->iSize = SQLITE_PAGE_SIZE - pc;
388 pFBlk->iNext = 0;
drh14acc042001-06-10 19:56:58 +0000389 pPage->u.hdr.firstFree = pc;
drh2af926b2001-05-15 00:39:25 +0000390 memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
drh365d68f2001-05-11 11:02:46 +0000391}
392
drha059ad02001-04-17 20:09:11 +0000393/*
drh8b2f49b2001-06-08 00:21:52 +0000394** Allocate nByte bytes of space on a page. nByte must be a
395** multiple of 4.
drhbd03cae2001-06-02 02:40:57 +0000396**
drh14acc042001-06-10 19:56:58 +0000397** Return the index into pPage->u.aDisk[] of the first byte of
drhbd03cae2001-06-02 02:40:57 +0000398** the new allocation. Or return 0 if there is not enough free
399** space on the page to satisfy the allocation request.
drh2af926b2001-05-15 00:39:25 +0000400**
drh72f82862001-05-24 21:06:34 +0000401** If the page contains nBytes of free space but does not contain
drh8b2f49b2001-06-08 00:21:52 +0000402** nBytes of contiguous free space, then this routine automatically
403** calls defragementPage() to consolidate all free space before
404** allocating the new chunk.
drh7e3b0a02001-04-28 16:52:40 +0000405*/
drhbd03cae2001-06-02 02:40:57 +0000406static int allocateSpace(MemPage *pPage, int nByte){
drh2af926b2001-05-15 00:39:25 +0000407 FreeBlk *p;
408 u16 *pIdx;
409 int start;
drh8c42ca92001-06-22 19:15:00 +0000410 int cnt = 0;
drh72f82862001-05-24 21:06:34 +0000411
drh5e2f8b92001-05-28 00:41:15 +0000412 assert( nByte==ROUNDUP(nByte) );
drh14acc042001-06-10 19:56:58 +0000413 if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
414 pIdx = &pPage->u.hdr.firstFree;
415 p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
drh2af926b2001-05-15 00:39:25 +0000416 while( p->iSize<nByte ){
drh8c42ca92001-06-22 19:15:00 +0000417 assert( cnt++ < SQLITE_PAGE_SIZE/4 );
drh2af926b2001-05-15 00:39:25 +0000418 if( p->iNext==0 ){
419 defragmentPage(pPage);
drh14acc042001-06-10 19:56:58 +0000420 pIdx = &pPage->u.hdr.firstFree;
drh2af926b2001-05-15 00:39:25 +0000421 }else{
422 pIdx = &p->iNext;
423 }
drh14acc042001-06-10 19:56:58 +0000424 p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
drh2af926b2001-05-15 00:39:25 +0000425 }
426 if( p->iSize==nByte ){
427 start = *pIdx;
428 *pIdx = p->iNext;
429 }else{
drh8c42ca92001-06-22 19:15:00 +0000430 FreeBlk *pNew;
drh72f82862001-05-24 21:06:34 +0000431 start = *pIdx;
drh8c42ca92001-06-22 19:15:00 +0000432 pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte];
drh72f82862001-05-24 21:06:34 +0000433 pNew->iNext = p->iNext;
434 pNew->iSize = p->iSize - nByte;
435 *pIdx = start + nByte;
drh2af926b2001-05-15 00:39:25 +0000436 }
437 pPage->nFree -= nByte;
438 return start;
drh7e3b0a02001-04-28 16:52:40 +0000439}
440
441/*
drh14acc042001-06-10 19:56:58 +0000442** Return a section of the MemPage.u.aDisk[] to the freelist.
443** The first byte of the new free block is pPage->u.aDisk[start]
444** and the size of the block is "size" bytes. Size must be
445** a multiple of 4.
drh306dc212001-05-21 13:45:10 +0000446**
447** Most of the effort here is involved in coalesing adjacent
448** free blocks into a single big free block.
drh7e3b0a02001-04-28 16:52:40 +0000449*/
450static void freeSpace(MemPage *pPage, int start, int size){
drh2af926b2001-05-15 00:39:25 +0000451 int end = start + size;
452 u16 *pIdx, idx;
453 FreeBlk *pFBlk;
454 FreeBlk *pNew;
455 FreeBlk *pNext;
456
457 assert( size == ROUNDUP(size) );
458 assert( start == ROUNDUP(start) );
drh14acc042001-06-10 19:56:58 +0000459 pIdx = &pPage->u.hdr.firstFree;
drh2af926b2001-05-15 00:39:25 +0000460 idx = *pIdx;
461 while( idx!=0 && idx<start ){
drh14acc042001-06-10 19:56:58 +0000462 pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
drh2af926b2001-05-15 00:39:25 +0000463 if( idx + pFBlk->iSize == start ){
464 pFBlk->iSize += size;
465 if( idx + pFBlk->iSize == pFBlk->iNext ){
drh8c42ca92001-06-22 19:15:00 +0000466 pNext = (FreeBlk*)&pPage->u.aDisk[pFBlk->iNext];
drh2af926b2001-05-15 00:39:25 +0000467 pFBlk->iSize += pNext->iSize;
468 pFBlk->iNext = pNext->iNext;
469 }
470 pPage->nFree += size;
471 return;
472 }
473 pIdx = &pFBlk->iNext;
474 idx = *pIdx;
475 }
drh14acc042001-06-10 19:56:58 +0000476 pNew = (FreeBlk*)&pPage->u.aDisk[start];
drh2af926b2001-05-15 00:39:25 +0000477 if( idx != end ){
478 pNew->iSize = size;
479 pNew->iNext = idx;
480 }else{
drh14acc042001-06-10 19:56:58 +0000481 pNext = (FreeBlk*)&pPage->u.aDisk[idx];
drh2af926b2001-05-15 00:39:25 +0000482 pNew->iSize = size + pNext->iSize;
483 pNew->iNext = pNext->iNext;
484 }
485 *pIdx = start;
486 pPage->nFree += size;
drh7e3b0a02001-04-28 16:52:40 +0000487}
488
489/*
490** Initialize the auxiliary information for a disk block.
drh72f82862001-05-24 21:06:34 +0000491**
drhbd03cae2001-06-02 02:40:57 +0000492** The pParent parameter must be a pointer to the MemPage which
493** is the parent of the page being initialized. The root of the
drh8b2f49b2001-06-08 00:21:52 +0000494** BTree (usually page 2) has no parent and so for that page,
495** pParent==NULL.
drh5e2f8b92001-05-28 00:41:15 +0000496**
drh72f82862001-05-24 21:06:34 +0000497** Return SQLITE_OK on success. If we see that the page does
498** not contained a well-formed database page, then return
499** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
500** guarantee that the page is well-formed. It only shows that
501** we failed to detect any corruption.
drh7e3b0a02001-04-28 16:52:40 +0000502*/
drh72f82862001-05-24 21:06:34 +0000503static int initPage(MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
drh14acc042001-06-10 19:56:58 +0000504 int idx; /* An index into pPage->u.aDisk[] */
505 Cell *pCell; /* A pointer to a Cell in pPage->u.aDisk[] */
506 FreeBlk *pFBlk; /* A pointer to a free block in pPage->u.aDisk[] */
drh5e2f8b92001-05-28 00:41:15 +0000507 int sz; /* The size of a Cell in bytes */
508 int freeSpace; /* Amount of free space on the page */
drh2af926b2001-05-15 00:39:25 +0000509
drh5e2f8b92001-05-28 00:41:15 +0000510 if( pPage->pParent ){
511 assert( pPage->pParent==pParent );
512 return SQLITE_OK;
513 }
514 if( pParent ){
515 pPage->pParent = pParent;
516 sqlitepager_ref(pParent);
517 }
518 if( pPage->isInit ) return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000519 pPage->isInit = 1;
drh7e3b0a02001-04-28 16:52:40 +0000520 pPage->nCell = 0;
drhbd03cae2001-06-02 02:40:57 +0000521 freeSpace = SQLITE_PAGE_SIZE - sizeof(PageHdr);
drh14acc042001-06-10 19:56:58 +0000522 idx = pPage->u.hdr.firstCell;
drh7e3b0a02001-04-28 16:52:40 +0000523 while( idx!=0 ){
drh8c42ca92001-06-22 19:15:00 +0000524 if( idx>SQLITE_PAGE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
drhbd03cae2001-06-02 02:40:57 +0000525 if( idx<sizeof(PageHdr) ) goto page_format_error;
drh8c42ca92001-06-22 19:15:00 +0000526 if( idx!=ROUNDUP(idx) ) goto page_format_error;
drh14acc042001-06-10 19:56:58 +0000527 pCell = (Cell*)&pPage->u.aDisk[idx];
drh5e2f8b92001-05-28 00:41:15 +0000528 sz = cellSize(pCell);
529 if( idx+sz > SQLITE_PAGE_SIZE ) goto page_format_error;
530 freeSpace -= sz;
531 pPage->apCell[pPage->nCell++] = pCell;
drh3b7511c2001-05-26 13:15:44 +0000532 idx = pCell->h.iNext;
drh2af926b2001-05-15 00:39:25 +0000533 }
534 pPage->nFree = 0;
drh14acc042001-06-10 19:56:58 +0000535 idx = pPage->u.hdr.firstFree;
drh2af926b2001-05-15 00:39:25 +0000536 while( idx!=0 ){
537 if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
drhbd03cae2001-06-02 02:40:57 +0000538 if( idx<sizeof(PageHdr) ) goto page_format_error;
drh14acc042001-06-10 19:56:58 +0000539 pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
drh2af926b2001-05-15 00:39:25 +0000540 pPage->nFree += pFBlk->iSize;
drh7c717f72001-06-24 20:39:41 +0000541 if( pFBlk->iNext>0 && pFBlk->iNext <= idx ) goto page_format_error;
drh2af926b2001-05-15 00:39:25 +0000542 idx = pFBlk->iNext;
drh7e3b0a02001-04-28 16:52:40 +0000543 }
drh8b2f49b2001-06-08 00:21:52 +0000544 if( pPage->nCell==0 && pPage->nFree==0 ){
545 /* As a special case, an uninitialized root page appears to be
546 ** an empty database */
547 return SQLITE_OK;
548 }
drh5e2f8b92001-05-28 00:41:15 +0000549 if( pPage->nFree!=freeSpace ) goto page_format_error;
drh7e3b0a02001-04-28 16:52:40 +0000550 return SQLITE_OK;
drh2af926b2001-05-15 00:39:25 +0000551
552page_format_error:
553 return SQLITE_CORRUPT;
drh7e3b0a02001-04-28 16:52:40 +0000554}
555
556/*
drh8b2f49b2001-06-08 00:21:52 +0000557** Set up a raw page so that it looks like a database page holding
558** no entries.
drhbd03cae2001-06-02 02:40:57 +0000559*/
560static void zeroPage(MemPage *pPage){
561 PageHdr *pHdr;
562 FreeBlk *pFBlk;
563 memset(pPage, 0, SQLITE_PAGE_SIZE);
drh14acc042001-06-10 19:56:58 +0000564 pHdr = &pPage->u.hdr;
drhbd03cae2001-06-02 02:40:57 +0000565 pHdr->firstCell = 0;
566 pHdr->firstFree = sizeof(*pHdr);
567 pFBlk = (FreeBlk*)&pHdr[1];
568 pFBlk->iNext = 0;
569 pFBlk->iSize = SQLITE_PAGE_SIZE - sizeof(*pHdr);
drh8c42ca92001-06-22 19:15:00 +0000570 pPage->nFree = pFBlk->iSize;
571 pPage->nCell = 0;
572 pPage->isOverfull = 0;
drhbd03cae2001-06-02 02:40:57 +0000573}
574
575/*
drh72f82862001-05-24 21:06:34 +0000576** This routine is called when the reference count for a page
577** reaches zero. We need to unref the pParent pointer when that
578** happens.
579*/
580static void pageDestructor(void *pData){
581 MemPage *pPage = (MemPage*)pData;
582 if( pPage->pParent ){
583 MemPage *pParent = pPage->pParent;
584 pPage->pParent = 0;
585 sqlitepager_unref(pParent);
586 }
587}
588
589/*
drh306dc212001-05-21 13:45:10 +0000590** Open a new database.
591**
592** Actually, this routine just sets up the internal data structures
drh72f82862001-05-24 21:06:34 +0000593** for accessing the database. We do not open the database file
594** until the first page is loaded.
drha059ad02001-04-17 20:09:11 +0000595*/
596int sqliteBtreeOpen(const char *zFilename, int mode, Btree **ppBtree){
597 Btree *pBt;
drh8c42ca92001-06-22 19:15:00 +0000598 int rc;
drha059ad02001-04-17 20:09:11 +0000599
600 pBt = sqliteMalloc( sizeof(*pBt) );
601 if( pBt==0 ){
drh8c42ca92001-06-22 19:15:00 +0000602 *ppBtree = 0;
drha059ad02001-04-17 20:09:11 +0000603 return SQLITE_NOMEM;
604 }
drh8c42ca92001-06-22 19:15:00 +0000605 rc = sqlitepager_open(&pBt->pPager, zFilename, 100, EXTRA_SIZE);
drha059ad02001-04-17 20:09:11 +0000606 if( rc!=SQLITE_OK ){
607 if( pBt->pPager ) sqlitepager_close(pBt->pPager);
608 sqliteFree(pBt);
609 *ppBtree = 0;
610 return rc;
611 }
drh72f82862001-05-24 21:06:34 +0000612 sqlitepager_set_destructor(pBt->pPager, pageDestructor);
drha059ad02001-04-17 20:09:11 +0000613 pBt->pCursor = 0;
614 pBt->page1 = 0;
615 *ppBtree = pBt;
616 return SQLITE_OK;
617}
618
619/*
620** Close an open database and invalidate all cursors.
621*/
622int sqliteBtreeClose(Btree *pBt){
623 while( pBt->pCursor ){
624 sqliteBtreeCloseCursor(pBt->pCursor);
625 }
626 sqlitepager_close(pBt->pPager);
627 sqliteFree(pBt);
628 return SQLITE_OK;
629}
630
631/*
drh306dc212001-05-21 13:45:10 +0000632** Get a reference to page1 of the database file. This will
633** also acquire a readlock on that file.
634**
635** SQLITE_OK is returned on success. If the file is not a
636** well-formed database file, then SQLITE_CORRUPT is returned.
637** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
638** is returned if we run out of memory. SQLITE_PROTOCOL is returned
639** if there is a locking protocol violation.
640*/
641static int lockBtree(Btree *pBt){
642 int rc;
643 if( pBt->page1 ) return SQLITE_OK;
drh8c42ca92001-06-22 19:15:00 +0000644 rc = sqlitepager_get(pBt->pPager, 1, (void**)&pBt->page1);
drh306dc212001-05-21 13:45:10 +0000645 if( rc!=SQLITE_OK ) return rc;
drh306dc212001-05-21 13:45:10 +0000646
647 /* Do some checking to help insure the file we opened really is
648 ** a valid database file.
649 */
650 if( sqlitepager_pagecount(pBt->pPager)>0 ){
drhbd03cae2001-06-02 02:40:57 +0000651 PageOne *pP1 = pBt->page1;
drh8c42ca92001-06-22 19:15:00 +0000652 if( strcmp(pP1->zMagic,zMagicHeader)!=0 || pP1->iMagic!=MAGIC ){
drh306dc212001-05-21 13:45:10 +0000653 rc = SQLITE_CORRUPT;
drh72f82862001-05-24 21:06:34 +0000654 goto page1_init_failed;
drh306dc212001-05-21 13:45:10 +0000655 }
656 }
657 return rc;
658
drh72f82862001-05-24 21:06:34 +0000659page1_init_failed:
drh306dc212001-05-21 13:45:10 +0000660 sqlitepager_unref(pBt->page1);
661 pBt->page1 = 0;
drh72f82862001-05-24 21:06:34 +0000662 return rc;
drh306dc212001-05-21 13:45:10 +0000663}
664
665/*
drh8c42ca92001-06-22 19:15:00 +0000666** Create a new database by initializing the first two pages of the
667** file.
drh8b2f49b2001-06-08 00:21:52 +0000668*/
669static int newDatabase(Btree *pBt){
670 MemPage *pRoot;
671 PageOne *pP1;
drh8c42ca92001-06-22 19:15:00 +0000672 int rc;
drh7c717f72001-06-24 20:39:41 +0000673 if( sqlitepager_pagecount(pBt->pPager)>1 ) return SQLITE_OK;
drh8b2f49b2001-06-08 00:21:52 +0000674 pP1 = pBt->page1;
675 rc = sqlitepager_write(pBt->page1);
676 if( rc ) return rc;
drh8c42ca92001-06-22 19:15:00 +0000677 rc = sqlitepager_get(pBt->pPager, 2, (void**)&pRoot);
drh8b2f49b2001-06-08 00:21:52 +0000678 if( rc ) return rc;
679 rc = sqlitepager_write(pRoot);
680 if( rc ){
681 sqlitepager_unref(pRoot);
682 return rc;
683 }
684 strcpy(pP1->zMagic, zMagicHeader);
drh8c42ca92001-06-22 19:15:00 +0000685 pP1->iMagic = MAGIC;
drh8b2f49b2001-06-08 00:21:52 +0000686 zeroPage(pRoot);
687 sqlitepager_unref(pRoot);
688 return SQLITE_OK;
689}
690
691/*
drh72f82862001-05-24 21:06:34 +0000692** Attempt to start a new transaction.
drh8b2f49b2001-06-08 00:21:52 +0000693**
694** A transaction must be started before attempting any changes
695** to the database. None of the following routines will work
696** unless a transaction is started first:
697**
698** sqliteBtreeCreateTable()
699** sqliteBtreeClearTable()
700** sqliteBtreeDropTable()
701** sqliteBtreeInsert()
702** sqliteBtreeDelete()
703** sqliteBtreeUpdateMeta()
drha059ad02001-04-17 20:09:11 +0000704*/
705int sqliteBtreeBeginTrans(Btree *pBt){
706 int rc;
707 if( pBt->inTrans ) return SQLITE_ERROR;
708 if( pBt->page1==0 ){
drh7e3b0a02001-04-28 16:52:40 +0000709 rc = lockBtree(pBt);
drh8c42ca92001-06-22 19:15:00 +0000710 if( rc!=SQLITE_OK ){
711 return rc;
712 }
drha059ad02001-04-17 20:09:11 +0000713 }
714 rc = sqlitepager_write(pBt->page1);
drh8c42ca92001-06-22 19:15:00 +0000715 if( rc!=SQLITE_OK ){
716 return rc;
drha059ad02001-04-17 20:09:11 +0000717 }
drh8c42ca92001-06-22 19:15:00 +0000718 pBt->inTrans = 1;
719 rc = newDatabase(pBt);
720 return rc;
drha059ad02001-04-17 20:09:11 +0000721}
722
723/*
drha059ad02001-04-17 20:09:11 +0000724** Remove the last reference to the database file. This will
725** remove the read lock.
726*/
727static void unlockBtree(Btree *pBt){
drh7c717f72001-06-24 20:39:41 +0000728 if( pBt->inTrans==0 && pBt->pCursor==0 && pBt->page1!=0 ){
drha059ad02001-04-17 20:09:11 +0000729 sqlitepager_unref(pBt->page1);
730 pBt->page1 = 0;
731 pBt->inTrans = 0;
732 }
733}
734
735/*
drh2aa679f2001-06-25 02:11:07 +0000736** Commit the transaction currently in progress.
drha059ad02001-04-17 20:09:11 +0000737*/
738int sqliteBtreeCommit(Btree *pBt){
739 int rc;
drh2aa679f2001-06-25 02:11:07 +0000740 if( pBt->inTrans==0 ) return SQLITE_ERROR;
drha059ad02001-04-17 20:09:11 +0000741 rc = sqlitepager_commit(pBt->pPager);
drh7c717f72001-06-24 20:39:41 +0000742 pBt->inTrans = 0;
drha059ad02001-04-17 20:09:11 +0000743 unlockBtree(pBt);
744 return rc;
745}
746
747/*
748** Rollback the transaction in progress. All cursors must be
749** closed before this routine is called.
750*/
751int sqliteBtreeRollback(Btree *pBt){
752 int rc;
drh72f82862001-05-24 21:06:34 +0000753 if( pBt->pCursor!=0 ) return SQLITE_ERROR;
drh7c717f72001-06-24 20:39:41 +0000754 if( pBt->inTrans==0 ) return SQLITE_OK;
755 pBt->inTrans = 0;
drha059ad02001-04-17 20:09:11 +0000756 rc = sqlitepager_rollback(pBt->pPager);
757 unlockBtree(pBt);
758 return rc;
759}
760
761/*
drh8b2f49b2001-06-08 00:21:52 +0000762** Create a new cursor for the BTree whose root is on the page
763** iTable. The act of acquiring a cursor gets a read lock on
764** the database file.
drha059ad02001-04-17 20:09:11 +0000765*/
drh8b2f49b2001-06-08 00:21:52 +0000766int sqliteBtreeCursor(Btree *pBt, int iTable, BtCursor **ppCur){
drha059ad02001-04-17 20:09:11 +0000767 int rc;
768 BtCursor *pCur;
769 if( pBt->page1==0 ){
770 rc = lockBtree(pBt);
771 if( rc!=SQLITE_OK ){
772 *ppCur = 0;
773 return rc;
774 }
775 }
776 pCur = sqliteMalloc( sizeof(*pCur) );
777 if( pCur==0 ){
drhbd03cae2001-06-02 02:40:57 +0000778 rc = SQLITE_NOMEM;
779 goto create_cursor_exception;
780 }
drh8b2f49b2001-06-08 00:21:52 +0000781 pCur->pgnoRoot = (Pgno)iTable;
drh8c42ca92001-06-22 19:15:00 +0000782 rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pCur->pPage);
drhbd03cae2001-06-02 02:40:57 +0000783 if( rc!=SQLITE_OK ){
784 goto create_cursor_exception;
785 }
drh8b2f49b2001-06-08 00:21:52 +0000786 rc = initPage(pCur->pPage, pCur->pgnoRoot, 0);
drhbd03cae2001-06-02 02:40:57 +0000787 if( rc!=SQLITE_OK ){
788 goto create_cursor_exception;
drha059ad02001-04-17 20:09:11 +0000789 }
drh14acc042001-06-10 19:56:58 +0000790 pCur->pBt = pBt;
791 pCur->idx = 0;
drha059ad02001-04-17 20:09:11 +0000792 pCur->pNext = pBt->pCursor;
793 if( pCur->pNext ){
794 pCur->pNext->pPrev = pCur;
795 }
drh14acc042001-06-10 19:56:58 +0000796 pCur->pPrev = 0;
drha059ad02001-04-17 20:09:11 +0000797 pBt->pCursor = pCur;
drh2af926b2001-05-15 00:39:25 +0000798 *ppCur = pCur;
799 return SQLITE_OK;
drhbd03cae2001-06-02 02:40:57 +0000800
801create_cursor_exception:
802 *ppCur = 0;
803 if( pCur ){
804 if( pCur->pPage ) sqlitepager_unref(pCur->pPage);
805 sqliteFree(pCur);
806 }
drh8c42ca92001-06-22 19:15:00 +0000807 unlockBtree(pBt);
drhbd03cae2001-06-02 02:40:57 +0000808 return rc;
drha059ad02001-04-17 20:09:11 +0000809}
810
811/*
drhbd03cae2001-06-02 02:40:57 +0000812** Close a cursor. The lock on the database file is released
813** when the last cursor is closed.
drha059ad02001-04-17 20:09:11 +0000814*/
815int sqliteBtreeCloseCursor(BtCursor *pCur){
816 Btree *pBt = pCur->pBt;
drha059ad02001-04-17 20:09:11 +0000817 if( pCur->pPrev ){
818 pCur->pPrev->pNext = pCur->pNext;
819 }else{
820 pBt->pCursor = pCur->pNext;
821 }
822 if( pCur->pNext ){
823 pCur->pNext->pPrev = pCur->pPrev;
824 }
drh2af926b2001-05-15 00:39:25 +0000825 sqlitepager_unref(pCur->pPage);
drhbd03cae2001-06-02 02:40:57 +0000826 unlockBtree(pBt);
drha059ad02001-04-17 20:09:11 +0000827 sqliteFree(pCur);
drh8c42ca92001-06-22 19:15:00 +0000828 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +0000829}
830
drh7e3b0a02001-04-28 16:52:40 +0000831/*
drh5e2f8b92001-05-28 00:41:15 +0000832** Make a temporary cursor by filling in the fields of pTempCur.
833** The temporary cursor is not on the cursor list for the Btree.
834*/
drh14acc042001-06-10 19:56:58 +0000835static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
drh5e2f8b92001-05-28 00:41:15 +0000836 memcpy(pTempCur, pCur, sizeof(*pCur));
837 pTempCur->pNext = 0;
838 pTempCur->pPrev = 0;
839 sqlitepager_ref(pTempCur->pPage);
840}
841
842/*
drhbd03cae2001-06-02 02:40:57 +0000843** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
drh5e2f8b92001-05-28 00:41:15 +0000844** function above.
845*/
drh14acc042001-06-10 19:56:58 +0000846static void releaseTempCursor(BtCursor *pCur){
drh5e2f8b92001-05-28 00:41:15 +0000847 sqlitepager_unref(pCur->pPage);
848}
849
850/*
drhbd03cae2001-06-02 02:40:57 +0000851** Set *pSize to the number of bytes of key in the entry the
852** cursor currently points to. Always return SQLITE_OK.
853** Failure is not possible. If the cursor is not currently
854** pointing to an entry (which can happen, for example, if
855** the database is empty) then *pSize is set to 0.
drh7e3b0a02001-04-28 16:52:40 +0000856*/
drh72f82862001-05-24 21:06:34 +0000857int sqliteBtreeKeySize(BtCursor *pCur, int *pSize){
drh2af926b2001-05-15 00:39:25 +0000858 Cell *pCell;
859 MemPage *pPage;
860
861 pPage = pCur->pPage;
drh72f82862001-05-24 21:06:34 +0000862 assert( pPage!=0 );
863 if( pCur->idx >= pPage->nCell ){
864 *pSize = 0;
865 }else{
drh5e2f8b92001-05-28 00:41:15 +0000866 pCell = pPage->apCell[pCur->idx];
drh8c42ca92001-06-22 19:15:00 +0000867 *pSize = pCell->h.nKey;
drh72f82862001-05-24 21:06:34 +0000868 }
869 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +0000870}
drh2af926b2001-05-15 00:39:25 +0000871
drh72f82862001-05-24 21:06:34 +0000872/*
873** Read payload information from the entry that the pCur cursor is
874** pointing to. Begin reading the payload at "offset" and read
875** a total of "amt" bytes. Put the result in zBuf.
876**
877** This routine does not make a distinction between key and data.
878** It just reads bytes from the payload area.
879*/
drh2af926b2001-05-15 00:39:25 +0000880static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){
drh5e2f8b92001-05-28 00:41:15 +0000881 char *aPayload;
drh2af926b2001-05-15 00:39:25 +0000882 Pgno nextPage;
drh8c42ca92001-06-22 19:15:00 +0000883 int rc;
drh72f82862001-05-24 21:06:34 +0000884 assert( pCur!=0 && pCur->pPage!=0 );
drh8c42ca92001-06-22 19:15:00 +0000885 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
886 aPayload = pCur->pPage->apCell[pCur->idx]->aPayload;
drh2af926b2001-05-15 00:39:25 +0000887 if( offset<MX_LOCAL_PAYLOAD ){
888 int a = amt;
889 if( a+offset>MX_LOCAL_PAYLOAD ){
890 a = MX_LOCAL_PAYLOAD - offset;
891 }
drh5e2f8b92001-05-28 00:41:15 +0000892 memcpy(zBuf, &aPayload[offset], a);
drh2af926b2001-05-15 00:39:25 +0000893 if( a==amt ){
894 return SQLITE_OK;
895 }
drh2aa679f2001-06-25 02:11:07 +0000896 offset = 0;
drh2af926b2001-05-15 00:39:25 +0000897 zBuf += a;
898 amt -= a;
drhbd03cae2001-06-02 02:40:57 +0000899 }
900 if( amt>0 ){
drh8c42ca92001-06-22 19:15:00 +0000901 nextPage = pCur->pPage->apCell[pCur->idx]->ovfl;
drh2af926b2001-05-15 00:39:25 +0000902 }
903 while( amt>0 && nextPage ){
904 OverflowPage *pOvfl;
drh8c42ca92001-06-22 19:15:00 +0000905 rc = sqlitepager_get(pCur->pBt->pPager, nextPage, (void**)&pOvfl);
drh2af926b2001-05-15 00:39:25 +0000906 if( rc!=0 ){
907 return rc;
908 }
drh14acc042001-06-10 19:56:58 +0000909 nextPage = pOvfl->iNext;
drh2af926b2001-05-15 00:39:25 +0000910 if( offset<OVERFLOW_SIZE ){
911 int a = amt;
912 if( a + offset > OVERFLOW_SIZE ){
913 a = OVERFLOW_SIZE - offset;
914 }
drh5e2f8b92001-05-28 00:41:15 +0000915 memcpy(zBuf, &pOvfl->aPayload[offset], a);
drh2aa679f2001-06-25 02:11:07 +0000916 offset = 0;
drh2af926b2001-05-15 00:39:25 +0000917 amt -= a;
918 zBuf += a;
drh2aa679f2001-06-25 02:11:07 +0000919 }else{
920 offset -= OVERFLOW_SIZE;
drh2af926b2001-05-15 00:39:25 +0000921 }
922 sqlitepager_unref(pOvfl);
923 }
924 return amt==0 ? SQLITE_OK : SQLITE_CORRUPT;
925}
926
drh72f82862001-05-24 21:06:34 +0000927/*
928** Read part of the key associated with cursor pCur. A total
929** of "amt" bytes will be transfered into zBuf[]. The transfer
930** begins at "offset". If the key does not contain enough data
931** to satisfy the request, no data is fetched and this routine
932** returns SQLITE_ERROR.
933*/
934int sqliteBtreeKey(BtCursor *pCur, int offset, int amt, char *zBuf){
935 Cell *pCell;
936 MemPage *pPage;
drha059ad02001-04-17 20:09:11 +0000937
drh72f82862001-05-24 21:06:34 +0000938 if( amt<0 ) return SQLITE_ERROR;
939 if( offset<0 ) return SQLITE_ERROR;
940 if( amt==0 ) return SQLITE_OK;
941 pPage = pCur->pPage;
942 assert( pPage!=0 );
943 if( pCur->idx >= pPage->nCell ){
944 return SQLITE_ERROR;
945 }
drh5e2f8b92001-05-28 00:41:15 +0000946 pCell = pPage->apCell[pCur->idx];
drh3b7511c2001-05-26 13:15:44 +0000947 if( amt+offset > pCell->h.nKey ){
drhbd03cae2001-06-02 02:40:57 +0000948 return SQLITE_ERROR;
949 }
drh72f82862001-05-24 21:06:34 +0000950 return getPayload(pCur, offset, amt, zBuf);
951}
952
953/*
drhbd03cae2001-06-02 02:40:57 +0000954** Set *pSize to the number of bytes of data in the entry the
955** cursor currently points to. Always return SQLITE_OK.
956** Failure is not possible. If the cursor is not currently
957** pointing to an entry (which can happen, for example, if
958** the database is empty) then *pSize is set to 0.
drh72f82862001-05-24 21:06:34 +0000959*/
960int sqliteBtreeDataSize(BtCursor *pCur, int *pSize){
961 Cell *pCell;
962 MemPage *pPage;
963
964 pPage = pCur->pPage;
965 assert( pPage!=0 );
966 if( pCur->idx >= pPage->nCell ){
967 *pSize = 0;
968 }else{
drh5e2f8b92001-05-28 00:41:15 +0000969 pCell = pPage->apCell[pCur->idx];
drh3b7511c2001-05-26 13:15:44 +0000970 *pSize = pCell->h.nData;
drh72f82862001-05-24 21:06:34 +0000971 }
972 return SQLITE_OK;
973}
974
975/*
976** Read part of the data associated with cursor pCur. A total
977** of "amt" bytes will be transfered into zBuf[]. The transfer
978** begins at "offset". If the size of the data in the record
979** is insufficent to satisfy this request then no data is read
980** and this routine returns SQLITE_ERROR.
981*/
982int sqliteBtreeData(BtCursor *pCur, int offset, int amt, char *zBuf){
983 Cell *pCell;
984 MemPage *pPage;
985
986 if( amt<0 ) return SQLITE_ERROR;
987 if( offset<0 ) return SQLITE_ERROR;
988 if( amt==0 ) return SQLITE_OK;
989 pPage = pCur->pPage;
990 assert( pPage!=0 );
991 if( pCur->idx >= pPage->nCell ){
992 return SQLITE_ERROR;
993 }
drh5e2f8b92001-05-28 00:41:15 +0000994 pCell = pPage->apCell[pCur->idx];
drhbd03cae2001-06-02 02:40:57 +0000995 if( amt+offset > pCell->h.nData ){
996 return SQLITE_ERROR;
997 }
drh3b7511c2001-05-26 13:15:44 +0000998 return getPayload(pCur, offset + pCell->h.nKey, amt, zBuf);
drh72f82862001-05-24 21:06:34 +0000999}
drha059ad02001-04-17 20:09:11 +00001000
drh2af926b2001-05-15 00:39:25 +00001001/*
1002** Compare the key for the entry that pCur points to against the
1003** given key (pKey,nKeyOrig). Put the comparison result in *pResult.
1004** The result is negative if pCur<pKey, zero if they are equal and
1005** positive if pCur>pKey.
1006**
1007** SQLITE_OK is returned on success. If part of the cursor key
1008** is on overflow pages and we are unable to access those overflow
1009** pages, then some other value might be returned to indicate the
1010** reason for the error.
1011*/
1012static int compareKey(BtCursor *pCur, char *pKey, int nKeyOrig, int *pResult){
1013 Pgno nextPage;
1014 int nKey = nKeyOrig;
drh8c42ca92001-06-22 19:15:00 +00001015 int n, c, rc;
drh2af926b2001-05-15 00:39:25 +00001016 Cell *pCell;
1017
1018 assert( pCur->pPage );
1019 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
drhbd03cae2001-06-02 02:40:57 +00001020 pCell = pCur->pPage->apCell[pCur->idx];
drh3b7511c2001-05-26 13:15:44 +00001021 if( nKey > pCell->h.nKey ){
1022 nKey = pCell->h.nKey;
drh2af926b2001-05-15 00:39:25 +00001023 }
1024 n = nKey;
1025 if( n>MX_LOCAL_PAYLOAD ){
1026 n = MX_LOCAL_PAYLOAD;
1027 }
drh5e2f8b92001-05-28 00:41:15 +00001028 c = memcmp(pCell->aPayload, pKey, n);
drh2af926b2001-05-15 00:39:25 +00001029 if( c!=0 ){
1030 *pResult = c;
1031 return SQLITE_OK;
1032 }
1033 pKey += n;
1034 nKey -= n;
drh3b7511c2001-05-26 13:15:44 +00001035 nextPage = pCell->ovfl;
drh2af926b2001-05-15 00:39:25 +00001036 while( nKey>0 ){
1037 OverflowPage *pOvfl;
1038 if( nextPage==0 ){
1039 return SQLITE_CORRUPT;
1040 }
drh8c42ca92001-06-22 19:15:00 +00001041 rc = sqlitepager_get(pCur->pBt->pPager, nextPage, (void**)&pOvfl);
drh72f82862001-05-24 21:06:34 +00001042 if( rc ){
drh2af926b2001-05-15 00:39:25 +00001043 return rc;
1044 }
drh14acc042001-06-10 19:56:58 +00001045 nextPage = pOvfl->iNext;
drh2af926b2001-05-15 00:39:25 +00001046 n = nKey;
1047 if( n>OVERFLOW_SIZE ){
1048 n = OVERFLOW_SIZE;
1049 }
drh5e2f8b92001-05-28 00:41:15 +00001050 c = memcmp(pOvfl->aPayload, pKey, n);
drh2af926b2001-05-15 00:39:25 +00001051 sqlitepager_unref(pOvfl);
1052 if( c!=0 ){
1053 *pResult = c;
1054 return SQLITE_OK;
1055 }
1056 nKey -= n;
1057 pKey += n;
1058 }
drh3b7511c2001-05-26 13:15:44 +00001059 c = pCell->h.nKey - nKeyOrig;
drh2af926b2001-05-15 00:39:25 +00001060 *pResult = c;
1061 return SQLITE_OK;
1062}
1063
drh72f82862001-05-24 21:06:34 +00001064/*
1065** Move the cursor down to a new child page.
1066*/
drh5e2f8b92001-05-28 00:41:15 +00001067static int moveToChild(BtCursor *pCur, int newPgno){
drh72f82862001-05-24 21:06:34 +00001068 int rc;
1069 MemPage *pNewPage;
1070
drh8c42ca92001-06-22 19:15:00 +00001071 rc = sqlitepager_get(pCur->pBt->pPager, newPgno, (void**)&pNewPage);
drh72f82862001-05-24 21:06:34 +00001072 if( rc ){
1073 return rc;
1074 }
drh5e2f8b92001-05-28 00:41:15 +00001075 initPage(pNewPage, newPgno, pCur->pPage);
drh72f82862001-05-24 21:06:34 +00001076 sqlitepager_unref(pCur->pPage);
1077 pCur->pPage = pNewPage;
1078 pCur->idx = 0;
1079 return SQLITE_OK;
1080}
1081
1082/*
drh5e2f8b92001-05-28 00:41:15 +00001083** Move the cursor up to the parent page.
1084**
1085** pCur->idx is set to the cell index that contains the pointer
1086** to the page we are coming from. If we are coming from the
1087** right-most child page then pCur->idx is set to one more than
drhbd03cae2001-06-02 02:40:57 +00001088** the largest cell index.
drh72f82862001-05-24 21:06:34 +00001089*/
drh5e2f8b92001-05-28 00:41:15 +00001090static int moveToParent(BtCursor *pCur){
drh72f82862001-05-24 21:06:34 +00001091 Pgno oldPgno;
1092 MemPage *pParent;
drh8c42ca92001-06-22 19:15:00 +00001093 int i;
drh72f82862001-05-24 21:06:34 +00001094 pParent = pCur->pPage->pParent;
drhbd03cae2001-06-02 02:40:57 +00001095 if( pParent==0 ) return SQLITE_INTERNAL;
drh72f82862001-05-24 21:06:34 +00001096 oldPgno = sqlitepager_pagenumber(pCur->pPage);
drh72f82862001-05-24 21:06:34 +00001097 sqlitepager_ref(pParent);
1098 sqlitepager_unref(pCur->pPage);
1099 pCur->pPage = pParent;
drh8c42ca92001-06-22 19:15:00 +00001100 pCur->idx = pParent->nCell;
1101 for(i=0; i<pParent->nCell; i++){
1102 if( pParent->apCell[i]->h.leftChild==oldPgno ){
drh72f82862001-05-24 21:06:34 +00001103 pCur->idx = i;
1104 break;
1105 }
1106 }
drh5e2f8b92001-05-28 00:41:15 +00001107 return SQLITE_OK;
drh72f82862001-05-24 21:06:34 +00001108}
1109
1110/*
1111** Move the cursor to the root page
1112*/
drh5e2f8b92001-05-28 00:41:15 +00001113static int moveToRoot(BtCursor *pCur){
drh72f82862001-05-24 21:06:34 +00001114 MemPage *pNew;
drhbd03cae2001-06-02 02:40:57 +00001115 int rc;
1116
drh8c42ca92001-06-22 19:15:00 +00001117 rc = sqlitepager_get(pCur->pBt->pPager, pCur->pgnoRoot, (void**)&pNew);
drhbd03cae2001-06-02 02:40:57 +00001118 if( rc ) return rc;
drh72f82862001-05-24 21:06:34 +00001119 sqlitepager_unref(pCur->pPage);
1120 pCur->pPage = pNew;
1121 pCur->idx = 0;
1122 return SQLITE_OK;
1123}
drh2af926b2001-05-15 00:39:25 +00001124
drh5e2f8b92001-05-28 00:41:15 +00001125/*
1126** Move the cursor down to the left-most leaf entry beneath the
1127** entry to which it is currently pointing.
1128*/
1129static int moveToLeftmost(BtCursor *pCur){
1130 Pgno pgno;
1131 int rc;
1132
1133 while( (pgno = pCur->pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
1134 rc = moveToChild(pCur, pgno);
1135 if( rc ) return rc;
1136 }
1137 return SQLITE_OK;
1138}
1139
1140
drha059ad02001-04-17 20:09:11 +00001141/* Move the cursor so that it points to an entry near pKey.
drh72f82862001-05-24 21:06:34 +00001142** Return a success code.
1143**
drh5e2f8b92001-05-28 00:41:15 +00001144** If an exact match is not found, then the cursor is always
drhbd03cae2001-06-02 02:40:57 +00001145** left pointing at a leaf page which would hold the entry if it
drh5e2f8b92001-05-28 00:41:15 +00001146** were present. The cursor might point to an entry that comes
1147** before or after the key.
1148**
drhbd03cae2001-06-02 02:40:57 +00001149** The result of comparing the key with the entry to which the
1150** cursor is left pointing is stored in pCur->iMatch. The same
1151** value is also written to *pRes if pRes!=NULL. The meaning of
1152** this value is as follows:
1153**
1154** *pRes<0 The cursor is left pointing at an entry that
drh7c717f72001-06-24 20:39:41 +00001155** is smaller than pKey.
drhbd03cae2001-06-02 02:40:57 +00001156**
1157** *pRes==0 The cursor is left pointing at an entry that
1158** exactly matches pKey.
1159**
1160** *pRes>0 The cursor is left pointing at an entry that
drh7c717f72001-06-24 20:39:41 +00001161** is larger than pKey.
drha059ad02001-04-17 20:09:11 +00001162*/
drh72f82862001-05-24 21:06:34 +00001163int sqliteBtreeMoveto(BtCursor *pCur, void *pKey, int nKey, int *pRes){
1164 int rc;
drh7c717f72001-06-24 20:39:41 +00001165 pCur->bSkipNext = 0;
drh5e2f8b92001-05-28 00:41:15 +00001166 rc = moveToRoot(pCur);
drh72f82862001-05-24 21:06:34 +00001167 if( rc ) return rc;
1168 for(;;){
1169 int lwr, upr;
1170 Pgno chldPg;
1171 MemPage *pPage = pCur->pPage;
drh8b2f49b2001-06-08 00:21:52 +00001172 int c = -1;
drh72f82862001-05-24 21:06:34 +00001173 lwr = 0;
1174 upr = pPage->nCell-1;
1175 while( lwr<=upr ){
drh72f82862001-05-24 21:06:34 +00001176 pCur->idx = (lwr+upr)/2;
1177 rc = compareKey(pCur, pKey, nKey, &c);
1178 if( rc ) return rc;
1179 if( c==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001180 pCur->iMatch = c;
drh72f82862001-05-24 21:06:34 +00001181 if( pRes ) *pRes = 0;
1182 return SQLITE_OK;
1183 }
1184 if( c<0 ){
1185 lwr = pCur->idx+1;
1186 }else{
1187 upr = pCur->idx-1;
1188 }
1189 }
1190 assert( lwr==upr+1 );
1191 if( lwr>=pPage->nCell ){
drh14acc042001-06-10 19:56:58 +00001192 chldPg = pPage->u.hdr.rightChild;
drh72f82862001-05-24 21:06:34 +00001193 }else{
drh5e2f8b92001-05-28 00:41:15 +00001194 chldPg = pPage->apCell[lwr]->h.leftChild;
drh72f82862001-05-24 21:06:34 +00001195 }
1196 if( chldPg==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001197 pCur->iMatch = c;
drh72f82862001-05-24 21:06:34 +00001198 if( pRes ) *pRes = c;
1199 return SQLITE_OK;
1200 }
drh5e2f8b92001-05-28 00:41:15 +00001201 rc = moveToChild(pCur, chldPg);
drh72f82862001-05-24 21:06:34 +00001202 if( rc ) return rc;
1203 }
drhbd03cae2001-06-02 02:40:57 +00001204 /* NOT REACHED */
drh72f82862001-05-24 21:06:34 +00001205}
1206
1207/*
drhbd03cae2001-06-02 02:40:57 +00001208** Advance the cursor to the next entry in the database. If
1209** successful and pRes!=NULL then set *pRes=0. If the cursor
1210** was already pointing to the last entry in the database before
1211** this routine was called, then set *pRes=1 if pRes!=NULL.
drh72f82862001-05-24 21:06:34 +00001212*/
1213int sqliteBtreeNext(BtCursor *pCur, int *pRes){
drh72f82862001-05-24 21:06:34 +00001214 int rc;
drh5e2f8b92001-05-28 00:41:15 +00001215 if( pCur->bSkipNext ){
1216 pCur->bSkipNext = 0;
drh72f82862001-05-24 21:06:34 +00001217 if( pRes ) *pRes = 0;
1218 return SQLITE_OK;
1219 }
drh72f82862001-05-24 21:06:34 +00001220 pCur->idx++;
drh5e2f8b92001-05-28 00:41:15 +00001221 if( pCur->idx>=pCur->pPage->nCell ){
drh8c42ca92001-06-22 19:15:00 +00001222 if( pCur->pPage->u.hdr.rightChild ){
1223 rc = moveToChild(pCur, pCur->pPage->u.hdr.rightChild);
drh5e2f8b92001-05-28 00:41:15 +00001224 if( rc ) return rc;
1225 rc = moveToLeftmost(pCur);
1226 if( rc ) return rc;
1227 if( pRes ) *pRes = 0;
drh72f82862001-05-24 21:06:34 +00001228 return SQLITE_OK;
1229 }
drh5e2f8b92001-05-28 00:41:15 +00001230 do{
drh8c42ca92001-06-22 19:15:00 +00001231 if( pCur->pPage->pParent==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001232 if( pRes ) *pRes = 1;
1233 return SQLITE_OK;
1234 }
1235 rc = moveToParent(pCur);
1236 if( rc ) return rc;
1237 }while( pCur->idx>=pCur->pPage->nCell );
drh72f82862001-05-24 21:06:34 +00001238 if( pRes ) *pRes = 0;
1239 return SQLITE_OK;
1240 }
drh5e2f8b92001-05-28 00:41:15 +00001241 rc = moveToLeftmost(pCur);
1242 if( rc ) return rc;
drh72f82862001-05-24 21:06:34 +00001243 if( pRes ) *pRes = 0;
1244 return SQLITE_OK;
1245}
1246
drh3b7511c2001-05-26 13:15:44 +00001247/*
1248** Allocate a new page from the database file.
1249**
1250** The new page is marked as dirty. (In other words, sqlitepager_write()
1251** has already been called on the new page.) The new page has also
1252** been referenced and the calling routine is responsible for calling
1253** sqlitepager_unref() on the new page when it is done.
1254**
1255** SQLITE_OK is returned on success. Any other return value indicates
1256** an error. *ppPage and *pPgno are undefined in the event of an error.
1257** Do not invoke sqlitepager_unref() on *ppPage if an error is returned.
1258*/
1259static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno){
drhbd03cae2001-06-02 02:40:57 +00001260 PageOne *pPage1 = pBt->page1;
drh8c42ca92001-06-22 19:15:00 +00001261 int rc;
drh3b7511c2001-05-26 13:15:44 +00001262 if( pPage1->freeList ){
1263 OverflowPage *pOvfl;
1264 rc = sqlitepager_write(pPage1);
1265 if( rc ) return rc;
1266 *pPgno = pPage1->freeList;
drh8c42ca92001-06-22 19:15:00 +00001267 rc = sqlitepager_get(pBt->pPager, pPage1->freeList, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001268 if( rc ) return rc;
1269 rc = sqlitepager_write(pOvfl);
1270 if( rc ){
1271 sqlitepager_unref(pOvfl);
1272 return rc;
1273 }
drh14acc042001-06-10 19:56:58 +00001274 pPage1->freeList = pOvfl->iNext;
drh2aa679f2001-06-25 02:11:07 +00001275 pPage1->nFree--;
drh3b7511c2001-05-26 13:15:44 +00001276 *ppPage = (MemPage*)pOvfl;
1277 }else{
drh2aa679f2001-06-25 02:11:07 +00001278 *pPgno = sqlitepager_pagecount(pBt->pPager) + 1;
drh8c42ca92001-06-22 19:15:00 +00001279 rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
drh3b7511c2001-05-26 13:15:44 +00001280 if( rc ) return rc;
1281 rc = sqlitepager_write(*ppPage);
1282 }
1283 return rc;
1284}
1285
1286/*
1287** Add a page of the database file to the freelist. Either pgno or
1288** pPage but not both may be 0.
drh5e2f8b92001-05-28 00:41:15 +00001289**
1290** sqlitepager_unref() is NOT called for pPage. The calling routine
1291** needs to do that.
drh3b7511c2001-05-26 13:15:44 +00001292*/
1293static int freePage(Btree *pBt, void *pPage, Pgno pgno){
drhbd03cae2001-06-02 02:40:57 +00001294 PageOne *pPage1 = pBt->page1;
drh3b7511c2001-05-26 13:15:44 +00001295 OverflowPage *pOvfl = (OverflowPage*)pPage;
1296 int rc;
1297 int needOvflUnref = 0;
drh8b2f49b2001-06-08 00:21:52 +00001298
drh3b7511c2001-05-26 13:15:44 +00001299 if( pgno==0 ){
1300 assert( pOvfl!=0 );
1301 pgno = sqlitepager_pagenumber(pOvfl);
1302 }
drh2aa679f2001-06-25 02:11:07 +00001303 assert( pgno>2 );
drh3b7511c2001-05-26 13:15:44 +00001304 rc = sqlitepager_write(pPage1);
1305 if( rc ){
1306 return rc;
1307 }
1308 if( pOvfl==0 ){
1309 assert( pgno>0 );
drh8c42ca92001-06-22 19:15:00 +00001310 rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001311 if( rc ) return rc;
1312 needOvflUnref = 1;
1313 }
1314 rc = sqlitepager_write(pOvfl);
1315 if( rc ){
1316 if( needOvflUnref ) sqlitepager_unref(pOvfl);
1317 return rc;
1318 }
drh14acc042001-06-10 19:56:58 +00001319 pOvfl->iNext = pPage1->freeList;
drh3b7511c2001-05-26 13:15:44 +00001320 pPage1->freeList = pgno;
drh2aa679f2001-06-25 02:11:07 +00001321 pPage1->nFree++;
drh5e2f8b92001-05-28 00:41:15 +00001322 memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
drh8c42ca92001-06-22 19:15:00 +00001323 ((MemPage*)pPage)->isInit = 0;
1324 assert( ((MemPage*)pPage)->pParent==0 );
drh3b7511c2001-05-26 13:15:44 +00001325 rc = sqlitepager_unref(pOvfl);
1326 return rc;
1327}
1328
1329/*
1330** Erase all the data out of a cell. This involves returning overflow
1331** pages back the freelist.
1332*/
1333static int clearCell(Btree *pBt, Cell *pCell){
1334 Pager *pPager = pBt->pPager;
1335 OverflowPage *pOvfl;
drh3b7511c2001-05-26 13:15:44 +00001336 Pgno ovfl, nextOvfl;
1337 int rc;
1338
drh5e2f8b92001-05-28 00:41:15 +00001339 if( pCell->h.nKey + pCell->h.nData <= MX_LOCAL_PAYLOAD ){
1340 return SQLITE_OK;
1341 }
drh3b7511c2001-05-26 13:15:44 +00001342 ovfl = pCell->ovfl;
1343 pCell->ovfl = 0;
1344 while( ovfl ){
drh8c42ca92001-06-22 19:15:00 +00001345 rc = sqlitepager_get(pPager, ovfl, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001346 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001347 nextOvfl = pOvfl->iNext;
drhbd03cae2001-06-02 02:40:57 +00001348 rc = freePage(pBt, pOvfl, ovfl);
1349 if( rc ) return rc;
drh3b7511c2001-05-26 13:15:44 +00001350 ovfl = nextOvfl;
drh3b7511c2001-05-26 13:15:44 +00001351 }
drh5e2f8b92001-05-28 00:41:15 +00001352 return SQLITE_OK;
drh3b7511c2001-05-26 13:15:44 +00001353}
1354
1355/*
1356** Create a new cell from key and data. Overflow pages are allocated as
1357** necessary and linked to this cell.
1358*/
1359static int fillInCell(
1360 Btree *pBt, /* The whole Btree. Needed to allocate pages */
1361 Cell *pCell, /* Populate this Cell structure */
1362 void *pKey, int nKey, /* The key */
1363 void *pData,int nData /* The data */
1364){
drh8c42ca92001-06-22 19:15:00 +00001365 OverflowPage *pOvfl;
drh3b7511c2001-05-26 13:15:44 +00001366 Pgno *pNext;
1367 int spaceLeft;
drh8c42ca92001-06-22 19:15:00 +00001368 int n, rc;
drh3b7511c2001-05-26 13:15:44 +00001369 int nPayload;
1370 char *pPayload;
1371 char *pSpace;
1372
drh5e2f8b92001-05-28 00:41:15 +00001373 pCell->h.leftChild = 0;
drh3b7511c2001-05-26 13:15:44 +00001374 pCell->h.nKey = nKey;
1375 pCell->h.nData = nData;
1376 pCell->h.iNext = 0;
1377
1378 pNext = &pCell->ovfl;
drh5e2f8b92001-05-28 00:41:15 +00001379 pSpace = pCell->aPayload;
drh3b7511c2001-05-26 13:15:44 +00001380 spaceLeft = MX_LOCAL_PAYLOAD;
1381 pPayload = pKey;
1382 pKey = 0;
1383 nPayload = nKey;
1384 while( nPayload>0 ){
1385 if( spaceLeft==0 ){
drh8c42ca92001-06-22 19:15:00 +00001386 rc = allocatePage(pBt, (MemPage**)&pOvfl, pNext);
drh3b7511c2001-05-26 13:15:44 +00001387 if( rc ){
1388 *pNext = 0;
drh5e2f8b92001-05-28 00:41:15 +00001389 clearCell(pBt, pCell);
drh3b7511c2001-05-26 13:15:44 +00001390 return rc;
1391 }
1392 spaceLeft = OVERFLOW_SIZE;
drh5e2f8b92001-05-28 00:41:15 +00001393 pSpace = pOvfl->aPayload;
drh8c42ca92001-06-22 19:15:00 +00001394 pNext = &pOvfl->iNext;
drh3b7511c2001-05-26 13:15:44 +00001395 }
1396 n = nPayload;
1397 if( n>spaceLeft ) n = spaceLeft;
1398 memcpy(pSpace, pPayload, n);
1399 nPayload -= n;
1400 if( nPayload==0 && pData ){
1401 pPayload = pData;
1402 nPayload = nData;
1403 pData = 0;
1404 }else{
1405 pPayload += n;
1406 }
1407 spaceLeft -= n;
1408 pSpace += n;
1409 }
1410 return SQLITE_OK;
1411}
1412
1413/*
drhbd03cae2001-06-02 02:40:57 +00001414** Change the MemPage.pParent pointer on the page whose number is
drh8b2f49b2001-06-08 00:21:52 +00001415** given in the second argument so that MemPage.pParent holds the
drhbd03cae2001-06-02 02:40:57 +00001416** pointer in the third argument.
1417*/
1418static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent){
1419 MemPage *pThis;
1420
1421 assert( pPager!=0 && pgno!=0 );
1422 pThis = sqlitepager_lookup(pPager, pgno);
1423 if( pThis && pThis->pParent!=pNewParent ){
1424 if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
1425 pThis->pParent = pNewParent;
1426 if( pNewParent ) sqlitepager_ref(pNewParent);
1427 }
1428}
1429
1430/*
1431** Reparent all children of the given page to be the given page.
1432** In other words, for every child of pPage, invoke reparentPage()
1433** to make sure that child knows that pPage is its parent.
1434**
1435** This routine gets called after you memcpy() one page into
1436** another.
1437*/
drh8c42ca92001-06-22 19:15:00 +00001438static void reparentChildPages(Pager *pPager, MemPage *pPage){
drhbd03cae2001-06-02 02:40:57 +00001439 int i;
1440 for(i=0; i<pPage->nCell; i++){
drh8c42ca92001-06-22 19:15:00 +00001441 reparentPage(pPager, pPage->apCell[i]->h.leftChild, pPage);
drhbd03cae2001-06-02 02:40:57 +00001442 }
drh14acc042001-06-10 19:56:58 +00001443 reparentPage(pPager, pPage->u.hdr.rightChild, pPage);
1444}
1445
1446/*
1447** Remove the i-th cell from pPage. This routine effects pPage only.
1448** The cell content is not freed or deallocated. It is assumed that
1449** the cell content has been copied someplace else. This routine just
1450** removes the reference to the cell from pPage.
1451**
1452** "sz" must be the number of bytes in the cell.
1453**
1454** Do not bother maintaining the integrity of the linked list of Cells.
drh8c42ca92001-06-22 19:15:00 +00001455** Only the pPage->apCell[] array is important. The relinkCellList()
1456** routine will be called soon after this routine in order to rebuild
1457** the linked list.
drh14acc042001-06-10 19:56:58 +00001458*/
drh8c42ca92001-06-22 19:15:00 +00001459static void dropCell(MemPage *pPage, int idx, int sz){
drh14acc042001-06-10 19:56:58 +00001460 int j;
drh8c42ca92001-06-22 19:15:00 +00001461 assert( idx>=0 && idx<pPage->nCell );
1462 assert( sz==cellSize(pPage->apCell[idx]) );
drh7c717f72001-06-24 20:39:41 +00001463 freeSpace(pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
1464 for(j=idx; j<pPage->nCell-1; j++){
drh14acc042001-06-10 19:56:58 +00001465 pPage->apCell[j] = pPage->apCell[j+1];
1466 }
1467 pPage->nCell--;
1468}
1469
1470/*
1471** Insert a new cell on pPage at cell index "i". pCell points to the
1472** content of the cell.
1473**
1474** If the cell content will fit on the page, then put it there. If it
1475** will not fit, then just make pPage->apCell[i] point to the content
1476** and set pPage->isOverfull.
1477**
1478** Do not bother maintaining the integrity of the linked list of Cells.
drh8c42ca92001-06-22 19:15:00 +00001479** Only the pPage->apCell[] array is important. The relinkCellList()
1480** routine will be called soon after this routine in order to rebuild
1481** the linked list.
drh14acc042001-06-10 19:56:58 +00001482*/
1483static void insertCell(MemPage *pPage, int i, Cell *pCell, int sz){
1484 int idx, j;
1485 assert( i>=0 && i<=pPage->nCell );
1486 assert( sz==cellSize(pCell) );
drh2aa679f2001-06-25 02:11:07 +00001487 idx = allocateSpace(pPage, sz);
drh14acc042001-06-10 19:56:58 +00001488 for(j=pPage->nCell; j>i; j--){
1489 pPage->apCell[j] = pPage->apCell[j-1];
1490 }
1491 pPage->nCell++;
drh14acc042001-06-10 19:56:58 +00001492 if( idx<=0 ){
1493 pPage->isOverfull = 1;
1494 pPage->apCell[i] = pCell;
1495 }else{
1496 memcpy(&pPage->u.aDisk[idx], pCell, sz);
drh8c42ca92001-06-22 19:15:00 +00001497 pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx];
drh14acc042001-06-10 19:56:58 +00001498 }
1499}
1500
1501/*
1502** Rebuild the linked list of cells on a page so that the cells
drh8c42ca92001-06-22 19:15:00 +00001503** occur in the order specified by the pPage->apCell[] array.
1504** Invoke this routine once to repair damage after one or more
1505** invocations of either insertCell() or dropCell().
drh14acc042001-06-10 19:56:58 +00001506*/
1507static void relinkCellList(MemPage *pPage){
1508 int i;
1509 u16 *pIdx;
1510 pIdx = &pPage->u.hdr.firstCell;
1511 for(i=0; i<pPage->nCell; i++){
drh7c717f72001-06-24 20:39:41 +00001512 int idx = Addr(pPage->apCell[i]) - Addr(pPage);
drh8c42ca92001-06-22 19:15:00 +00001513 assert( idx>0 && idx<SQLITE_PAGE_SIZE );
drh14acc042001-06-10 19:56:58 +00001514 *pIdx = idx;
1515 pIdx = &pPage->apCell[i]->h.iNext;
1516 }
1517 *pIdx = 0;
1518}
1519
1520/*
1521** Make a copy of the contents of pFrom into pTo. The pFrom->apCell[]
1522** pointers that point intto pFrom->u.aDisk[] must be adjusted to point
1523** intto pTo->u.aDisk[] instead. But some pFrom->apCell[] entries might
1524** not point to pFrom->u.aDisk[]. Those are unchanged.
1525*/
1526static void copyPage(MemPage *pTo, MemPage *pFrom){
1527 uptr from, to;
1528 int i;
1529 memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_PAGE_SIZE);
1530 pTo->pParent = pFrom->pParent;
1531 pTo->isInit = 1;
1532 pTo->nCell = pFrom->nCell;
1533 pTo->nFree = pFrom->nFree;
1534 pTo->isOverfull = pFrom->isOverfull;
drh7c717f72001-06-24 20:39:41 +00001535 to = Addr(pTo);
1536 from = Addr(pFrom);
drh14acc042001-06-10 19:56:58 +00001537 for(i=0; i<pTo->nCell; i++){
drh7c717f72001-06-24 20:39:41 +00001538 uptr x = Addr(pFrom->apCell[i]);
drh8c42ca92001-06-22 19:15:00 +00001539 if( x>from && x<from+SQLITE_PAGE_SIZE ){
1540 *((uptr*)&pTo->apCell[i]) = x + to - from;
drh14acc042001-06-10 19:56:58 +00001541 }
1542 }
drhbd03cae2001-06-02 02:40:57 +00001543}
1544
1545/*
drh8b2f49b2001-06-08 00:21:52 +00001546** This routine redistributes Cells on pPage and up to two siblings
1547** of pPage so that all pages have about the same amount of free space.
drh14acc042001-06-10 19:56:58 +00001548** Usually one sibling on either side of pPage is used in the balancing,
drh8b2f49b2001-06-08 00:21:52 +00001549** though both siblings might come from one side if pPage is the first
1550** or last child of its parent. If pPage has fewer than two siblings
1551** (something which can only happen if pPage is the root page or a
drh14acc042001-06-10 19:56:58 +00001552** child of root) then all available siblings participate in the balancing.
drh8b2f49b2001-06-08 00:21:52 +00001553**
1554** The number of siblings of pPage might be increased or decreased by
drh8c42ca92001-06-22 19:15:00 +00001555** one in an effort to keep pages between 66% and 100% full. The root page
1556** is special and is allowed to be less than 66% full. If pPage is
1557** the root page, then the depth of the tree might be increased
drh8b2f49b2001-06-08 00:21:52 +00001558** or decreased by one, as necessary, to keep the root page from being
1559** overfull or empty.
1560**
drh14acc042001-06-10 19:56:58 +00001561** This routine calls relinkCellList() on its input page regardless of
1562** whether or not it does any real balancing. Client routines will typically
1563** invoke insertCell() or dropCell() before calling this routine, so we
1564** need to call relinkCellList() to clean up the mess that those other
1565** routines left behind.
1566**
1567** pCur is left pointing to the same cell as when this routine was called
drh8c42ca92001-06-22 19:15:00 +00001568** even if that cell gets moved to a different page. pCur may be NULL.
1569** Set the pCur parameter to NULL if you do not care about keeping track
1570** of a cell as that will save this routine the work of keeping track of it.
drh14acc042001-06-10 19:56:58 +00001571**
drh8b2f49b2001-06-08 00:21:52 +00001572** Note that when this routine is called, some of the Cells on pPage
drh14acc042001-06-10 19:56:58 +00001573** might not actually be stored in pPage->u.aDisk[]. This can happen
drh8b2f49b2001-06-08 00:21:52 +00001574** if the page is overfull. Part of the job of this routine is to
drh14acc042001-06-10 19:56:58 +00001575** make sure all Cells for pPage once again fit in pPage->u.aDisk[].
1576**
drh8c42ca92001-06-22 19:15:00 +00001577** In the course of balancing the siblings of pPage, the parent of pPage
1578** might become overfull or underfull. If that happens, then this routine
1579** is called recursively on the parent.
1580**
drh14acc042001-06-10 19:56:58 +00001581** If this routine fails for any reason, it means the database may have
1582** been left in a corrupted state and should be rolled back.
drh8b2f49b2001-06-08 00:21:52 +00001583*/
drh14acc042001-06-10 19:56:58 +00001584static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
drh8b2f49b2001-06-08 00:21:52 +00001585 MemPage *pParent; /* The parent of pPage */
drh14acc042001-06-10 19:56:58 +00001586 MemPage *apOld[3]; /* pPage and up to two siblings */
drh8b2f49b2001-06-08 00:21:52 +00001587 Pgno pgnoOld[3]; /* Page numbers for each page in apOld[] */
drh14acc042001-06-10 19:56:58 +00001588 MemPage *apNew[4]; /* pPage and up to 3 siblings after balancing */
1589 Pgno pgnoNew[4]; /* Page numbers for each page in apNew[] */
drh8b2f49b2001-06-08 00:21:52 +00001590 int idxDiv[3]; /* Indices of divider cells in pParent */
1591 Cell *apDiv[3]; /* Divider cells in pParent */
1592 int nCell; /* Number of cells in apCell[] */
1593 int nOld; /* Number of pages in apOld[] */
1594 int nNew; /* Number of pages in apNew[] */
drh8b2f49b2001-06-08 00:21:52 +00001595 int nDiv; /* Number of cells in apDiv[] */
drh14acc042001-06-10 19:56:58 +00001596 int i, j, k; /* Loop counters */
1597 int idx; /* Index of pPage in pParent->apCell[] */
1598 int nxDiv; /* Next divider slot in pParent->apCell[] */
1599 int rc; /* The return code */
1600 int iCur; /* apCell[iCur] is the cell of the cursor */
1601 int usedPerPage; /* Memory needed for each page */
1602 int freePerPage; /* Average free space per page */
drh8c42ca92001-06-22 19:15:00 +00001603 int totalSize; /* Total bytes for all cells */
1604 Pgno pgno; /* Page number */
drh14acc042001-06-10 19:56:58 +00001605 Cell *apCell[MX_CELL*3+5]; /* All cells from pages being balanceed */
1606 int szCell[MX_CELL*3+5]; /* Local size of all cells */
1607 Cell aTemp[2]; /* Temporary holding area for apDiv[] */
1608 MemPage aOld[3]; /* Temporary copies of pPage and its siblings */
drh8b2f49b2001-06-08 00:21:52 +00001609
drh14acc042001-06-10 19:56:58 +00001610 /*
1611 ** Return without doing any work if pPage is neither overfull nor
1612 ** underfull.
drh8b2f49b2001-06-08 00:21:52 +00001613 */
drh14acc042001-06-10 19:56:58 +00001614 if( !pPage->isOverfull && pPage->nFree<SQLITE_PAGE_SIZE/2 ){
1615 relinkCellList(pPage);
drh8b2f49b2001-06-08 00:21:52 +00001616 return SQLITE_OK;
1617 }
1618
1619 /*
drh14acc042001-06-10 19:56:58 +00001620 ** Find the parent of the page to be balanceed.
1621 ** If there is no parent, it means this page is the root page and
drh8b2f49b2001-06-08 00:21:52 +00001622 ** special rules apply.
1623 */
drh14acc042001-06-10 19:56:58 +00001624 pParent = pPage->pParent;
drh8b2f49b2001-06-08 00:21:52 +00001625 if( pParent==0 ){
1626 Pgno pgnoChild;
drh8c42ca92001-06-22 19:15:00 +00001627 MemPage *pChild;
drh8b2f49b2001-06-08 00:21:52 +00001628 if( pPage->nCell==0 ){
drh14acc042001-06-10 19:56:58 +00001629 if( pPage->u.hdr.rightChild ){
1630 /*
1631 ** The root page is empty. Copy the one child page
drh8b2f49b2001-06-08 00:21:52 +00001632 ** into the root page and return. This reduces the depth
1633 ** of the BTree by one.
1634 */
drh14acc042001-06-10 19:56:58 +00001635 rc = sqlitepager_write(pPage);
1636 if( rc ) return rc;
1637 pgnoChild = pPage->u.hdr.rightChild;
drh8c42ca92001-06-22 19:15:00 +00001638 rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
drh8b2f49b2001-06-08 00:21:52 +00001639 if( rc ) return rc;
1640 memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
1641 pPage->isInit = 0;
1642 initPage(pPage, sqlitepager_pagenumber(pPage), 0);
1643 reparentChildPages(pBt->pPager, pPage);
1644 freePage(pBt, pChild, pgnoChild);
1645 sqlitepager_unref(pChild);
1646 }
1647 return SQLITE_OK;
1648 }
drh14acc042001-06-10 19:56:58 +00001649 if( !pPage->isOverfull ){
drh8b2f49b2001-06-08 00:21:52 +00001650 /* It is OK for the root page to be less than half full.
1651 */
drh14acc042001-06-10 19:56:58 +00001652 relinkCellList(pPage);
drh8b2f49b2001-06-08 00:21:52 +00001653 return SQLITE_OK;
1654 }
drh14acc042001-06-10 19:56:58 +00001655 /*
1656 ** If we get to here, it means the root page is overfull.
drh8b2f49b2001-06-08 00:21:52 +00001657 ** When this happens, Create a new child page and copy the
1658 ** contents of the root into the child. Then make the root
drh14acc042001-06-10 19:56:58 +00001659 ** page an empty page with rightChild pointing to the new
drh8b2f49b2001-06-08 00:21:52 +00001660 ** child. Then fall thru to the code below which will cause
1661 ** the overfull child page to be split.
1662 */
drh14acc042001-06-10 19:56:58 +00001663 rc = sqlitepager_write(pPage);
1664 if( rc ) return rc;
drh8b2f49b2001-06-08 00:21:52 +00001665 rc = allocatePage(pBt, &pChild, &pgnoChild);
1666 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001667 copyPage(pChild, pPage);
1668 pChild->pParent = pPage;
1669 pChild->isOverfull = 1;
1670 if( pCur ){
1671 sqlitepager_ref(pChild);
1672 sqlitepager_unref(pCur->pPage);
1673 pCur->pPage = pChild;
drh8b2f49b2001-06-08 00:21:52 +00001674 }
drh8b2f49b2001-06-08 00:21:52 +00001675 zeroPage(pPage);
drh14acc042001-06-10 19:56:58 +00001676 pPage->u.hdr.rightChild = pgnoChild;
drh8b2f49b2001-06-08 00:21:52 +00001677 pParent = pPage;
1678 pPage = pChild;
drh14acc042001-06-10 19:56:58 +00001679 }else{
1680 rc = sqlitepager_write(pPage);
1681 if( rc ) return rc;
drh8b2f49b2001-06-08 00:21:52 +00001682 }
drh14acc042001-06-10 19:56:58 +00001683
drh8b2f49b2001-06-08 00:21:52 +00001684 /*
drh14acc042001-06-10 19:56:58 +00001685 ** Find the Cell in the parent page whose h.leftChild points back
1686 ** to pPage. The "idx" variable is the index of that cell. If pPage
1687 ** is the rightmost child of pParent then set idx to pParent->nCell
drh8b2f49b2001-06-08 00:21:52 +00001688 */
1689 idx = -1;
1690 pgno = sqlitepager_pagenumber(pPage);
1691 for(i=0; i<pParent->nCell; i++){
1692 if( pParent->apCell[i]->h.leftChild==pgno ){
1693 idx = i;
1694 break;
1695 }
1696 }
drh14acc042001-06-10 19:56:58 +00001697 if( idx<0 && pPage->u.hdr.rightChild==pgno ){
drh8b2f49b2001-06-08 00:21:52 +00001698 idx = pPage->nCell;
1699 }
1700 if( idx<0 ){
drh14acc042001-06-10 19:56:58 +00001701 return SQLITE_CORRUPT;
drh8b2f49b2001-06-08 00:21:52 +00001702 }
1703
1704 /*
drh14acc042001-06-10 19:56:58 +00001705 ** Initialize variables so that it will be safe to jump
1706 ** directory to balance_cleanup at any moment.
drh8b2f49b2001-06-08 00:21:52 +00001707 */
drh14acc042001-06-10 19:56:58 +00001708 nOld = nNew = 0;
1709 sqlitepager_ref(pParent);
1710
1711 /*
1712 ** Find sibling pages to pPage and the Cells in pParent that divide
1713 ** the siblings. An attempt is made to find one sibling on either
1714 ** side of pPage. Both siblings are taken from one side, however, if
1715 ** pPage is either the first or last child of its parent. If pParent
1716 ** has 3 or fewer children then all children of pParent are taken.
1717 */
1718 if( idx==pParent->nCell ){
1719 nxDiv = idx - 2;
drh8b2f49b2001-06-08 00:21:52 +00001720 }else{
drh14acc042001-06-10 19:56:58 +00001721 nxDiv = idx - 1;
drh8b2f49b2001-06-08 00:21:52 +00001722 }
drh14acc042001-06-10 19:56:58 +00001723 if( nxDiv<0 ) nxDiv = 0;
drh8b2f49b2001-06-08 00:21:52 +00001724 nDiv = 0;
drh14acc042001-06-10 19:56:58 +00001725 for(i=0, k=nxDiv; i<3; i++, k++){
1726 if( k<pParent->nCell ){
1727 idxDiv[i] = k;
1728 apDiv[i] = pParent->apCell[k];
drh8b2f49b2001-06-08 00:21:52 +00001729 nDiv++;
1730 pgnoOld[i] = apDiv[i]->h.leftChild;
drh14acc042001-06-10 19:56:58 +00001731 }else if( k==pParent->nCell ){
drh8c42ca92001-06-22 19:15:00 +00001732 pgnoOld[i] = pParent->u.hdr.rightChild;
drh14acc042001-06-10 19:56:58 +00001733 }else{
1734 break;
drh8b2f49b2001-06-08 00:21:52 +00001735 }
drh8c42ca92001-06-22 19:15:00 +00001736 rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);
drh14acc042001-06-10 19:56:58 +00001737 if( rc ) goto balance_cleanup;
1738 nOld++;
drh8b2f49b2001-06-08 00:21:52 +00001739 }
1740
1741 /*
drh14acc042001-06-10 19:56:58 +00001742 ** Set iCur to be the index in apCell[] of the cell that the cursor
1743 ** is pointing to. We will need this later on in order to keep the
1744 ** cursor pointing at the same cell.
1745 */
1746 if( pCur ){
1747 iCur = pCur->idx;
1748 for(i=0; idxDiv[i]<idx; i++){
1749 iCur += apOld[i]->nCell + 1;
1750 }
1751 sqlitepager_unref(pCur->pPage);
1752 pCur->pPage = 0;
1753 }
1754
1755 /*
1756 ** Make copies of the content of pPage and its siblings into aOld[].
1757 ** The rest of this function will use data from the copies rather
1758 ** that the original pages since the original pages will be in the
1759 ** process of being overwritten.
1760 */
1761 for(i=0; i<nOld; i++){
1762 copyPage(&aOld[i], apOld[i]);
1763 rc = freePage(pBt, apOld[i], pgnoOld[i]);
1764 if( rc ) goto balance_cleanup;
1765 apOld[i] = &aOld[i];
1766 }
1767
1768 /*
1769 ** Load pointers to all cells on sibling pages and the divider cells
1770 ** into the local apCell[] array. Make copies of the divider cells
1771 ** into aTemp[] and remove the the divider Cells from pParent.
drh8b2f49b2001-06-08 00:21:52 +00001772 */
1773 nCell = 0;
1774 for(i=0; i<nOld; i++){
1775 MemPage *pOld = apOld[i];
1776 for(j=0; j<pOld->nCell; j++){
drh14acc042001-06-10 19:56:58 +00001777 apCell[nCell] = pOld->apCell[j];
1778 szCell[nCell] = cellSize(apCell[nCell]);
1779 nCell++;
drh8b2f49b2001-06-08 00:21:52 +00001780 }
1781 if( i<nOld-1 ){
drh14acc042001-06-10 19:56:58 +00001782 szCell[nCell] = cellSize(apDiv[i]);
drh8c42ca92001-06-22 19:15:00 +00001783 memcpy(&aTemp[i], apDiv[i], szCell[nCell]);
drh14acc042001-06-10 19:56:58 +00001784 apCell[nCell] = &aTemp[i];
1785 dropCell(pParent, nxDiv, szCell[nCell]);
1786 assert( apCell[nCell]->h.leftChild==pgnoOld[i] );
1787 apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
1788 nCell++;
drh8b2f49b2001-06-08 00:21:52 +00001789 }
1790 }
1791
1792 /*
drh14acc042001-06-10 19:56:58 +00001793 ** Estimate the number of pages needed. Record this number in "k"
1794 ** for now. It will get transferred to nNew as we allocate the
1795 ** new pages.
drh8b2f49b2001-06-08 00:21:52 +00001796 */
1797 totalSize = 0;
1798 for(i=0; i<nCell; i++){
drh14acc042001-06-10 19:56:58 +00001799 totalSize += szCell[i];
drh8b2f49b2001-06-08 00:21:52 +00001800 }
drh14acc042001-06-10 19:56:58 +00001801 k = (totalSize + (SQLITE_PAGE_SIZE - sizeof(PageHdr) - 1)) /
drh8b2f49b2001-06-08 00:21:52 +00001802 (SQLITE_PAGE_SIZE - sizeof(PageHdr));
drh14acc042001-06-10 19:56:58 +00001803 usedPerPage = (totalSize+k-1)/k;
1804 freePerPage = SQLITE_PAGE_SIZE - usedPerPage;
drh8b2f49b2001-06-08 00:21:52 +00001805
1806
1807 /*
1808 ** Allocate new pages
1809 */
drh14acc042001-06-10 19:56:58 +00001810 for(i=0; i<k; i++){
drh8b2f49b2001-06-08 00:21:52 +00001811 rc = allocatePage(pBt, &apNew[i], &pgnoNew[i]);
drh14acc042001-06-10 19:56:58 +00001812 if( rc ) goto balance_cleanup;
1813 nNew++;
drh8b2f49b2001-06-08 00:21:52 +00001814 zeroPage(apNew[i]);
1815 }
1816
1817 /*
drh14acc042001-06-10 19:56:58 +00001818 ** Evenly distribute the data in apCell[] across the new pages.
1819 ** Insert divider cells into pParent as necessary.
1820 */
1821 j = 0;
1822 for(i=0; i<nNew; i++){
1823 MemPage *pNew = apNew[i];
1824 while( j<nCell && pNew->nFree<freePerPage && szCell[j]<=pNew->nFree ){
1825 if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
1826 insertCell(pNew, pNew->nCell, apCell[j], szCell[j]);
1827 j++;
1828 }
1829 assert( !pNew->isOverfull );
1830 relinkCellList(pNew);
1831 if( i<nNew-1 && j<nCell ){
1832 pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
1833 apCell[j]->h.leftChild = pgnoNew[i];
1834 if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
1835 insertCell(pParent, nxDiv, apCell[j], szCell[j]);
1836 j++;
1837 nxDiv++;
1838 }
1839 }
1840 apNew[nNew-1]->u.hdr.rightChild = apOld[nOld-1]->u.hdr.rightChild;
1841 if( nxDiv==pParent->nCell ){
1842 pParent->u.hdr.rightChild = pgnoNew[nNew-1];
1843 }else{
1844 pParent->apCell[nxDiv]->h.leftChild = pgnoNew[nNew-1];
1845 }
1846 if( pCur ){
1847 assert( pCur->pPage!=0 );
1848 sqlitepager_ref(pCur->pPage);
1849 }
1850
1851 /*
1852 ** Reparent children of all cells.
drh8b2f49b2001-06-08 00:21:52 +00001853 */
1854 for(i=0; i<nNew; i++){
drh14acc042001-06-10 19:56:58 +00001855 reparentChildPages(pBt->pPager, apNew[i]);
drh8b2f49b2001-06-08 00:21:52 +00001856 }
drh14acc042001-06-10 19:56:58 +00001857 reparentChildPages(pBt->pPager, pParent);
drh8b2f49b2001-06-08 00:21:52 +00001858
1859 /*
drh14acc042001-06-10 19:56:58 +00001860 ** balance the parent page.
drh8b2f49b2001-06-08 00:21:52 +00001861 */
drh14acc042001-06-10 19:56:58 +00001862 rc = balance(pBt, pParent, 0);
drh8b2f49b2001-06-08 00:21:52 +00001863
1864 /*
drh14acc042001-06-10 19:56:58 +00001865 ** Cleanup before returning.
drh8b2f49b2001-06-08 00:21:52 +00001866 */
drh14acc042001-06-10 19:56:58 +00001867balance_cleanup:
drh8b2f49b2001-06-08 00:21:52 +00001868 for(i=0; i<nOld; i++){
drh14acc042001-06-10 19:56:58 +00001869 sqlitepager_unref(apOld[i]);
drh8b2f49b2001-06-08 00:21:52 +00001870 }
drh14acc042001-06-10 19:56:58 +00001871 for(i=0; i<nNew; i++){
1872 sqlitepager_unref(apNew[i]);
drh8b2f49b2001-06-08 00:21:52 +00001873 }
drh14acc042001-06-10 19:56:58 +00001874 if( pCur && pCur->pPage==0 ){
1875 pCur->pPage = pParent;
1876 pCur->idx = 0;
1877 }else{
1878 sqlitepager_unref(pParent);
drh8b2f49b2001-06-08 00:21:52 +00001879 }
1880 return rc;
1881}
1882
1883/*
drh3b7511c2001-05-26 13:15:44 +00001884** Insert a new record into the BTree. The key is given by (pKey,nKey)
1885** and the data is given by (pData,nData). The cursor is used only to
1886** define what database the record should be inserted into. The cursor
drh14acc042001-06-10 19:56:58 +00001887** is left pointing at the new record.
drh3b7511c2001-05-26 13:15:44 +00001888*/
1889int sqliteBtreeInsert(
1890 BtCursor *pCur, /* Insert data into the table of this cursor */
1891 void *pKey, int nKey, /* The key of the new record */
1892 void *pData, int nData /* The data of the new record */
1893){
1894 Cell newCell;
1895 int rc;
1896 int loc;
drh14acc042001-06-10 19:56:58 +00001897 int szNew;
drh3b7511c2001-05-26 13:15:44 +00001898 MemPage *pPage;
1899 Btree *pBt = pCur->pBt;
1900
drh8b2f49b2001-06-08 00:21:52 +00001901 if( !pCur->pBt->inTrans ){
1902 return SQLITE_ERROR; /* Must start a transaction first */
1903 }
drh14acc042001-06-10 19:56:58 +00001904 rc = sqliteBtreeMoveto(pCur, pKey, nKey, &loc);
drh3b7511c2001-05-26 13:15:44 +00001905 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001906 pPage = pCur->pPage;
1907 rc = sqlitepager_write(pPage);
drhbd03cae2001-06-02 02:40:57 +00001908 if( rc ) return rc;
drh3b7511c2001-05-26 13:15:44 +00001909 rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
1910 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001911 szNew = cellSize(&newCell);
drh3b7511c2001-05-26 13:15:44 +00001912 if( loc==0 ){
drh14acc042001-06-10 19:56:58 +00001913 newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild;
1914 rc = clearCell(pBt, pPage->apCell[pCur->idx]);
drh5e2f8b92001-05-28 00:41:15 +00001915 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001916 dropCell(pPage, pCur->idx, cellSize(pPage->apCell[pCur->idx]));
drh7c717f72001-06-24 20:39:41 +00001917 }else if( loc<0 && pPage->nCell>0 ){
drh14acc042001-06-10 19:56:58 +00001918 assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
1919 pCur->idx++;
1920 }else{
1921 assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
drh3b7511c2001-05-26 13:15:44 +00001922 }
drh7c717f72001-06-24 20:39:41 +00001923 insertCell(pPage, pCur->idx, &newCell, szNew);
drh14acc042001-06-10 19:56:58 +00001924 rc = balance(pCur->pBt, pPage, pCur);
drh5e2f8b92001-05-28 00:41:15 +00001925 return rc;
1926}
1927
1928/*
drhbd03cae2001-06-02 02:40:57 +00001929** Delete the entry that the cursor is pointing to.
drh5e2f8b92001-05-28 00:41:15 +00001930**
drhbd03cae2001-06-02 02:40:57 +00001931** The cursor is left pointing at either the next or the previous
1932** entry. If the cursor is left pointing to the next entry, then
1933** the pCur->bSkipNext flag is set which forces the next call to
1934** sqliteBtreeNext() to be a no-op. That way, you can always call
1935** sqliteBtreeNext() after a delete and the cursor will be left
1936** pointing to the first entry after the deleted entry.
drh3b7511c2001-05-26 13:15:44 +00001937*/
1938int sqliteBtreeDelete(BtCursor *pCur){
drh5e2f8b92001-05-28 00:41:15 +00001939 MemPage *pPage = pCur->pPage;
1940 Cell *pCell;
1941 int rc;
drh8c42ca92001-06-22 19:15:00 +00001942 Pgno pgnoChild;
drh8b2f49b2001-06-08 00:21:52 +00001943
1944 if( !pCur->pBt->inTrans ){
1945 return SQLITE_ERROR; /* Must start a transaction first */
1946 }
drhbd03cae2001-06-02 02:40:57 +00001947 if( pCur->idx >= pPage->nCell ){
1948 return SQLITE_ERROR; /* The cursor is not pointing to anything */
1949 }
1950 rc = sqlitepager_write(pPage);
1951 if( rc ) return rc;
drh5e2f8b92001-05-28 00:41:15 +00001952 pCell = pPage->apCell[pCur->idx];
drh14acc042001-06-10 19:56:58 +00001953 pgnoChild = pCell->h.leftChild;
drh8c42ca92001-06-22 19:15:00 +00001954 clearCell(pCur->pBt, pCell);
drh14acc042001-06-10 19:56:58 +00001955 dropCell(pPage, pCur->idx, cellSize(pCell));
1956 if( pgnoChild ){
1957 /*
1958 ** If the entry we just deleted is not a leaf, then we've left a
drh8c42ca92001-06-22 19:15:00 +00001959 ** hole in an internal page. We have to fill the hole by moving
1960 ** in a cell from a leaf. The next Cell after the one just deleted
drh14acc042001-06-10 19:56:58 +00001961 ** is guaranteed to exist and to be a leaf so we can use it.
drh5e2f8b92001-05-28 00:41:15 +00001962 */
drh14acc042001-06-10 19:56:58 +00001963 BtCursor leafCur;
1964 Cell *pNext;
1965 int szNext;
1966 getTempCursor(pCur, &leafCur);
1967 rc = sqliteBtreeNext(&leafCur, 0);
1968 if( rc!=SQLITE_OK ){
1969 return SQLITE_CORRUPT;
drh5e2f8b92001-05-28 00:41:15 +00001970 }
drh8c42ca92001-06-22 19:15:00 +00001971 pNext = leafCur.pPage->apCell[leafCur.idx];
drh14acc042001-06-10 19:56:58 +00001972 szNext = cellSize(pNext);
drh8c42ca92001-06-22 19:15:00 +00001973 pNext->h.leftChild = pgnoChild;
drh14acc042001-06-10 19:56:58 +00001974 insertCell(pPage, pCur->idx, pNext, szNext);
1975 rc = balance(pCur->pBt, pPage, pCur);
drh5e2f8b92001-05-28 00:41:15 +00001976 if( rc ) return rc;
drh5e2f8b92001-05-28 00:41:15 +00001977 pCur->bSkipNext = 1;
drh14acc042001-06-10 19:56:58 +00001978 dropCell(leafCur.pPage, leafCur.idx, szNext);
1979 rc = balance(pCur->pBt, leafCur.pPage, 0);
drh8c42ca92001-06-22 19:15:00 +00001980 releaseTempCursor(&leafCur);
drh5e2f8b92001-05-28 00:41:15 +00001981 }else{
drh14acc042001-06-10 19:56:58 +00001982 rc = balance(pCur->pBt, pPage, pCur);
1983 pCur->bSkipNext = 1;
drh5e2f8b92001-05-28 00:41:15 +00001984 }
drh5e2f8b92001-05-28 00:41:15 +00001985 return rc;
drh3b7511c2001-05-26 13:15:44 +00001986}
drh8b2f49b2001-06-08 00:21:52 +00001987
1988/*
1989** Create a new BTree in the same file. Write into *piTable the index
1990** of the root page of the new table.
1991*/
1992int sqliteBtreeCreateTable(Btree *pBt, int *piTable){
1993 MemPage *pRoot;
1994 Pgno pgnoRoot;
1995 int rc;
1996 if( !pBt->inTrans ){
1997 return SQLITE_ERROR; /* Must start a transaction first */
1998 }
1999 rc = allocatePage(pBt, &pRoot, &pgnoRoot);
2000 if( rc ) return rc;
2001 sqlitepager_write(pRoot);
2002 zeroPage(pRoot);
2003 sqlitepager_unref(pRoot);
2004 *piTable = (int)pgnoRoot;
2005 return SQLITE_OK;
2006}
2007
2008/*
2009** Erase the given database page and all its children. Return
2010** the page to the freelist.
2011*/
drh2aa679f2001-06-25 02:11:07 +00002012static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
drh8b2f49b2001-06-08 00:21:52 +00002013 MemPage *pPage;
2014 int rc;
drh8b2f49b2001-06-08 00:21:52 +00002015 Cell *pCell;
2016 int idx;
2017
drh8c42ca92001-06-22 19:15:00 +00002018 rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);
drh8b2f49b2001-06-08 00:21:52 +00002019 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00002020 idx = pPage->u.hdr.firstCell;
drh8b2f49b2001-06-08 00:21:52 +00002021 while( idx>0 ){
drh14acc042001-06-10 19:56:58 +00002022 pCell = (Cell*)&pPage->u.aDisk[idx];
drh8b2f49b2001-06-08 00:21:52 +00002023 idx = pCell->h.iNext;
2024 if( pCell->h.leftChild ){
drh2aa679f2001-06-25 02:11:07 +00002025 rc = clearDatabasePage(pBt, pCell->h.leftChild, 1);
drh8b2f49b2001-06-08 00:21:52 +00002026 if( rc ) return rc;
2027 }
drh8c42ca92001-06-22 19:15:00 +00002028 rc = clearCell(pBt, pCell);
drh8b2f49b2001-06-08 00:21:52 +00002029 if( rc ) return rc;
2030 }
drh2aa679f2001-06-25 02:11:07 +00002031 if( pPage->u.hdr.rightChild ){
2032 rc = clearDatabasePage(pBt, pPage->u.hdr.rightChild, 1);
2033 if( rc ) return rc;
2034 }
2035 if( freePageFlag ){
2036 rc = freePage(pBt, pPage, pgno);
2037 }else{
2038 zeroPage(pPage);
2039 }
2040 return rc;
drh8b2f49b2001-06-08 00:21:52 +00002041}
2042
2043/*
2044** Delete all information from a single table in the database.
2045*/
2046int sqliteBtreeClearTable(Btree *pBt, int iTable){
2047 int rc;
2048 if( !pBt->inTrans ){
2049 return SQLITE_ERROR; /* Must start a transaction first */
2050 }
drh2aa679f2001-06-25 02:11:07 +00002051 rc = clearDatabasePage(pBt, (Pgno)iTable, 0);
drh8b2f49b2001-06-08 00:21:52 +00002052 if( rc ){
2053 sqliteBtreeRollback(pBt);
drh8b2f49b2001-06-08 00:21:52 +00002054 }
drh8c42ca92001-06-22 19:15:00 +00002055 return rc;
drh8b2f49b2001-06-08 00:21:52 +00002056}
2057
2058/*
2059** Erase all information in a table and add the root of the table to
2060** the freelist. Except, the root of the principle table (the one on
2061** page 2) is never added to the freelist.
2062*/
2063int sqliteBtreeDropTable(Btree *pBt, int iTable){
2064 int rc;
2065 MemPage *pPage;
2066 if( !pBt->inTrans ){
2067 return SQLITE_ERROR; /* Must start a transaction first */
2068 }
drh8c42ca92001-06-22 19:15:00 +00002069 rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage);
drh2aa679f2001-06-25 02:11:07 +00002070 if( rc ) return rc;
2071 rc = sqliteBtreeClearTable(pBt, iTable);
2072 if( rc ) return rc;
2073 if( iTable>2 ){
2074 rc = freePage(pBt, pPage, iTable);
2075 }else{
2076 zeroPage(pPage);
2077 sqlitepager_unref(pPage);
drh8b2f49b2001-06-08 00:21:52 +00002078 }
drh8b2f49b2001-06-08 00:21:52 +00002079 return rc;
2080}
2081
2082/*
2083** Read the meta-information out of a database file.
2084*/
2085int sqliteBtreeGetMeta(Btree *pBt, int *aMeta){
2086 PageOne *pP1;
2087 int rc;
2088
drh8c42ca92001-06-22 19:15:00 +00002089 rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1);
drh8b2f49b2001-06-08 00:21:52 +00002090 if( rc ) return rc;
drh2aa679f2001-06-25 02:11:07 +00002091 aMeta[0] = pP1->nFree;
2092 memcpy(&aMeta[1], pP1->aMeta, sizeof(pP1->aMeta));
drh8b2f49b2001-06-08 00:21:52 +00002093 sqlitepager_unref(pP1);
2094 return SQLITE_OK;
2095}
2096
2097/*
2098** Write meta-information back into the database.
2099*/
2100int sqliteBtreeUpdateMeta(Btree *pBt, int *aMeta){
2101 PageOne *pP1;
2102 int rc;
2103 if( !pBt->inTrans ){
2104 return SQLITE_ERROR; /* Must start a transaction first */
2105 }
2106 pP1 = pBt->page1;
2107 rc = sqlitepager_write(pP1);
drh2aa679f2001-06-25 02:11:07 +00002108 if( rc ) return rc;
2109 memcpy(pP1->aMeta, &aMeta[1], sizeof(pP1->aMeta));
drh8b2f49b2001-06-08 00:21:52 +00002110 return SQLITE_OK;
2111}
drh8c42ca92001-06-22 19:15:00 +00002112
2113#ifdef SQLITE_TEST
2114/*
2115** Print a disassembly of the given page on standard output. This routine
2116** is used for debugging and testing only.
2117*/
2118int sqliteBtreePageDump(Btree *pBt, int pgno){
2119 int rc;
2120 MemPage *pPage;
2121 int i, j;
2122 int nFree;
2123 u16 idx;
2124 char range[20];
2125 unsigned char payload[20];
2126 rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
2127 if( rc ){
2128 return rc;
2129 }
2130 i = 0;
2131 idx = pPage->u.hdr.firstCell;
2132 while( idx>0 && idx<=SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
2133 Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
2134 int sz = cellSize(pCell);
2135 sprintf(range,"%d..%d", idx, idx+sz-1);
drh2aa679f2001-06-25 02:11:07 +00002136 sz = pCell->h.nKey + pCell->h.nData;
drh8c42ca92001-06-22 19:15:00 +00002137 if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
2138 memcpy(payload, pCell->aPayload, sz);
2139 for(j=0; j<sz; j++){
2140 if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
2141 }
2142 payload[sz] = 0;
2143 printf(
2144 "cell %2d: i=%-10s chld=%-4d nk=%-3d nd=%-3d payload=%s\n",
2145 i, range, (int)pCell->h.leftChild, pCell->h.nKey, pCell->h.nData,
drh2aa679f2001-06-25 02:11:07 +00002146 payload
drh8c42ca92001-06-22 19:15:00 +00002147 );
drh2aa679f2001-06-25 02:11:07 +00002148 if( pPage->apCell[i]!=pCell ){
2149 printf("**** apCell[%d] does not match on prior entry ****\n", i);
2150 }
drh7c717f72001-06-24 20:39:41 +00002151 i++;
drh8c42ca92001-06-22 19:15:00 +00002152 idx = pCell->h.iNext;
2153 }
2154 if( idx!=0 ){
2155 printf("ERROR: next cell index out of range: %d\n", idx);
2156 }
2157 printf("right_child: %d\n", pPage->u.hdr.rightChild);
2158 nFree = 0;
2159 i = 0;
2160 idx = pPage->u.hdr.firstFree;
2161 while( idx>0 && idx<SQLITE_PAGE_SIZE ){
2162 FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
2163 sprintf(range,"%d..%d", idx, idx+p->iSize-1);
2164 nFree += p->iSize;
2165 printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
2166 i, range, p->iSize, nFree);
2167 idx = p->iNext;
drh2aa679f2001-06-25 02:11:07 +00002168 i++;
drh8c42ca92001-06-22 19:15:00 +00002169 }
2170 if( idx!=0 ){
2171 printf("ERROR: next freeblock index out of range: %d\n", idx);
2172 }
2173 sqlitepager_unref(pPage);
2174 return SQLITE_OK;
2175}
2176#endif
2177
2178#ifdef SQLITE_TEST
2179/*
drh2aa679f2001-06-25 02:11:07 +00002180** Fill aResult[] with information about the entry and page that the
2181** cursor is pointing to.
2182**
2183** aResult[0] = The page number
2184** aResult[1] = The entry number
2185** aResult[2] = Total number of entries on this page
2186** aResult[3] = Size of this entry
2187** aResult[4] = Number of free bytes on this page
2188** aResult[5] = Number of free blocks on the page
2189** aResult[6] = Page number of the left child of this entry
2190** aResult[7] = Page number of the right child for the whole page
drh8c42ca92001-06-22 19:15:00 +00002191*/
2192int sqliteBtreeCursorDump(BtCursor *pCur, int *aResult){
drh2aa679f2001-06-25 02:11:07 +00002193 int cnt, idx;
2194 MemPage *pPage = pCur->pPage;
2195 aResult[0] = sqlitepager_pagenumber(pPage);
drh8c42ca92001-06-22 19:15:00 +00002196 aResult[1] = pCur->idx;
drh2aa679f2001-06-25 02:11:07 +00002197 aResult[2] = pPage->nCell;
2198 if( pCur->idx>=0 && pCur->idx<pPage->nCell ){
2199 aResult[3] = cellSize(pPage->apCell[pCur->idx]);
2200 aResult[6] = pPage->apCell[pCur->idx]->h.leftChild;
2201 }else{
2202 aResult[3] = 0;
2203 aResult[6] = 0;
2204 }
2205 aResult[4] = pPage->nFree;
2206 cnt = 0;
2207 idx = pPage->u.hdr.firstFree;
2208 while( idx>0 && idx<SQLITE_PAGE_SIZE ){
2209 cnt++;
2210 idx = ((FreeBlk*)&pPage->u.aDisk[idx])->iNext;
2211 }
2212 aResult[5] = cnt;
2213 aResult[7] = pPage->u.hdr.rightChild;
drh8c42ca92001-06-22 19:15:00 +00002214 return SQLITE_OK;
2215}
2216#endif