blob: 7dbac878a6c1f8916856bd6e321d6d7071bd79f4 [file] [log] [blame]
drha059ad02001-04-17 20:09:11 +00001/*
2** Copyright (c) 2001 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
drhdd793422001-06-28 01:54:48 +000024** $Id: btree.c,v 1.16 2001/06/28 01:54:48 drh Exp $
drh8b2f49b2001-06-08 00:21:52 +000025**
26** This file implements a external (disk-based) database using BTrees.
27** For a detailed discussion of BTrees, refer to
28**
29** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
30** "Sorting And Searching", pages 473-480. Addison-Wesley
31** Publishing Company, Reading, Massachusetts.
32**
33** The basic idea is that each page of the file contains N database
34** entries and N+1 pointers to subpages.
35**
36** ----------------------------------------------------------------
37** | Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) |
38** ----------------------------------------------------------------
39**
40** All of the keys on the page that Ptr(0) points to have values less
41** than Key(0). All of the keys on page Ptr(1) and its subpages have
42** values greater than Key(0) and less than Key(1). All of the keys
43** on Ptr(N+1) and its subpages have values greater than Key(N). And
44** so forth.
45**
46** Finding a particular key requires reading O(log(M)) pages from the file
47** where M is the number of entries in the tree.
48**
49** In this implementation, a single file can hold one or more separate
50** BTrees. Each BTree is identified by the index of its root page. The
51** key and data for any entry are combined to form the "payload". Up to
52** MX_LOCAL_PAYLOAD bytes of payload can be carried directly on the
53** database page. If the payload is larger than MX_LOCAL_PAYLOAD bytes
54** then surplus bytes are stored on overflow pages. The payload for an
55** entry and the preceding pointer are combined to form a "Cell". Each
56** page has a smaller header which contains the Ptr(N+1) pointer.
57**
58** The first page of the file contains a magic string used to verify that
59** the file really is a valid BTree database, a pointer to a list of unused
60** pages in the file, and some meta information. The root of the first
61** BTree begins on page 2 of the file. (Pages are numbered beginning with
62** 1, not 0.) Thus a minimum database contains 2 pages.
drha059ad02001-04-17 20:09:11 +000063*/
64#include "sqliteInt.h"
65#include "pager.h"
66#include "btree.h"
67#include <assert.h>
68
drh2af926b2001-05-15 00:39:25 +000069
drh2af926b2001-05-15 00:39:25 +000070/*
71** Primitive data types. u32 must be 4 bytes and u16 must be 2 bytes.
drh14acc042001-06-10 19:56:58 +000072** The uptr type must be big enough to hold a pointer.
drh306dc212001-05-21 13:45:10 +000073** Change these typedefs when porting to new architectures.
drh2af926b2001-05-15 00:39:25 +000074*/
drh14acc042001-06-10 19:56:58 +000075typedef unsigned int uptr;
drh8c42ca92001-06-22 19:15:00 +000076/* typedef unsigned int u32; -- already defined in sqliteInt.h */
drh365d68f2001-05-11 11:02:46 +000077typedef unsigned short int u16;
drh5e2f8b92001-05-28 00:41:15 +000078typedef unsigned char u8;
drh365d68f2001-05-11 11:02:46 +000079
80/*
drh8c42ca92001-06-22 19:15:00 +000081** This macro casts a pointer to an integer. Useful for doing
82** pointer arithmetic.
83*/
drh7c717f72001-06-24 20:39:41 +000084#define Addr(X) ((uptr)X)
drh8c42ca92001-06-22 19:15:00 +000085
86/*
drh365d68f2001-05-11 11:02:46 +000087** Forward declarations of structures used only in this file.
88*/
drhbd03cae2001-06-02 02:40:57 +000089typedef struct PageOne PageOne;
drh2af926b2001-05-15 00:39:25 +000090typedef struct MemPage MemPage;
drh365d68f2001-05-11 11:02:46 +000091typedef struct PageHdr PageHdr;
92typedef struct Cell Cell;
drh3b7511c2001-05-26 13:15:44 +000093typedef struct CellHdr CellHdr;
drh365d68f2001-05-11 11:02:46 +000094typedef struct FreeBlk FreeBlk;
drh2af926b2001-05-15 00:39:25 +000095typedef struct OverflowPage OverflowPage;
96
97/*
98** All structures on a database page are aligned to 4-byte boundries.
99** This routine rounds up a number of bytes to the next multiple of 4.
drh306dc212001-05-21 13:45:10 +0000100**
101** This might need to change for computer architectures that require
102** and 8-byte alignment boundry for structures.
drh2af926b2001-05-15 00:39:25 +0000103*/
104#define ROUNDUP(X) ((X+3) & ~3)
drha059ad02001-04-17 20:09:11 +0000105
drh08ed44e2001-04-29 23:32:55 +0000106/*
drhbd03cae2001-06-02 02:40:57 +0000107** This is a magic string that appears at the beginning of every
drh8c42ca92001-06-22 19:15:00 +0000108** SQLite database in order to identify the file as a real database.
drh08ed44e2001-04-29 23:32:55 +0000109*/
drhbd03cae2001-06-02 02:40:57 +0000110static const char zMagicHeader[] =
drh8c42ca92001-06-22 19:15:00 +0000111 "** This file contains an SQLite 2.0 database **";
drhbd03cae2001-06-02 02:40:57 +0000112#define MAGIC_SIZE (sizeof(zMagicHeader))
drh08ed44e2001-04-29 23:32:55 +0000113
114/*
drh8c42ca92001-06-22 19:15:00 +0000115** This is a magic integer also used to the integrety of the database
116** file. This integer is used in addition to the string above so that
117** if the file is written on a little-endian architecture and read
118** on a big-endian architectures (or vice versa) we can detect the
119** problem.
120**
121** The number used was obtained at random and has no special
122** significance.
123*/
124#define MAGIC 0xdae37528
125
126/*
drhbd03cae2001-06-02 02:40:57 +0000127** The first page of the database file contains a magic header string
128** to identify the file as an SQLite database file. It also contains
129** a pointer to the first free page of the file. Page 2 contains the
drh8b2f49b2001-06-08 00:21:52 +0000130** root of the principle BTree. The file might contain other BTrees
131** rooted on pages above 2.
132**
133** The first page also contains SQLITE_N_BTREE_META integers that
134** can be used by higher-level routines.
drh08ed44e2001-04-29 23:32:55 +0000135**
drhbd03cae2001-06-02 02:40:57 +0000136** Remember that pages are numbered beginning with 1. (See pager.c
137** for additional information.) Page 0 does not exist and a page
138** number of 0 is used to mean "no such page".
139*/
140struct PageOne {
141 char zMagic[MAGIC_SIZE]; /* String that identifies the file as a database */
drh8c42ca92001-06-22 19:15:00 +0000142 int iMagic; /* Integer to verify correct byte order */
143 Pgno freeList; /* First free page in a list of all free pages */
drh2aa679f2001-06-25 02:11:07 +0000144 int nFree; /* Number of pages on the free list */
145 int aMeta[SQLITE_N_BTREE_META-1]; /* User defined integers */
drhbd03cae2001-06-02 02:40:57 +0000146};
147
148/*
149** Each database page has a header that is an instance of this
150** structure.
drh08ed44e2001-04-29 23:32:55 +0000151**
drh8b2f49b2001-06-08 00:21:52 +0000152** PageHdr.firstFree is 0 if there is no free space on this page.
drh14acc042001-06-10 19:56:58 +0000153** Otherwise, PageHdr.firstFree is the index in MemPage.u.aDisk[] of a
drh8b2f49b2001-06-08 00:21:52 +0000154** FreeBlk structure that describes the first block of free space.
155** All free space is defined by a linked list of FreeBlk structures.
drh08ed44e2001-04-29 23:32:55 +0000156**
drh8b2f49b2001-06-08 00:21:52 +0000157** Data is stored in a linked list of Cell structures. PageHdr.firstCell
drh14acc042001-06-10 19:56:58 +0000158** is the index into MemPage.u.aDisk[] of the first cell on the page. The
drh306dc212001-05-21 13:45:10 +0000159** Cells are kept in sorted order.
drh8b2f49b2001-06-08 00:21:52 +0000160**
161** A Cell contains all information about a database entry and a pointer
162** to a child page that contains other entries less than itself. In
163** other words, the i-th Cell contains both Ptr(i) and Key(i). The
164** right-most pointer of the page is contained in PageHdr.rightChild.
drh08ed44e2001-04-29 23:32:55 +0000165*/
drh365d68f2001-05-11 11:02:46 +0000166struct PageHdr {
drh5e2f8b92001-05-28 00:41:15 +0000167 Pgno rightChild; /* Child page that comes after all cells on this page */
drh14acc042001-06-10 19:56:58 +0000168 u16 firstCell; /* Index in MemPage.u.aDisk[] of the first cell */
169 u16 firstFree; /* Index in MemPage.u.aDisk[] of the first free block */
drh365d68f2001-05-11 11:02:46 +0000170};
drh306dc212001-05-21 13:45:10 +0000171
drh3b7511c2001-05-26 13:15:44 +0000172/*
173** Entries on a page of the database are called "Cells". Each Cell
174** has a header and data. This structure defines the header. The
drhbd03cae2001-06-02 02:40:57 +0000175** key and data (collectively the "payload") follow this header on
176** the database page.
177**
178** A definition of the complete Cell structure is given below. The
drh8c42ca92001-06-22 19:15:00 +0000179** header for the cell must be defined first in order to do some
drhbd03cae2001-06-02 02:40:57 +0000180** of the sizing #defines that follow.
drh3b7511c2001-05-26 13:15:44 +0000181*/
182struct CellHdr {
drh5e2f8b92001-05-28 00:41:15 +0000183 Pgno leftChild; /* Child page that comes before this cell */
drh3b7511c2001-05-26 13:15:44 +0000184 u16 nKey; /* Number of bytes in the key */
drh14acc042001-06-10 19:56:58 +0000185 u16 iNext; /* Index in MemPage.u.aDisk[] of next cell in sorted order */
drh3b7511c2001-05-26 13:15:44 +0000186 u32 nData; /* Number of bytes of data */
drh8c42ca92001-06-22 19:15:00 +0000187};
drh3b7511c2001-05-26 13:15:44 +0000188
189/*
190** The minimum size of a complete Cell. The Cell must contain a header
drhbd03cae2001-06-02 02:40:57 +0000191** and at least 4 bytes of payload.
drh3b7511c2001-05-26 13:15:44 +0000192*/
193#define MIN_CELL_SIZE (sizeof(CellHdr)+4)
194
195/*
196** The maximum number of database entries that can be held in a single
197** page of the database.
198*/
199#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)
200
201/*
drh8c42ca92001-06-22 19:15:00 +0000202** The maximum amount of payload (in bytes) that can be stored locally for
203** a database entry. If the entry contains more data than this, the
drh3b7511c2001-05-26 13:15:44 +0000204** extra goes onto overflow pages.
drhbd03cae2001-06-02 02:40:57 +0000205**
206** This number is chosen so that at least 4 cells will fit on every page.
drh3b7511c2001-05-26 13:15:44 +0000207*/
208#define MX_LOCAL_PAYLOAD \
drh2aa679f2001-06-25 02:11:07 +0000209 (((SQLITE_PAGE_SIZE-sizeof(PageHdr))/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3)
drh3b7511c2001-05-26 13:15:44 +0000210
drh306dc212001-05-21 13:45:10 +0000211/*
212** Data on a database page is stored as a linked list of Cell structures.
drh5e2f8b92001-05-28 00:41:15 +0000213** Both the key and the data are stored in aPayload[]. The key always comes
214** first. The aPayload[] field grows as necessary to hold the key and data,
drh306dc212001-05-21 13:45:10 +0000215** up to a maximum of MX_LOCAL_PAYLOAD bytes. If the size of the key and
drh3b7511c2001-05-26 13:15:44 +0000216** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the
217** page number of the first overflow page.
218**
219** Though this structure is fixed in size, the Cell on the database
drhbd03cae2001-06-02 02:40:57 +0000220** page varies in size. Every cell has a CellHdr and at least 4 bytes
drh3b7511c2001-05-26 13:15:44 +0000221** of payload space. Additional payload bytes (up to the maximum of
222** MX_LOCAL_PAYLOAD) and the Cell.ovfl value are allocated only as
223** needed.
drh306dc212001-05-21 13:45:10 +0000224*/
drh365d68f2001-05-11 11:02:46 +0000225struct Cell {
drh5e2f8b92001-05-28 00:41:15 +0000226 CellHdr h; /* The cell header */
227 char aPayload[MX_LOCAL_PAYLOAD]; /* Key and data */
228 Pgno ovfl; /* The first overflow page */
drh365d68f2001-05-11 11:02:46 +0000229};
drh306dc212001-05-21 13:45:10 +0000230
231/*
232** Free space on a page is remembered using a linked list of the FreeBlk
233** structures. Space on a database page is allocated in increments of
drh72f82862001-05-24 21:06:34 +0000234** at least 4 bytes and is always aligned to a 4-byte boundry. The
drh8b2f49b2001-06-08 00:21:52 +0000235** linked list of FreeBlks is always kept in order by address.
drh306dc212001-05-21 13:45:10 +0000236*/
drh365d68f2001-05-11 11:02:46 +0000237struct FreeBlk {
drh72f82862001-05-24 21:06:34 +0000238 u16 iSize; /* Number of bytes in this block of free space */
drh14acc042001-06-10 19:56:58 +0000239 u16 iNext; /* Index in MemPage.u.aDisk[] of the next free block */
drh365d68f2001-05-11 11:02:46 +0000240};
drh306dc212001-05-21 13:45:10 +0000241
242/*
drh14acc042001-06-10 19:56:58 +0000243** The number of bytes of payload that will fit on a single overflow page.
drh3b7511c2001-05-26 13:15:44 +0000244*/
245#define OVERFLOW_SIZE (SQLITE_PAGE_SIZE-sizeof(Pgno))
246
247/*
drh306dc212001-05-21 13:45:10 +0000248** When the key and data for a single entry in the BTree will not fit in
drh8c42ca92001-06-22 19:15:00 +0000249** the MX_LOCAL_PAYLOAD bytes of space available on the database page,
drh8b2f49b2001-06-08 00:21:52 +0000250** then all extra bytes are written to a linked list of overflow pages.
drh306dc212001-05-21 13:45:10 +0000251** Each overflow page is an instance of the following structure.
252**
253** Unused pages in the database are also represented by instances of
drhbd03cae2001-06-02 02:40:57 +0000254** the OverflowPage structure. The PageOne.freeList field is the
drh306dc212001-05-21 13:45:10 +0000255** page number of the first page in a linked list of unused database
256** pages.
257*/
drh2af926b2001-05-15 00:39:25 +0000258struct OverflowPage {
drh14acc042001-06-10 19:56:58 +0000259 Pgno iNext;
drh5e2f8b92001-05-28 00:41:15 +0000260 char aPayload[OVERFLOW_SIZE];
drh7e3b0a02001-04-28 16:52:40 +0000261};
drh7e3b0a02001-04-28 16:52:40 +0000262
263/*
264** For every page in the database file, an instance of the following structure
drh14acc042001-06-10 19:56:58 +0000265** is stored in memory. The u.aDisk[] array contains the raw bits read from
drhbd03cae2001-06-02 02:40:57 +0000266** the disk. The rest is auxiliary information that held in memory only. The
267** auxiliary info is only valid for regular database pages - it is not
268** used for overflow pages and pages on the freelist.
drh306dc212001-05-21 13:45:10 +0000269**
drhbd03cae2001-06-02 02:40:57 +0000270** Of particular interest in the auxiliary info is the apCell[] entry. Each
drh14acc042001-06-10 19:56:58 +0000271** apCell[] entry is a pointer to a Cell structure in u.aDisk[]. The cells are
drh306dc212001-05-21 13:45:10 +0000272** put in this array so that they can be accessed in constant time, rather
drhbd03cae2001-06-02 02:40:57 +0000273** than in linear time which would be needed if we had to walk the linked
274** list on every access.
drh72f82862001-05-24 21:06:34 +0000275**
drh14acc042001-06-10 19:56:58 +0000276** Note that apCell[] contains enough space to hold up to two more Cells
277** than can possibly fit on one page. In the steady state, every apCell[]
278** points to memory inside u.aDisk[]. But in the middle of an insert
279** operation, some apCell[] entries may temporarily point to data space
280** outside of u.aDisk[]. This is a transient situation that is quickly
281** resolved. But while it is happening, it is possible for a database
282** page to hold as many as two more cells than it might otherwise hold.
283** The extra too entries in apCell[] are an allowance for this situation.
284**
drh72f82862001-05-24 21:06:34 +0000285** The pParent field points back to the parent page. This allows us to
286** walk up the BTree from any leaf to the root. Care must be taken to
287** unref() the parent page pointer when this page is no longer referenced.
drhbd03cae2001-06-02 02:40:57 +0000288** The pageDestructor() routine handles that chore.
drh7e3b0a02001-04-28 16:52:40 +0000289*/
290struct MemPage {
drh14acc042001-06-10 19:56:58 +0000291 union {
292 char aDisk[SQLITE_PAGE_SIZE]; /* Page data stored on disk */
293 PageHdr hdr; /* Overlay page header */
294 } u;
drh5e2f8b92001-05-28 00:41:15 +0000295 int isInit; /* True if auxiliary data is initialized */
drh72f82862001-05-24 21:06:34 +0000296 MemPage *pParent; /* The parent of this page. NULL for root */
drh14acc042001-06-10 19:56:58 +0000297 int nFree; /* Number of free bytes in u.aDisk[] */
drh306dc212001-05-21 13:45:10 +0000298 int nCell; /* Number of entries on this page */
drh14acc042001-06-10 19:56:58 +0000299 int isOverfull; /* Some apCell[] points outside u.aDisk[] */
300 Cell *apCell[MX_CELL+2]; /* All data entires in sorted order */
drh8c42ca92001-06-22 19:15:00 +0000301};
drh7e3b0a02001-04-28 16:52:40 +0000302
303/*
drh3b7511c2001-05-26 13:15:44 +0000304** The in-memory image of a disk page has the auxiliary information appended
305** to the end. EXTRA_SIZE is the number of bytes of space needed to hold
306** that extra information.
307*/
308#define EXTRA_SIZE (sizeof(MemPage)-SQLITE_PAGE_SIZE)
309
310/*
drha059ad02001-04-17 20:09:11 +0000311** Everything we need to know about an open database
312*/
313struct Btree {
314 Pager *pPager; /* The page cache */
drh306dc212001-05-21 13:45:10 +0000315 BtCursor *pCursor; /* A list of all open cursors */
drhbd03cae2001-06-02 02:40:57 +0000316 PageOne *page1; /* First page of the database */
drh306dc212001-05-21 13:45:10 +0000317 int inTrans; /* True if a transaction is in progress */
drha059ad02001-04-17 20:09:11 +0000318};
319typedef Btree Bt;
320
drh365d68f2001-05-11 11:02:46 +0000321/*
322** A cursor is a pointer to a particular entry in the BTree.
323** The entry is identified by its MemPage and the index in
drh5e2f8b92001-05-28 00:41:15 +0000324** MemPage.apCell[] of the entry.
drh365d68f2001-05-11 11:02:46 +0000325*/
drh72f82862001-05-24 21:06:34 +0000326struct BtCursor {
drh5e2f8b92001-05-28 00:41:15 +0000327 Btree *pBt; /* The Btree to which this cursor belongs */
drh14acc042001-06-10 19:56:58 +0000328 BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */
drh8b2f49b2001-06-08 00:21:52 +0000329 Pgno pgnoRoot; /* The root page of this tree */
drh5e2f8b92001-05-28 00:41:15 +0000330 MemPage *pPage; /* Page that contains the entry */
drh8c42ca92001-06-22 19:15:00 +0000331 int idx; /* Index of the entry in pPage->apCell[] */
drh5e2f8b92001-05-28 00:41:15 +0000332 u8 bSkipNext; /* sqliteBtreeNext() is no-op if true */
333 u8 iMatch; /* compare result from last sqliteBtreeMoveto() */
drh365d68f2001-05-11 11:02:46 +0000334};
drh7e3b0a02001-04-28 16:52:40 +0000335
drha059ad02001-04-17 20:09:11 +0000336/*
drh3b7511c2001-05-26 13:15:44 +0000337** Compute the total number of bytes that a Cell needs on the main
drh5e2f8b92001-05-28 00:41:15 +0000338** database page. The number returned includes the Cell header,
339** local payload storage, and the pointer to overflow pages (if
drh8c42ca92001-06-22 19:15:00 +0000340** applicable). Additional space allocated on overflow pages
drhbd03cae2001-06-02 02:40:57 +0000341** is NOT included in the value returned from this routine.
drh3b7511c2001-05-26 13:15:44 +0000342*/
343static int cellSize(Cell *pCell){
344 int n = pCell->h.nKey + pCell->h.nData;
345 if( n>MX_LOCAL_PAYLOAD ){
346 n = MX_LOCAL_PAYLOAD + sizeof(Pgno);
347 }else{
348 n = ROUNDUP(n);
349 }
350 n += sizeof(CellHdr);
351 return n;
352}
353
354/*
drh72f82862001-05-24 21:06:34 +0000355** Defragment the page given. All Cells are moved to the
356** beginning of the page and all free space is collected
357** into one big FreeBlk at the end of the page.
drh365d68f2001-05-11 11:02:46 +0000358*/
359static void defragmentPage(MemPage *pPage){
drh14acc042001-06-10 19:56:58 +0000360 int pc, i, n;
drh2af926b2001-05-15 00:39:25 +0000361 FreeBlk *pFBlk;
362 char newPage[SQLITE_PAGE_SIZE];
363
drhbd03cae2001-06-02 02:40:57 +0000364 pc = sizeof(PageHdr);
drh14acc042001-06-10 19:56:58 +0000365 pPage->u.hdr.firstCell = pc;
366 memcpy(newPage, pPage->u.aDisk, pc);
drh2af926b2001-05-15 00:39:25 +0000367 for(i=0; i<pPage->nCell; i++){
drh2aa679f2001-06-25 02:11:07 +0000368 Cell *pCell = pPage->apCell[i];
drh8c42ca92001-06-22 19:15:00 +0000369
370 /* This routine should never be called on an overfull page. The
371 ** following asserts verify that constraint. */
drh7c717f72001-06-24 20:39:41 +0000372 assert( Addr(pCell) > Addr(pPage) );
373 assert( Addr(pCell) < Addr(pPage) + SQLITE_PAGE_SIZE );
drh8c42ca92001-06-22 19:15:00 +0000374
drh3b7511c2001-05-26 13:15:44 +0000375 n = cellSize(pCell);
drh2aa679f2001-06-25 02:11:07 +0000376 pCell->h.iNext = pc + n;
drh2af926b2001-05-15 00:39:25 +0000377 memcpy(&newPage[pc], pCell, n);
drh14acc042001-06-10 19:56:58 +0000378 pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
drh2af926b2001-05-15 00:39:25 +0000379 pc += n;
380 }
drh72f82862001-05-24 21:06:34 +0000381 assert( pPage->nFree==SQLITE_PAGE_SIZE-pc );
drh14acc042001-06-10 19:56:58 +0000382 memcpy(pPage->u.aDisk, newPage, pc);
drh2aa679f2001-06-25 02:11:07 +0000383 if( pPage->nCell>0 ){
384 pPage->apCell[pPage->nCell-1]->h.iNext = 0;
385 }
drh8c42ca92001-06-22 19:15:00 +0000386 pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
drh2af926b2001-05-15 00:39:25 +0000387 pFBlk->iSize = SQLITE_PAGE_SIZE - pc;
388 pFBlk->iNext = 0;
drh14acc042001-06-10 19:56:58 +0000389 pPage->u.hdr.firstFree = pc;
drh2af926b2001-05-15 00:39:25 +0000390 memset(&pFBlk[1], 0, SQLITE_PAGE_SIZE - pc - sizeof(FreeBlk));
drh365d68f2001-05-11 11:02:46 +0000391}
392
drha059ad02001-04-17 20:09:11 +0000393/*
drh8b2f49b2001-06-08 00:21:52 +0000394** Allocate nByte bytes of space on a page. nByte must be a
395** multiple of 4.
drhbd03cae2001-06-02 02:40:57 +0000396**
drh14acc042001-06-10 19:56:58 +0000397** Return the index into pPage->u.aDisk[] of the first byte of
drhbd03cae2001-06-02 02:40:57 +0000398** the new allocation. Or return 0 if there is not enough free
399** space on the page to satisfy the allocation request.
drh2af926b2001-05-15 00:39:25 +0000400**
drh72f82862001-05-24 21:06:34 +0000401** If the page contains nBytes of free space but does not contain
drh8b2f49b2001-06-08 00:21:52 +0000402** nBytes of contiguous free space, then this routine automatically
403** calls defragementPage() to consolidate all free space before
404** allocating the new chunk.
drh7e3b0a02001-04-28 16:52:40 +0000405*/
drhbd03cae2001-06-02 02:40:57 +0000406static int allocateSpace(MemPage *pPage, int nByte){
drh2af926b2001-05-15 00:39:25 +0000407 FreeBlk *p;
408 u16 *pIdx;
409 int start;
drh8c42ca92001-06-22 19:15:00 +0000410 int cnt = 0;
drh72f82862001-05-24 21:06:34 +0000411
drh5e2f8b92001-05-28 00:41:15 +0000412 assert( nByte==ROUNDUP(nByte) );
drh14acc042001-06-10 19:56:58 +0000413 if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
414 pIdx = &pPage->u.hdr.firstFree;
415 p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
drh2af926b2001-05-15 00:39:25 +0000416 while( p->iSize<nByte ){
drh8c42ca92001-06-22 19:15:00 +0000417 assert( cnt++ < SQLITE_PAGE_SIZE/4 );
drh2af926b2001-05-15 00:39:25 +0000418 if( p->iNext==0 ){
419 defragmentPage(pPage);
drh14acc042001-06-10 19:56:58 +0000420 pIdx = &pPage->u.hdr.firstFree;
drh2af926b2001-05-15 00:39:25 +0000421 }else{
422 pIdx = &p->iNext;
423 }
drh14acc042001-06-10 19:56:58 +0000424 p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
drh2af926b2001-05-15 00:39:25 +0000425 }
426 if( p->iSize==nByte ){
427 start = *pIdx;
428 *pIdx = p->iNext;
429 }else{
drh8c42ca92001-06-22 19:15:00 +0000430 FreeBlk *pNew;
drh72f82862001-05-24 21:06:34 +0000431 start = *pIdx;
drh8c42ca92001-06-22 19:15:00 +0000432 pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte];
drh72f82862001-05-24 21:06:34 +0000433 pNew->iNext = p->iNext;
434 pNew->iSize = p->iSize - nByte;
435 *pIdx = start + nByte;
drh2af926b2001-05-15 00:39:25 +0000436 }
437 pPage->nFree -= nByte;
438 return start;
drh7e3b0a02001-04-28 16:52:40 +0000439}
440
441/*
drh14acc042001-06-10 19:56:58 +0000442** Return a section of the MemPage.u.aDisk[] to the freelist.
443** The first byte of the new free block is pPage->u.aDisk[start]
444** and the size of the block is "size" bytes. Size must be
445** a multiple of 4.
drh306dc212001-05-21 13:45:10 +0000446**
447** Most of the effort here is involved in coalesing adjacent
448** free blocks into a single big free block.
drh7e3b0a02001-04-28 16:52:40 +0000449*/
450static void freeSpace(MemPage *pPage, int start, int size){
drh2af926b2001-05-15 00:39:25 +0000451 int end = start + size;
452 u16 *pIdx, idx;
453 FreeBlk *pFBlk;
454 FreeBlk *pNew;
455 FreeBlk *pNext;
456
457 assert( size == ROUNDUP(size) );
458 assert( start == ROUNDUP(start) );
drh14acc042001-06-10 19:56:58 +0000459 pIdx = &pPage->u.hdr.firstFree;
drh2af926b2001-05-15 00:39:25 +0000460 idx = *pIdx;
461 while( idx!=0 && idx<start ){
drh14acc042001-06-10 19:56:58 +0000462 pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
drh2af926b2001-05-15 00:39:25 +0000463 if( idx + pFBlk->iSize == start ){
464 pFBlk->iSize += size;
465 if( idx + pFBlk->iSize == pFBlk->iNext ){
drh8c42ca92001-06-22 19:15:00 +0000466 pNext = (FreeBlk*)&pPage->u.aDisk[pFBlk->iNext];
drh2af926b2001-05-15 00:39:25 +0000467 pFBlk->iSize += pNext->iSize;
468 pFBlk->iNext = pNext->iNext;
469 }
470 pPage->nFree += size;
471 return;
472 }
473 pIdx = &pFBlk->iNext;
474 idx = *pIdx;
475 }
drh14acc042001-06-10 19:56:58 +0000476 pNew = (FreeBlk*)&pPage->u.aDisk[start];
drh2af926b2001-05-15 00:39:25 +0000477 if( idx != end ){
478 pNew->iSize = size;
479 pNew->iNext = idx;
480 }else{
drh14acc042001-06-10 19:56:58 +0000481 pNext = (FreeBlk*)&pPage->u.aDisk[idx];
drh2af926b2001-05-15 00:39:25 +0000482 pNew->iSize = size + pNext->iSize;
483 pNew->iNext = pNext->iNext;
484 }
485 *pIdx = start;
486 pPage->nFree += size;
drh7e3b0a02001-04-28 16:52:40 +0000487}
488
489/*
490** Initialize the auxiliary information for a disk block.
drh72f82862001-05-24 21:06:34 +0000491**
drhbd03cae2001-06-02 02:40:57 +0000492** The pParent parameter must be a pointer to the MemPage which
493** is the parent of the page being initialized. The root of the
drh8b2f49b2001-06-08 00:21:52 +0000494** BTree (usually page 2) has no parent and so for that page,
495** pParent==NULL.
drh5e2f8b92001-05-28 00:41:15 +0000496**
drh72f82862001-05-24 21:06:34 +0000497** Return SQLITE_OK on success. If we see that the page does
498** not contained a well-formed database page, then return
499** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
500** guarantee that the page is well-formed. It only shows that
501** we failed to detect any corruption.
drh7e3b0a02001-04-28 16:52:40 +0000502*/
drh72f82862001-05-24 21:06:34 +0000503static int initPage(MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
drh14acc042001-06-10 19:56:58 +0000504 int idx; /* An index into pPage->u.aDisk[] */
505 Cell *pCell; /* A pointer to a Cell in pPage->u.aDisk[] */
506 FreeBlk *pFBlk; /* A pointer to a free block in pPage->u.aDisk[] */
drh5e2f8b92001-05-28 00:41:15 +0000507 int sz; /* The size of a Cell in bytes */
508 int freeSpace; /* Amount of free space on the page */
drh2af926b2001-05-15 00:39:25 +0000509
drh5e2f8b92001-05-28 00:41:15 +0000510 if( pPage->pParent ){
511 assert( pPage->pParent==pParent );
512 return SQLITE_OK;
513 }
514 if( pParent ){
515 pPage->pParent = pParent;
516 sqlitepager_ref(pParent);
517 }
518 if( pPage->isInit ) return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000519 pPage->isInit = 1;
drh7e3b0a02001-04-28 16:52:40 +0000520 pPage->nCell = 0;
drhbd03cae2001-06-02 02:40:57 +0000521 freeSpace = SQLITE_PAGE_SIZE - sizeof(PageHdr);
drh14acc042001-06-10 19:56:58 +0000522 idx = pPage->u.hdr.firstCell;
drh7e3b0a02001-04-28 16:52:40 +0000523 while( idx!=0 ){
drh8c42ca92001-06-22 19:15:00 +0000524 if( idx>SQLITE_PAGE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
drhbd03cae2001-06-02 02:40:57 +0000525 if( idx<sizeof(PageHdr) ) goto page_format_error;
drh8c42ca92001-06-22 19:15:00 +0000526 if( idx!=ROUNDUP(idx) ) goto page_format_error;
drh14acc042001-06-10 19:56:58 +0000527 pCell = (Cell*)&pPage->u.aDisk[idx];
drh5e2f8b92001-05-28 00:41:15 +0000528 sz = cellSize(pCell);
529 if( idx+sz > SQLITE_PAGE_SIZE ) goto page_format_error;
530 freeSpace -= sz;
531 pPage->apCell[pPage->nCell++] = pCell;
drh3b7511c2001-05-26 13:15:44 +0000532 idx = pCell->h.iNext;
drh2af926b2001-05-15 00:39:25 +0000533 }
534 pPage->nFree = 0;
drh14acc042001-06-10 19:56:58 +0000535 idx = pPage->u.hdr.firstFree;
drh2af926b2001-05-15 00:39:25 +0000536 while( idx!=0 ){
537 if( idx>SQLITE_PAGE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
drhbd03cae2001-06-02 02:40:57 +0000538 if( idx<sizeof(PageHdr) ) goto page_format_error;
drh14acc042001-06-10 19:56:58 +0000539 pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
drh2af926b2001-05-15 00:39:25 +0000540 pPage->nFree += pFBlk->iSize;
drh7c717f72001-06-24 20:39:41 +0000541 if( pFBlk->iNext>0 && pFBlk->iNext <= idx ) goto page_format_error;
drh2af926b2001-05-15 00:39:25 +0000542 idx = pFBlk->iNext;
drh7e3b0a02001-04-28 16:52:40 +0000543 }
drh8b2f49b2001-06-08 00:21:52 +0000544 if( pPage->nCell==0 && pPage->nFree==0 ){
545 /* As a special case, an uninitialized root page appears to be
546 ** an empty database */
547 return SQLITE_OK;
548 }
drh5e2f8b92001-05-28 00:41:15 +0000549 if( pPage->nFree!=freeSpace ) goto page_format_error;
drh7e3b0a02001-04-28 16:52:40 +0000550 return SQLITE_OK;
drh2af926b2001-05-15 00:39:25 +0000551
552page_format_error:
553 return SQLITE_CORRUPT;
drh7e3b0a02001-04-28 16:52:40 +0000554}
555
556/*
drh8b2f49b2001-06-08 00:21:52 +0000557** Set up a raw page so that it looks like a database page holding
558** no entries.
drhbd03cae2001-06-02 02:40:57 +0000559*/
560static void zeroPage(MemPage *pPage){
561 PageHdr *pHdr;
562 FreeBlk *pFBlk;
563 memset(pPage, 0, SQLITE_PAGE_SIZE);
drh14acc042001-06-10 19:56:58 +0000564 pHdr = &pPage->u.hdr;
drhbd03cae2001-06-02 02:40:57 +0000565 pHdr->firstCell = 0;
566 pHdr->firstFree = sizeof(*pHdr);
567 pFBlk = (FreeBlk*)&pHdr[1];
568 pFBlk->iNext = 0;
569 pFBlk->iSize = SQLITE_PAGE_SIZE - sizeof(*pHdr);
drh8c42ca92001-06-22 19:15:00 +0000570 pPage->nFree = pFBlk->iSize;
571 pPage->nCell = 0;
572 pPage->isOverfull = 0;
drhbd03cae2001-06-02 02:40:57 +0000573}
574
575/*
drh72f82862001-05-24 21:06:34 +0000576** This routine is called when the reference count for a page
577** reaches zero. We need to unref the pParent pointer when that
578** happens.
579*/
580static void pageDestructor(void *pData){
581 MemPage *pPage = (MemPage*)pData;
582 if( pPage->pParent ){
583 MemPage *pParent = pPage->pParent;
584 pPage->pParent = 0;
585 sqlitepager_unref(pParent);
586 }
587}
588
589/*
drh306dc212001-05-21 13:45:10 +0000590** Open a new database.
591**
592** Actually, this routine just sets up the internal data structures
drh72f82862001-05-24 21:06:34 +0000593** for accessing the database. We do not open the database file
594** until the first page is loaded.
drha059ad02001-04-17 20:09:11 +0000595*/
596int sqliteBtreeOpen(const char *zFilename, int mode, Btree **ppBtree){
597 Btree *pBt;
drh8c42ca92001-06-22 19:15:00 +0000598 int rc;
drha059ad02001-04-17 20:09:11 +0000599
600 pBt = sqliteMalloc( sizeof(*pBt) );
601 if( pBt==0 ){
drh8c42ca92001-06-22 19:15:00 +0000602 *ppBtree = 0;
drha059ad02001-04-17 20:09:11 +0000603 return SQLITE_NOMEM;
604 }
drh8c42ca92001-06-22 19:15:00 +0000605 rc = sqlitepager_open(&pBt->pPager, zFilename, 100, EXTRA_SIZE);
drha059ad02001-04-17 20:09:11 +0000606 if( rc!=SQLITE_OK ){
607 if( pBt->pPager ) sqlitepager_close(pBt->pPager);
608 sqliteFree(pBt);
609 *ppBtree = 0;
610 return rc;
611 }
drh72f82862001-05-24 21:06:34 +0000612 sqlitepager_set_destructor(pBt->pPager, pageDestructor);
drha059ad02001-04-17 20:09:11 +0000613 pBt->pCursor = 0;
614 pBt->page1 = 0;
615 *ppBtree = pBt;
616 return SQLITE_OK;
617}
618
619/*
620** Close an open database and invalidate all cursors.
621*/
622int sqliteBtreeClose(Btree *pBt){
623 while( pBt->pCursor ){
624 sqliteBtreeCloseCursor(pBt->pCursor);
625 }
626 sqlitepager_close(pBt->pPager);
627 sqliteFree(pBt);
628 return SQLITE_OK;
629}
630
631/*
drh306dc212001-05-21 13:45:10 +0000632** Get a reference to page1 of the database file. This will
633** also acquire a readlock on that file.
634**
635** SQLITE_OK is returned on success. If the file is not a
636** well-formed database file, then SQLITE_CORRUPT is returned.
637** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
638** is returned if we run out of memory. SQLITE_PROTOCOL is returned
639** if there is a locking protocol violation.
640*/
641static int lockBtree(Btree *pBt){
642 int rc;
643 if( pBt->page1 ) return SQLITE_OK;
drh8c42ca92001-06-22 19:15:00 +0000644 rc = sqlitepager_get(pBt->pPager, 1, (void**)&pBt->page1);
drh306dc212001-05-21 13:45:10 +0000645 if( rc!=SQLITE_OK ) return rc;
drh306dc212001-05-21 13:45:10 +0000646
647 /* Do some checking to help insure the file we opened really is
648 ** a valid database file.
649 */
650 if( sqlitepager_pagecount(pBt->pPager)>0 ){
drhbd03cae2001-06-02 02:40:57 +0000651 PageOne *pP1 = pBt->page1;
drh8c42ca92001-06-22 19:15:00 +0000652 if( strcmp(pP1->zMagic,zMagicHeader)!=0 || pP1->iMagic!=MAGIC ){
drh306dc212001-05-21 13:45:10 +0000653 rc = SQLITE_CORRUPT;
drh72f82862001-05-24 21:06:34 +0000654 goto page1_init_failed;
drh306dc212001-05-21 13:45:10 +0000655 }
656 }
657 return rc;
658
drh72f82862001-05-24 21:06:34 +0000659page1_init_failed:
drh306dc212001-05-21 13:45:10 +0000660 sqlitepager_unref(pBt->page1);
661 pBt->page1 = 0;
drh72f82862001-05-24 21:06:34 +0000662 return rc;
drh306dc212001-05-21 13:45:10 +0000663}
664
665/*
drh8c42ca92001-06-22 19:15:00 +0000666** Create a new database by initializing the first two pages of the
667** file.
drh8b2f49b2001-06-08 00:21:52 +0000668*/
669static int newDatabase(Btree *pBt){
670 MemPage *pRoot;
671 PageOne *pP1;
drh8c42ca92001-06-22 19:15:00 +0000672 int rc;
drh7c717f72001-06-24 20:39:41 +0000673 if( sqlitepager_pagecount(pBt->pPager)>1 ) return SQLITE_OK;
drh8b2f49b2001-06-08 00:21:52 +0000674 pP1 = pBt->page1;
675 rc = sqlitepager_write(pBt->page1);
676 if( rc ) return rc;
drh8c42ca92001-06-22 19:15:00 +0000677 rc = sqlitepager_get(pBt->pPager, 2, (void**)&pRoot);
drh8b2f49b2001-06-08 00:21:52 +0000678 if( rc ) return rc;
679 rc = sqlitepager_write(pRoot);
680 if( rc ){
681 sqlitepager_unref(pRoot);
682 return rc;
683 }
684 strcpy(pP1->zMagic, zMagicHeader);
drh8c42ca92001-06-22 19:15:00 +0000685 pP1->iMagic = MAGIC;
drh8b2f49b2001-06-08 00:21:52 +0000686 zeroPage(pRoot);
687 sqlitepager_unref(pRoot);
688 return SQLITE_OK;
689}
690
691/*
drh72f82862001-05-24 21:06:34 +0000692** Attempt to start a new transaction.
drh8b2f49b2001-06-08 00:21:52 +0000693**
694** A transaction must be started before attempting any changes
695** to the database. None of the following routines will work
696** unless a transaction is started first:
697**
698** sqliteBtreeCreateTable()
699** sqliteBtreeClearTable()
700** sqliteBtreeDropTable()
701** sqliteBtreeInsert()
702** sqliteBtreeDelete()
703** sqliteBtreeUpdateMeta()
drha059ad02001-04-17 20:09:11 +0000704*/
705int sqliteBtreeBeginTrans(Btree *pBt){
706 int rc;
707 if( pBt->inTrans ) return SQLITE_ERROR;
708 if( pBt->page1==0 ){
drh7e3b0a02001-04-28 16:52:40 +0000709 rc = lockBtree(pBt);
drh8c42ca92001-06-22 19:15:00 +0000710 if( rc!=SQLITE_OK ){
711 return rc;
712 }
drha059ad02001-04-17 20:09:11 +0000713 }
714 rc = sqlitepager_write(pBt->page1);
drh8c42ca92001-06-22 19:15:00 +0000715 if( rc!=SQLITE_OK ){
716 return rc;
drha059ad02001-04-17 20:09:11 +0000717 }
drh8c42ca92001-06-22 19:15:00 +0000718 pBt->inTrans = 1;
719 rc = newDatabase(pBt);
720 return rc;
drha059ad02001-04-17 20:09:11 +0000721}
722
723/*
drha059ad02001-04-17 20:09:11 +0000724** Remove the last reference to the database file. This will
725** remove the read lock.
726*/
727static void unlockBtree(Btree *pBt){
drh7c717f72001-06-24 20:39:41 +0000728 if( pBt->inTrans==0 && pBt->pCursor==0 && pBt->page1!=0 ){
drha059ad02001-04-17 20:09:11 +0000729 sqlitepager_unref(pBt->page1);
730 pBt->page1 = 0;
731 pBt->inTrans = 0;
732 }
733}
734
735/*
drh2aa679f2001-06-25 02:11:07 +0000736** Commit the transaction currently in progress.
drha059ad02001-04-17 20:09:11 +0000737*/
738int sqliteBtreeCommit(Btree *pBt){
739 int rc;
drh2aa679f2001-06-25 02:11:07 +0000740 if( pBt->inTrans==0 ) return SQLITE_ERROR;
drha059ad02001-04-17 20:09:11 +0000741 rc = sqlitepager_commit(pBt->pPager);
drh7c717f72001-06-24 20:39:41 +0000742 pBt->inTrans = 0;
drha059ad02001-04-17 20:09:11 +0000743 unlockBtree(pBt);
744 return rc;
745}
746
747/*
748** Rollback the transaction in progress. All cursors must be
749** closed before this routine is called.
750*/
751int sqliteBtreeRollback(Btree *pBt){
752 int rc;
drh72f82862001-05-24 21:06:34 +0000753 if( pBt->pCursor!=0 ) return SQLITE_ERROR;
drh7c717f72001-06-24 20:39:41 +0000754 if( pBt->inTrans==0 ) return SQLITE_OK;
755 pBt->inTrans = 0;
drha059ad02001-04-17 20:09:11 +0000756 rc = sqlitepager_rollback(pBt->pPager);
757 unlockBtree(pBt);
758 return rc;
759}
760
761/*
drh8b2f49b2001-06-08 00:21:52 +0000762** Create a new cursor for the BTree whose root is on the page
763** iTable. The act of acquiring a cursor gets a read lock on
764** the database file.
drha059ad02001-04-17 20:09:11 +0000765*/
drh8b2f49b2001-06-08 00:21:52 +0000766int sqliteBtreeCursor(Btree *pBt, int iTable, BtCursor **ppCur){
drha059ad02001-04-17 20:09:11 +0000767 int rc;
768 BtCursor *pCur;
769 if( pBt->page1==0 ){
770 rc = lockBtree(pBt);
771 if( rc!=SQLITE_OK ){
772 *ppCur = 0;
773 return rc;
774 }
775 }
776 pCur = sqliteMalloc( sizeof(*pCur) );
777 if( pCur==0 ){
drhbd03cae2001-06-02 02:40:57 +0000778 rc = SQLITE_NOMEM;
779 goto create_cursor_exception;
780 }
drh8b2f49b2001-06-08 00:21:52 +0000781 pCur->pgnoRoot = (Pgno)iTable;
drh8c42ca92001-06-22 19:15:00 +0000782 rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pCur->pPage);
drhbd03cae2001-06-02 02:40:57 +0000783 if( rc!=SQLITE_OK ){
784 goto create_cursor_exception;
785 }
drh8b2f49b2001-06-08 00:21:52 +0000786 rc = initPage(pCur->pPage, pCur->pgnoRoot, 0);
drhbd03cae2001-06-02 02:40:57 +0000787 if( rc!=SQLITE_OK ){
788 goto create_cursor_exception;
drha059ad02001-04-17 20:09:11 +0000789 }
drh14acc042001-06-10 19:56:58 +0000790 pCur->pBt = pBt;
791 pCur->idx = 0;
drha059ad02001-04-17 20:09:11 +0000792 pCur->pNext = pBt->pCursor;
793 if( pCur->pNext ){
794 pCur->pNext->pPrev = pCur;
795 }
drh14acc042001-06-10 19:56:58 +0000796 pCur->pPrev = 0;
drha059ad02001-04-17 20:09:11 +0000797 pBt->pCursor = pCur;
drh2af926b2001-05-15 00:39:25 +0000798 *ppCur = pCur;
799 return SQLITE_OK;
drhbd03cae2001-06-02 02:40:57 +0000800
801create_cursor_exception:
802 *ppCur = 0;
803 if( pCur ){
804 if( pCur->pPage ) sqlitepager_unref(pCur->pPage);
805 sqliteFree(pCur);
806 }
drh8c42ca92001-06-22 19:15:00 +0000807 unlockBtree(pBt);
drhbd03cae2001-06-02 02:40:57 +0000808 return rc;
drha059ad02001-04-17 20:09:11 +0000809}
810
811/*
drhbd03cae2001-06-02 02:40:57 +0000812** Close a cursor. The lock on the database file is released
813** when the last cursor is closed.
drha059ad02001-04-17 20:09:11 +0000814*/
815int sqliteBtreeCloseCursor(BtCursor *pCur){
816 Btree *pBt = pCur->pBt;
drha059ad02001-04-17 20:09:11 +0000817 if( pCur->pPrev ){
818 pCur->pPrev->pNext = pCur->pNext;
819 }else{
820 pBt->pCursor = pCur->pNext;
821 }
822 if( pCur->pNext ){
823 pCur->pNext->pPrev = pCur->pPrev;
824 }
drh2af926b2001-05-15 00:39:25 +0000825 sqlitepager_unref(pCur->pPage);
drhbd03cae2001-06-02 02:40:57 +0000826 unlockBtree(pBt);
drha059ad02001-04-17 20:09:11 +0000827 sqliteFree(pCur);
drh8c42ca92001-06-22 19:15:00 +0000828 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +0000829}
830
drh7e3b0a02001-04-28 16:52:40 +0000831/*
drh5e2f8b92001-05-28 00:41:15 +0000832** Make a temporary cursor by filling in the fields of pTempCur.
833** The temporary cursor is not on the cursor list for the Btree.
834*/
drh14acc042001-06-10 19:56:58 +0000835static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
drh5e2f8b92001-05-28 00:41:15 +0000836 memcpy(pTempCur, pCur, sizeof(*pCur));
837 pTempCur->pNext = 0;
838 pTempCur->pPrev = 0;
839 sqlitepager_ref(pTempCur->pPage);
840}
841
842/*
drhbd03cae2001-06-02 02:40:57 +0000843** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
drh5e2f8b92001-05-28 00:41:15 +0000844** function above.
845*/
drh14acc042001-06-10 19:56:58 +0000846static void releaseTempCursor(BtCursor *pCur){
drh5e2f8b92001-05-28 00:41:15 +0000847 sqlitepager_unref(pCur->pPage);
848}
849
850/*
drhbd03cae2001-06-02 02:40:57 +0000851** Set *pSize to the number of bytes of key in the entry the
852** cursor currently points to. Always return SQLITE_OK.
853** Failure is not possible. If the cursor is not currently
854** pointing to an entry (which can happen, for example, if
855** the database is empty) then *pSize is set to 0.
drh7e3b0a02001-04-28 16:52:40 +0000856*/
drh72f82862001-05-24 21:06:34 +0000857int sqliteBtreeKeySize(BtCursor *pCur, int *pSize){
drh2af926b2001-05-15 00:39:25 +0000858 Cell *pCell;
859 MemPage *pPage;
860
861 pPage = pCur->pPage;
drh72f82862001-05-24 21:06:34 +0000862 assert( pPage!=0 );
863 if( pCur->idx >= pPage->nCell ){
864 *pSize = 0;
865 }else{
drh5e2f8b92001-05-28 00:41:15 +0000866 pCell = pPage->apCell[pCur->idx];
drh8c42ca92001-06-22 19:15:00 +0000867 *pSize = pCell->h.nKey;
drh72f82862001-05-24 21:06:34 +0000868 }
869 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +0000870}
drh2af926b2001-05-15 00:39:25 +0000871
drh72f82862001-05-24 21:06:34 +0000872/*
873** Read payload information from the entry that the pCur cursor is
874** pointing to. Begin reading the payload at "offset" and read
875** a total of "amt" bytes. Put the result in zBuf.
876**
877** This routine does not make a distinction between key and data.
878** It just reads bytes from the payload area.
879*/
drh2af926b2001-05-15 00:39:25 +0000880static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){
drh5e2f8b92001-05-28 00:41:15 +0000881 char *aPayload;
drh2af926b2001-05-15 00:39:25 +0000882 Pgno nextPage;
drh8c42ca92001-06-22 19:15:00 +0000883 int rc;
drh72f82862001-05-24 21:06:34 +0000884 assert( pCur!=0 && pCur->pPage!=0 );
drh8c42ca92001-06-22 19:15:00 +0000885 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
886 aPayload = pCur->pPage->apCell[pCur->idx]->aPayload;
drh2af926b2001-05-15 00:39:25 +0000887 if( offset<MX_LOCAL_PAYLOAD ){
888 int a = amt;
889 if( a+offset>MX_LOCAL_PAYLOAD ){
890 a = MX_LOCAL_PAYLOAD - offset;
891 }
drh5e2f8b92001-05-28 00:41:15 +0000892 memcpy(zBuf, &aPayload[offset], a);
drh2af926b2001-05-15 00:39:25 +0000893 if( a==amt ){
894 return SQLITE_OK;
895 }
drh2aa679f2001-06-25 02:11:07 +0000896 offset = 0;
drh2af926b2001-05-15 00:39:25 +0000897 zBuf += a;
898 amt -= a;
drhdd793422001-06-28 01:54:48 +0000899 }else{
900 offset -= MX_LOCAL_PAYLOAD;
drhbd03cae2001-06-02 02:40:57 +0000901 }
902 if( amt>0 ){
drh8c42ca92001-06-22 19:15:00 +0000903 nextPage = pCur->pPage->apCell[pCur->idx]->ovfl;
drh2af926b2001-05-15 00:39:25 +0000904 }
905 while( amt>0 && nextPage ){
906 OverflowPage *pOvfl;
drh8c42ca92001-06-22 19:15:00 +0000907 rc = sqlitepager_get(pCur->pBt->pPager, nextPage, (void**)&pOvfl);
drh2af926b2001-05-15 00:39:25 +0000908 if( rc!=0 ){
909 return rc;
910 }
drh14acc042001-06-10 19:56:58 +0000911 nextPage = pOvfl->iNext;
drh2af926b2001-05-15 00:39:25 +0000912 if( offset<OVERFLOW_SIZE ){
913 int a = amt;
914 if( a + offset > OVERFLOW_SIZE ){
915 a = OVERFLOW_SIZE - offset;
916 }
drh5e2f8b92001-05-28 00:41:15 +0000917 memcpy(zBuf, &pOvfl->aPayload[offset], a);
drh2aa679f2001-06-25 02:11:07 +0000918 offset = 0;
drh2af926b2001-05-15 00:39:25 +0000919 amt -= a;
920 zBuf += a;
drh2aa679f2001-06-25 02:11:07 +0000921 }else{
922 offset -= OVERFLOW_SIZE;
drh2af926b2001-05-15 00:39:25 +0000923 }
924 sqlitepager_unref(pOvfl);
925 }
926 return amt==0 ? SQLITE_OK : SQLITE_CORRUPT;
927}
928
drh72f82862001-05-24 21:06:34 +0000929/*
930** Read part of the key associated with cursor pCur. A total
931** of "amt" bytes will be transfered into zBuf[]. The transfer
932** begins at "offset". If the key does not contain enough data
933** to satisfy the request, no data is fetched and this routine
934** returns SQLITE_ERROR.
935*/
936int sqliteBtreeKey(BtCursor *pCur, int offset, int amt, char *zBuf){
937 Cell *pCell;
938 MemPage *pPage;
drha059ad02001-04-17 20:09:11 +0000939
drh72f82862001-05-24 21:06:34 +0000940 if( amt<0 ) return SQLITE_ERROR;
941 if( offset<0 ) return SQLITE_ERROR;
942 if( amt==0 ) return SQLITE_OK;
943 pPage = pCur->pPage;
944 assert( pPage!=0 );
945 if( pCur->idx >= pPage->nCell ){
946 return SQLITE_ERROR;
947 }
drh5e2f8b92001-05-28 00:41:15 +0000948 pCell = pPage->apCell[pCur->idx];
drh3b7511c2001-05-26 13:15:44 +0000949 if( amt+offset > pCell->h.nKey ){
drhbd03cae2001-06-02 02:40:57 +0000950 return SQLITE_ERROR;
951 }
drh72f82862001-05-24 21:06:34 +0000952 return getPayload(pCur, offset, amt, zBuf);
953}
954
955/*
drhbd03cae2001-06-02 02:40:57 +0000956** Set *pSize to the number of bytes of data in the entry the
957** cursor currently points to. Always return SQLITE_OK.
958** Failure is not possible. If the cursor is not currently
959** pointing to an entry (which can happen, for example, if
960** the database is empty) then *pSize is set to 0.
drh72f82862001-05-24 21:06:34 +0000961*/
962int sqliteBtreeDataSize(BtCursor *pCur, int *pSize){
963 Cell *pCell;
964 MemPage *pPage;
965
966 pPage = pCur->pPage;
967 assert( pPage!=0 );
968 if( pCur->idx >= pPage->nCell ){
969 *pSize = 0;
970 }else{
drh5e2f8b92001-05-28 00:41:15 +0000971 pCell = pPage->apCell[pCur->idx];
drh3b7511c2001-05-26 13:15:44 +0000972 *pSize = pCell->h.nData;
drh72f82862001-05-24 21:06:34 +0000973 }
974 return SQLITE_OK;
975}
976
977/*
978** Read part of the data associated with cursor pCur. A total
979** of "amt" bytes will be transfered into zBuf[]. The transfer
980** begins at "offset". If the size of the data in the record
981** is insufficent to satisfy this request then no data is read
982** and this routine returns SQLITE_ERROR.
983*/
984int sqliteBtreeData(BtCursor *pCur, int offset, int amt, char *zBuf){
985 Cell *pCell;
986 MemPage *pPage;
987
988 if( amt<0 ) return SQLITE_ERROR;
989 if( offset<0 ) return SQLITE_ERROR;
990 if( amt==0 ) return SQLITE_OK;
991 pPage = pCur->pPage;
992 assert( pPage!=0 );
993 if( pCur->idx >= pPage->nCell ){
994 return SQLITE_ERROR;
995 }
drh5e2f8b92001-05-28 00:41:15 +0000996 pCell = pPage->apCell[pCur->idx];
drhbd03cae2001-06-02 02:40:57 +0000997 if( amt+offset > pCell->h.nData ){
998 return SQLITE_ERROR;
999 }
drh3b7511c2001-05-26 13:15:44 +00001000 return getPayload(pCur, offset + pCell->h.nKey, amt, zBuf);
drh72f82862001-05-24 21:06:34 +00001001}
drha059ad02001-04-17 20:09:11 +00001002
drh2af926b2001-05-15 00:39:25 +00001003/*
1004** Compare the key for the entry that pCur points to against the
1005** given key (pKey,nKeyOrig). Put the comparison result in *pResult.
1006** The result is negative if pCur<pKey, zero if they are equal and
1007** positive if pCur>pKey.
1008**
1009** SQLITE_OK is returned on success. If part of the cursor key
1010** is on overflow pages and we are unable to access those overflow
1011** pages, then some other value might be returned to indicate the
1012** reason for the error.
1013*/
1014static int compareKey(BtCursor *pCur, char *pKey, int nKeyOrig, int *pResult){
1015 Pgno nextPage;
1016 int nKey = nKeyOrig;
drh8c42ca92001-06-22 19:15:00 +00001017 int n, c, rc;
drh2af926b2001-05-15 00:39:25 +00001018 Cell *pCell;
1019
1020 assert( pCur->pPage );
1021 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
drhbd03cae2001-06-02 02:40:57 +00001022 pCell = pCur->pPage->apCell[pCur->idx];
drh3b7511c2001-05-26 13:15:44 +00001023 if( nKey > pCell->h.nKey ){
1024 nKey = pCell->h.nKey;
drh2af926b2001-05-15 00:39:25 +00001025 }
1026 n = nKey;
1027 if( n>MX_LOCAL_PAYLOAD ){
1028 n = MX_LOCAL_PAYLOAD;
1029 }
drh5e2f8b92001-05-28 00:41:15 +00001030 c = memcmp(pCell->aPayload, pKey, n);
drh2af926b2001-05-15 00:39:25 +00001031 if( c!=0 ){
1032 *pResult = c;
1033 return SQLITE_OK;
1034 }
1035 pKey += n;
1036 nKey -= n;
drh3b7511c2001-05-26 13:15:44 +00001037 nextPage = pCell->ovfl;
drh2af926b2001-05-15 00:39:25 +00001038 while( nKey>0 ){
1039 OverflowPage *pOvfl;
1040 if( nextPage==0 ){
1041 return SQLITE_CORRUPT;
1042 }
drh8c42ca92001-06-22 19:15:00 +00001043 rc = sqlitepager_get(pCur->pBt->pPager, nextPage, (void**)&pOvfl);
drh72f82862001-05-24 21:06:34 +00001044 if( rc ){
drh2af926b2001-05-15 00:39:25 +00001045 return rc;
1046 }
drh14acc042001-06-10 19:56:58 +00001047 nextPage = pOvfl->iNext;
drh2af926b2001-05-15 00:39:25 +00001048 n = nKey;
1049 if( n>OVERFLOW_SIZE ){
1050 n = OVERFLOW_SIZE;
1051 }
drh5e2f8b92001-05-28 00:41:15 +00001052 c = memcmp(pOvfl->aPayload, pKey, n);
drh2af926b2001-05-15 00:39:25 +00001053 sqlitepager_unref(pOvfl);
1054 if( c!=0 ){
1055 *pResult = c;
1056 return SQLITE_OK;
1057 }
1058 nKey -= n;
1059 pKey += n;
1060 }
drh3b7511c2001-05-26 13:15:44 +00001061 c = pCell->h.nKey - nKeyOrig;
drh2af926b2001-05-15 00:39:25 +00001062 *pResult = c;
1063 return SQLITE_OK;
1064}
1065
drh72f82862001-05-24 21:06:34 +00001066/*
1067** Move the cursor down to a new child page.
1068*/
drh5e2f8b92001-05-28 00:41:15 +00001069static int moveToChild(BtCursor *pCur, int newPgno){
drh72f82862001-05-24 21:06:34 +00001070 int rc;
1071 MemPage *pNewPage;
1072
drh8c42ca92001-06-22 19:15:00 +00001073 rc = sqlitepager_get(pCur->pBt->pPager, newPgno, (void**)&pNewPage);
drh72f82862001-05-24 21:06:34 +00001074 if( rc ){
1075 return rc;
1076 }
drh5e2f8b92001-05-28 00:41:15 +00001077 initPage(pNewPage, newPgno, pCur->pPage);
drh72f82862001-05-24 21:06:34 +00001078 sqlitepager_unref(pCur->pPage);
1079 pCur->pPage = pNewPage;
1080 pCur->idx = 0;
1081 return SQLITE_OK;
1082}
1083
1084/*
drh5e2f8b92001-05-28 00:41:15 +00001085** Move the cursor up to the parent page.
1086**
1087** pCur->idx is set to the cell index that contains the pointer
1088** to the page we are coming from. If we are coming from the
1089** right-most child page then pCur->idx is set to one more than
drhbd03cae2001-06-02 02:40:57 +00001090** the largest cell index.
drh72f82862001-05-24 21:06:34 +00001091*/
drh5e2f8b92001-05-28 00:41:15 +00001092static int moveToParent(BtCursor *pCur){
drh72f82862001-05-24 21:06:34 +00001093 Pgno oldPgno;
1094 MemPage *pParent;
drh8c42ca92001-06-22 19:15:00 +00001095 int i;
drh72f82862001-05-24 21:06:34 +00001096 pParent = pCur->pPage->pParent;
drhbd03cae2001-06-02 02:40:57 +00001097 if( pParent==0 ) return SQLITE_INTERNAL;
drh72f82862001-05-24 21:06:34 +00001098 oldPgno = sqlitepager_pagenumber(pCur->pPage);
drh72f82862001-05-24 21:06:34 +00001099 sqlitepager_ref(pParent);
1100 sqlitepager_unref(pCur->pPage);
1101 pCur->pPage = pParent;
drh8c42ca92001-06-22 19:15:00 +00001102 pCur->idx = pParent->nCell;
1103 for(i=0; i<pParent->nCell; i++){
1104 if( pParent->apCell[i]->h.leftChild==oldPgno ){
drh72f82862001-05-24 21:06:34 +00001105 pCur->idx = i;
1106 break;
1107 }
1108 }
drh5e2f8b92001-05-28 00:41:15 +00001109 return SQLITE_OK;
drh72f82862001-05-24 21:06:34 +00001110}
1111
1112/*
1113** Move the cursor to the root page
1114*/
drh5e2f8b92001-05-28 00:41:15 +00001115static int moveToRoot(BtCursor *pCur){
drh72f82862001-05-24 21:06:34 +00001116 MemPage *pNew;
drhbd03cae2001-06-02 02:40:57 +00001117 int rc;
1118
drh8c42ca92001-06-22 19:15:00 +00001119 rc = sqlitepager_get(pCur->pBt->pPager, pCur->pgnoRoot, (void**)&pNew);
drhbd03cae2001-06-02 02:40:57 +00001120 if( rc ) return rc;
drh72f82862001-05-24 21:06:34 +00001121 sqlitepager_unref(pCur->pPage);
1122 pCur->pPage = pNew;
1123 pCur->idx = 0;
1124 return SQLITE_OK;
1125}
drh2af926b2001-05-15 00:39:25 +00001126
drh5e2f8b92001-05-28 00:41:15 +00001127/*
1128** Move the cursor down to the left-most leaf entry beneath the
1129** entry to which it is currently pointing.
1130*/
1131static int moveToLeftmost(BtCursor *pCur){
1132 Pgno pgno;
1133 int rc;
1134
1135 while( (pgno = pCur->pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
1136 rc = moveToChild(pCur, pgno);
1137 if( rc ) return rc;
1138 }
1139 return SQLITE_OK;
1140}
1141
1142
drha059ad02001-04-17 20:09:11 +00001143/* Move the cursor so that it points to an entry near pKey.
drh72f82862001-05-24 21:06:34 +00001144** Return a success code.
1145**
drh5e2f8b92001-05-28 00:41:15 +00001146** If an exact match is not found, then the cursor is always
drhbd03cae2001-06-02 02:40:57 +00001147** left pointing at a leaf page which would hold the entry if it
drh5e2f8b92001-05-28 00:41:15 +00001148** were present. The cursor might point to an entry that comes
1149** before or after the key.
1150**
drhbd03cae2001-06-02 02:40:57 +00001151** The result of comparing the key with the entry to which the
1152** cursor is left pointing is stored in pCur->iMatch. The same
1153** value is also written to *pRes if pRes!=NULL. The meaning of
1154** this value is as follows:
1155**
1156** *pRes<0 The cursor is left pointing at an entry that
drh7c717f72001-06-24 20:39:41 +00001157** is smaller than pKey.
drhbd03cae2001-06-02 02:40:57 +00001158**
1159** *pRes==0 The cursor is left pointing at an entry that
1160** exactly matches pKey.
1161**
1162** *pRes>0 The cursor is left pointing at an entry that
drh7c717f72001-06-24 20:39:41 +00001163** is larger than pKey.
drha059ad02001-04-17 20:09:11 +00001164*/
drh72f82862001-05-24 21:06:34 +00001165int sqliteBtreeMoveto(BtCursor *pCur, void *pKey, int nKey, int *pRes){
1166 int rc;
drh7c717f72001-06-24 20:39:41 +00001167 pCur->bSkipNext = 0;
drh5e2f8b92001-05-28 00:41:15 +00001168 rc = moveToRoot(pCur);
drh72f82862001-05-24 21:06:34 +00001169 if( rc ) return rc;
1170 for(;;){
1171 int lwr, upr;
1172 Pgno chldPg;
1173 MemPage *pPage = pCur->pPage;
drh8b2f49b2001-06-08 00:21:52 +00001174 int c = -1;
drh72f82862001-05-24 21:06:34 +00001175 lwr = 0;
1176 upr = pPage->nCell-1;
1177 while( lwr<=upr ){
drh72f82862001-05-24 21:06:34 +00001178 pCur->idx = (lwr+upr)/2;
1179 rc = compareKey(pCur, pKey, nKey, &c);
1180 if( rc ) return rc;
1181 if( c==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001182 pCur->iMatch = c;
drh72f82862001-05-24 21:06:34 +00001183 if( pRes ) *pRes = 0;
1184 return SQLITE_OK;
1185 }
1186 if( c<0 ){
1187 lwr = pCur->idx+1;
1188 }else{
1189 upr = pCur->idx-1;
1190 }
1191 }
1192 assert( lwr==upr+1 );
1193 if( lwr>=pPage->nCell ){
drh14acc042001-06-10 19:56:58 +00001194 chldPg = pPage->u.hdr.rightChild;
drh72f82862001-05-24 21:06:34 +00001195 }else{
drh5e2f8b92001-05-28 00:41:15 +00001196 chldPg = pPage->apCell[lwr]->h.leftChild;
drh72f82862001-05-24 21:06:34 +00001197 }
1198 if( chldPg==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001199 pCur->iMatch = c;
drh72f82862001-05-24 21:06:34 +00001200 if( pRes ) *pRes = c;
1201 return SQLITE_OK;
1202 }
drh5e2f8b92001-05-28 00:41:15 +00001203 rc = moveToChild(pCur, chldPg);
drh72f82862001-05-24 21:06:34 +00001204 if( rc ) return rc;
1205 }
drhbd03cae2001-06-02 02:40:57 +00001206 /* NOT REACHED */
drh72f82862001-05-24 21:06:34 +00001207}
1208
1209/*
drhbd03cae2001-06-02 02:40:57 +00001210** Advance the cursor to the next entry in the database. If
1211** successful and pRes!=NULL then set *pRes=0. If the cursor
1212** was already pointing to the last entry in the database before
1213** this routine was called, then set *pRes=1 if pRes!=NULL.
drh72f82862001-05-24 21:06:34 +00001214*/
1215int sqliteBtreeNext(BtCursor *pCur, int *pRes){
drh72f82862001-05-24 21:06:34 +00001216 int rc;
drh5e2f8b92001-05-28 00:41:15 +00001217 if( pCur->bSkipNext ){
1218 pCur->bSkipNext = 0;
drh72f82862001-05-24 21:06:34 +00001219 if( pRes ) *pRes = 0;
1220 return SQLITE_OK;
1221 }
drh72f82862001-05-24 21:06:34 +00001222 pCur->idx++;
drh5e2f8b92001-05-28 00:41:15 +00001223 if( pCur->idx>=pCur->pPage->nCell ){
drh8c42ca92001-06-22 19:15:00 +00001224 if( pCur->pPage->u.hdr.rightChild ){
1225 rc = moveToChild(pCur, pCur->pPage->u.hdr.rightChild);
drh5e2f8b92001-05-28 00:41:15 +00001226 if( rc ) return rc;
1227 rc = moveToLeftmost(pCur);
1228 if( rc ) return rc;
1229 if( pRes ) *pRes = 0;
drh72f82862001-05-24 21:06:34 +00001230 return SQLITE_OK;
1231 }
drh5e2f8b92001-05-28 00:41:15 +00001232 do{
drh8c42ca92001-06-22 19:15:00 +00001233 if( pCur->pPage->pParent==0 ){
drh5e2f8b92001-05-28 00:41:15 +00001234 if( pRes ) *pRes = 1;
1235 return SQLITE_OK;
1236 }
1237 rc = moveToParent(pCur);
1238 if( rc ) return rc;
1239 }while( pCur->idx>=pCur->pPage->nCell );
drh72f82862001-05-24 21:06:34 +00001240 if( pRes ) *pRes = 0;
1241 return SQLITE_OK;
1242 }
drh5e2f8b92001-05-28 00:41:15 +00001243 rc = moveToLeftmost(pCur);
1244 if( rc ) return rc;
drh72f82862001-05-24 21:06:34 +00001245 if( pRes ) *pRes = 0;
1246 return SQLITE_OK;
1247}
1248
drh3b7511c2001-05-26 13:15:44 +00001249/*
1250** Allocate a new page from the database file.
1251**
1252** The new page is marked as dirty. (In other words, sqlitepager_write()
1253** has already been called on the new page.) The new page has also
1254** been referenced and the calling routine is responsible for calling
1255** sqlitepager_unref() on the new page when it is done.
1256**
1257** SQLITE_OK is returned on success. Any other return value indicates
1258** an error. *ppPage and *pPgno are undefined in the event of an error.
1259** Do not invoke sqlitepager_unref() on *ppPage if an error is returned.
1260*/
1261static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno){
drhbd03cae2001-06-02 02:40:57 +00001262 PageOne *pPage1 = pBt->page1;
drh8c42ca92001-06-22 19:15:00 +00001263 int rc;
drh3b7511c2001-05-26 13:15:44 +00001264 if( pPage1->freeList ){
1265 OverflowPage *pOvfl;
1266 rc = sqlitepager_write(pPage1);
1267 if( rc ) return rc;
1268 *pPgno = pPage1->freeList;
drh8c42ca92001-06-22 19:15:00 +00001269 rc = sqlitepager_get(pBt->pPager, pPage1->freeList, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001270 if( rc ) return rc;
1271 rc = sqlitepager_write(pOvfl);
1272 if( rc ){
1273 sqlitepager_unref(pOvfl);
1274 return rc;
1275 }
drh14acc042001-06-10 19:56:58 +00001276 pPage1->freeList = pOvfl->iNext;
drh2aa679f2001-06-25 02:11:07 +00001277 pPage1->nFree--;
drh3b7511c2001-05-26 13:15:44 +00001278 *ppPage = (MemPage*)pOvfl;
1279 }else{
drh2aa679f2001-06-25 02:11:07 +00001280 *pPgno = sqlitepager_pagecount(pBt->pPager) + 1;
drh8c42ca92001-06-22 19:15:00 +00001281 rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
drh3b7511c2001-05-26 13:15:44 +00001282 if( rc ) return rc;
1283 rc = sqlitepager_write(*ppPage);
1284 }
1285 return rc;
1286}
1287
1288/*
1289** Add a page of the database file to the freelist. Either pgno or
1290** pPage but not both may be 0.
drh5e2f8b92001-05-28 00:41:15 +00001291**
drhdd793422001-06-28 01:54:48 +00001292** sqlitepager_unref() is NOT called for pPage.
drh3b7511c2001-05-26 13:15:44 +00001293*/
1294static int freePage(Btree *pBt, void *pPage, Pgno pgno){
drhbd03cae2001-06-02 02:40:57 +00001295 PageOne *pPage1 = pBt->page1;
drh3b7511c2001-05-26 13:15:44 +00001296 OverflowPage *pOvfl = (OverflowPage*)pPage;
1297 int rc;
drhdd793422001-06-28 01:54:48 +00001298 int needUnref = 0;
1299 MemPage *pMemPage;
drh8b2f49b2001-06-08 00:21:52 +00001300
drh3b7511c2001-05-26 13:15:44 +00001301 if( pgno==0 ){
1302 assert( pOvfl!=0 );
1303 pgno = sqlitepager_pagenumber(pOvfl);
1304 }
drh2aa679f2001-06-25 02:11:07 +00001305 assert( pgno>2 );
drh3b7511c2001-05-26 13:15:44 +00001306 rc = sqlitepager_write(pPage1);
1307 if( rc ){
1308 return rc;
1309 }
1310 if( pOvfl==0 ){
1311 assert( pgno>0 );
drh8c42ca92001-06-22 19:15:00 +00001312 rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001313 if( rc ) return rc;
drhdd793422001-06-28 01:54:48 +00001314 needUnref = 1;
drh3b7511c2001-05-26 13:15:44 +00001315 }
1316 rc = sqlitepager_write(pOvfl);
1317 if( rc ){
drhdd793422001-06-28 01:54:48 +00001318 if( needUnref ) sqlitepager_unref(pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001319 return rc;
1320 }
drh14acc042001-06-10 19:56:58 +00001321 pOvfl->iNext = pPage1->freeList;
drh3b7511c2001-05-26 13:15:44 +00001322 pPage1->freeList = pgno;
drh2aa679f2001-06-25 02:11:07 +00001323 pPage1->nFree++;
drh5e2f8b92001-05-28 00:41:15 +00001324 memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
drhdd793422001-06-28 01:54:48 +00001325 pMemPage = (MemPage*)pPage;
1326 pMemPage->isInit = 0;
1327 if( pMemPage->pParent ){
1328 sqlitepager_unref(pMemPage->pParent);
1329 pMemPage->pParent = 0;
1330 }
1331 if( needUnref ) rc = sqlitepager_unref(pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001332 return rc;
1333}
1334
1335/*
1336** Erase all the data out of a cell. This involves returning overflow
1337** pages back the freelist.
1338*/
1339static int clearCell(Btree *pBt, Cell *pCell){
1340 Pager *pPager = pBt->pPager;
1341 OverflowPage *pOvfl;
drh3b7511c2001-05-26 13:15:44 +00001342 Pgno ovfl, nextOvfl;
1343 int rc;
1344
drh5e2f8b92001-05-28 00:41:15 +00001345 if( pCell->h.nKey + pCell->h.nData <= MX_LOCAL_PAYLOAD ){
1346 return SQLITE_OK;
1347 }
drh3b7511c2001-05-26 13:15:44 +00001348 ovfl = pCell->ovfl;
1349 pCell->ovfl = 0;
1350 while( ovfl ){
drh8c42ca92001-06-22 19:15:00 +00001351 rc = sqlitepager_get(pPager, ovfl, (void**)&pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001352 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001353 nextOvfl = pOvfl->iNext;
drhbd03cae2001-06-02 02:40:57 +00001354 rc = freePage(pBt, pOvfl, ovfl);
1355 if( rc ) return rc;
drhdd793422001-06-28 01:54:48 +00001356 sqlitepager_unref(pOvfl);
drh3b7511c2001-05-26 13:15:44 +00001357 ovfl = nextOvfl;
drh3b7511c2001-05-26 13:15:44 +00001358 }
drh5e2f8b92001-05-28 00:41:15 +00001359 return SQLITE_OK;
drh3b7511c2001-05-26 13:15:44 +00001360}
1361
1362/*
1363** Create a new cell from key and data. Overflow pages are allocated as
1364** necessary and linked to this cell.
1365*/
1366static int fillInCell(
1367 Btree *pBt, /* The whole Btree. Needed to allocate pages */
1368 Cell *pCell, /* Populate this Cell structure */
1369 void *pKey, int nKey, /* The key */
1370 void *pData,int nData /* The data */
1371){
drhdd793422001-06-28 01:54:48 +00001372 OverflowPage *pOvfl, *pPrior;
drh3b7511c2001-05-26 13:15:44 +00001373 Pgno *pNext;
1374 int spaceLeft;
drh8c42ca92001-06-22 19:15:00 +00001375 int n, rc;
drh3b7511c2001-05-26 13:15:44 +00001376 int nPayload;
1377 char *pPayload;
1378 char *pSpace;
1379
drh5e2f8b92001-05-28 00:41:15 +00001380 pCell->h.leftChild = 0;
drh3b7511c2001-05-26 13:15:44 +00001381 pCell->h.nKey = nKey;
1382 pCell->h.nData = nData;
1383 pCell->h.iNext = 0;
1384
1385 pNext = &pCell->ovfl;
drh5e2f8b92001-05-28 00:41:15 +00001386 pSpace = pCell->aPayload;
drh3b7511c2001-05-26 13:15:44 +00001387 spaceLeft = MX_LOCAL_PAYLOAD;
1388 pPayload = pKey;
1389 pKey = 0;
1390 nPayload = nKey;
drhdd793422001-06-28 01:54:48 +00001391 pPrior = 0;
drh3b7511c2001-05-26 13:15:44 +00001392 while( nPayload>0 ){
1393 if( spaceLeft==0 ){
drh8c42ca92001-06-22 19:15:00 +00001394 rc = allocatePage(pBt, (MemPage**)&pOvfl, pNext);
drh3b7511c2001-05-26 13:15:44 +00001395 if( rc ){
1396 *pNext = 0;
drhdd793422001-06-28 01:54:48 +00001397 }
1398 if( pPrior ) sqlitepager_unref(pPrior);
1399 if( rc ){
drh5e2f8b92001-05-28 00:41:15 +00001400 clearCell(pBt, pCell);
drh3b7511c2001-05-26 13:15:44 +00001401 return rc;
1402 }
drhdd793422001-06-28 01:54:48 +00001403 pPrior = pOvfl;
drh3b7511c2001-05-26 13:15:44 +00001404 spaceLeft = OVERFLOW_SIZE;
drh5e2f8b92001-05-28 00:41:15 +00001405 pSpace = pOvfl->aPayload;
drh8c42ca92001-06-22 19:15:00 +00001406 pNext = &pOvfl->iNext;
drh3b7511c2001-05-26 13:15:44 +00001407 }
1408 n = nPayload;
1409 if( n>spaceLeft ) n = spaceLeft;
1410 memcpy(pSpace, pPayload, n);
1411 nPayload -= n;
1412 if( nPayload==0 && pData ){
1413 pPayload = pData;
1414 nPayload = nData;
1415 pData = 0;
1416 }else{
1417 pPayload += n;
1418 }
1419 spaceLeft -= n;
1420 pSpace += n;
1421 }
drhdd793422001-06-28 01:54:48 +00001422 *pNext = 0;
1423 if( pPrior ){
1424 sqlitepager_unref(pPrior);
1425 }
drh3b7511c2001-05-26 13:15:44 +00001426 return SQLITE_OK;
1427}
1428
1429/*
drhbd03cae2001-06-02 02:40:57 +00001430** Change the MemPage.pParent pointer on the page whose number is
drh8b2f49b2001-06-08 00:21:52 +00001431** given in the second argument so that MemPage.pParent holds the
drhbd03cae2001-06-02 02:40:57 +00001432** pointer in the third argument.
1433*/
1434static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent){
1435 MemPage *pThis;
1436
drhdd793422001-06-28 01:54:48 +00001437 if( pgno==0 ) return;
1438 assert( pPager!=0 );
drhbd03cae2001-06-02 02:40:57 +00001439 pThis = sqlitepager_lookup(pPager, pgno);
drhdd793422001-06-28 01:54:48 +00001440 if( pThis ){
1441 if( pThis->pParent!=pNewParent ){
1442 if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
1443 pThis->pParent = pNewParent;
1444 if( pNewParent ) sqlitepager_ref(pNewParent);
1445 }
1446 sqlitepager_unref(pThis);
drhbd03cae2001-06-02 02:40:57 +00001447 }
1448}
1449
1450/*
1451** Reparent all children of the given page to be the given page.
1452** In other words, for every child of pPage, invoke reparentPage()
1453** to make sure that child knows that pPage is its parent.
1454**
1455** This routine gets called after you memcpy() one page into
1456** another.
1457*/
drh8c42ca92001-06-22 19:15:00 +00001458static void reparentChildPages(Pager *pPager, MemPage *pPage){
drhbd03cae2001-06-02 02:40:57 +00001459 int i;
1460 for(i=0; i<pPage->nCell; i++){
drh8c42ca92001-06-22 19:15:00 +00001461 reparentPage(pPager, pPage->apCell[i]->h.leftChild, pPage);
drhbd03cae2001-06-02 02:40:57 +00001462 }
drh14acc042001-06-10 19:56:58 +00001463 reparentPage(pPager, pPage->u.hdr.rightChild, pPage);
1464}
1465
1466/*
1467** Remove the i-th cell from pPage. This routine effects pPage only.
1468** The cell content is not freed or deallocated. It is assumed that
1469** the cell content has been copied someplace else. This routine just
1470** removes the reference to the cell from pPage.
1471**
1472** "sz" must be the number of bytes in the cell.
1473**
1474** Do not bother maintaining the integrity of the linked list of Cells.
drh8c42ca92001-06-22 19:15:00 +00001475** Only the pPage->apCell[] array is important. The relinkCellList()
1476** routine will be called soon after this routine in order to rebuild
1477** the linked list.
drh14acc042001-06-10 19:56:58 +00001478*/
drh8c42ca92001-06-22 19:15:00 +00001479static void dropCell(MemPage *pPage, int idx, int sz){
drh14acc042001-06-10 19:56:58 +00001480 int j;
drh8c42ca92001-06-22 19:15:00 +00001481 assert( idx>=0 && idx<pPage->nCell );
1482 assert( sz==cellSize(pPage->apCell[idx]) );
drh7c717f72001-06-24 20:39:41 +00001483 freeSpace(pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
1484 for(j=idx; j<pPage->nCell-1; j++){
drh14acc042001-06-10 19:56:58 +00001485 pPage->apCell[j] = pPage->apCell[j+1];
1486 }
1487 pPage->nCell--;
1488}
1489
1490/*
1491** Insert a new cell on pPage at cell index "i". pCell points to the
1492** content of the cell.
1493**
1494** If the cell content will fit on the page, then put it there. If it
1495** will not fit, then just make pPage->apCell[i] point to the content
1496** and set pPage->isOverfull.
1497**
1498** Do not bother maintaining the integrity of the linked list of Cells.
drh8c42ca92001-06-22 19:15:00 +00001499** Only the pPage->apCell[] array is important. The relinkCellList()
1500** routine will be called soon after this routine in order to rebuild
1501** the linked list.
drh14acc042001-06-10 19:56:58 +00001502*/
1503static void insertCell(MemPage *pPage, int i, Cell *pCell, int sz){
1504 int idx, j;
1505 assert( i>=0 && i<=pPage->nCell );
1506 assert( sz==cellSize(pCell) );
drh2aa679f2001-06-25 02:11:07 +00001507 idx = allocateSpace(pPage, sz);
drh14acc042001-06-10 19:56:58 +00001508 for(j=pPage->nCell; j>i; j--){
1509 pPage->apCell[j] = pPage->apCell[j-1];
1510 }
1511 pPage->nCell++;
drh14acc042001-06-10 19:56:58 +00001512 if( idx<=0 ){
1513 pPage->isOverfull = 1;
1514 pPage->apCell[i] = pCell;
1515 }else{
1516 memcpy(&pPage->u.aDisk[idx], pCell, sz);
drh8c42ca92001-06-22 19:15:00 +00001517 pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx];
drh14acc042001-06-10 19:56:58 +00001518 }
1519}
1520
1521/*
1522** Rebuild the linked list of cells on a page so that the cells
drh8c42ca92001-06-22 19:15:00 +00001523** occur in the order specified by the pPage->apCell[] array.
1524** Invoke this routine once to repair damage after one or more
1525** invocations of either insertCell() or dropCell().
drh14acc042001-06-10 19:56:58 +00001526*/
1527static void relinkCellList(MemPage *pPage){
1528 int i;
1529 u16 *pIdx;
1530 pIdx = &pPage->u.hdr.firstCell;
1531 for(i=0; i<pPage->nCell; i++){
drh7c717f72001-06-24 20:39:41 +00001532 int idx = Addr(pPage->apCell[i]) - Addr(pPage);
drh8c42ca92001-06-22 19:15:00 +00001533 assert( idx>0 && idx<SQLITE_PAGE_SIZE );
drh14acc042001-06-10 19:56:58 +00001534 *pIdx = idx;
1535 pIdx = &pPage->apCell[i]->h.iNext;
1536 }
1537 *pIdx = 0;
1538}
1539
1540/*
1541** Make a copy of the contents of pFrom into pTo. The pFrom->apCell[]
1542** pointers that point intto pFrom->u.aDisk[] must be adjusted to point
drhdd793422001-06-28 01:54:48 +00001543** into pTo->u.aDisk[] instead. But some pFrom->apCell[] entries might
drh14acc042001-06-10 19:56:58 +00001544** not point to pFrom->u.aDisk[]. Those are unchanged.
1545*/
1546static void copyPage(MemPage *pTo, MemPage *pFrom){
1547 uptr from, to;
1548 int i;
1549 memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_PAGE_SIZE);
drhdd793422001-06-28 01:54:48 +00001550 pTo->pParent = 0;
drh14acc042001-06-10 19:56:58 +00001551 pTo->isInit = 1;
1552 pTo->nCell = pFrom->nCell;
1553 pTo->nFree = pFrom->nFree;
1554 pTo->isOverfull = pFrom->isOverfull;
drh7c717f72001-06-24 20:39:41 +00001555 to = Addr(pTo);
1556 from = Addr(pFrom);
drh14acc042001-06-10 19:56:58 +00001557 for(i=0; i<pTo->nCell; i++){
drh7c717f72001-06-24 20:39:41 +00001558 uptr x = Addr(pFrom->apCell[i]);
drh8c42ca92001-06-22 19:15:00 +00001559 if( x>from && x<from+SQLITE_PAGE_SIZE ){
1560 *((uptr*)&pTo->apCell[i]) = x + to - from;
drhdd793422001-06-28 01:54:48 +00001561 }else{
1562 pTo->apCell[i] = pFrom->apCell[i];
drh14acc042001-06-10 19:56:58 +00001563 }
1564 }
drhbd03cae2001-06-02 02:40:57 +00001565}
1566
1567/*
drh8b2f49b2001-06-08 00:21:52 +00001568** This routine redistributes Cells on pPage and up to two siblings
1569** of pPage so that all pages have about the same amount of free space.
drh14acc042001-06-10 19:56:58 +00001570** Usually one sibling on either side of pPage is used in the balancing,
drh8b2f49b2001-06-08 00:21:52 +00001571** though both siblings might come from one side if pPage is the first
1572** or last child of its parent. If pPage has fewer than two siblings
1573** (something which can only happen if pPage is the root page or a
drh14acc042001-06-10 19:56:58 +00001574** child of root) then all available siblings participate in the balancing.
drh8b2f49b2001-06-08 00:21:52 +00001575**
1576** The number of siblings of pPage might be increased or decreased by
drh8c42ca92001-06-22 19:15:00 +00001577** one in an effort to keep pages between 66% and 100% full. The root page
1578** is special and is allowed to be less than 66% full. If pPage is
1579** the root page, then the depth of the tree might be increased
drh8b2f49b2001-06-08 00:21:52 +00001580** or decreased by one, as necessary, to keep the root page from being
1581** overfull or empty.
1582**
drh14acc042001-06-10 19:56:58 +00001583** This routine calls relinkCellList() on its input page regardless of
1584** whether or not it does any real balancing. Client routines will typically
1585** invoke insertCell() or dropCell() before calling this routine, so we
1586** need to call relinkCellList() to clean up the mess that those other
1587** routines left behind.
1588**
1589** pCur is left pointing to the same cell as when this routine was called
drh8c42ca92001-06-22 19:15:00 +00001590** even if that cell gets moved to a different page. pCur may be NULL.
1591** Set the pCur parameter to NULL if you do not care about keeping track
1592** of a cell as that will save this routine the work of keeping track of it.
drh14acc042001-06-10 19:56:58 +00001593**
drh8b2f49b2001-06-08 00:21:52 +00001594** Note that when this routine is called, some of the Cells on pPage
drh14acc042001-06-10 19:56:58 +00001595** might not actually be stored in pPage->u.aDisk[]. This can happen
drh8b2f49b2001-06-08 00:21:52 +00001596** if the page is overfull. Part of the job of this routine is to
drh14acc042001-06-10 19:56:58 +00001597** make sure all Cells for pPage once again fit in pPage->u.aDisk[].
1598**
drh8c42ca92001-06-22 19:15:00 +00001599** In the course of balancing the siblings of pPage, the parent of pPage
1600** might become overfull or underfull. If that happens, then this routine
1601** is called recursively on the parent.
1602**
drh14acc042001-06-10 19:56:58 +00001603** If this routine fails for any reason, it means the database may have
1604** been left in a corrupted state and should be rolled back.
drh8b2f49b2001-06-08 00:21:52 +00001605*/
drh14acc042001-06-10 19:56:58 +00001606static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
drh8b2f49b2001-06-08 00:21:52 +00001607 MemPage *pParent; /* The parent of pPage */
drh14acc042001-06-10 19:56:58 +00001608 MemPage *apOld[3]; /* pPage and up to two siblings */
drh8b2f49b2001-06-08 00:21:52 +00001609 Pgno pgnoOld[3]; /* Page numbers for each page in apOld[] */
drh14acc042001-06-10 19:56:58 +00001610 MemPage *apNew[4]; /* pPage and up to 3 siblings after balancing */
1611 Pgno pgnoNew[4]; /* Page numbers for each page in apNew[] */
drh8b2f49b2001-06-08 00:21:52 +00001612 int idxDiv[3]; /* Indices of divider cells in pParent */
1613 Cell *apDiv[3]; /* Divider cells in pParent */
1614 int nCell; /* Number of cells in apCell[] */
1615 int nOld; /* Number of pages in apOld[] */
1616 int nNew; /* Number of pages in apNew[] */
drh8b2f49b2001-06-08 00:21:52 +00001617 int nDiv; /* Number of cells in apDiv[] */
drh14acc042001-06-10 19:56:58 +00001618 int i, j, k; /* Loop counters */
1619 int idx; /* Index of pPage in pParent->apCell[] */
1620 int nxDiv; /* Next divider slot in pParent->apCell[] */
1621 int rc; /* The return code */
1622 int iCur; /* apCell[iCur] is the cell of the cursor */
1623 int usedPerPage; /* Memory needed for each page */
1624 int freePerPage; /* Average free space per page */
drh8c42ca92001-06-22 19:15:00 +00001625 int totalSize; /* Total bytes for all cells */
1626 Pgno pgno; /* Page number */
drh14acc042001-06-10 19:56:58 +00001627 Cell *apCell[MX_CELL*3+5]; /* All cells from pages being balanceed */
1628 int szCell[MX_CELL*3+5]; /* Local size of all cells */
1629 Cell aTemp[2]; /* Temporary holding area for apDiv[] */
1630 MemPage aOld[3]; /* Temporary copies of pPage and its siblings */
drh8b2f49b2001-06-08 00:21:52 +00001631
drh14acc042001-06-10 19:56:58 +00001632 /*
1633 ** Return without doing any work if pPage is neither overfull nor
1634 ** underfull.
drh8b2f49b2001-06-08 00:21:52 +00001635 */
drh14acc042001-06-10 19:56:58 +00001636 if( !pPage->isOverfull && pPage->nFree<SQLITE_PAGE_SIZE/2 ){
1637 relinkCellList(pPage);
drh8b2f49b2001-06-08 00:21:52 +00001638 return SQLITE_OK;
1639 }
1640
1641 /*
drh14acc042001-06-10 19:56:58 +00001642 ** Find the parent of the page to be balanceed.
1643 ** If there is no parent, it means this page is the root page and
drh8b2f49b2001-06-08 00:21:52 +00001644 ** special rules apply.
1645 */
drh14acc042001-06-10 19:56:58 +00001646 pParent = pPage->pParent;
drh8b2f49b2001-06-08 00:21:52 +00001647 if( pParent==0 ){
1648 Pgno pgnoChild;
drh8c42ca92001-06-22 19:15:00 +00001649 MemPage *pChild;
drh8b2f49b2001-06-08 00:21:52 +00001650 if( pPage->nCell==0 ){
drh14acc042001-06-10 19:56:58 +00001651 if( pPage->u.hdr.rightChild ){
1652 /*
1653 ** The root page is empty. Copy the one child page
drh8b2f49b2001-06-08 00:21:52 +00001654 ** into the root page and return. This reduces the depth
1655 ** of the BTree by one.
1656 */
drh14acc042001-06-10 19:56:58 +00001657 rc = sqlitepager_write(pPage);
1658 if( rc ) return rc;
1659 pgnoChild = pPage->u.hdr.rightChild;
drh8c42ca92001-06-22 19:15:00 +00001660 rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
drh8b2f49b2001-06-08 00:21:52 +00001661 if( rc ) return rc;
1662 memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
1663 pPage->isInit = 0;
1664 initPage(pPage, sqlitepager_pagenumber(pPage), 0);
1665 reparentChildPages(pBt->pPager, pPage);
1666 freePage(pBt, pChild, pgnoChild);
1667 sqlitepager_unref(pChild);
1668 }
1669 return SQLITE_OK;
1670 }
drh14acc042001-06-10 19:56:58 +00001671 if( !pPage->isOverfull ){
drh8b2f49b2001-06-08 00:21:52 +00001672 /* It is OK for the root page to be less than half full.
1673 */
drh14acc042001-06-10 19:56:58 +00001674 relinkCellList(pPage);
drh8b2f49b2001-06-08 00:21:52 +00001675 return SQLITE_OK;
1676 }
drh14acc042001-06-10 19:56:58 +00001677 /*
1678 ** If we get to here, it means the root page is overfull.
drh8b2f49b2001-06-08 00:21:52 +00001679 ** When this happens, Create a new child page and copy the
1680 ** contents of the root into the child. Then make the root
drh14acc042001-06-10 19:56:58 +00001681 ** page an empty page with rightChild pointing to the new
drh8b2f49b2001-06-08 00:21:52 +00001682 ** child. Then fall thru to the code below which will cause
1683 ** the overfull child page to be split.
1684 */
drh14acc042001-06-10 19:56:58 +00001685 rc = sqlitepager_write(pPage);
1686 if( rc ) return rc;
drh8b2f49b2001-06-08 00:21:52 +00001687 rc = allocatePage(pBt, &pChild, &pgnoChild);
1688 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001689 copyPage(pChild, pPage);
1690 pChild->pParent = pPage;
drhdd793422001-06-28 01:54:48 +00001691 sqlitepager_ref(pPage);
drh14acc042001-06-10 19:56:58 +00001692 pChild->isOverfull = 1;
1693 if( pCur ){
drh14acc042001-06-10 19:56:58 +00001694 sqlitepager_unref(pCur->pPage);
1695 pCur->pPage = pChild;
drh8b2f49b2001-06-08 00:21:52 +00001696 }
drh8b2f49b2001-06-08 00:21:52 +00001697 zeroPage(pPage);
drh14acc042001-06-10 19:56:58 +00001698 pPage->u.hdr.rightChild = pgnoChild;
drh8b2f49b2001-06-08 00:21:52 +00001699 pParent = pPage;
1700 pPage = pChild;
drh14acc042001-06-10 19:56:58 +00001701 }else{
1702 rc = sqlitepager_write(pPage);
1703 if( rc ) return rc;
drh8b2f49b2001-06-08 00:21:52 +00001704 }
drh14acc042001-06-10 19:56:58 +00001705
drh8b2f49b2001-06-08 00:21:52 +00001706 /*
drh14acc042001-06-10 19:56:58 +00001707 ** Find the Cell in the parent page whose h.leftChild points back
1708 ** to pPage. The "idx" variable is the index of that cell. If pPage
1709 ** is the rightmost child of pParent then set idx to pParent->nCell
drh8b2f49b2001-06-08 00:21:52 +00001710 */
1711 idx = -1;
1712 pgno = sqlitepager_pagenumber(pPage);
1713 for(i=0; i<pParent->nCell; i++){
1714 if( pParent->apCell[i]->h.leftChild==pgno ){
1715 idx = i;
1716 break;
1717 }
1718 }
drhdd793422001-06-28 01:54:48 +00001719 if( idx<0 && pParent->u.hdr.rightChild==pgno ){
1720 idx = pParent->nCell;
drh8b2f49b2001-06-08 00:21:52 +00001721 }
1722 if( idx<0 ){
drh14acc042001-06-10 19:56:58 +00001723 return SQLITE_CORRUPT;
drh8b2f49b2001-06-08 00:21:52 +00001724 }
1725
1726 /*
drh14acc042001-06-10 19:56:58 +00001727 ** Initialize variables so that it will be safe to jump
1728 ** directory to balance_cleanup at any moment.
drh8b2f49b2001-06-08 00:21:52 +00001729 */
drh14acc042001-06-10 19:56:58 +00001730 nOld = nNew = 0;
1731 sqlitepager_ref(pParent);
1732
1733 /*
1734 ** Find sibling pages to pPage and the Cells in pParent that divide
1735 ** the siblings. An attempt is made to find one sibling on either
1736 ** side of pPage. Both siblings are taken from one side, however, if
1737 ** pPage is either the first or last child of its parent. If pParent
1738 ** has 3 or fewer children then all children of pParent are taken.
1739 */
1740 if( idx==pParent->nCell ){
1741 nxDiv = idx - 2;
drh8b2f49b2001-06-08 00:21:52 +00001742 }else{
drh14acc042001-06-10 19:56:58 +00001743 nxDiv = idx - 1;
drh8b2f49b2001-06-08 00:21:52 +00001744 }
drh14acc042001-06-10 19:56:58 +00001745 if( nxDiv<0 ) nxDiv = 0;
drh8b2f49b2001-06-08 00:21:52 +00001746 nDiv = 0;
drh14acc042001-06-10 19:56:58 +00001747 for(i=0, k=nxDiv; i<3; i++, k++){
1748 if( k<pParent->nCell ){
1749 idxDiv[i] = k;
1750 apDiv[i] = pParent->apCell[k];
drh8b2f49b2001-06-08 00:21:52 +00001751 nDiv++;
1752 pgnoOld[i] = apDiv[i]->h.leftChild;
drh14acc042001-06-10 19:56:58 +00001753 }else if( k==pParent->nCell ){
drh8c42ca92001-06-22 19:15:00 +00001754 pgnoOld[i] = pParent->u.hdr.rightChild;
drh14acc042001-06-10 19:56:58 +00001755 }else{
1756 break;
drh8b2f49b2001-06-08 00:21:52 +00001757 }
drh8c42ca92001-06-22 19:15:00 +00001758 rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);
drh14acc042001-06-10 19:56:58 +00001759 if( rc ) goto balance_cleanup;
1760 nOld++;
drh8b2f49b2001-06-08 00:21:52 +00001761 }
1762
1763 /*
drh14acc042001-06-10 19:56:58 +00001764 ** Set iCur to be the index in apCell[] of the cell that the cursor
1765 ** is pointing to. We will need this later on in order to keep the
1766 ** cursor pointing at the same cell.
1767 */
1768 if( pCur ){
1769 iCur = pCur->idx;
1770 for(i=0; idxDiv[i]<idx; i++){
1771 iCur += apOld[i]->nCell + 1;
1772 }
1773 sqlitepager_unref(pCur->pPage);
1774 pCur->pPage = 0;
1775 }
1776
1777 /*
1778 ** Make copies of the content of pPage and its siblings into aOld[].
1779 ** The rest of this function will use data from the copies rather
1780 ** that the original pages since the original pages will be in the
1781 ** process of being overwritten.
1782 */
1783 for(i=0; i<nOld; i++){
1784 copyPage(&aOld[i], apOld[i]);
1785 rc = freePage(pBt, apOld[i], pgnoOld[i]);
1786 if( rc ) goto balance_cleanup;
drhdd793422001-06-28 01:54:48 +00001787 sqlitepager_unref(apOld[i]);
drh14acc042001-06-10 19:56:58 +00001788 apOld[i] = &aOld[i];
1789 }
1790
1791 /*
1792 ** Load pointers to all cells on sibling pages and the divider cells
1793 ** into the local apCell[] array. Make copies of the divider cells
1794 ** into aTemp[] and remove the the divider Cells from pParent.
drh8b2f49b2001-06-08 00:21:52 +00001795 */
1796 nCell = 0;
1797 for(i=0; i<nOld; i++){
1798 MemPage *pOld = apOld[i];
1799 for(j=0; j<pOld->nCell; j++){
drh14acc042001-06-10 19:56:58 +00001800 apCell[nCell] = pOld->apCell[j];
1801 szCell[nCell] = cellSize(apCell[nCell]);
1802 nCell++;
drh8b2f49b2001-06-08 00:21:52 +00001803 }
1804 if( i<nOld-1 ){
drh14acc042001-06-10 19:56:58 +00001805 szCell[nCell] = cellSize(apDiv[i]);
drh8c42ca92001-06-22 19:15:00 +00001806 memcpy(&aTemp[i], apDiv[i], szCell[nCell]);
drh14acc042001-06-10 19:56:58 +00001807 apCell[nCell] = &aTemp[i];
1808 dropCell(pParent, nxDiv, szCell[nCell]);
1809 assert( apCell[nCell]->h.leftChild==pgnoOld[i] );
1810 apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
1811 nCell++;
drh8b2f49b2001-06-08 00:21:52 +00001812 }
1813 }
1814
1815 /*
drh14acc042001-06-10 19:56:58 +00001816 ** Estimate the number of pages needed. Record this number in "k"
1817 ** for now. It will get transferred to nNew as we allocate the
1818 ** new pages.
drh8b2f49b2001-06-08 00:21:52 +00001819 */
1820 totalSize = 0;
1821 for(i=0; i<nCell; i++){
drh14acc042001-06-10 19:56:58 +00001822 totalSize += szCell[i];
drh8b2f49b2001-06-08 00:21:52 +00001823 }
drh14acc042001-06-10 19:56:58 +00001824 k = (totalSize + (SQLITE_PAGE_SIZE - sizeof(PageHdr) - 1)) /
drh8b2f49b2001-06-08 00:21:52 +00001825 (SQLITE_PAGE_SIZE - sizeof(PageHdr));
drh14acc042001-06-10 19:56:58 +00001826 usedPerPage = (totalSize+k-1)/k;
1827 freePerPage = SQLITE_PAGE_SIZE - usedPerPage;
drh8b2f49b2001-06-08 00:21:52 +00001828
1829
1830 /*
1831 ** Allocate new pages
1832 */
drh14acc042001-06-10 19:56:58 +00001833 for(i=0; i<k; i++){
drh8b2f49b2001-06-08 00:21:52 +00001834 rc = allocatePage(pBt, &apNew[i], &pgnoNew[i]);
drh14acc042001-06-10 19:56:58 +00001835 if( rc ) goto balance_cleanup;
1836 nNew++;
drh8b2f49b2001-06-08 00:21:52 +00001837 zeroPage(apNew[i]);
1838 }
1839
1840 /*
drh14acc042001-06-10 19:56:58 +00001841 ** Evenly distribute the data in apCell[] across the new pages.
1842 ** Insert divider cells into pParent as necessary.
1843 */
1844 j = 0;
1845 for(i=0; i<nNew; i++){
1846 MemPage *pNew = apNew[i];
drhdd793422001-06-28 01:54:48 +00001847 while( j<nCell && pNew->nFree>freePerPage && szCell[j]<=pNew->nFree ){
drh14acc042001-06-10 19:56:58 +00001848 if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
1849 insertCell(pNew, pNew->nCell, apCell[j], szCell[j]);
1850 j++;
1851 }
1852 assert( !pNew->isOverfull );
1853 relinkCellList(pNew);
1854 if( i<nNew-1 && j<nCell ){
1855 pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
1856 apCell[j]->h.leftChild = pgnoNew[i];
1857 if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
1858 insertCell(pParent, nxDiv, apCell[j], szCell[j]);
1859 j++;
1860 nxDiv++;
1861 }
1862 }
1863 apNew[nNew-1]->u.hdr.rightChild = apOld[nOld-1]->u.hdr.rightChild;
1864 if( nxDiv==pParent->nCell ){
1865 pParent->u.hdr.rightChild = pgnoNew[nNew-1];
1866 }else{
1867 pParent->apCell[nxDiv]->h.leftChild = pgnoNew[nNew-1];
1868 }
1869 if( pCur ){
1870 assert( pCur->pPage!=0 );
1871 sqlitepager_ref(pCur->pPage);
1872 }
1873
1874 /*
1875 ** Reparent children of all cells.
drh8b2f49b2001-06-08 00:21:52 +00001876 */
1877 for(i=0; i<nNew; i++){
drh14acc042001-06-10 19:56:58 +00001878 reparentChildPages(pBt->pPager, apNew[i]);
drh8b2f49b2001-06-08 00:21:52 +00001879 }
drh14acc042001-06-10 19:56:58 +00001880 reparentChildPages(pBt->pPager, pParent);
drh8b2f49b2001-06-08 00:21:52 +00001881
1882 /*
drh14acc042001-06-10 19:56:58 +00001883 ** balance the parent page.
drh8b2f49b2001-06-08 00:21:52 +00001884 */
drh14acc042001-06-10 19:56:58 +00001885 rc = balance(pBt, pParent, 0);
drh8b2f49b2001-06-08 00:21:52 +00001886
1887 /*
drh14acc042001-06-10 19:56:58 +00001888 ** Cleanup before returning.
drh8b2f49b2001-06-08 00:21:52 +00001889 */
drh14acc042001-06-10 19:56:58 +00001890balance_cleanup:
drh8b2f49b2001-06-08 00:21:52 +00001891 for(i=0; i<nOld; i++){
drhdd793422001-06-28 01:54:48 +00001892 if( apOld[i]!=&aOld[i] ) sqlitepager_unref(apOld[i]);
drh8b2f49b2001-06-08 00:21:52 +00001893 }
drh14acc042001-06-10 19:56:58 +00001894 for(i=0; i<nNew; i++){
1895 sqlitepager_unref(apNew[i]);
drh8b2f49b2001-06-08 00:21:52 +00001896 }
drh14acc042001-06-10 19:56:58 +00001897 if( pCur && pCur->pPage==0 ){
1898 pCur->pPage = pParent;
1899 pCur->idx = 0;
1900 }else{
1901 sqlitepager_unref(pParent);
drh8b2f49b2001-06-08 00:21:52 +00001902 }
1903 return rc;
1904}
1905
1906/*
drh3b7511c2001-05-26 13:15:44 +00001907** Insert a new record into the BTree. The key is given by (pKey,nKey)
1908** and the data is given by (pData,nData). The cursor is used only to
1909** define what database the record should be inserted into. The cursor
drh14acc042001-06-10 19:56:58 +00001910** is left pointing at the new record.
drh3b7511c2001-05-26 13:15:44 +00001911*/
1912int sqliteBtreeInsert(
1913 BtCursor *pCur, /* Insert data into the table of this cursor */
1914 void *pKey, int nKey, /* The key of the new record */
1915 void *pData, int nData /* The data of the new record */
1916){
1917 Cell newCell;
1918 int rc;
1919 int loc;
drh14acc042001-06-10 19:56:58 +00001920 int szNew;
drh3b7511c2001-05-26 13:15:44 +00001921 MemPage *pPage;
1922 Btree *pBt = pCur->pBt;
1923
drh8b2f49b2001-06-08 00:21:52 +00001924 if( !pCur->pBt->inTrans ){
1925 return SQLITE_ERROR; /* Must start a transaction first */
1926 }
drh14acc042001-06-10 19:56:58 +00001927 rc = sqliteBtreeMoveto(pCur, pKey, nKey, &loc);
drh3b7511c2001-05-26 13:15:44 +00001928 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001929 pPage = pCur->pPage;
1930 rc = sqlitepager_write(pPage);
drhbd03cae2001-06-02 02:40:57 +00001931 if( rc ) return rc;
drh3b7511c2001-05-26 13:15:44 +00001932 rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
1933 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001934 szNew = cellSize(&newCell);
drh3b7511c2001-05-26 13:15:44 +00001935 if( loc==0 ){
drh14acc042001-06-10 19:56:58 +00001936 newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild;
1937 rc = clearCell(pBt, pPage->apCell[pCur->idx]);
drh5e2f8b92001-05-28 00:41:15 +00001938 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00001939 dropCell(pPage, pCur->idx, cellSize(pPage->apCell[pCur->idx]));
drh7c717f72001-06-24 20:39:41 +00001940 }else if( loc<0 && pPage->nCell>0 ){
drh14acc042001-06-10 19:56:58 +00001941 assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
1942 pCur->idx++;
1943 }else{
1944 assert( pPage->u.hdr.rightChild==0 ); /* Must be a leaf page */
drh3b7511c2001-05-26 13:15:44 +00001945 }
drh7c717f72001-06-24 20:39:41 +00001946 insertCell(pPage, pCur->idx, &newCell, szNew);
drh14acc042001-06-10 19:56:58 +00001947 rc = balance(pCur->pBt, pPage, pCur);
drh5e2f8b92001-05-28 00:41:15 +00001948 return rc;
1949}
1950
1951/*
drhbd03cae2001-06-02 02:40:57 +00001952** Delete the entry that the cursor is pointing to.
drh5e2f8b92001-05-28 00:41:15 +00001953**
drhbd03cae2001-06-02 02:40:57 +00001954** The cursor is left pointing at either the next or the previous
1955** entry. If the cursor is left pointing to the next entry, then
1956** the pCur->bSkipNext flag is set which forces the next call to
1957** sqliteBtreeNext() to be a no-op. That way, you can always call
1958** sqliteBtreeNext() after a delete and the cursor will be left
1959** pointing to the first entry after the deleted entry.
drh3b7511c2001-05-26 13:15:44 +00001960*/
1961int sqliteBtreeDelete(BtCursor *pCur){
drh5e2f8b92001-05-28 00:41:15 +00001962 MemPage *pPage = pCur->pPage;
1963 Cell *pCell;
1964 int rc;
drh8c42ca92001-06-22 19:15:00 +00001965 Pgno pgnoChild;
drh8b2f49b2001-06-08 00:21:52 +00001966
1967 if( !pCur->pBt->inTrans ){
1968 return SQLITE_ERROR; /* Must start a transaction first */
1969 }
drhbd03cae2001-06-02 02:40:57 +00001970 if( pCur->idx >= pPage->nCell ){
1971 return SQLITE_ERROR; /* The cursor is not pointing to anything */
1972 }
1973 rc = sqlitepager_write(pPage);
1974 if( rc ) return rc;
drh5e2f8b92001-05-28 00:41:15 +00001975 pCell = pPage->apCell[pCur->idx];
drh14acc042001-06-10 19:56:58 +00001976 pgnoChild = pCell->h.leftChild;
drh8c42ca92001-06-22 19:15:00 +00001977 clearCell(pCur->pBt, pCell);
drh14acc042001-06-10 19:56:58 +00001978 dropCell(pPage, pCur->idx, cellSize(pCell));
1979 if( pgnoChild ){
1980 /*
1981 ** If the entry we just deleted is not a leaf, then we've left a
drh8c42ca92001-06-22 19:15:00 +00001982 ** hole in an internal page. We have to fill the hole by moving
1983 ** in a cell from a leaf. The next Cell after the one just deleted
drh14acc042001-06-10 19:56:58 +00001984 ** is guaranteed to exist and to be a leaf so we can use it.
drh5e2f8b92001-05-28 00:41:15 +00001985 */
drh14acc042001-06-10 19:56:58 +00001986 BtCursor leafCur;
1987 Cell *pNext;
1988 int szNext;
1989 getTempCursor(pCur, &leafCur);
1990 rc = sqliteBtreeNext(&leafCur, 0);
1991 if( rc!=SQLITE_OK ){
1992 return SQLITE_CORRUPT;
drh5e2f8b92001-05-28 00:41:15 +00001993 }
drh8c42ca92001-06-22 19:15:00 +00001994 pNext = leafCur.pPage->apCell[leafCur.idx];
drh14acc042001-06-10 19:56:58 +00001995 szNext = cellSize(pNext);
drh8c42ca92001-06-22 19:15:00 +00001996 pNext->h.leftChild = pgnoChild;
drh14acc042001-06-10 19:56:58 +00001997 insertCell(pPage, pCur->idx, pNext, szNext);
1998 rc = balance(pCur->pBt, pPage, pCur);
drh5e2f8b92001-05-28 00:41:15 +00001999 if( rc ) return rc;
drh5e2f8b92001-05-28 00:41:15 +00002000 pCur->bSkipNext = 1;
drh14acc042001-06-10 19:56:58 +00002001 dropCell(leafCur.pPage, leafCur.idx, szNext);
2002 rc = balance(pCur->pBt, leafCur.pPage, 0);
drh8c42ca92001-06-22 19:15:00 +00002003 releaseTempCursor(&leafCur);
drh5e2f8b92001-05-28 00:41:15 +00002004 }else{
drh14acc042001-06-10 19:56:58 +00002005 rc = balance(pCur->pBt, pPage, pCur);
2006 pCur->bSkipNext = 1;
drh5e2f8b92001-05-28 00:41:15 +00002007 }
drh5e2f8b92001-05-28 00:41:15 +00002008 return rc;
drh3b7511c2001-05-26 13:15:44 +00002009}
drh8b2f49b2001-06-08 00:21:52 +00002010
2011/*
2012** Create a new BTree in the same file. Write into *piTable the index
2013** of the root page of the new table.
2014*/
2015int sqliteBtreeCreateTable(Btree *pBt, int *piTable){
2016 MemPage *pRoot;
2017 Pgno pgnoRoot;
2018 int rc;
2019 if( !pBt->inTrans ){
2020 return SQLITE_ERROR; /* Must start a transaction first */
2021 }
2022 rc = allocatePage(pBt, &pRoot, &pgnoRoot);
2023 if( rc ) return rc;
2024 sqlitepager_write(pRoot);
2025 zeroPage(pRoot);
2026 sqlitepager_unref(pRoot);
2027 *piTable = (int)pgnoRoot;
2028 return SQLITE_OK;
2029}
2030
2031/*
2032** Erase the given database page and all its children. Return
2033** the page to the freelist.
2034*/
drh2aa679f2001-06-25 02:11:07 +00002035static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
drh8b2f49b2001-06-08 00:21:52 +00002036 MemPage *pPage;
2037 int rc;
drh8b2f49b2001-06-08 00:21:52 +00002038 Cell *pCell;
2039 int idx;
2040
drh8c42ca92001-06-22 19:15:00 +00002041 rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);
drh8b2f49b2001-06-08 00:21:52 +00002042 if( rc ) return rc;
drh14acc042001-06-10 19:56:58 +00002043 idx = pPage->u.hdr.firstCell;
drh8b2f49b2001-06-08 00:21:52 +00002044 while( idx>0 ){
drh14acc042001-06-10 19:56:58 +00002045 pCell = (Cell*)&pPage->u.aDisk[idx];
drh8b2f49b2001-06-08 00:21:52 +00002046 idx = pCell->h.iNext;
2047 if( pCell->h.leftChild ){
drh2aa679f2001-06-25 02:11:07 +00002048 rc = clearDatabasePage(pBt, pCell->h.leftChild, 1);
drh8b2f49b2001-06-08 00:21:52 +00002049 if( rc ) return rc;
2050 }
drh8c42ca92001-06-22 19:15:00 +00002051 rc = clearCell(pBt, pCell);
drh8b2f49b2001-06-08 00:21:52 +00002052 if( rc ) return rc;
2053 }
drh2aa679f2001-06-25 02:11:07 +00002054 if( pPage->u.hdr.rightChild ){
2055 rc = clearDatabasePage(pBt, pPage->u.hdr.rightChild, 1);
2056 if( rc ) return rc;
2057 }
2058 if( freePageFlag ){
2059 rc = freePage(pBt, pPage, pgno);
2060 }else{
2061 zeroPage(pPage);
2062 }
drhdd793422001-06-28 01:54:48 +00002063 sqlitepager_unref(pPage);
drh2aa679f2001-06-25 02:11:07 +00002064 return rc;
drh8b2f49b2001-06-08 00:21:52 +00002065}
2066
2067/*
2068** Delete all information from a single table in the database.
2069*/
2070int sqliteBtreeClearTable(Btree *pBt, int iTable){
2071 int rc;
2072 if( !pBt->inTrans ){
2073 return SQLITE_ERROR; /* Must start a transaction first */
2074 }
drh2aa679f2001-06-25 02:11:07 +00002075 rc = clearDatabasePage(pBt, (Pgno)iTable, 0);
drh8b2f49b2001-06-08 00:21:52 +00002076 if( rc ){
2077 sqliteBtreeRollback(pBt);
drh8b2f49b2001-06-08 00:21:52 +00002078 }
drh8c42ca92001-06-22 19:15:00 +00002079 return rc;
drh8b2f49b2001-06-08 00:21:52 +00002080}
2081
2082/*
2083** Erase all information in a table and add the root of the table to
2084** the freelist. Except, the root of the principle table (the one on
2085** page 2) is never added to the freelist.
2086*/
2087int sqliteBtreeDropTable(Btree *pBt, int iTable){
2088 int rc;
2089 MemPage *pPage;
2090 if( !pBt->inTrans ){
2091 return SQLITE_ERROR; /* Must start a transaction first */
2092 }
drh8c42ca92001-06-22 19:15:00 +00002093 rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage);
drh2aa679f2001-06-25 02:11:07 +00002094 if( rc ) return rc;
2095 rc = sqliteBtreeClearTable(pBt, iTable);
2096 if( rc ) return rc;
2097 if( iTable>2 ){
2098 rc = freePage(pBt, pPage, iTable);
2099 }else{
2100 zeroPage(pPage);
drh8b2f49b2001-06-08 00:21:52 +00002101 }
drhdd793422001-06-28 01:54:48 +00002102 sqlitepager_unref(pPage);
drh8b2f49b2001-06-08 00:21:52 +00002103 return rc;
2104}
2105
2106/*
2107** Read the meta-information out of a database file.
2108*/
2109int sqliteBtreeGetMeta(Btree *pBt, int *aMeta){
2110 PageOne *pP1;
2111 int rc;
2112
drh8c42ca92001-06-22 19:15:00 +00002113 rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1);
drh8b2f49b2001-06-08 00:21:52 +00002114 if( rc ) return rc;
drh2aa679f2001-06-25 02:11:07 +00002115 aMeta[0] = pP1->nFree;
2116 memcpy(&aMeta[1], pP1->aMeta, sizeof(pP1->aMeta));
drh8b2f49b2001-06-08 00:21:52 +00002117 sqlitepager_unref(pP1);
2118 return SQLITE_OK;
2119}
2120
2121/*
2122** Write meta-information back into the database.
2123*/
2124int sqliteBtreeUpdateMeta(Btree *pBt, int *aMeta){
2125 PageOne *pP1;
2126 int rc;
2127 if( !pBt->inTrans ){
2128 return SQLITE_ERROR; /* Must start a transaction first */
2129 }
2130 pP1 = pBt->page1;
2131 rc = sqlitepager_write(pP1);
drh2aa679f2001-06-25 02:11:07 +00002132 if( rc ) return rc;
2133 memcpy(pP1->aMeta, &aMeta[1], sizeof(pP1->aMeta));
drh8b2f49b2001-06-08 00:21:52 +00002134 return SQLITE_OK;
2135}
drh8c42ca92001-06-22 19:15:00 +00002136
2137#ifdef SQLITE_TEST
2138/*
2139** Print a disassembly of the given page on standard output. This routine
2140** is used for debugging and testing only.
2141*/
2142int sqliteBtreePageDump(Btree *pBt, int pgno){
2143 int rc;
2144 MemPage *pPage;
2145 int i, j;
2146 int nFree;
2147 u16 idx;
2148 char range[20];
2149 unsigned char payload[20];
2150 rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
2151 if( rc ){
2152 return rc;
2153 }
2154 i = 0;
2155 idx = pPage->u.hdr.firstCell;
2156 while( idx>0 && idx<=SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
2157 Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
2158 int sz = cellSize(pCell);
2159 sprintf(range,"%d..%d", idx, idx+sz-1);
drh2aa679f2001-06-25 02:11:07 +00002160 sz = pCell->h.nKey + pCell->h.nData;
drh8c42ca92001-06-22 19:15:00 +00002161 if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
2162 memcpy(payload, pCell->aPayload, sz);
2163 for(j=0; j<sz; j++){
2164 if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
2165 }
2166 payload[sz] = 0;
2167 printf(
2168 "cell %2d: i=%-10s chld=%-4d nk=%-3d nd=%-3d payload=%s\n",
2169 i, range, (int)pCell->h.leftChild, pCell->h.nKey, pCell->h.nData,
drh2aa679f2001-06-25 02:11:07 +00002170 payload
drh8c42ca92001-06-22 19:15:00 +00002171 );
drh2aa679f2001-06-25 02:11:07 +00002172 if( pPage->apCell[i]!=pCell ){
2173 printf("**** apCell[%d] does not match on prior entry ****\n", i);
2174 }
drh7c717f72001-06-24 20:39:41 +00002175 i++;
drh8c42ca92001-06-22 19:15:00 +00002176 idx = pCell->h.iNext;
2177 }
2178 if( idx!=0 ){
2179 printf("ERROR: next cell index out of range: %d\n", idx);
2180 }
2181 printf("right_child: %d\n", pPage->u.hdr.rightChild);
2182 nFree = 0;
2183 i = 0;
2184 idx = pPage->u.hdr.firstFree;
2185 while( idx>0 && idx<SQLITE_PAGE_SIZE ){
2186 FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
2187 sprintf(range,"%d..%d", idx, idx+p->iSize-1);
2188 nFree += p->iSize;
2189 printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
2190 i, range, p->iSize, nFree);
2191 idx = p->iNext;
drh2aa679f2001-06-25 02:11:07 +00002192 i++;
drh8c42ca92001-06-22 19:15:00 +00002193 }
2194 if( idx!=0 ){
2195 printf("ERROR: next freeblock index out of range: %d\n", idx);
2196 }
2197 sqlitepager_unref(pPage);
2198 return SQLITE_OK;
2199}
2200#endif
2201
2202#ifdef SQLITE_TEST
2203/*
drh2aa679f2001-06-25 02:11:07 +00002204** Fill aResult[] with information about the entry and page that the
2205** cursor is pointing to.
2206**
2207** aResult[0] = The page number
2208** aResult[1] = The entry number
2209** aResult[2] = Total number of entries on this page
2210** aResult[3] = Size of this entry
2211** aResult[4] = Number of free bytes on this page
2212** aResult[5] = Number of free blocks on the page
2213** aResult[6] = Page number of the left child of this entry
2214** aResult[7] = Page number of the right child for the whole page
drh8c42ca92001-06-22 19:15:00 +00002215*/
2216int sqliteBtreeCursorDump(BtCursor *pCur, int *aResult){
drh2aa679f2001-06-25 02:11:07 +00002217 int cnt, idx;
2218 MemPage *pPage = pCur->pPage;
2219 aResult[0] = sqlitepager_pagenumber(pPage);
drh8c42ca92001-06-22 19:15:00 +00002220 aResult[1] = pCur->idx;
drh2aa679f2001-06-25 02:11:07 +00002221 aResult[2] = pPage->nCell;
2222 if( pCur->idx>=0 && pCur->idx<pPage->nCell ){
2223 aResult[3] = cellSize(pPage->apCell[pCur->idx]);
2224 aResult[6] = pPage->apCell[pCur->idx]->h.leftChild;
2225 }else{
2226 aResult[3] = 0;
2227 aResult[6] = 0;
2228 }
2229 aResult[4] = pPage->nFree;
2230 cnt = 0;
2231 idx = pPage->u.hdr.firstFree;
2232 while( idx>0 && idx<SQLITE_PAGE_SIZE ){
2233 cnt++;
2234 idx = ((FreeBlk*)&pPage->u.aDisk[idx])->iNext;
2235 }
2236 aResult[5] = cnt;
2237 aResult[7] = pPage->u.hdr.rightChild;
drh8c42ca92001-06-22 19:15:00 +00002238 return SQLITE_OK;
2239}
2240#endif
drhdd793422001-06-28 01:54:48 +00002241
2242#ifdef SQLITE_TEST
2243/*
2244** Return the pager associated with a BTree
2245*/
2246Pager *sqliteBtreePager(Btree *pBt){
2247 return pBt->pPager;
2248}
2249#endif