blob: af8edac35d39646372c9cc156fc365fee1394c7a [file] [log] [blame]
drha059ad02001-04-17 20:09:11 +00001/*
drh9e572e62004-04-23 23:43:10 +00002** 2004 April 6
drha059ad02001-04-17 20:09:11 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drha059ad02001-04-17 20:09:11 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drha059ad02001-04-17 20:09:11 +000010**
11*************************************************************************
drhd0679ed2007-08-28 22:24:34 +000012** $Id: btree.c,v 1.414 2007/08/28 22:24:35 drh Exp $
drh8b2f49b2001-06-08 00:21:52 +000013**
14** This file implements a external (disk-based) database using BTrees.
drha3152892007-05-05 11:48:52 +000015** See the header comment on "btreeInt.h" for additional information.
16** Including a description of file format and an overview of operation.
drha059ad02001-04-17 20:09:11 +000017*/
drha3152892007-05-05 11:48:52 +000018#include "btreeInt.h"
paulb95a8862003-04-01 21:16:41 +000019
drh8c42ca92001-06-22 19:15:00 +000020/*
drha3152892007-05-05 11:48:52 +000021** The header string that appears at the beginning of every
22** SQLite database.
drh556b2a22005-06-14 16:04:05 +000023*/
drh556b2a22005-06-14 16:04:05 +000024static const char zMagicHeader[] = SQLITE_FILE_HEADER;
drh08ed44e2001-04-29 23:32:55 +000025
drh8c42ca92001-06-22 19:15:00 +000026/*
drha3152892007-05-05 11:48:52 +000027** Set this global variable to 1 to enable tracing using the TRACE
28** macro.
drh615ae552005-01-16 23:21:00 +000029*/
30#if SQLITE_TEST
drh0f7eb612006-08-08 13:51:43 +000031int sqlite3_btree_trace=0; /* True to enable tracing */
drh615ae552005-01-16 23:21:00 +000032#endif
drh615ae552005-01-16 23:21:00 +000033
drh86f8c192007-08-22 00:39:19 +000034
35
drhe53831d2007-08-17 01:14:38 +000036#ifndef SQLITE_OMIT_SHARED_CACHE
37/*
38** A flag to indicate whether or not shared cache is enabled. Also,
39** a list of BtShared objects that are eligible for participation
drhd677b3d2007-08-20 22:48:41 +000040** in shared cache. The variables have file scope during normal builds,
drh86f8c192007-08-22 00:39:19 +000041** but the test harness needs to access these variables so we make them
drhd677b3d2007-08-20 22:48:41 +000042** global for test builds.
drhe53831d2007-08-17 01:14:38 +000043*/
44#ifdef SQLITE_TEST
45BtShared *sqlite3SharedCacheList = 0;
46int sqlite3SharedCacheEnabled = 0;
47#else
48static BtShared *sqlite3SharedCacheList = 0;
49static int sqlite3SharedCacheEnabled = 0;
50#endif
drhe53831d2007-08-17 01:14:38 +000051#endif /* SQLITE_OMIT_SHARED_CACHE */
52
53#ifndef SQLITE_OMIT_SHARED_CACHE
54/*
55** Enable or disable the shared pager and schema features.
56**
57** This routine has no effect on existing database connections.
58** The shared cache setting effects only future calls to
59** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2().
60*/
61int sqlite3_enable_shared_cache(int enable){
62 sqlite3SharedCacheEnabled = enable;
63 return SQLITE_OK;
64}
65#endif
66
drhd677b3d2007-08-20 22:48:41 +000067
drh615ae552005-01-16 23:21:00 +000068/*
drh66cbd152004-09-01 16:12:25 +000069** Forward declaration
70*/
drh980b1a72006-08-16 16:42:48 +000071static int checkReadLocks(Btree*,Pgno,BtCursor*);
drh66cbd152004-09-01 16:12:25 +000072
danielk1977aef0bf62005-12-30 16:28:01 +000073
74#ifdef SQLITE_OMIT_SHARED_CACHE
75 /*
76 ** The functions queryTableLock(), lockTable() and unlockAllTables()
77 ** manipulate entries in the BtShared.pLock linked list used to store
78 ** shared-cache table level locks. If the library is compiled with the
79 ** shared-cache feature disabled, then there is only ever one user
danielk1977da184232006-01-05 11:34:32 +000080 ** of each BtShared structure and so this locking is not necessary.
81 ** So define the lock related functions as no-ops.
danielk1977aef0bf62005-12-30 16:28:01 +000082 */
83 #define queryTableLock(a,b,c) SQLITE_OK
84 #define lockTable(a,b,c) SQLITE_OK
danielk1977da184232006-01-05 11:34:32 +000085 #define unlockAllTables(a)
drhe53831d2007-08-17 01:14:38 +000086#endif
danielk1977aef0bf62005-12-30 16:28:01 +000087
drhe53831d2007-08-17 01:14:38 +000088#ifndef SQLITE_OMIT_SHARED_CACHE
danielk1977da184232006-01-05 11:34:32 +000089/*
danielk1977aef0bf62005-12-30 16:28:01 +000090** Query to see if btree handle p may obtain a lock of type eLock
91** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
92** SQLITE_OK if the lock may be obtained (by calling lockTable()), or
danielk1977c87d34d2006-01-06 13:00:28 +000093** SQLITE_LOCKED if not.
danielk1977aef0bf62005-12-30 16:28:01 +000094*/
95static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){
96 BtShared *pBt = p->pBt;
97 BtLock *pIter;
98
drhd0679ed2007-08-28 22:24:34 +000099 assert( sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
drh27641702007-08-22 02:56:42 +0000100 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +0000101
danielk1977da184232006-01-05 11:34:32 +0000102 /* This is a no-op if the shared-cache is not enabled */
drhe53831d2007-08-17 01:14:38 +0000103 if( !p->sharable ){
danielk1977da184232006-01-05 11:34:32 +0000104 return SQLITE_OK;
105 }
106
107 /* This (along with lockTable()) is where the ReadUncommitted flag is
108 ** dealt with. If the caller is querying for a read-lock and the flag is
109 ** set, it is unconditionally granted - even if there are write-locks
110 ** on the table. If a write-lock is requested, the ReadUncommitted flag
111 ** is not considered.
112 **
113 ** In function lockTable(), if a read-lock is demanded and the
114 ** ReadUncommitted flag is set, no entry is added to the locks list
115 ** (BtShared.pLock).
116 **
117 ** To summarize: If the ReadUncommitted flag is set, then read cursors do
118 ** not create or respect table locks. The locking procedure for a
119 ** write-cursor does not change.
120 */
121 if(
122 !p->pSqlite ||
123 0==(p->pSqlite->flags&SQLITE_ReadUncommitted) ||
124 eLock==WRITE_LOCK ||
drh47ded162006-01-06 01:42:58 +0000125 iTab==MASTER_ROOT
danielk1977da184232006-01-05 11:34:32 +0000126 ){
127 for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
128 if( pIter->pBtree!=p && pIter->iTable==iTab &&
129 (pIter->eLock!=eLock || eLock!=READ_LOCK) ){
danielk1977c87d34d2006-01-06 13:00:28 +0000130 return SQLITE_LOCKED;
danielk1977da184232006-01-05 11:34:32 +0000131 }
danielk1977aef0bf62005-12-30 16:28:01 +0000132 }
133 }
134 return SQLITE_OK;
135}
drhe53831d2007-08-17 01:14:38 +0000136#endif /* !SQLITE_OMIT_SHARED_CACHE */
danielk1977aef0bf62005-12-30 16:28:01 +0000137
drhe53831d2007-08-17 01:14:38 +0000138#ifndef SQLITE_OMIT_SHARED_CACHE
danielk1977aef0bf62005-12-30 16:28:01 +0000139/*
140** Add a lock on the table with root-page iTable to the shared-btree used
141** by Btree handle p. Parameter eLock must be either READ_LOCK or
142** WRITE_LOCK.
143**
144** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and
145** SQLITE_NOMEM may also be returned.
146*/
147static int lockTable(Btree *p, Pgno iTable, u8 eLock){
148 BtShared *pBt = p->pBt;
149 BtLock *pLock = 0;
150 BtLock *pIter;
151
drhd0679ed2007-08-28 22:24:34 +0000152 assert( sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
drh27641702007-08-22 02:56:42 +0000153 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +0000154
danielk1977da184232006-01-05 11:34:32 +0000155 /* This is a no-op if the shared-cache is not enabled */
drhe53831d2007-08-17 01:14:38 +0000156 if( !p->sharable ){
danielk1977da184232006-01-05 11:34:32 +0000157 return SQLITE_OK;
158 }
159
danielk1977aef0bf62005-12-30 16:28:01 +0000160 assert( SQLITE_OK==queryTableLock(p, iTable, eLock) );
161
danielk1977da184232006-01-05 11:34:32 +0000162 /* If the read-uncommitted flag is set and a read-lock is requested,
163 ** return early without adding an entry to the BtShared.pLock list. See
164 ** comment in function queryTableLock() for more info on handling
165 ** the ReadUncommitted flag.
166 */
167 if(
168 (p->pSqlite) &&
169 (p->pSqlite->flags&SQLITE_ReadUncommitted) &&
170 (eLock==READ_LOCK) &&
drh47ded162006-01-06 01:42:58 +0000171 iTable!=MASTER_ROOT
danielk1977da184232006-01-05 11:34:32 +0000172 ){
173 return SQLITE_OK;
174 }
175
danielk1977aef0bf62005-12-30 16:28:01 +0000176 /* First search the list for an existing lock on this table. */
177 for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
178 if( pIter->iTable==iTable && pIter->pBtree==p ){
179 pLock = pIter;
180 break;
181 }
182 }
183
184 /* If the above search did not find a BtLock struct associating Btree p
185 ** with table iTable, allocate one and link it into the list.
186 */
187 if( !pLock ){
drh17435752007-08-16 04:30:38 +0000188 pLock = (BtLock *)sqlite3MallocZero(sizeof(BtLock));
danielk1977aef0bf62005-12-30 16:28:01 +0000189 if( !pLock ){
190 return SQLITE_NOMEM;
191 }
192 pLock->iTable = iTable;
193 pLock->pBtree = p;
194 pLock->pNext = pBt->pLock;
195 pBt->pLock = pLock;
196 }
197
198 /* Set the BtLock.eLock variable to the maximum of the current lock
199 ** and the requested lock. This means if a write-lock was already held
200 ** and a read-lock requested, we don't incorrectly downgrade the lock.
201 */
202 assert( WRITE_LOCK>READ_LOCK );
danielk19775118b912005-12-30 16:31:53 +0000203 if( eLock>pLock->eLock ){
204 pLock->eLock = eLock;
205 }
danielk1977aef0bf62005-12-30 16:28:01 +0000206
207 return SQLITE_OK;
208}
drhe53831d2007-08-17 01:14:38 +0000209#endif /* !SQLITE_OMIT_SHARED_CACHE */
danielk1977aef0bf62005-12-30 16:28:01 +0000210
drhe53831d2007-08-17 01:14:38 +0000211#ifndef SQLITE_OMIT_SHARED_CACHE
danielk1977aef0bf62005-12-30 16:28:01 +0000212/*
213** Release all the table locks (locks obtained via calls to the lockTable()
214** procedure) held by Btree handle p.
215*/
216static void unlockAllTables(Btree *p){
217 BtLock **ppIter = &p->pBt->pLock;
danielk1977da184232006-01-05 11:34:32 +0000218
drhd0679ed2007-08-28 22:24:34 +0000219 assert( sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
drh27641702007-08-22 02:56:42 +0000220 assert( sqlite3BtreeMutexHeld(p->pBt->mutex) );
drhe53831d2007-08-17 01:14:38 +0000221 assert( p->sharable || 0==*ppIter );
danielk1977da184232006-01-05 11:34:32 +0000222
danielk1977aef0bf62005-12-30 16:28:01 +0000223 while( *ppIter ){
224 BtLock *pLock = *ppIter;
225 if( pLock->pBtree==p ){
226 *ppIter = pLock->pNext;
drh17435752007-08-16 04:30:38 +0000227 sqlite3_free(pLock);
danielk1977aef0bf62005-12-30 16:28:01 +0000228 }else{
229 ppIter = &pLock->pNext;
230 }
231 }
232}
233#endif /* SQLITE_OMIT_SHARED_CACHE */
234
drh980b1a72006-08-16 16:42:48 +0000235static void releasePage(MemPage *pPage); /* Forward reference */
236
danielk197792d4d7a2007-05-04 12:05:56 +0000237#ifndef SQLITE_OMIT_INCRBLOB
238/*
239** Invalidate the overflow page-list cache for cursor pCur, if any.
240*/
241static void invalidateOverflowCache(BtCursor *pCur){
drhd0679ed2007-08-28 22:24:34 +0000242 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh17435752007-08-16 04:30:38 +0000243 sqlite3_free(pCur->aOverflow);
danielk197792d4d7a2007-05-04 12:05:56 +0000244 pCur->aOverflow = 0;
245}
246
247/*
248** Invalidate the overflow page-list cache for all cursors opened
249** on the shared btree structure pBt.
250*/
251static void invalidateAllOverflowCache(BtShared *pBt){
252 BtCursor *p;
drh27641702007-08-22 02:56:42 +0000253 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk197792d4d7a2007-05-04 12:05:56 +0000254 for(p=pBt->pCursor; p; p=p->pNext){
255 invalidateOverflowCache(p);
256 }
257}
258#else
259 #define invalidateOverflowCache(x)
260 #define invalidateAllOverflowCache(x)
261#endif
262
drh980b1a72006-08-16 16:42:48 +0000263/*
264** Save the current cursor position in the variables BtCursor.nKey
265** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
266*/
267static int saveCursorPosition(BtCursor *pCur){
268 int rc;
269
270 assert( CURSOR_VALID==pCur->eState );
271 assert( 0==pCur->pKey );
drhd0679ed2007-08-28 22:24:34 +0000272 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh980b1a72006-08-16 16:42:48 +0000273
274 rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
275
276 /* If this is an intKey table, then the above call to BtreeKeySize()
277 ** stores the integer key in pCur->nKey. In this case this value is
278 ** all that is required. Otherwise, if pCur is not open on an intKey
279 ** table, then malloc space for and store the pCur->nKey bytes of key
280 ** data.
281 */
282 if( rc==SQLITE_OK && 0==pCur->pPage->intKey){
drh17435752007-08-16 04:30:38 +0000283 void *pKey = sqlite3_malloc(pCur->nKey);
drh980b1a72006-08-16 16:42:48 +0000284 if( pKey ){
285 rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
286 if( rc==SQLITE_OK ){
287 pCur->pKey = pKey;
288 }else{
drh17435752007-08-16 04:30:38 +0000289 sqlite3_free(pKey);
drh980b1a72006-08-16 16:42:48 +0000290 }
291 }else{
292 rc = SQLITE_NOMEM;
293 }
294 }
295 assert( !pCur->pPage->intKey || !pCur->pKey );
296
297 if( rc==SQLITE_OK ){
298 releasePage(pCur->pPage);
299 pCur->pPage = 0;
300 pCur->eState = CURSOR_REQUIRESEEK;
301 }
302
danielk197792d4d7a2007-05-04 12:05:56 +0000303 invalidateOverflowCache(pCur);
drh980b1a72006-08-16 16:42:48 +0000304 return rc;
305}
306
307/*
308** Save the positions of all cursors except pExcept open on the table
309** with root-page iRoot. Usually, this is called just before cursor
310** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()).
311*/
312static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
313 BtCursor *p;
drh27641702007-08-22 02:56:42 +0000314 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhd0679ed2007-08-28 22:24:34 +0000315 assert( pExcept==0 || pExcept->pBt==pBt );
drh980b1a72006-08-16 16:42:48 +0000316 for(p=pBt->pCursor; p; p=p->pNext){
317 if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) &&
318 p->eState==CURSOR_VALID ){
319 int rc = saveCursorPosition(p);
320 if( SQLITE_OK!=rc ){
321 return rc;
322 }
323 }
324 }
325 return SQLITE_OK;
326}
327
328/*
drhbf700f32007-03-31 02:36:44 +0000329** Clear the current cursor position.
330*/
331static void clearCursorPosition(BtCursor *pCur){
drhd0679ed2007-08-28 22:24:34 +0000332 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh17435752007-08-16 04:30:38 +0000333 sqlite3_free(pCur->pKey);
drhbf700f32007-03-31 02:36:44 +0000334 pCur->pKey = 0;
335 pCur->eState = CURSOR_INVALID;
336}
337
338/*
drh980b1a72006-08-16 16:42:48 +0000339** Restore the cursor to the position it was in (or as close to as possible)
340** when saveCursorPosition() was called. Note that this call deletes the
341** saved position info stored by saveCursorPosition(), so there can be
342** at most one effective restoreOrClearCursorPosition() call after each
343** saveCursorPosition().
344**
345** If the second argument argument - doSeek - is false, then instead of
346** returning the cursor to it's saved position, any saved position is deleted
347** and the cursor state set to CURSOR_INVALID.
348*/
drh16a9b832007-05-05 18:39:25 +0000349int sqlite3BtreeRestoreOrClearCursorPosition(BtCursor *pCur){
drhbf700f32007-03-31 02:36:44 +0000350 int rc;
drhd0679ed2007-08-28 22:24:34 +0000351 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh980b1a72006-08-16 16:42:48 +0000352 assert( pCur->eState==CURSOR_REQUIRESEEK );
danielk197732a0d8b2007-05-04 19:03:02 +0000353#ifndef SQLITE_OMIT_INCRBLOB
danielk1977dcbb5d32007-05-04 18:36:44 +0000354 if( pCur->isIncrblobHandle ){
355 return SQLITE_ABORT;
356 }
danielk197732a0d8b2007-05-04 19:03:02 +0000357#endif
drh980b1a72006-08-16 16:42:48 +0000358 pCur->eState = CURSOR_INVALID;
drhbf700f32007-03-31 02:36:44 +0000359 rc = sqlite3BtreeMoveto(pCur, pCur->pKey, pCur->nKey, 0, &pCur->skip);
drh980b1a72006-08-16 16:42:48 +0000360 if( rc==SQLITE_OK ){
drh17435752007-08-16 04:30:38 +0000361 sqlite3_free(pCur->pKey);
drh980b1a72006-08-16 16:42:48 +0000362 pCur->pKey = 0;
drhbf700f32007-03-31 02:36:44 +0000363 assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID );
drh980b1a72006-08-16 16:42:48 +0000364 }
365 return rc;
366}
367
drhbf700f32007-03-31 02:36:44 +0000368#define restoreOrClearCursorPosition(p) \
drh16a9b832007-05-05 18:39:25 +0000369 (p->eState==CURSOR_REQUIRESEEK ? \
370 sqlite3BtreeRestoreOrClearCursorPosition(p) : \
371 SQLITE_OK)
drh980b1a72006-08-16 16:42:48 +0000372
danielk1977599fcba2004-11-08 07:13:13 +0000373#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977afcdd022004-10-31 16:25:42 +0000374/*
drha3152892007-05-05 11:48:52 +0000375** Given a page number of a regular database page, return the page
376** number for the pointer-map page that contains the entry for the
377** input page number.
danielk1977afcdd022004-10-31 16:25:42 +0000378*/
danielk1977266664d2006-02-10 08:24:21 +0000379static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
drhd677b3d2007-08-20 22:48:41 +0000380 int nPagesPerMapPage, iPtrMap, ret;
drh27641702007-08-22 02:56:42 +0000381 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +0000382 nPagesPerMapPage = (pBt->usableSize/5)+1;
383 iPtrMap = (pgno-2)/nPagesPerMapPage;
384 ret = (iPtrMap*nPagesPerMapPage) + 2;
danielk1977266664d2006-02-10 08:24:21 +0000385 if( ret==PENDING_BYTE_PAGE(pBt) ){
386 ret++;
387 }
388 return ret;
389}
danielk1977a19df672004-11-03 11:37:07 +0000390
danielk1977afcdd022004-10-31 16:25:42 +0000391/*
danielk1977afcdd022004-10-31 16:25:42 +0000392** Write an entry into the pointer map.
danielk1977687566d2004-11-02 12:56:41 +0000393**
394** This routine updates the pointer map entry for page number 'key'
395** so that it maps to type 'eType' and parent page number 'pgno'.
396** An error code is returned if something goes wrong, otherwise SQLITE_OK.
danielk1977afcdd022004-10-31 16:25:42 +0000397*/
danielk1977aef0bf62005-12-30 16:28:01 +0000398static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){
danielk19773b8a05f2007-03-19 17:44:26 +0000399 DbPage *pDbPage; /* The pointer map page */
400 u8 *pPtrmap; /* The pointer map data */
401 Pgno iPtrmap; /* The pointer map page number */
402 int offset; /* Offset in pointer map page */
danielk1977afcdd022004-10-31 16:25:42 +0000403 int rc;
404
drh27641702007-08-22 02:56:42 +0000405 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk1977266664d2006-02-10 08:24:21 +0000406 /* The master-journal page number must never be used as a pointer map page */
407 assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
408
danielk1977ac11ee62005-01-15 12:45:51 +0000409 assert( pBt->autoVacuum );
danielk1977fdb7cdb2005-01-17 02:12:18 +0000410 if( key==0 ){
drh49285702005-09-17 15:20:26 +0000411 return SQLITE_CORRUPT_BKPT;
danielk1977fdb7cdb2005-01-17 02:12:18 +0000412 }
danielk1977266664d2006-02-10 08:24:21 +0000413 iPtrmap = PTRMAP_PAGENO(pBt, key);
danielk19773b8a05f2007-03-19 17:44:26 +0000414 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
danielk1977687566d2004-11-02 12:56:41 +0000415 if( rc!=SQLITE_OK ){
danielk1977afcdd022004-10-31 16:25:42 +0000416 return rc;
417 }
danielk1977266664d2006-02-10 08:24:21 +0000418 offset = PTRMAP_PTROFFSET(pBt, key);
danielk19773b8a05f2007-03-19 17:44:26 +0000419 pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
danielk1977afcdd022004-10-31 16:25:42 +0000420
drh615ae552005-01-16 23:21:00 +0000421 if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
422 TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
danielk19773b8a05f2007-03-19 17:44:26 +0000423 rc = sqlite3PagerWrite(pDbPage);
danielk19775558a8a2005-01-17 07:53:44 +0000424 if( rc==SQLITE_OK ){
425 pPtrmap[offset] = eType;
426 put4byte(&pPtrmap[offset+1], parent);
danielk1977afcdd022004-10-31 16:25:42 +0000427 }
danielk1977afcdd022004-10-31 16:25:42 +0000428 }
429
danielk19773b8a05f2007-03-19 17:44:26 +0000430 sqlite3PagerUnref(pDbPage);
danielk19775558a8a2005-01-17 07:53:44 +0000431 return rc;
danielk1977afcdd022004-10-31 16:25:42 +0000432}
433
434/*
435** Read an entry from the pointer map.
danielk1977687566d2004-11-02 12:56:41 +0000436**
437** This routine retrieves the pointer map entry for page 'key', writing
438** the type and parent page number to *pEType and *pPgno respectively.
439** An error code is returned if something goes wrong, otherwise SQLITE_OK.
danielk1977afcdd022004-10-31 16:25:42 +0000440*/
danielk1977aef0bf62005-12-30 16:28:01 +0000441static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
danielk19773b8a05f2007-03-19 17:44:26 +0000442 DbPage *pDbPage; /* The pointer map page */
danielk1977afcdd022004-10-31 16:25:42 +0000443 int iPtrmap; /* Pointer map page index */
444 u8 *pPtrmap; /* Pointer map page data */
445 int offset; /* Offset of entry in pointer map */
446 int rc;
447
drh27641702007-08-22 02:56:42 +0000448 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +0000449
danielk1977266664d2006-02-10 08:24:21 +0000450 iPtrmap = PTRMAP_PAGENO(pBt, key);
danielk19773b8a05f2007-03-19 17:44:26 +0000451 rc = sqlite3PagerGet(pBt->pPager, iPtrmap, &pDbPage);
danielk1977afcdd022004-10-31 16:25:42 +0000452 if( rc!=0 ){
453 return rc;
454 }
danielk19773b8a05f2007-03-19 17:44:26 +0000455 pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);
danielk1977afcdd022004-10-31 16:25:42 +0000456
danielk1977266664d2006-02-10 08:24:21 +0000457 offset = PTRMAP_PTROFFSET(pBt, key);
drh43617e92006-03-06 20:55:46 +0000458 assert( pEType!=0 );
459 *pEType = pPtrmap[offset];
danielk1977687566d2004-11-02 12:56:41 +0000460 if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
danielk1977afcdd022004-10-31 16:25:42 +0000461
danielk19773b8a05f2007-03-19 17:44:26 +0000462 sqlite3PagerUnref(pDbPage);
drh49285702005-09-17 15:20:26 +0000463 if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT;
danielk1977afcdd022004-10-31 16:25:42 +0000464 return SQLITE_OK;
465}
466
467#endif /* SQLITE_OMIT_AUTOVACUUM */
468
drh0d316a42002-08-11 20:10:47 +0000469/*
drh271efa52004-05-30 19:19:05 +0000470** Given a btree page and a cell index (0 means the first cell on
471** the page, 1 means the second cell, and so forth) return a pointer
472** to the cell content.
473**
474** This routine works only for pages that do not contain overflow cells.
drh3aac2dd2004-04-26 14:10:20 +0000475*/
danielk19771cc5ed82007-05-16 17:28:43 +0000476#define findCell(pPage, iCell) \
477 ((pPage)->aData + get2byte(&(pPage)->aData[(pPage)->cellOffset+2*(iCell)]))
drhe6e4d6b2007-08-05 23:52:05 +0000478#ifdef SQLITE_TEST
drh16a9b832007-05-05 18:39:25 +0000479u8 *sqlite3BtreeFindCell(MemPage *pPage, int iCell){
drh43605152004-05-29 21:46:49 +0000480 assert( iCell>=0 );
drh029f3f82007-06-20 15:14:10 +0000481 assert( iCell<get2byte(&pPage->aData[pPage->hdrOffset+3]) );
danielk19771cc5ed82007-05-16 17:28:43 +0000482 return findCell(pPage, iCell);
drh43605152004-05-29 21:46:49 +0000483}
drhe6e4d6b2007-08-05 23:52:05 +0000484#endif
drh43605152004-05-29 21:46:49 +0000485
486/*
drh16a9b832007-05-05 18:39:25 +0000487** This a more complex version of sqlite3BtreeFindCell() that works for
drh43605152004-05-29 21:46:49 +0000488** pages that do contain overflow cells. See insert
489*/
490static u8 *findOverflowCell(MemPage *pPage, int iCell){
491 int i;
drh27641702007-08-22 02:56:42 +0000492 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh43605152004-05-29 21:46:49 +0000493 for(i=pPage->nOverflow-1; i>=0; i--){
drh6d08b4d2004-07-20 12:45:22 +0000494 int k;
495 struct _OvflCell *pOvfl;
496 pOvfl = &pPage->aOvfl[i];
497 k = pOvfl->idx;
498 if( k<=iCell ){
499 if( k==iCell ){
500 return pOvfl->pCell;
drh43605152004-05-29 21:46:49 +0000501 }
502 iCell--;
503 }
504 }
danielk19771cc5ed82007-05-16 17:28:43 +0000505 return findCell(pPage, iCell);
drh43605152004-05-29 21:46:49 +0000506}
507
508/*
509** Parse a cell content block and fill in the CellInfo structure. There
drh16a9b832007-05-05 18:39:25 +0000510** are two versions of this function. sqlite3BtreeParseCell() takes a
511** cell index as the second argument and sqlite3BtreeParseCellPtr()
512** takes a pointer to the body of the cell as its second argument.
danielk19771cc5ed82007-05-16 17:28:43 +0000513**
514** Within this file, the parseCell() macro can be called instead of
515** sqlite3BtreeParseCellPtr(). Using some compilers, this will be faster.
drh43605152004-05-29 21:46:49 +0000516*/
drh16a9b832007-05-05 18:39:25 +0000517void sqlite3BtreeParseCellPtr(
drh3aac2dd2004-04-26 14:10:20 +0000518 MemPage *pPage, /* Page containing the cell */
drh43605152004-05-29 21:46:49 +0000519 u8 *pCell, /* Pointer to the cell text. */
drh6f11bef2004-05-13 01:12:56 +0000520 CellInfo *pInfo /* Fill in this structure */
drh3aac2dd2004-04-26 14:10:20 +0000521){
drh271efa52004-05-30 19:19:05 +0000522 int n; /* Number bytes in cell content header */
523 u32 nPayload; /* Number of bytes of cell payload */
drh43605152004-05-29 21:46:49 +0000524
drh27641702007-08-22 02:56:42 +0000525 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +0000526
drh43605152004-05-29 21:46:49 +0000527 pInfo->pCell = pCell;
drhab01f612004-05-22 02:55:23 +0000528 assert( pPage->leaf==0 || pPage->leaf==1 );
drh271efa52004-05-30 19:19:05 +0000529 n = pPage->childPtrSize;
530 assert( n==4-4*pPage->leaf );
drh8b18dd42004-05-12 19:18:15 +0000531 if( pPage->hasData ){
drh271efa52004-05-30 19:19:05 +0000532 n += getVarint32(&pCell[n], &nPayload);
drh8b18dd42004-05-12 19:18:15 +0000533 }else{
drh271efa52004-05-30 19:19:05 +0000534 nPayload = 0;
drh3aac2dd2004-04-26 14:10:20 +0000535 }
drh271efa52004-05-30 19:19:05 +0000536 pInfo->nData = nPayload;
drh504b6982006-01-22 21:52:56 +0000537 if( pPage->intKey ){
538 n += getVarint(&pCell[n], (u64 *)&pInfo->nKey);
539 }else{
540 u32 x;
541 n += getVarint32(&pCell[n], &x);
542 pInfo->nKey = x;
543 nPayload += x;
drh6f11bef2004-05-13 01:12:56 +0000544 }
drh72365832007-03-06 15:53:44 +0000545 pInfo->nPayload = nPayload;
drh504b6982006-01-22 21:52:56 +0000546 pInfo->nHeader = n;
drh271efa52004-05-30 19:19:05 +0000547 if( nPayload<=pPage->maxLocal ){
548 /* This is the (easy) common case where the entire payload fits
549 ** on the local page. No overflow is required.
550 */
551 int nSize; /* Total size of cell content in bytes */
drh6f11bef2004-05-13 01:12:56 +0000552 pInfo->nLocal = nPayload;
553 pInfo->iOverflow = 0;
drh271efa52004-05-30 19:19:05 +0000554 nSize = nPayload + n;
555 if( nSize<4 ){
556 nSize = 4; /* Minimum cell size is 4 */
drh43605152004-05-29 21:46:49 +0000557 }
drh271efa52004-05-30 19:19:05 +0000558 pInfo->nSize = nSize;
drh6f11bef2004-05-13 01:12:56 +0000559 }else{
drh271efa52004-05-30 19:19:05 +0000560 /* If the payload will not fit completely on the local page, we have
561 ** to decide how much to store locally and how much to spill onto
562 ** overflow pages. The strategy is to minimize the amount of unused
563 ** space on overflow pages while keeping the amount of local storage
564 ** in between minLocal and maxLocal.
565 **
566 ** Warning: changing the way overflow payload is distributed in any
567 ** way will result in an incompatible file format.
568 */
569 int minLocal; /* Minimum amount of payload held locally */
570 int maxLocal; /* Maximum amount of payload held locally */
571 int surplus; /* Overflow payload available for local storage */
572
573 minLocal = pPage->minLocal;
574 maxLocal = pPage->maxLocal;
575 surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
drh6f11bef2004-05-13 01:12:56 +0000576 if( surplus <= maxLocal ){
577 pInfo->nLocal = surplus;
578 }else{
579 pInfo->nLocal = minLocal;
580 }
581 pInfo->iOverflow = pInfo->nLocal + n;
582 pInfo->nSize = pInfo->iOverflow + 4;
583 }
drh3aac2dd2004-04-26 14:10:20 +0000584}
danielk19771cc5ed82007-05-16 17:28:43 +0000585#define parseCell(pPage, iCell, pInfo) \
586 sqlite3BtreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo))
drh16a9b832007-05-05 18:39:25 +0000587void sqlite3BtreeParseCell(
drh43605152004-05-29 21:46:49 +0000588 MemPage *pPage, /* Page containing the cell */
589 int iCell, /* The cell index. First cell is 0 */
590 CellInfo *pInfo /* Fill in this structure */
591){
danielk19771cc5ed82007-05-16 17:28:43 +0000592 parseCell(pPage, iCell, pInfo);
drh43605152004-05-29 21:46:49 +0000593}
drh3aac2dd2004-04-26 14:10:20 +0000594
595/*
drh43605152004-05-29 21:46:49 +0000596** Compute the total number of bytes that a Cell needs in the cell
597** data area of the btree-page. The return number includes the cell
598** data header and the local payload, but not any overflow page or
599** the space used by the cell pointer.
drh3b7511c2001-05-26 13:15:44 +0000600*/
danielk1977bc6ada42004-06-30 08:20:16 +0000601#ifndef NDEBUG
drh43605152004-05-29 21:46:49 +0000602static int cellSize(MemPage *pPage, int iCell){
drh6f11bef2004-05-13 01:12:56 +0000603 CellInfo info;
drh16a9b832007-05-05 18:39:25 +0000604 sqlite3BtreeParseCell(pPage, iCell, &info);
drh43605152004-05-29 21:46:49 +0000605 return info.nSize;
606}
danielk1977bc6ada42004-06-30 08:20:16 +0000607#endif
drh43605152004-05-29 21:46:49 +0000608static int cellSizePtr(MemPage *pPage, u8 *pCell){
609 CellInfo info;
drh16a9b832007-05-05 18:39:25 +0000610 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
drh6f11bef2004-05-13 01:12:56 +0000611 return info.nSize;
drh3b7511c2001-05-26 13:15:44 +0000612}
613
danielk197779a40da2005-01-16 08:00:01 +0000614#ifndef SQLITE_OMIT_AUTOVACUUM
drh3b7511c2001-05-26 13:15:44 +0000615/*
danielk197726836652005-01-17 01:33:13 +0000616** If the cell pCell, part of page pPage contains a pointer
danielk197779a40da2005-01-16 08:00:01 +0000617** to an overflow page, insert an entry into the pointer-map
618** for the overflow page.
danielk1977ac11ee62005-01-15 12:45:51 +0000619*/
danielk197726836652005-01-17 01:33:13 +0000620static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){
danielk197779a40da2005-01-16 08:00:01 +0000621 if( pCell ){
622 CellInfo info;
drh16a9b832007-05-05 18:39:25 +0000623 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
drh72365832007-03-06 15:53:44 +0000624 assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
danielk197779a40da2005-01-16 08:00:01 +0000625 if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
626 Pgno ovfl = get4byte(&pCell[info.iOverflow]);
627 return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno);
628 }
danielk1977ac11ee62005-01-15 12:45:51 +0000629 }
danielk197779a40da2005-01-16 08:00:01 +0000630 return SQLITE_OK;
danielk1977ac11ee62005-01-15 12:45:51 +0000631}
danielk197726836652005-01-17 01:33:13 +0000632/*
633** If the cell with index iCell on page pPage contains a pointer
634** to an overflow page, insert an entry into the pointer-map
635** for the overflow page.
636*/
637static int ptrmapPutOvfl(MemPage *pPage, int iCell){
638 u8 *pCell;
drh27641702007-08-22 02:56:42 +0000639 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
danielk197726836652005-01-17 01:33:13 +0000640 pCell = findOverflowCell(pPage, iCell);
641 return ptrmapPutOvflPtr(pPage, pCell);
642}
danielk197779a40da2005-01-16 08:00:01 +0000643#endif
644
danielk1977ac11ee62005-01-15 12:45:51 +0000645
drhda200cc2004-05-09 11:51:38 +0000646/*
drh72f82862001-05-24 21:06:34 +0000647** Defragment the page given. All Cells are moved to the
drh3a4a2d42005-11-24 14:24:28 +0000648** end of the page and all free space is collected into one
649** big FreeBlk that occurs in between the header and cell
drh31beae92005-11-24 14:34:36 +0000650** pointer array and the cell content area.
drh365d68f2001-05-11 11:02:46 +0000651*/
drh2e38c322004-09-03 18:38:44 +0000652static int defragmentPage(MemPage *pPage){
drh43605152004-05-29 21:46:49 +0000653 int i; /* Loop counter */
654 int pc; /* Address of a i-th cell */
655 int addr; /* Offset of first byte after cell pointer array */
656 int hdr; /* Offset to the page header */
657 int size; /* Size of a cell */
658 int usableSize; /* Number of usable bytes on a page */
659 int cellOffset; /* Offset to the cell pointer array */
660 int brk; /* Offset to the cell content area */
661 int nCell; /* Number of cells on the page */
drh2e38c322004-09-03 18:38:44 +0000662 unsigned char *data; /* The page data */
663 unsigned char *temp; /* Temp area for cell content */
drh2af926b2001-05-15 00:39:25 +0000664
danielk19773b8a05f2007-03-19 17:44:26 +0000665 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
drh9e572e62004-04-23 23:43:10 +0000666 assert( pPage->pBt!=0 );
drh90f5ecb2004-07-22 01:19:35 +0000667 assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
drh43605152004-05-29 21:46:49 +0000668 assert( pPage->nOverflow==0 );
drh27641702007-08-22 02:56:42 +0000669 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh17435752007-08-16 04:30:38 +0000670 temp = sqlite3_malloc( pPage->pBt->pageSize );
drh2e38c322004-09-03 18:38:44 +0000671 if( temp==0 ) return SQLITE_NOMEM;
drh43605152004-05-29 21:46:49 +0000672 data = pPage->aData;
drh9e572e62004-04-23 23:43:10 +0000673 hdr = pPage->hdrOffset;
drh43605152004-05-29 21:46:49 +0000674 cellOffset = pPage->cellOffset;
675 nCell = pPage->nCell;
676 assert( nCell==get2byte(&data[hdr+3]) );
677 usableSize = pPage->pBt->usableSize;
678 brk = get2byte(&data[hdr+5]);
679 memcpy(&temp[brk], &data[brk], usableSize - brk);
680 brk = usableSize;
681 for(i=0; i<nCell; i++){
682 u8 *pAddr; /* The i-th cell pointer */
683 pAddr = &data[cellOffset + i*2];
684 pc = get2byte(pAddr);
685 assert( pc<pPage->pBt->usableSize );
686 size = cellSizePtr(pPage, &temp[pc]);
687 brk -= size;
688 memcpy(&data[brk], &temp[pc], size);
689 put2byte(pAddr, brk);
drh2af926b2001-05-15 00:39:25 +0000690 }
drh43605152004-05-29 21:46:49 +0000691 assert( brk>=cellOffset+2*nCell );
692 put2byte(&data[hdr+5], brk);
693 data[hdr+1] = 0;
694 data[hdr+2] = 0;
695 data[hdr+7] = 0;
696 addr = cellOffset+2*nCell;
697 memset(&data[addr], 0, brk-addr);
drh17435752007-08-16 04:30:38 +0000698 sqlite3_free(temp);
drh2e38c322004-09-03 18:38:44 +0000699 return SQLITE_OK;
drh365d68f2001-05-11 11:02:46 +0000700}
701
drha059ad02001-04-17 20:09:11 +0000702/*
drh43605152004-05-29 21:46:49 +0000703** Allocate nByte bytes of space on a page.
drhbd03cae2001-06-02 02:40:57 +0000704**
drh9e572e62004-04-23 23:43:10 +0000705** Return the index into pPage->aData[] of the first byte of
drhbd03cae2001-06-02 02:40:57 +0000706** the new allocation. Or return 0 if there is not enough free
707** space on the page to satisfy the allocation request.
drh2af926b2001-05-15 00:39:25 +0000708**
drh72f82862001-05-24 21:06:34 +0000709** If the page contains nBytes of free space but does not contain
drh8b2f49b2001-06-08 00:21:52 +0000710** nBytes of contiguous free space, then this routine automatically
711** calls defragementPage() to consolidate all free space before
712** allocating the new chunk.
drh7e3b0a02001-04-28 16:52:40 +0000713*/
drh9e572e62004-04-23 23:43:10 +0000714static int allocateSpace(MemPage *pPage, int nByte){
drh3aac2dd2004-04-26 14:10:20 +0000715 int addr, pc, hdr;
drh9e572e62004-04-23 23:43:10 +0000716 int size;
drh24cd67e2004-05-10 16:18:47 +0000717 int nFrag;
drh43605152004-05-29 21:46:49 +0000718 int top;
719 int nCell;
720 int cellOffset;
drh9e572e62004-04-23 23:43:10 +0000721 unsigned char *data;
drh43605152004-05-29 21:46:49 +0000722
drh9e572e62004-04-23 23:43:10 +0000723 data = pPage->aData;
danielk19773b8a05f2007-03-19 17:44:26 +0000724 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
drh9e572e62004-04-23 23:43:10 +0000725 assert( pPage->pBt );
drh27641702007-08-22 02:56:42 +0000726 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh9e572e62004-04-23 23:43:10 +0000727 if( nByte<4 ) nByte = 4;
drh43605152004-05-29 21:46:49 +0000728 if( pPage->nFree<nByte || pPage->nOverflow>0 ) return 0;
729 pPage->nFree -= nByte;
drh9e572e62004-04-23 23:43:10 +0000730 hdr = pPage->hdrOffset;
drh43605152004-05-29 21:46:49 +0000731
732 nFrag = data[hdr+7];
733 if( nFrag<60 ){
734 /* Search the freelist looking for a slot big enough to satisfy the
735 ** space request. */
736 addr = hdr+1;
737 while( (pc = get2byte(&data[addr]))>0 ){
738 size = get2byte(&data[pc+2]);
739 if( size>=nByte ){
740 if( size<nByte+4 ){
741 memcpy(&data[addr], &data[pc], 2);
742 data[hdr+7] = nFrag + size - nByte;
743 return pc;
744 }else{
745 put2byte(&data[pc+2], size-nByte);
746 return pc + size - nByte;
747 }
748 }
749 addr = pc;
drh9e572e62004-04-23 23:43:10 +0000750 }
751 }
drh43605152004-05-29 21:46:49 +0000752
753 /* Allocate memory from the gap in between the cell pointer array
754 ** and the cell content area.
755 */
756 top = get2byte(&data[hdr+5]);
757 nCell = get2byte(&data[hdr+3]);
758 cellOffset = pPage->cellOffset;
759 if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
drh2e38c322004-09-03 18:38:44 +0000760 if( defragmentPage(pPage) ) return 0;
drh43605152004-05-29 21:46:49 +0000761 top = get2byte(&data[hdr+5]);
drh2af926b2001-05-15 00:39:25 +0000762 }
drh43605152004-05-29 21:46:49 +0000763 top -= nByte;
764 assert( cellOffset + 2*nCell <= top );
765 put2byte(&data[hdr+5], top);
766 return top;
drh7e3b0a02001-04-28 16:52:40 +0000767}
768
769/*
drh9e572e62004-04-23 23:43:10 +0000770** Return a section of the pPage->aData to the freelist.
771** The first byte of the new free block is pPage->aDisk[start]
772** and the size of the block is "size" bytes.
drh306dc212001-05-21 13:45:10 +0000773**
774** Most of the effort here is involved in coalesing adjacent
775** free blocks into a single big free block.
drh7e3b0a02001-04-28 16:52:40 +0000776*/
drh9e572e62004-04-23 23:43:10 +0000777static void freeSpace(MemPage *pPage, int start, int size){
drh43605152004-05-29 21:46:49 +0000778 int addr, pbegin, hdr;
drh9e572e62004-04-23 23:43:10 +0000779 unsigned char *data = pPage->aData;
drh2af926b2001-05-15 00:39:25 +0000780
drh9e572e62004-04-23 23:43:10 +0000781 assert( pPage->pBt!=0 );
danielk19773b8a05f2007-03-19 17:44:26 +0000782 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
drh9e572e62004-04-23 23:43:10 +0000783 assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
danielk1977bc6ada42004-06-30 08:20:16 +0000784 assert( (start + size)<=pPage->pBt->usableSize );
drh27641702007-08-22 02:56:42 +0000785 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh9e572e62004-04-23 23:43:10 +0000786 if( size<4 ) size = 4;
787
drhfcce93f2006-02-22 03:08:32 +0000788#ifdef SQLITE_SECURE_DELETE
789 /* Overwrite deleted information with zeros when the SECURE_DELETE
790 ** option is enabled at compile-time */
791 memset(&data[start], 0, size);
792#endif
793
drh9e572e62004-04-23 23:43:10 +0000794 /* Add the space back into the linked list of freeblocks */
drh43605152004-05-29 21:46:49 +0000795 hdr = pPage->hdrOffset;
796 addr = hdr + 1;
drh3aac2dd2004-04-26 14:10:20 +0000797 while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
drhb6f41482004-05-14 01:58:11 +0000798 assert( pbegin<=pPage->pBt->usableSize-4 );
drh3aac2dd2004-04-26 14:10:20 +0000799 assert( pbegin>addr );
800 addr = pbegin;
drh2af926b2001-05-15 00:39:25 +0000801 }
drhb6f41482004-05-14 01:58:11 +0000802 assert( pbegin<=pPage->pBt->usableSize-4 );
drh3aac2dd2004-04-26 14:10:20 +0000803 assert( pbegin>addr || pbegin==0 );
drha34b6762004-05-07 13:30:42 +0000804 put2byte(&data[addr], start);
805 put2byte(&data[start], pbegin);
806 put2byte(&data[start+2], size);
drh2af926b2001-05-15 00:39:25 +0000807 pPage->nFree += size;
drh9e572e62004-04-23 23:43:10 +0000808
809 /* Coalesce adjacent free blocks */
drh3aac2dd2004-04-26 14:10:20 +0000810 addr = pPage->hdrOffset + 1;
811 while( (pbegin = get2byte(&data[addr]))>0 ){
drh9e572e62004-04-23 23:43:10 +0000812 int pnext, psize;
drh3aac2dd2004-04-26 14:10:20 +0000813 assert( pbegin>addr );
drh43605152004-05-29 21:46:49 +0000814 assert( pbegin<=pPage->pBt->usableSize-4 );
drh9e572e62004-04-23 23:43:10 +0000815 pnext = get2byte(&data[pbegin]);
816 psize = get2byte(&data[pbegin+2]);
817 if( pbegin + psize + 3 >= pnext && pnext>0 ){
818 int frag = pnext - (pbegin+psize);
drh43605152004-05-29 21:46:49 +0000819 assert( frag<=data[pPage->hdrOffset+7] );
820 data[pPage->hdrOffset+7] -= frag;
drh9e572e62004-04-23 23:43:10 +0000821 put2byte(&data[pbegin], get2byte(&data[pnext]));
822 put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin);
823 }else{
drh3aac2dd2004-04-26 14:10:20 +0000824 addr = pbegin;
drh9e572e62004-04-23 23:43:10 +0000825 }
826 }
drh7e3b0a02001-04-28 16:52:40 +0000827
drh43605152004-05-29 21:46:49 +0000828 /* If the cell content area begins with a freeblock, remove it. */
829 if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){
830 int top;
831 pbegin = get2byte(&data[hdr+1]);
832 memcpy(&data[hdr+1], &data[pbegin], 2);
833 top = get2byte(&data[hdr+5]);
834 put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
drh4b70f112004-05-02 21:12:19 +0000835 }
drh4b70f112004-05-02 21:12:19 +0000836}
837
838/*
drh271efa52004-05-30 19:19:05 +0000839** Decode the flags byte (the first byte of the header) for a page
840** and initialize fields of the MemPage structure accordingly.
841*/
842static void decodeFlags(MemPage *pPage, int flagByte){
danielk1977aef0bf62005-12-30 16:28:01 +0000843 BtShared *pBt; /* A copy of pPage->pBt */
drh271efa52004-05-30 19:19:05 +0000844
845 assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
drh27641702007-08-22 02:56:42 +0000846 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh271efa52004-05-30 19:19:05 +0000847 pPage->intKey = (flagByte & (PTF_INTKEY|PTF_LEAFDATA))!=0;
848 pPage->zeroData = (flagByte & PTF_ZERODATA)!=0;
849 pPage->leaf = (flagByte & PTF_LEAF)!=0;
850 pPage->childPtrSize = 4*(pPage->leaf==0);
851 pBt = pPage->pBt;
852 if( flagByte & PTF_LEAFDATA ){
853 pPage->leafData = 1;
854 pPage->maxLocal = pBt->maxLeaf;
855 pPage->minLocal = pBt->minLeaf;
856 }else{
857 pPage->leafData = 0;
858 pPage->maxLocal = pBt->maxLocal;
859 pPage->minLocal = pBt->minLocal;
860 }
861 pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
862}
863
864/*
drh7e3b0a02001-04-28 16:52:40 +0000865** Initialize the auxiliary information for a disk block.
drh72f82862001-05-24 21:06:34 +0000866**
drhbd03cae2001-06-02 02:40:57 +0000867** The pParent parameter must be a pointer to the MemPage which
drh9e572e62004-04-23 23:43:10 +0000868** is the parent of the page being initialized. The root of a
869** BTree has no parent and so for that page, pParent==NULL.
drh5e2f8b92001-05-28 00:41:15 +0000870**
drh72f82862001-05-24 21:06:34 +0000871** Return SQLITE_OK on success. If we see that the page does
drhda47d772002-12-02 04:25:19 +0000872** not contain a well-formed database page, then return
drh72f82862001-05-24 21:06:34 +0000873** SQLITE_CORRUPT. Note that a return of SQLITE_OK does not
874** guarantee that the page is well-formed. It only shows that
875** we failed to detect any corruption.
drh7e3b0a02001-04-28 16:52:40 +0000876*/
drh16a9b832007-05-05 18:39:25 +0000877int sqlite3BtreeInitPage(
drh3aac2dd2004-04-26 14:10:20 +0000878 MemPage *pPage, /* The page to be initialized */
drh9e572e62004-04-23 23:43:10 +0000879 MemPage *pParent /* The parent. Might be NULL */
880){
drh271efa52004-05-30 19:19:05 +0000881 int pc; /* Address of a freeblock within pPage->aData[] */
drh271efa52004-05-30 19:19:05 +0000882 int hdr; /* Offset to beginning of page header */
883 u8 *data; /* Equal to pPage->aData */
danielk1977aef0bf62005-12-30 16:28:01 +0000884 BtShared *pBt; /* The main btree structure */
drh271efa52004-05-30 19:19:05 +0000885 int usableSize; /* Amount of usable space on each page */
886 int cellOffset; /* Offset from start of page to first cell pointer */
887 int nFree; /* Number of unused bytes on the page */
888 int top; /* First byte of the cell content area */
drh2af926b2001-05-15 00:39:25 +0000889
drh2e38c322004-09-03 18:38:44 +0000890 pBt = pPage->pBt;
891 assert( pBt!=0 );
892 assert( pParent==0 || pParent->pBt==pBt );
drh27641702007-08-22 02:56:42 +0000893 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk19773b8a05f2007-03-19 17:44:26 +0000894 assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
drh07d183d2005-05-01 22:52:42 +0000895 assert( pPage->aData == &((unsigned char*)pPage)[-pBt->pageSize] );
drhee696e22004-08-30 16:52:17 +0000896 if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
897 /* The parent page should never change unless the file is corrupt */
drh49285702005-09-17 15:20:26 +0000898 return SQLITE_CORRUPT_BKPT;
drhee696e22004-08-30 16:52:17 +0000899 }
drh10617cd2004-05-14 15:27:27 +0000900 if( pPage->isInit ) return SQLITE_OK;
drhda200cc2004-05-09 11:51:38 +0000901 if( pPage->pParent==0 && pParent!=0 ){
902 pPage->pParent = pParent;
danielk19773b8a05f2007-03-19 17:44:26 +0000903 sqlite3PagerRef(pParent->pDbPage);
drh5e2f8b92001-05-28 00:41:15 +0000904 }
drhde647132004-05-07 17:57:49 +0000905 hdr = pPage->hdrOffset;
drha34b6762004-05-07 13:30:42 +0000906 data = pPage->aData;
drh271efa52004-05-30 19:19:05 +0000907 decodeFlags(pPage, data[hdr]);
drh43605152004-05-29 21:46:49 +0000908 pPage->nOverflow = 0;
drhc8629a12004-05-08 20:07:40 +0000909 pPage->idxShift = 0;
drh2e38c322004-09-03 18:38:44 +0000910 usableSize = pBt->usableSize;
drh43605152004-05-29 21:46:49 +0000911 pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
912 top = get2byte(&data[hdr+5]);
913 pPage->nCell = get2byte(&data[hdr+3]);
drh2e38c322004-09-03 18:38:44 +0000914 if( pPage->nCell>MX_CELL(pBt) ){
drhee696e22004-08-30 16:52:17 +0000915 /* To many cells for a single page. The page must be corrupt */
drh49285702005-09-17 15:20:26 +0000916 return SQLITE_CORRUPT_BKPT;
drhee696e22004-08-30 16:52:17 +0000917 }
918 if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
919 /* All pages must have at least one cell, except for root pages */
drh49285702005-09-17 15:20:26 +0000920 return SQLITE_CORRUPT_BKPT;
drhee696e22004-08-30 16:52:17 +0000921 }
drh9e572e62004-04-23 23:43:10 +0000922
923 /* Compute the total free space on the page */
drh9e572e62004-04-23 23:43:10 +0000924 pc = get2byte(&data[hdr+1]);
drh43605152004-05-29 21:46:49 +0000925 nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
drh9e572e62004-04-23 23:43:10 +0000926 while( pc>0 ){
927 int next, size;
drhee696e22004-08-30 16:52:17 +0000928 if( pc>usableSize-4 ){
929 /* Free block is off the page */
drh49285702005-09-17 15:20:26 +0000930 return SQLITE_CORRUPT_BKPT;
drhee696e22004-08-30 16:52:17 +0000931 }
drh9e572e62004-04-23 23:43:10 +0000932 next = get2byte(&data[pc]);
933 size = get2byte(&data[pc+2]);
drhee696e22004-08-30 16:52:17 +0000934 if( next>0 && next<=pc+size+3 ){
935 /* Free blocks must be in accending order */
drh49285702005-09-17 15:20:26 +0000936 return SQLITE_CORRUPT_BKPT;
drhee696e22004-08-30 16:52:17 +0000937 }
drh3add3672004-05-15 00:29:24 +0000938 nFree += size;
drh9e572e62004-04-23 23:43:10 +0000939 pc = next;
940 }
drh3add3672004-05-15 00:29:24 +0000941 pPage->nFree = nFree;
drhee696e22004-08-30 16:52:17 +0000942 if( nFree>=usableSize ){
943 /* Free space cannot exceed total page size */
drh49285702005-09-17 15:20:26 +0000944 return SQLITE_CORRUPT_BKPT;
drhee696e22004-08-30 16:52:17 +0000945 }
drh9e572e62004-04-23 23:43:10 +0000946
drhde647132004-05-07 17:57:49 +0000947 pPage->isInit = 1;
drh9e572e62004-04-23 23:43:10 +0000948 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000949}
950
951/*
drh8b2f49b2001-06-08 00:21:52 +0000952** Set up a raw page so that it looks like a database page holding
953** no entries.
drhbd03cae2001-06-02 02:40:57 +0000954*/
drh9e572e62004-04-23 23:43:10 +0000955static void zeroPage(MemPage *pPage, int flags){
956 unsigned char *data = pPage->aData;
danielk1977aef0bf62005-12-30 16:28:01 +0000957 BtShared *pBt = pPage->pBt;
drh3aac2dd2004-04-26 14:10:20 +0000958 int hdr = pPage->hdrOffset;
drh9e572e62004-04-23 23:43:10 +0000959 int first;
960
danielk19773b8a05f2007-03-19 17:44:26 +0000961 assert( sqlite3PagerPagenumber(pPage->pDbPage)==pPage->pgno );
drh07d183d2005-05-01 22:52:42 +0000962 assert( &data[pBt->pageSize] == (unsigned char*)pPage );
danielk19773b8a05f2007-03-19 17:44:26 +0000963 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
drh27641702007-08-22 02:56:42 +0000964 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhb6f41482004-05-14 01:58:11 +0000965 memset(&data[hdr], 0, pBt->usableSize - hdr);
drh9e572e62004-04-23 23:43:10 +0000966 data[hdr] = flags;
drh43605152004-05-29 21:46:49 +0000967 first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
968 memset(&data[hdr+1], 0, 4);
969 data[hdr+7] = 0;
970 put2byte(&data[hdr+5], pBt->usableSize);
drhb6f41482004-05-14 01:58:11 +0000971 pPage->nFree = pBt->usableSize - first;
drh271efa52004-05-30 19:19:05 +0000972 decodeFlags(pPage, flags);
drh9e572e62004-04-23 23:43:10 +0000973 pPage->hdrOffset = hdr;
drh43605152004-05-29 21:46:49 +0000974 pPage->cellOffset = first;
975 pPage->nOverflow = 0;
drhda200cc2004-05-09 11:51:38 +0000976 pPage->idxShift = 0;
drh43605152004-05-29 21:46:49 +0000977 pPage->nCell = 0;
drhda200cc2004-05-09 11:51:38 +0000978 pPage->isInit = 1;
drhbd03cae2001-06-02 02:40:57 +0000979}
980
981/*
drh3aac2dd2004-04-26 14:10:20 +0000982** Get a page from the pager. Initialize the MemPage.pBt and
983** MemPage.aData elements if needed.
drh538f5702007-04-13 02:14:30 +0000984**
985** If the noContent flag is set, it means that we do not care about
986** the content of the page at this time. So do not go to the disk
987** to fetch the content. Just fill in the content with zeros for now.
988** If in the future we call sqlite3PagerWrite() on this page, that
989** means we have started to be concerned about content and the disk
990** read should occur at that point.
drh3aac2dd2004-04-26 14:10:20 +0000991*/
drh16a9b832007-05-05 18:39:25 +0000992int sqlite3BtreeGetPage(
993 BtShared *pBt, /* The btree */
994 Pgno pgno, /* Number of the page to fetch */
995 MemPage **ppPage, /* Return the page in this parameter */
996 int noContent /* Do not load page content if true */
997){
drh3aac2dd2004-04-26 14:10:20 +0000998 int rc;
drh3aac2dd2004-04-26 14:10:20 +0000999 MemPage *pPage;
danielk19773b8a05f2007-03-19 17:44:26 +00001000 DbPage *pDbPage;
1001
drh27641702007-08-22 02:56:42 +00001002 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drh538f5702007-04-13 02:14:30 +00001003 rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent);
drh3aac2dd2004-04-26 14:10:20 +00001004 if( rc ) return rc;
danielk19773b8a05f2007-03-19 17:44:26 +00001005 pPage = (MemPage *)sqlite3PagerGetExtra(pDbPage);
1006 pPage->aData = sqlite3PagerGetData(pDbPage);
1007 pPage->pDbPage = pDbPage;
drh3aac2dd2004-04-26 14:10:20 +00001008 pPage->pBt = pBt;
1009 pPage->pgno = pgno;
drhde647132004-05-07 17:57:49 +00001010 pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
drh3aac2dd2004-04-26 14:10:20 +00001011 *ppPage = pPage;
1012 return SQLITE_OK;
1013}
1014
1015/*
drhde647132004-05-07 17:57:49 +00001016** Get a page from the pager and initialize it. This routine
1017** is just a convenience wrapper around separate calls to
drh16a9b832007-05-05 18:39:25 +00001018** sqlite3BtreeGetPage() and sqlite3BtreeInitPage().
drhde647132004-05-07 17:57:49 +00001019*/
1020static int getAndInitPage(
danielk1977aef0bf62005-12-30 16:28:01 +00001021 BtShared *pBt, /* The database file */
drhde647132004-05-07 17:57:49 +00001022 Pgno pgno, /* Number of the page to get */
1023 MemPage **ppPage, /* Write the page pointer here */
1024 MemPage *pParent /* Parent of the page */
1025){
1026 int rc;
drh27641702007-08-22 02:56:42 +00001027 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhee696e22004-08-30 16:52:17 +00001028 if( pgno==0 ){
drh49285702005-09-17 15:20:26 +00001029 return SQLITE_CORRUPT_BKPT;
drhee696e22004-08-30 16:52:17 +00001030 }
drh16a9b832007-05-05 18:39:25 +00001031 rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
drh10617cd2004-05-14 15:27:27 +00001032 if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
drh16a9b832007-05-05 18:39:25 +00001033 rc = sqlite3BtreeInitPage(*ppPage, pParent);
drhde647132004-05-07 17:57:49 +00001034 }
1035 return rc;
1036}
1037
1038/*
drh3aac2dd2004-04-26 14:10:20 +00001039** Release a MemPage. This should be called once for each prior
drh16a9b832007-05-05 18:39:25 +00001040** call to sqlite3BtreeGetPage.
drh3aac2dd2004-04-26 14:10:20 +00001041*/
drh4b70f112004-05-02 21:12:19 +00001042static void releasePage(MemPage *pPage){
drh3aac2dd2004-04-26 14:10:20 +00001043 if( pPage ){
1044 assert( pPage->aData );
1045 assert( pPage->pBt );
drh07d183d2005-05-01 22:52:42 +00001046 assert( &pPage->aData[pPage->pBt->pageSize]==(unsigned char*)pPage );
drh27641702007-08-22 02:56:42 +00001047 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
danielk19773b8a05f2007-03-19 17:44:26 +00001048 sqlite3PagerUnref(pPage->pDbPage);
drh3aac2dd2004-04-26 14:10:20 +00001049 }
1050}
1051
1052/*
drh72f82862001-05-24 21:06:34 +00001053** This routine is called when the reference count for a page
1054** reaches zero. We need to unref the pParent pointer when that
1055** happens.
1056*/
danielk19773b8a05f2007-03-19 17:44:26 +00001057static void pageDestructor(DbPage *pData, int pageSize){
drh07d183d2005-05-01 22:52:42 +00001058 MemPage *pPage;
1059 assert( (pageSize & 7)==0 );
danielk19773b8a05f2007-03-19 17:44:26 +00001060 pPage = (MemPage *)sqlite3PagerGetExtra(pData);
drhd0679ed2007-08-28 22:24:34 +00001061 assert( pPage->isInit==0 || sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh72f82862001-05-24 21:06:34 +00001062 if( pPage->pParent ){
1063 MemPage *pParent = pPage->pParent;
drhd0679ed2007-08-28 22:24:34 +00001064 assert( pPage->isInit==1 );
1065 assert( pParent->pBt==pPage->pBt );
drh72f82862001-05-24 21:06:34 +00001066 pPage->pParent = 0;
drha34b6762004-05-07 13:30:42 +00001067 releasePage(pParent);
drh72f82862001-05-24 21:06:34 +00001068 }
drh3aac2dd2004-04-26 14:10:20 +00001069 pPage->isInit = 0;
drh72f82862001-05-24 21:06:34 +00001070}
1071
1072/*
drha6abd042004-06-09 17:37:22 +00001073** During a rollback, when the pager reloads information into the cache
1074** so that the cache is restored to its original state at the start of
1075** the transaction, for each page restored this routine is called.
1076**
1077** This routine needs to reset the extra data section at the end of the
1078** page to agree with the restored data.
1079*/
danielk19773b8a05f2007-03-19 17:44:26 +00001080static void pageReinit(DbPage *pData, int pageSize){
drh07d183d2005-05-01 22:52:42 +00001081 MemPage *pPage;
1082 assert( (pageSize & 7)==0 );
danielk19773b8a05f2007-03-19 17:44:26 +00001083 pPage = (MemPage *)sqlite3PagerGetExtra(pData);
drha6abd042004-06-09 17:37:22 +00001084 if( pPage->isInit ){
drh27641702007-08-22 02:56:42 +00001085 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drha6abd042004-06-09 17:37:22 +00001086 pPage->isInit = 0;
drh16a9b832007-05-05 18:39:25 +00001087 sqlite3BtreeInitPage(pPage, pPage->pParent);
drha6abd042004-06-09 17:37:22 +00001088 }
1089}
1090
1091/*
drhad3e0102004-09-03 23:32:18 +00001092** Open a database file.
1093**
drh382c0242001-10-06 16:33:02 +00001094** zFilename is the name of the database file. If zFilename is NULL
drh1bee3d72001-10-15 00:44:35 +00001095** a new database with a random name is created. This randomly named
drh23e11ca2004-05-04 17:27:28 +00001096** database file will be deleted when sqlite3BtreeClose() is called.
drhe53831d2007-08-17 01:14:38 +00001097** If zFilename is ":memory:" then an in-memory database is created
1098** that is automatically destroyed when it is closed.
drha059ad02001-04-17 20:09:11 +00001099*/
drh23e11ca2004-05-04 17:27:28 +00001100int sqlite3BtreeOpen(
drh3aac2dd2004-04-26 14:10:20 +00001101 const char *zFilename, /* Name of the file containing the BTree database */
danielk1977aef0bf62005-12-30 16:28:01 +00001102 sqlite3 *pSqlite, /* Associated database handle */
drh3aac2dd2004-04-26 14:10:20 +00001103 Btree **ppBtree, /* Pointer to new Btree object written here */
drh90f5ecb2004-07-22 01:19:35 +00001104 int flags /* Options */
drh6019e162001-07-02 17:51:45 +00001105){
drhd677b3d2007-08-20 22:48:41 +00001106 sqlite3_vfs *pVfs; /* The VFS to use for this btree */
drhe53831d2007-08-17 01:14:38 +00001107 BtShared *pBt = 0; /* Shared part of btree structure */
danielk1977aef0bf62005-12-30 16:28:01 +00001108 Btree *p; /* Handle to return */
danielk1977dddbcdc2007-04-26 14:42:34 +00001109 int rc = SQLITE_OK;
drh90f5ecb2004-07-22 01:19:35 +00001110 int nReserve;
1111 unsigned char zDbHeader[100];
danielk1977aef0bf62005-12-30 16:28:01 +00001112
1113 /* Set the variable isMemdb to true for an in-memory database, or
1114 ** false for a file-based database. This symbol is only required if
1115 ** either of the shared-data or autovacuum features are compiled
1116 ** into the library.
1117 */
1118#if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM)
1119 #ifdef SQLITE_OMIT_MEMORYDB
drh980b1a72006-08-16 16:42:48 +00001120 const int isMemdb = 0;
danielk1977aef0bf62005-12-30 16:28:01 +00001121 #else
drh980b1a72006-08-16 16:42:48 +00001122 const int isMemdb = zFilename && !strcmp(zFilename, ":memory:");
danielk1977aef0bf62005-12-30 16:28:01 +00001123 #endif
1124#endif
1125
drhd0679ed2007-08-28 22:24:34 +00001126 assert( pSqlite!=0 );
1127 assert( sqlite3_mutex_held(pSqlite->mutex) );
drh153c62c2007-08-24 03:51:33 +00001128
drhd0679ed2007-08-28 22:24:34 +00001129 pVfs = pSqlite->pVfs;
drh17435752007-08-16 04:30:38 +00001130 p = sqlite3MallocZero(sizeof(Btree));
danielk1977aef0bf62005-12-30 16:28:01 +00001131 if( !p ){
1132 return SQLITE_NOMEM;
1133 }
1134 p->inTrans = TRANS_NONE;
1135 p->pSqlite = pSqlite;
1136
drh198bf392006-01-06 21:52:49 +00001137#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
drhe53831d2007-08-17 01:14:38 +00001138 /*
1139 ** If this Btree is a candidate for shared cache, try to find an
1140 ** existing BtShared object that we can share with
1141 */
1142 if( (flags & BTREE_PRIVATE)==0
1143 && isMemdb==0
drhd0679ed2007-08-28 22:24:34 +00001144 && (pSqlite->flags & SQLITE_Vtab)==0
drhe53831d2007-08-17 01:14:38 +00001145 && zFilename && zFilename[0]
1146 && sqlite3SharedCacheEnabled
1147 ){
danielk197790949c22007-08-17 16:50:38 +00001148 char *zFullPathname = (char *)sqlite3_malloc(pVfs->mxPathname);
drhe53831d2007-08-17 01:14:38 +00001149 sqlite3_mutex *mutexShared;
1150 p->sharable = 1;
drh4a50aac2007-08-23 02:47:53 +00001151 if( pSqlite ){
1152 pSqlite->flags |= SQLITE_SharedCache;
1153 }
danielk1977aef0bf62005-12-30 16:28:01 +00001154 if( !zFullPathname ){
drh17435752007-08-16 04:30:38 +00001155 sqlite3_free(p);
danielk1977aef0bf62005-12-30 16:28:01 +00001156 return SQLITE_NOMEM;
1157 }
danielk197790949c22007-08-17 16:50:38 +00001158 sqlite3OsFullPathname(pVfs, zFilename, zFullPathname);
drhe53831d2007-08-17 01:14:38 +00001159 mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
1160 sqlite3_mutex_enter(mutexShared);
1161 for(pBt=sqlite3SharedCacheList; pBt; pBt=pBt->pNext){
danielk1977b82e7ed2006-01-11 14:09:31 +00001162 assert( pBt->nRef>0 );
drhd0679ed2007-08-28 22:24:34 +00001163 if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager))
1164 && sqlite3PagerVfs(pBt->pPager)==pVfs ){
danielk1977aef0bf62005-12-30 16:28:01 +00001165 p->pBt = pBt;
danielk1977aef0bf62005-12-30 16:28:01 +00001166 pBt->nRef++;
drhe53831d2007-08-17 01:14:38 +00001167 break;
danielk1977aef0bf62005-12-30 16:28:01 +00001168 }
1169 }
drhe53831d2007-08-17 01:14:38 +00001170 sqlite3_mutex_leave(mutexShared);
drh17435752007-08-16 04:30:38 +00001171 sqlite3_free(zFullPathname);
danielk1977aef0bf62005-12-30 16:28:01 +00001172 }
1173#endif
drha059ad02001-04-17 20:09:11 +00001174 if( pBt==0 ){
drhe53831d2007-08-17 01:14:38 +00001175 /*
1176 ** The following asserts make sure that structures used by the btree are
1177 ** the right size. This is to guard against size changes that result
1178 ** when compiling on a different architecture.
danielk197703aded42004-11-22 05:26:27 +00001179 */
drhe53831d2007-08-17 01:14:38 +00001180 assert( sizeof(i64)==8 || sizeof(i64)==4 );
1181 assert( sizeof(u64)==8 || sizeof(u64)==4 );
1182 assert( sizeof(u32)==4 );
1183 assert( sizeof(u16)==2 );
1184 assert( sizeof(Pgno)==4 );
1185
1186 pBt = sqlite3MallocZero( sizeof(*pBt) );
1187 if( pBt==0 ){
1188 rc = SQLITE_NOMEM;
1189 goto btree_open_out;
1190 }
danielk1977b4b47412007-08-17 15:53:36 +00001191 rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, EXTRA_SIZE, flags);
drhe53831d2007-08-17 01:14:38 +00001192 if( rc==SQLITE_OK ){
1193 rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
1194 }
1195 if( rc!=SQLITE_OK ){
1196 goto btree_open_out;
1197 }
1198 p->pBt = pBt;
1199
1200 sqlite3PagerSetDestructor(pBt->pPager, pageDestructor);
1201 sqlite3PagerSetReiniter(pBt->pPager, pageReinit);
1202 pBt->pCursor = 0;
1203 pBt->pPage1 = 0;
1204 pBt->readOnly = sqlite3PagerIsreadonly(pBt->pPager);
1205 pBt->pageSize = get2byte(&zDbHeader[16]);
1206 if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
1207 || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
danielk19779663b8f2007-08-24 11:52:28 +00001208 pBt->pageSize = sqlite3PagerSetPagesize(pBt->pPager, 0);
drhe53831d2007-08-17 01:14:38 +00001209 pBt->maxEmbedFrac = 64; /* 25% */
1210 pBt->minEmbedFrac = 32; /* 12.5% */
1211 pBt->minLeafFrac = 32; /* 12.5% */
1212#ifndef SQLITE_OMIT_AUTOVACUUM
1213 /* If the magic name ":memory:" will create an in-memory database, then
1214 ** leave the autoVacuum mode at 0 (do not auto-vacuum), even if
1215 ** SQLITE_DEFAULT_AUTOVACUUM is true. On the other hand, if
1216 ** SQLITE_OMIT_MEMORYDB has been defined, then ":memory:" is just a
1217 ** regular file-name. In this case the auto-vacuum applies as per normal.
1218 */
1219 if( zFilename && !isMemdb ){
1220 pBt->autoVacuum = (SQLITE_DEFAULT_AUTOVACUUM ? 1 : 0);
1221 pBt->incrVacuum = (SQLITE_DEFAULT_AUTOVACUUM==2 ? 1 : 0);
1222 }
1223#endif
1224 nReserve = 0;
1225 }else{
1226 nReserve = zDbHeader[20];
1227 pBt->maxEmbedFrac = zDbHeader[21];
1228 pBt->minEmbedFrac = zDbHeader[22];
1229 pBt->minLeafFrac = zDbHeader[23];
1230 pBt->pageSizeFixed = 1;
1231#ifndef SQLITE_OMIT_AUTOVACUUM
1232 pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
1233 pBt->incrVacuum = (get4byte(&zDbHeader[36 + 7*4])?1:0);
1234#endif
1235 }
1236 pBt->usableSize = pBt->pageSize - nReserve;
1237 assert( (pBt->pageSize & 7)==0 ); /* 8-byte alignment of pageSize */
1238 sqlite3PagerSetPagesize(pBt->pPager, pBt->pageSize);
1239
1240#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
1241 /* Add the new BtShared object to the linked list sharable BtShareds.
1242 */
1243 if( p->sharable ){
1244 sqlite3_mutex *mutexShared;
1245 pBt->nRef = 1;
1246 mutexShared = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
1247 pBt->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
1248 sqlite3_mutex_enter(mutexShared);
1249 pBt->pNext = sqlite3SharedCacheList;
1250 sqlite3SharedCacheList = pBt;
1251 sqlite3_mutex_leave(mutexShared);
danielk1977951af802004-11-05 15:45:09 +00001252 }
drheee46cf2004-11-06 00:02:48 +00001253#endif
drh90f5ecb2004-07-22 01:19:35 +00001254 }
danielk1977aef0bf62005-12-30 16:28:01 +00001255
drhcfed7bc2006-03-13 14:28:05 +00001256#if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
drhe53831d2007-08-17 01:14:38 +00001257 /* If the new Btree uses a sharable pBtShared, then link the new
1258 ** Btree into the list of all sharable Btrees for the same connection.
drhabddb0c2007-08-20 13:14:28 +00001259 ** The list is kept in ascending order by pBt address.
danielk197754f01982006-01-18 15:25:17 +00001260 */
drhe53831d2007-08-17 01:14:38 +00001261 if( p->sharable ){
1262 int i;
1263 Btree *pSib;
1264 for(i=0; i<pSqlite->nDb; i++){
1265 if( (pSib = pSqlite->aDb[i].pBt)!=0 && pSib->sharable ){
1266 while( pSib->pPrev ){ pSib = pSib->pPrev; }
1267 if( p->pBt<pSib->pBt ){
1268 p->pNext = pSib;
1269 p->pPrev = 0;
1270 pSib->pPrev = p;
1271 }else{
drhabddb0c2007-08-20 13:14:28 +00001272 while( pSib->pNext && pSib->pNext->pBt<p->pBt ){
drhe53831d2007-08-17 01:14:38 +00001273 pSib = pSib->pNext;
1274 }
1275 p->pNext = pSib->pNext;
1276 p->pPrev = pSib;
1277 if( p->pNext ){
1278 p->pNext->pPrev = p;
1279 }
1280 pSib->pNext = p;
1281 }
1282 break;
1283 }
1284 }
danielk1977aef0bf62005-12-30 16:28:01 +00001285 }
danielk1977aef0bf62005-12-30 16:28:01 +00001286#endif
1287 *ppBtree = p;
danielk1977dddbcdc2007-04-26 14:42:34 +00001288
1289btree_open_out:
1290 if( rc!=SQLITE_OK ){
1291 if( pBt && pBt->pPager ){
1292 sqlite3PagerClose(pBt->pPager);
1293 }
drh17435752007-08-16 04:30:38 +00001294 sqlite3_free(pBt);
1295 sqlite3_free(p);
danielk1977dddbcdc2007-04-26 14:42:34 +00001296 *ppBtree = 0;
1297 }
1298 return rc;
drha059ad02001-04-17 20:09:11 +00001299}
1300
1301/*
drhe53831d2007-08-17 01:14:38 +00001302** Decrement the BtShared.nRef counter. When it reaches zero,
1303** remove the BtShared structure from the sharing list. Return
1304** true if the BtShared.nRef counter reaches zero and return
1305** false if it is still positive.
1306*/
1307static int removeFromSharingList(BtShared *pBt){
1308#ifndef SQLITE_OMIT_SHARED_CACHE
1309 sqlite3_mutex *pMaster;
1310 BtShared *pList;
1311 int removed = 0;
1312
drhd677b3d2007-08-20 22:48:41 +00001313 assert( sqlite3_mutex_notheld(pBt->mutex) );
drhe53831d2007-08-17 01:14:38 +00001314 pMaster = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MASTER);
1315 sqlite3_mutex_enter(pMaster);
1316 pBt->nRef--;
1317 if( pBt->nRef<=0 ){
1318 if( sqlite3SharedCacheList==pBt ){
1319 sqlite3SharedCacheList = pBt->pNext;
1320 }else{
1321 pList = sqlite3SharedCacheList;
1322 while( pList && pList->pNext!=pBt ){
1323 pList=pList->pNext;
1324 }
1325 if( pList ){
1326 pList->pNext = pBt->pNext;
1327 }
1328 }
1329 sqlite3_mutex_free(pBt->mutex);
1330 removed = 1;
1331 }
1332 sqlite3_mutex_leave(pMaster);
1333 return removed;
1334#else
1335 return 1;
1336#endif
1337}
1338
1339/*
drha059ad02001-04-17 20:09:11 +00001340** Close an open database and invalidate all cursors.
1341*/
danielk1977aef0bf62005-12-30 16:28:01 +00001342int sqlite3BtreeClose(Btree *p){
danielk1977aef0bf62005-12-30 16:28:01 +00001343 BtShared *pBt = p->pBt;
1344 BtCursor *pCur;
1345
danielk1977aef0bf62005-12-30 16:28:01 +00001346 /* Close all cursors opened via this handle. */
drhd0679ed2007-08-28 22:24:34 +00001347 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
drhe53831d2007-08-17 01:14:38 +00001348 sqlite3BtreeEnter(p);
danielk1977aef0bf62005-12-30 16:28:01 +00001349 pCur = pBt->pCursor;
1350 while( pCur ){
1351 BtCursor *pTmp = pCur;
1352 pCur = pCur->pNext;
1353 if( pTmp->pBtree==p ){
1354 sqlite3BtreeCloseCursor(pTmp);
1355 }
drha059ad02001-04-17 20:09:11 +00001356 }
danielk1977aef0bf62005-12-30 16:28:01 +00001357
danielk19778d34dfd2006-01-24 16:37:57 +00001358 /* Rollback any active transaction and free the handle structure.
1359 ** The call to sqlite3BtreeRollback() drops any table-locks held by
1360 ** this handle.
1361 */
danielk1977b597f742006-01-15 11:39:18 +00001362 sqlite3BtreeRollback(p);
drhe53831d2007-08-17 01:14:38 +00001363 sqlite3BtreeLeave(p);
danielk1977aef0bf62005-12-30 16:28:01 +00001364
danielk1977aef0bf62005-12-30 16:28:01 +00001365 /* If there are still other outstanding references to the shared-btree
1366 ** structure, return now. The remainder of this procedure cleans
1367 ** up the shared-btree.
1368 */
drhe53831d2007-08-17 01:14:38 +00001369 assert( p->wantToLock==0 && p->locked==0 );
1370 if( !p->sharable || removeFromSharingList(pBt) ){
1371 /* The pBt is no longer on the sharing list, so we can access
1372 ** it without having to hold the mutex.
1373 **
1374 ** Clean out and delete the BtShared object.
1375 */
1376 assert( !pBt->pCursor );
drhe53831d2007-08-17 01:14:38 +00001377 sqlite3PagerClose(pBt->pPager);
1378 if( pBt->xFreeSchema && pBt->pSchema ){
1379 pBt->xFreeSchema(pBt->pSchema);
1380 }
1381 sqlite3_free(pBt->pSchema);
1382 sqlite3_free(pBt);
danielk1977aef0bf62005-12-30 16:28:01 +00001383 }
1384
drhe53831d2007-08-17 01:14:38 +00001385#ifndef SQLITE_OMIT_SHARED_CACHE
drhcab5ed72007-08-22 11:41:18 +00001386 assert( p->wantToLock==0 );
1387 assert( p->locked==0 );
1388 if( p->pPrev ) p->pPrev->pNext = p->pNext;
1389 if( p->pNext ) p->pNext->pPrev = p->pPrev;
danielk1977aef0bf62005-12-30 16:28:01 +00001390#endif
1391
drhe53831d2007-08-17 01:14:38 +00001392 sqlite3_free(p);
drha059ad02001-04-17 20:09:11 +00001393 return SQLITE_OK;
1394}
1395
drhd677b3d2007-08-20 22:48:41 +00001396#if SQLITE_THREADSAFE && !defined(SQLITE_OMIT_SHARED_CACHE)
drhe53831d2007-08-17 01:14:38 +00001397/*
drhd677b3d2007-08-20 22:48:41 +00001398** Short-cuts for entering and leaving mutexes on a cursor.
1399*/
drhd0679ed2007-08-28 22:24:34 +00001400# define cursorEnter(X) assert( sqlite3_mutex_held(X->pBt->mutex) )
1401# define cursorLeave(X)
drhd677b3d2007-08-20 22:48:41 +00001402#else
1403# define cursorEnter(X)
1404# define cursorLeave(X)
drhe53831d2007-08-17 01:14:38 +00001405#endif /* !SQLITE_OMIT_SHARED_CACHE */
1406
drha059ad02001-04-17 20:09:11 +00001407/*
drh90f5ecb2004-07-22 01:19:35 +00001408** Change the busy handler callback function.
1409*/
danielk1977aef0bf62005-12-30 16:28:01 +00001410int sqlite3BtreeSetBusyHandler(Btree *p, BusyHandler *pHandler){
1411 BtShared *pBt = p->pBt;
drhd0679ed2007-08-28 22:24:34 +00001412 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
drhd677b3d2007-08-20 22:48:41 +00001413 sqlite3BtreeEnter(p);
drhb8ef32c2005-03-14 02:01:49 +00001414 pBt->pBusyHandler = pHandler;
danielk19773b8a05f2007-03-19 17:44:26 +00001415 sqlite3PagerSetBusyhandler(pBt->pPager, pHandler);
drhd677b3d2007-08-20 22:48:41 +00001416 sqlite3BtreeLeave(p);
drh90f5ecb2004-07-22 01:19:35 +00001417 return SQLITE_OK;
1418}
1419
1420/*
drhda47d772002-12-02 04:25:19 +00001421** Change the limit on the number of pages allowed in the cache.
drhcd61c282002-03-06 22:01:34 +00001422**
1423** The maximum number of cache pages is set to the absolute
1424** value of mxPage. If mxPage is negative, the pager will
1425** operate asynchronously - it will not stop to do fsync()s
1426** to insure data is written to the disk surface before
1427** continuing. Transactions still work if synchronous is off,
1428** and the database cannot be corrupted if this program
1429** crashes. But if the operating system crashes or there is
1430** an abrupt power failure when synchronous is off, the database
1431** could be left in an inconsistent and unrecoverable state.
1432** Synchronous is on by default so database corruption is not
1433** normally a worry.
drhf57b14a2001-09-14 18:54:08 +00001434*/
danielk1977aef0bf62005-12-30 16:28:01 +00001435int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
1436 BtShared *pBt = p->pBt;
drhd0679ed2007-08-28 22:24:34 +00001437 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
drhd677b3d2007-08-20 22:48:41 +00001438 sqlite3BtreeEnter(p);
danielk19773b8a05f2007-03-19 17:44:26 +00001439 sqlite3PagerSetCachesize(pBt->pPager, mxPage);
drhd677b3d2007-08-20 22:48:41 +00001440 sqlite3BtreeLeave(p);
drhf57b14a2001-09-14 18:54:08 +00001441 return SQLITE_OK;
1442}
1443
1444/*
drh973b6e32003-02-12 14:09:42 +00001445** Change the way data is synced to disk in order to increase or decrease
1446** how well the database resists damage due to OS crashes and power
1447** failures. Level 1 is the same as asynchronous (no syncs() occur and
1448** there is a high probability of damage) Level 2 is the default. There
1449** is a very low but non-zero probability of damage. Level 3 reduces the
1450** probability of damage to near zero but with a write performance reduction.
1451*/
danielk197793758c82005-01-21 08:13:14 +00001452#ifndef SQLITE_OMIT_PAGER_PRAGMAS
drhac530b12006-02-11 01:25:50 +00001453int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){
danielk1977aef0bf62005-12-30 16:28:01 +00001454 BtShared *pBt = p->pBt;
drhd0679ed2007-08-28 22:24:34 +00001455 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
drhd677b3d2007-08-20 22:48:41 +00001456 sqlite3BtreeEnter(p);
danielk19773b8a05f2007-03-19 17:44:26 +00001457 sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync);
drhd677b3d2007-08-20 22:48:41 +00001458 sqlite3BtreeLeave(p);
drh973b6e32003-02-12 14:09:42 +00001459 return SQLITE_OK;
1460}
danielk197793758c82005-01-21 08:13:14 +00001461#endif
drh973b6e32003-02-12 14:09:42 +00001462
drh2c8997b2005-08-27 16:36:48 +00001463/*
1464** Return TRUE if the given btree is set to safety level 1. In other
1465** words, return TRUE if no sync() occurs on the disk files.
1466*/
danielk1977aef0bf62005-12-30 16:28:01 +00001467int sqlite3BtreeSyncDisabled(Btree *p){
1468 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00001469 int rc;
drhd0679ed2007-08-28 22:24:34 +00001470 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
drhd677b3d2007-08-20 22:48:41 +00001471 sqlite3BtreeEnter(p);
drhd0679ed2007-08-28 22:24:34 +00001472 assert( pBt && pBt->pPager );
drhd677b3d2007-08-20 22:48:41 +00001473 rc = sqlite3PagerNosync(pBt->pPager);
1474 sqlite3BtreeLeave(p);
1475 return rc;
drh2c8997b2005-08-27 16:36:48 +00001476}
1477
danielk1977576ec6b2005-01-21 11:55:25 +00001478#if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
drh973b6e32003-02-12 14:09:42 +00001479/*
drh90f5ecb2004-07-22 01:19:35 +00001480** Change the default pages size and the number of reserved bytes per page.
drh06f50212004-11-02 14:24:33 +00001481**
1482** The page size must be a power of 2 between 512 and 65536. If the page
1483** size supplied does not meet this constraint then the page size is not
1484** changed.
1485**
1486** Page sizes are constrained to be a power of two so that the region
1487** of the database file used for locking (beginning at PENDING_BYTE,
1488** the first byte past the 1GB boundary, 0x40000000) needs to occur
1489** at the beginning of a page.
danielk197728129562005-01-11 10:25:06 +00001490**
1491** If parameter nReserve is less than zero, then the number of reserved
1492** bytes per page is left unchanged.
drh90f5ecb2004-07-22 01:19:35 +00001493*/
danielk1977aef0bf62005-12-30 16:28:01 +00001494int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){
1495 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00001496 sqlite3BtreeEnter(p);
drh90f5ecb2004-07-22 01:19:35 +00001497 if( pBt->pageSizeFixed ){
drhd677b3d2007-08-20 22:48:41 +00001498 sqlite3BtreeLeave(p);
drh90f5ecb2004-07-22 01:19:35 +00001499 return SQLITE_READONLY;
1500 }
1501 if( nReserve<0 ){
1502 nReserve = pBt->pageSize - pBt->usableSize;
1503 }
drh06f50212004-11-02 14:24:33 +00001504 if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
1505 ((pageSize-1)&pageSize)==0 ){
drh07d183d2005-05-01 22:52:42 +00001506 assert( (pageSize & 7)==0 );
danielk1977aef0bf62005-12-30 16:28:01 +00001507 assert( !pBt->pPage1 && !pBt->pCursor );
danielk19773b8a05f2007-03-19 17:44:26 +00001508 pBt->pageSize = sqlite3PagerSetPagesize(pBt->pPager, pageSize);
drh90f5ecb2004-07-22 01:19:35 +00001509 }
1510 pBt->usableSize = pBt->pageSize - nReserve;
drhd677b3d2007-08-20 22:48:41 +00001511 sqlite3BtreeLeave(p);
drh90f5ecb2004-07-22 01:19:35 +00001512 return SQLITE_OK;
1513}
1514
1515/*
1516** Return the currently defined page size
1517*/
danielk1977aef0bf62005-12-30 16:28:01 +00001518int sqlite3BtreeGetPageSize(Btree *p){
1519 return p->pBt->pageSize;
drh90f5ecb2004-07-22 01:19:35 +00001520}
danielk1977aef0bf62005-12-30 16:28:01 +00001521int sqlite3BtreeGetReserve(Btree *p){
drhd677b3d2007-08-20 22:48:41 +00001522 int n;
1523 sqlite3BtreeEnter(p);
1524 n = p->pBt->pageSize - p->pBt->usableSize;
1525 sqlite3BtreeLeave(p);
1526 return n;
drh2011d5f2004-07-22 02:40:37 +00001527}
drhf8e632b2007-05-08 14:51:36 +00001528
1529/*
1530** Set the maximum page count for a database if mxPage is positive.
1531** No changes are made if mxPage is 0 or negative.
1532** Regardless of the value of mxPage, return the maximum page count.
1533*/
1534int sqlite3BtreeMaxPageCount(Btree *p, int mxPage){
drhd677b3d2007-08-20 22:48:41 +00001535 int n;
1536 sqlite3BtreeEnter(p);
1537 n = sqlite3PagerMaxPageCount(p->pBt->pPager, mxPage);
1538 sqlite3BtreeLeave(p);
1539 return n;
drhf8e632b2007-05-08 14:51:36 +00001540}
danielk1977576ec6b2005-01-21 11:55:25 +00001541#endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
drh90f5ecb2004-07-22 01:19:35 +00001542
1543/*
danielk1977951af802004-11-05 15:45:09 +00001544** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
1545** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
1546** is disabled. The default value for the auto-vacuum property is
1547** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
1548*/
danielk1977aef0bf62005-12-30 16:28:01 +00001549int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
danielk1977951af802004-11-05 15:45:09 +00001550#ifdef SQLITE_OMIT_AUTOVACUUM
drheee46cf2004-11-06 00:02:48 +00001551 return SQLITE_READONLY;
danielk1977951af802004-11-05 15:45:09 +00001552#else
danielk1977dddbcdc2007-04-26 14:42:34 +00001553 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00001554 int rc = SQLITE_OK;
danielk1977dddbcdc2007-04-26 14:42:34 +00001555 int av = (autoVacuum?1:0);
drhd677b3d2007-08-20 22:48:41 +00001556
1557 sqlite3BtreeEnter(p);
danielk1977dddbcdc2007-04-26 14:42:34 +00001558 if( pBt->pageSizeFixed && av!=pBt->autoVacuum ){
drhd677b3d2007-08-20 22:48:41 +00001559 rc = SQLITE_READONLY;
1560 }else{
1561 pBt->autoVacuum = av;
danielk1977951af802004-11-05 15:45:09 +00001562 }
drhd677b3d2007-08-20 22:48:41 +00001563 sqlite3BtreeLeave(p);
1564 return rc;
danielk1977951af802004-11-05 15:45:09 +00001565#endif
1566}
1567
1568/*
1569** Return the value of the 'auto-vacuum' property. If auto-vacuum is
1570** enabled 1 is returned. Otherwise 0.
1571*/
danielk1977aef0bf62005-12-30 16:28:01 +00001572int sqlite3BtreeGetAutoVacuum(Btree *p){
danielk1977951af802004-11-05 15:45:09 +00001573#ifdef SQLITE_OMIT_AUTOVACUUM
danielk1977dddbcdc2007-04-26 14:42:34 +00001574 return BTREE_AUTOVACUUM_NONE;
danielk1977951af802004-11-05 15:45:09 +00001575#else
drhd677b3d2007-08-20 22:48:41 +00001576 int rc;
1577 sqlite3BtreeEnter(p);
1578 rc = (
danielk1977dddbcdc2007-04-26 14:42:34 +00001579 (!p->pBt->autoVacuum)?BTREE_AUTOVACUUM_NONE:
1580 (!p->pBt->incrVacuum)?BTREE_AUTOVACUUM_FULL:
1581 BTREE_AUTOVACUUM_INCR
1582 );
drhd677b3d2007-08-20 22:48:41 +00001583 sqlite3BtreeLeave(p);
1584 return rc;
danielk1977951af802004-11-05 15:45:09 +00001585#endif
1586}
1587
1588
1589/*
drha34b6762004-05-07 13:30:42 +00001590** Get a reference to pPage1 of the database file. This will
drh306dc212001-05-21 13:45:10 +00001591** also acquire a readlock on that file.
1592**
1593** SQLITE_OK is returned on success. If the file is not a
1594** well-formed database file, then SQLITE_CORRUPT is returned.
1595** SQLITE_BUSY is returned if the database is locked. SQLITE_NOMEM
drh4f0ee682007-03-30 20:43:40 +00001596** is returned if we run out of memory.
drh306dc212001-05-21 13:45:10 +00001597*/
danielk1977aef0bf62005-12-30 16:28:01 +00001598static int lockBtree(BtShared *pBt){
drh07d183d2005-05-01 22:52:42 +00001599 int rc, pageSize;
drh3aac2dd2004-04-26 14:10:20 +00001600 MemPage *pPage1;
drhd677b3d2007-08-20 22:48:41 +00001601
drh27641702007-08-22 02:56:42 +00001602 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drha34b6762004-05-07 13:30:42 +00001603 if( pBt->pPage1 ) return SQLITE_OK;
drh16a9b832007-05-05 18:39:25 +00001604 rc = sqlite3BtreeGetPage(pBt, 1, &pPage1, 0);
drh306dc212001-05-21 13:45:10 +00001605 if( rc!=SQLITE_OK ) return rc;
drh3aac2dd2004-04-26 14:10:20 +00001606
drh306dc212001-05-21 13:45:10 +00001607
1608 /* Do some checking to help insure the file we opened really is
1609 ** a valid database file.
1610 */
drhb6f41482004-05-14 01:58:11 +00001611 rc = SQLITE_NOTADB;
danielk19773b8a05f2007-03-19 17:44:26 +00001612 if( sqlite3PagerPagecount(pBt->pPager)>0 ){
drhb6f41482004-05-14 01:58:11 +00001613 u8 *page1 = pPage1->aData;
1614 if( memcmp(page1, zMagicHeader, 16)!=0 ){
drh72f82862001-05-24 21:06:34 +00001615 goto page1_init_failed;
drh306dc212001-05-21 13:45:10 +00001616 }
drh309169a2007-04-24 17:27:51 +00001617 if( page1[18]>1 ){
1618 pBt->readOnly = 1;
1619 }
1620 if( page1[19]>1 ){
drhb6f41482004-05-14 01:58:11 +00001621 goto page1_init_failed;
1622 }
drh07d183d2005-05-01 22:52:42 +00001623 pageSize = get2byte(&page1[16]);
drh15926592007-04-06 15:02:13 +00001624 if( ((pageSize-1)&pageSize)!=0 || pageSize<512 ){
drh07d183d2005-05-01 22:52:42 +00001625 goto page1_init_failed;
1626 }
1627 assert( (pageSize & 7)==0 );
1628 pBt->pageSize = pageSize;
1629 pBt->usableSize = pageSize - page1[20];
drhb6f41482004-05-14 01:58:11 +00001630 if( pBt->usableSize<500 ){
1631 goto page1_init_failed;
1632 }
1633 pBt->maxEmbedFrac = page1[21];
1634 pBt->minEmbedFrac = page1[22];
1635 pBt->minLeafFrac = page1[23];
drh057cd3a2005-02-15 16:23:02 +00001636#ifndef SQLITE_OMIT_AUTOVACUUM
1637 pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
danielk197727b1f952007-06-25 08:16:58 +00001638 pBt->incrVacuum = (get4byte(&page1[36 + 7*4])?1:0);
drh057cd3a2005-02-15 16:23:02 +00001639#endif
drh306dc212001-05-21 13:45:10 +00001640 }
drhb6f41482004-05-14 01:58:11 +00001641
1642 /* maxLocal is the maximum amount of payload to store locally for
1643 ** a cell. Make sure it is small enough so that at least minFanout
1644 ** cells can will fit on one page. We assume a 10-byte page header.
1645 ** Besides the payload, the cell must store:
drh43605152004-05-29 21:46:49 +00001646 ** 2-byte pointer to the cell
drhb6f41482004-05-14 01:58:11 +00001647 ** 4-byte child pointer
1648 ** 9-byte nKey value
1649 ** 4-byte nData value
1650 ** 4-byte overflow page pointer
drh43605152004-05-29 21:46:49 +00001651 ** So a cell consists of a 2-byte poiner, a header which is as much as
1652 ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
1653 ** page pointer.
drhb6f41482004-05-14 01:58:11 +00001654 */
drh43605152004-05-29 21:46:49 +00001655 pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23;
1656 pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23;
1657 pBt->maxLeaf = pBt->usableSize - 35;
1658 pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23;
drhb6f41482004-05-14 01:58:11 +00001659 if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
1660 goto page1_init_failed;
1661 }
drh2e38c322004-09-03 18:38:44 +00001662 assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
drh3aac2dd2004-04-26 14:10:20 +00001663 pBt->pPage1 = pPage1;
drhb6f41482004-05-14 01:58:11 +00001664 return SQLITE_OK;
drh306dc212001-05-21 13:45:10 +00001665
drh72f82862001-05-24 21:06:34 +00001666page1_init_failed:
drh3aac2dd2004-04-26 14:10:20 +00001667 releasePage(pPage1);
1668 pBt->pPage1 = 0;
drh72f82862001-05-24 21:06:34 +00001669 return rc;
drh306dc212001-05-21 13:45:10 +00001670}
1671
1672/*
drhb8ef32c2005-03-14 02:01:49 +00001673** This routine works like lockBtree() except that it also invokes the
1674** busy callback if there is lock contention.
1675*/
danielk1977aef0bf62005-12-30 16:28:01 +00001676static int lockBtreeWithRetry(Btree *pRef){
drhb8ef32c2005-03-14 02:01:49 +00001677 int rc = SQLITE_OK;
drhd677b3d2007-08-20 22:48:41 +00001678
drh27641702007-08-22 02:56:42 +00001679 assert( sqlite3BtreeMutexHeld(pRef->pSqlite->mutex) );
1680 assert( sqlite3BtreeMutexHeld(pRef->pBt->mutex) );
danielk1977aef0bf62005-12-30 16:28:01 +00001681 if( pRef->inTrans==TRANS_NONE ){
1682 u8 inTransaction = pRef->pBt->inTransaction;
1683 btreeIntegrity(pRef);
1684 rc = sqlite3BtreeBeginTrans(pRef, 0);
1685 pRef->pBt->inTransaction = inTransaction;
1686 pRef->inTrans = TRANS_NONE;
1687 if( rc==SQLITE_OK ){
1688 pRef->pBt->nTransaction--;
1689 }
1690 btreeIntegrity(pRef);
drhb8ef32c2005-03-14 02:01:49 +00001691 }
1692 return rc;
1693}
1694
1695
1696/*
drhb8ca3072001-12-05 00:21:20 +00001697** If there are no outstanding cursors and we are not in the middle
1698** of a transaction but there is a read lock on the database, then
1699** this routine unrefs the first page of the database file which
1700** has the effect of releasing the read lock.
1701**
1702** If there are any outstanding cursors, this routine is a no-op.
1703**
1704** If there is a transaction in progress, this routine is a no-op.
1705*/
danielk1977aef0bf62005-12-30 16:28:01 +00001706static void unlockBtreeIfUnused(BtShared *pBt){
drh27641702007-08-22 02:56:42 +00001707 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk1977aef0bf62005-12-30 16:28:01 +00001708 if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){
danielk19773b8a05f2007-03-19 17:44:26 +00001709 if( sqlite3PagerRefcount(pBt->pPager)>=1 ){
drh24c9a2e2007-01-05 02:00:47 +00001710 if( pBt->pPage1->aData==0 ){
1711 MemPage *pPage = pBt->pPage1;
1712 pPage->aData = &((u8*)pPage)[-pBt->pageSize];
1713 pPage->pBt = pBt;
1714 pPage->pgno = 1;
1715 }
1716 releasePage(pBt->pPage1);
drh51c6d962004-06-06 00:42:25 +00001717 }
drh3aac2dd2004-04-26 14:10:20 +00001718 pBt->pPage1 = 0;
drh3aac2dd2004-04-26 14:10:20 +00001719 pBt->inStmt = 0;
drhb8ca3072001-12-05 00:21:20 +00001720 }
1721}
1722
1723/*
drh9e572e62004-04-23 23:43:10 +00001724** Create a new database by initializing the first page of the
drh8c42ca92001-06-22 19:15:00 +00001725** file.
drh8b2f49b2001-06-08 00:21:52 +00001726*/
danielk1977aef0bf62005-12-30 16:28:01 +00001727static int newDatabase(BtShared *pBt){
drh9e572e62004-04-23 23:43:10 +00001728 MemPage *pP1;
1729 unsigned char *data;
drh8c42ca92001-06-22 19:15:00 +00001730 int rc;
drhd677b3d2007-08-20 22:48:41 +00001731
drh27641702007-08-22 02:56:42 +00001732 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk19773b8a05f2007-03-19 17:44:26 +00001733 if( sqlite3PagerPagecount(pBt->pPager)>0 ) return SQLITE_OK;
drh3aac2dd2004-04-26 14:10:20 +00001734 pP1 = pBt->pPage1;
drh9e572e62004-04-23 23:43:10 +00001735 assert( pP1!=0 );
1736 data = pP1->aData;
danielk19773b8a05f2007-03-19 17:44:26 +00001737 rc = sqlite3PagerWrite(pP1->pDbPage);
drh8b2f49b2001-06-08 00:21:52 +00001738 if( rc ) return rc;
drh9e572e62004-04-23 23:43:10 +00001739 memcpy(data, zMagicHeader, sizeof(zMagicHeader));
1740 assert( sizeof(zMagicHeader)==16 );
drhb6f41482004-05-14 01:58:11 +00001741 put2byte(&data[16], pBt->pageSize);
drh9e572e62004-04-23 23:43:10 +00001742 data[18] = 1;
1743 data[19] = 1;
drhb6f41482004-05-14 01:58:11 +00001744 data[20] = pBt->pageSize - pBt->usableSize;
1745 data[21] = pBt->maxEmbedFrac;
1746 data[22] = pBt->minEmbedFrac;
1747 data[23] = pBt->minLeafFrac;
1748 memset(&data[24], 0, 100-24);
drhe6c43812004-05-14 12:17:46 +00001749 zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA );
drhf2a611c2004-09-05 00:33:43 +00001750 pBt->pageSizeFixed = 1;
danielk1977003ba062004-11-04 02:57:33 +00001751#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977dddbcdc2007-04-26 14:42:34 +00001752 assert( pBt->autoVacuum==1 || pBt->autoVacuum==0 );
danielk1977418899a2007-06-24 10:14:00 +00001753 assert( pBt->incrVacuum==1 || pBt->incrVacuum==0 );
danielk1977dddbcdc2007-04-26 14:42:34 +00001754 put4byte(&data[36 + 4*4], pBt->autoVacuum);
danielk1977418899a2007-06-24 10:14:00 +00001755 put4byte(&data[36 + 7*4], pBt->incrVacuum);
danielk1977003ba062004-11-04 02:57:33 +00001756#endif
drh8b2f49b2001-06-08 00:21:52 +00001757 return SQLITE_OK;
1758}
1759
1760/*
danielk1977ee5741e2004-05-31 10:01:34 +00001761** Attempt to start a new transaction. A write-transaction
drh684917c2004-10-05 02:41:42 +00001762** is started if the second argument is nonzero, otherwise a read-
1763** transaction. If the second argument is 2 or more and exclusive
1764** transaction is started, meaning that no other process is allowed
1765** to access the database. A preexisting transaction may not be
drhb8ef32c2005-03-14 02:01:49 +00001766** upgraded to exclusive by calling this routine a second time - the
drh684917c2004-10-05 02:41:42 +00001767** exclusivity flag only works for a new transaction.
drh8b2f49b2001-06-08 00:21:52 +00001768**
danielk1977ee5741e2004-05-31 10:01:34 +00001769** A write-transaction must be started before attempting any
1770** changes to the database. None of the following routines
1771** will work unless a transaction is started first:
drh8b2f49b2001-06-08 00:21:52 +00001772**
drh23e11ca2004-05-04 17:27:28 +00001773** sqlite3BtreeCreateTable()
1774** sqlite3BtreeCreateIndex()
1775** sqlite3BtreeClearTable()
1776** sqlite3BtreeDropTable()
1777** sqlite3BtreeInsert()
1778** sqlite3BtreeDelete()
1779** sqlite3BtreeUpdateMeta()
danielk197713adf8a2004-06-03 16:08:41 +00001780**
drhb8ef32c2005-03-14 02:01:49 +00001781** If an initial attempt to acquire the lock fails because of lock contention
1782** and the database was previously unlocked, then invoke the busy handler
1783** if there is one. But if there was previously a read-lock, do not
1784** invoke the busy handler - just return SQLITE_BUSY. SQLITE_BUSY is
1785** returned when there is already a read-lock in order to avoid a deadlock.
1786**
1787** Suppose there are two processes A and B. A has a read lock and B has
1788** a reserved lock. B tries to promote to exclusive but is blocked because
1789** of A's read lock. A tries to promote to reserved but is blocked by B.
1790** One or the other of the two processes must give way or there can be
1791** no progress. By returning SQLITE_BUSY and not invoking the busy callback
1792** when A already has a read lock, we encourage A to give up and let B
1793** proceed.
drha059ad02001-04-17 20:09:11 +00001794*/
danielk1977aef0bf62005-12-30 16:28:01 +00001795int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
1796 BtShared *pBt = p->pBt;
danielk1977ee5741e2004-05-31 10:01:34 +00001797 int rc = SQLITE_OK;
1798
drhd677b3d2007-08-20 22:48:41 +00001799 sqlite3BtreeEnter(p);
danielk1977aef0bf62005-12-30 16:28:01 +00001800 btreeIntegrity(p);
1801
danielk1977ee5741e2004-05-31 10:01:34 +00001802 /* If the btree is already in a write-transaction, or it
1803 ** is already in a read-transaction and a read-transaction
1804 ** is requested, this is a no-op.
1805 */
danielk1977aef0bf62005-12-30 16:28:01 +00001806 if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
drhd677b3d2007-08-20 22:48:41 +00001807 goto trans_begun;
danielk1977ee5741e2004-05-31 10:01:34 +00001808 }
drhb8ef32c2005-03-14 02:01:49 +00001809
1810 /* Write transactions are not possible on a read-only database */
danielk1977ee5741e2004-05-31 10:01:34 +00001811 if( pBt->readOnly && wrflag ){
drhd677b3d2007-08-20 22:48:41 +00001812 rc = SQLITE_READONLY;
1813 goto trans_begun;
danielk1977ee5741e2004-05-31 10:01:34 +00001814 }
1815
danielk1977aef0bf62005-12-30 16:28:01 +00001816 /* If another database handle has already opened a write transaction
1817 ** on this shared-btree structure and a second write transaction is
1818 ** requested, return SQLITE_BUSY.
1819 */
1820 if( pBt->inTransaction==TRANS_WRITE && wrflag ){
drhd677b3d2007-08-20 22:48:41 +00001821 rc = SQLITE_BUSY;
1822 goto trans_begun;
danielk1977aef0bf62005-12-30 16:28:01 +00001823 }
1824
drhb8ef32c2005-03-14 02:01:49 +00001825 do {
1826 if( pBt->pPage1==0 ){
1827 rc = lockBtree(pBt);
drh8c42ca92001-06-22 19:15:00 +00001828 }
drh309169a2007-04-24 17:27:51 +00001829
drhb8ef32c2005-03-14 02:01:49 +00001830 if( rc==SQLITE_OK && wrflag ){
drh309169a2007-04-24 17:27:51 +00001831 if( pBt->readOnly ){
1832 rc = SQLITE_READONLY;
1833 }else{
1834 rc = sqlite3PagerBegin(pBt->pPage1->pDbPage, wrflag>1);
1835 if( rc==SQLITE_OK ){
1836 rc = newDatabase(pBt);
1837 }
drhb8ef32c2005-03-14 02:01:49 +00001838 }
1839 }
1840
1841 if( rc==SQLITE_OK ){
drhb8ef32c2005-03-14 02:01:49 +00001842 if( wrflag ) pBt->inStmt = 0;
1843 }else{
1844 unlockBtreeIfUnused(pBt);
1845 }
danielk1977aef0bf62005-12-30 16:28:01 +00001846 }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
drha4afb652005-07-09 02:16:02 +00001847 sqlite3InvokeBusyHandler(pBt->pBusyHandler) );
danielk1977aef0bf62005-12-30 16:28:01 +00001848
1849 if( rc==SQLITE_OK ){
1850 if( p->inTrans==TRANS_NONE ){
1851 pBt->nTransaction++;
1852 }
1853 p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
1854 if( p->inTrans>pBt->inTransaction ){
1855 pBt->inTransaction = p->inTrans;
1856 }
1857 }
1858
drhd677b3d2007-08-20 22:48:41 +00001859
1860trans_begun:
danielk1977aef0bf62005-12-30 16:28:01 +00001861 btreeIntegrity(p);
drhd677b3d2007-08-20 22:48:41 +00001862 sqlite3BtreeLeave(p);
drhb8ca3072001-12-05 00:21:20 +00001863 return rc;
drha059ad02001-04-17 20:09:11 +00001864}
1865
danielk1977687566d2004-11-02 12:56:41 +00001866#ifndef SQLITE_OMIT_AUTOVACUUM
1867
1868/*
1869** Set the pointer-map entries for all children of page pPage. Also, if
1870** pPage contains cells that point to overflow pages, set the pointer
1871** map entries for the overflow pages as well.
1872*/
1873static int setChildPtrmaps(MemPage *pPage){
1874 int i; /* Counter variable */
1875 int nCell; /* Number of cells in page pPage */
danielk19772df71c72007-05-24 07:22:42 +00001876 int rc; /* Return code */
danielk1977aef0bf62005-12-30 16:28:01 +00001877 BtShared *pBt = pPage->pBt;
danielk1977687566d2004-11-02 12:56:41 +00001878 int isInitOrig = pPage->isInit;
1879 Pgno pgno = pPage->pgno;
1880
drh27641702007-08-22 02:56:42 +00001881 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
danielk19772df71c72007-05-24 07:22:42 +00001882 rc = sqlite3BtreeInitPage(pPage, pPage->pParent);
1883 if( rc!=SQLITE_OK ){
1884 goto set_child_ptrmaps_out;
1885 }
danielk1977687566d2004-11-02 12:56:41 +00001886 nCell = pPage->nCell;
1887
1888 for(i=0; i<nCell; i++){
danielk19771cc5ed82007-05-16 17:28:43 +00001889 u8 *pCell = findCell(pPage, i);
danielk1977687566d2004-11-02 12:56:41 +00001890
danielk197726836652005-01-17 01:33:13 +00001891 rc = ptrmapPutOvflPtr(pPage, pCell);
1892 if( rc!=SQLITE_OK ){
1893 goto set_child_ptrmaps_out;
danielk1977687566d2004-11-02 12:56:41 +00001894 }
danielk197726836652005-01-17 01:33:13 +00001895
danielk1977687566d2004-11-02 12:56:41 +00001896 if( !pPage->leaf ){
1897 Pgno childPgno = get4byte(pCell);
1898 rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
1899 if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out;
1900 }
1901 }
1902
1903 if( !pPage->leaf ){
1904 Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
1905 rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
1906 }
1907
1908set_child_ptrmaps_out:
1909 pPage->isInit = isInitOrig;
1910 return rc;
1911}
1912
1913/*
1914** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow
1915** page, is a pointer to page iFrom. Modify this pointer so that it points to
1916** iTo. Parameter eType describes the type of pointer to be modified, as
1917** follows:
1918**
1919** PTRMAP_BTREE: pPage is a btree-page. The pointer points at a child
1920** page of pPage.
1921**
1922** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
1923** page pointed to by one of the cells on pPage.
1924**
1925** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
1926** overflow page in the list.
1927*/
danielk1977fdb7cdb2005-01-17 02:12:18 +00001928static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
drh27641702007-08-22 02:56:42 +00001929 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
danielk1977687566d2004-11-02 12:56:41 +00001930 if( eType==PTRMAP_OVERFLOW2 ){
danielk1977f78fc082004-11-02 14:40:32 +00001931 /* The pointer is always the first 4 bytes of the page in this case. */
danielk1977fdb7cdb2005-01-17 02:12:18 +00001932 if( get4byte(pPage->aData)!=iFrom ){
drh49285702005-09-17 15:20:26 +00001933 return SQLITE_CORRUPT_BKPT;
danielk1977fdb7cdb2005-01-17 02:12:18 +00001934 }
danielk1977f78fc082004-11-02 14:40:32 +00001935 put4byte(pPage->aData, iTo);
danielk1977687566d2004-11-02 12:56:41 +00001936 }else{
1937 int isInitOrig = pPage->isInit;
1938 int i;
1939 int nCell;
1940
drh16a9b832007-05-05 18:39:25 +00001941 sqlite3BtreeInitPage(pPage, 0);
danielk1977687566d2004-11-02 12:56:41 +00001942 nCell = pPage->nCell;
1943
danielk1977687566d2004-11-02 12:56:41 +00001944 for(i=0; i<nCell; i++){
danielk19771cc5ed82007-05-16 17:28:43 +00001945 u8 *pCell = findCell(pPage, i);
danielk1977687566d2004-11-02 12:56:41 +00001946 if( eType==PTRMAP_OVERFLOW1 ){
1947 CellInfo info;
drh16a9b832007-05-05 18:39:25 +00001948 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
danielk1977687566d2004-11-02 12:56:41 +00001949 if( info.iOverflow ){
1950 if( iFrom==get4byte(&pCell[info.iOverflow]) ){
1951 put4byte(&pCell[info.iOverflow], iTo);
1952 break;
1953 }
1954 }
1955 }else{
1956 if( get4byte(pCell)==iFrom ){
1957 put4byte(pCell, iTo);
1958 break;
1959 }
1960 }
1961 }
1962
1963 if( i==nCell ){
danielk1977fdb7cdb2005-01-17 02:12:18 +00001964 if( eType!=PTRMAP_BTREE ||
1965 get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
drh49285702005-09-17 15:20:26 +00001966 return SQLITE_CORRUPT_BKPT;
danielk1977fdb7cdb2005-01-17 02:12:18 +00001967 }
danielk1977687566d2004-11-02 12:56:41 +00001968 put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
1969 }
1970
1971 pPage->isInit = isInitOrig;
1972 }
danielk1977fdb7cdb2005-01-17 02:12:18 +00001973 return SQLITE_OK;
danielk1977687566d2004-11-02 12:56:41 +00001974}
1975
danielk1977003ba062004-11-04 02:57:33 +00001976
danielk19777701e812005-01-10 12:59:51 +00001977/*
1978** Move the open database page pDbPage to location iFreePage in the
1979** database. The pDbPage reference remains valid.
1980*/
danielk1977003ba062004-11-04 02:57:33 +00001981static int relocatePage(
danielk1977aef0bf62005-12-30 16:28:01 +00001982 BtShared *pBt, /* Btree */
danielk19777701e812005-01-10 12:59:51 +00001983 MemPage *pDbPage, /* Open page to move */
1984 u8 eType, /* Pointer map 'type' entry for pDbPage */
1985 Pgno iPtrPage, /* Pointer map 'page-no' entry for pDbPage */
1986 Pgno iFreePage /* The location to move pDbPage to */
danielk1977003ba062004-11-04 02:57:33 +00001987){
1988 MemPage *pPtrPage; /* The page that contains a pointer to pDbPage */
1989 Pgno iDbPage = pDbPage->pgno;
1990 Pager *pPager = pBt->pPager;
1991 int rc;
1992
danielk1977a0bf2652004-11-04 14:30:04 +00001993 assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 ||
1994 eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
drh27641702007-08-22 02:56:42 +00001995 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhd0679ed2007-08-28 22:24:34 +00001996 assert( pDbPage->pBt==pBt );
danielk1977003ba062004-11-04 02:57:33 +00001997
1998 /* Move page iDbPage from it's current location to page number iFreePage */
1999 TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n",
2000 iDbPage, iFreePage, iPtrPage, eType));
danielk19773b8a05f2007-03-19 17:44:26 +00002001 rc = sqlite3PagerMovepage(pPager, pDbPage->pDbPage, iFreePage);
danielk1977003ba062004-11-04 02:57:33 +00002002 if( rc!=SQLITE_OK ){
2003 return rc;
2004 }
2005 pDbPage->pgno = iFreePage;
2006
2007 /* If pDbPage was a btree-page, then it may have child pages and/or cells
2008 ** that point to overflow pages. The pointer map entries for all these
2009 ** pages need to be changed.
2010 **
2011 ** If pDbPage is an overflow page, then the first 4 bytes may store a
2012 ** pointer to a subsequent overflow page. If this is the case, then
2013 ** the pointer map needs to be updated for the subsequent overflow page.
2014 */
danielk1977a0bf2652004-11-04 14:30:04 +00002015 if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){
danielk1977003ba062004-11-04 02:57:33 +00002016 rc = setChildPtrmaps(pDbPage);
2017 if( rc!=SQLITE_OK ){
2018 return rc;
2019 }
2020 }else{
2021 Pgno nextOvfl = get4byte(pDbPage->aData);
2022 if( nextOvfl!=0 ){
danielk1977003ba062004-11-04 02:57:33 +00002023 rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage);
2024 if( rc!=SQLITE_OK ){
2025 return rc;
2026 }
2027 }
2028 }
2029
2030 /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
2031 ** that it points at iFreePage. Also fix the pointer map entry for
2032 ** iPtrPage.
2033 */
danielk1977a0bf2652004-11-04 14:30:04 +00002034 if( eType!=PTRMAP_ROOTPAGE ){
drh16a9b832007-05-05 18:39:25 +00002035 rc = sqlite3BtreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
danielk1977a0bf2652004-11-04 14:30:04 +00002036 if( rc!=SQLITE_OK ){
2037 return rc;
2038 }
danielk19773b8a05f2007-03-19 17:44:26 +00002039 rc = sqlite3PagerWrite(pPtrPage->pDbPage);
danielk1977a0bf2652004-11-04 14:30:04 +00002040 if( rc!=SQLITE_OK ){
2041 releasePage(pPtrPage);
2042 return rc;
2043 }
danielk1977fdb7cdb2005-01-17 02:12:18 +00002044 rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
danielk1977003ba062004-11-04 02:57:33 +00002045 releasePage(pPtrPage);
danielk1977fdb7cdb2005-01-17 02:12:18 +00002046 if( rc==SQLITE_OK ){
2047 rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage);
2048 }
danielk1977003ba062004-11-04 02:57:33 +00002049 }
danielk1977003ba062004-11-04 02:57:33 +00002050 return rc;
2051}
2052
danielk1977dddbcdc2007-04-26 14:42:34 +00002053/* Forward declaration required by incrVacuumStep(). */
drh4f0c5872007-03-26 22:05:01 +00002054static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
danielk1977687566d2004-11-02 12:56:41 +00002055
2056/*
danielk1977dddbcdc2007-04-26 14:42:34 +00002057** Perform a single step of an incremental-vacuum. If successful,
2058** return SQLITE_OK. If there is no work to do (and therefore no
2059** point in calling this function again), return SQLITE_DONE.
2060**
2061** More specificly, this function attempts to re-organize the
2062** database so that the last page of the file currently in use
2063** is no longer in use.
2064**
2065** If the nFin parameter is non-zero, the implementation assumes
2066** that the caller will keep calling incrVacuumStep() until
2067** it returns SQLITE_DONE or an error, and that nFin is the
2068** number of pages the database file will contain after this
2069** process is complete.
2070*/
2071static int incrVacuumStep(BtShared *pBt, Pgno nFin){
2072 Pgno iLastPg; /* Last page in the database */
2073 Pgno nFreeList; /* Number of pages still on the free-list */
2074
drh27641702007-08-22 02:56:42 +00002075 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk1977dddbcdc2007-04-26 14:42:34 +00002076 iLastPg = pBt->nTrunc;
2077 if( iLastPg==0 ){
2078 iLastPg = sqlite3PagerPagecount(pBt->pPager);
2079 }
2080
2081 if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
2082 int rc;
2083 u8 eType;
2084 Pgno iPtrPage;
2085
2086 nFreeList = get4byte(&pBt->pPage1->aData[36]);
2087 if( nFreeList==0 || nFin==iLastPg ){
2088 return SQLITE_DONE;
2089 }
2090
2091 rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage);
2092 if( rc!=SQLITE_OK ){
2093 return rc;
2094 }
2095 if( eType==PTRMAP_ROOTPAGE ){
2096 return SQLITE_CORRUPT_BKPT;
2097 }
2098
2099 if( eType==PTRMAP_FREEPAGE ){
2100 if( nFin==0 ){
2101 /* Remove the page from the files free-list. This is not required
danielk19774ef24492007-05-23 09:52:41 +00002102 ** if nFin is non-zero. In that case, the free-list will be
danielk1977dddbcdc2007-04-26 14:42:34 +00002103 ** truncated to zero after this function returns, so it doesn't
2104 ** matter if it still contains some garbage entries.
2105 */
2106 Pgno iFreePg;
2107 MemPage *pFreePg;
2108 rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, 1);
2109 if( rc!=SQLITE_OK ){
2110 return rc;
2111 }
2112 assert( iFreePg==iLastPg );
2113 releasePage(pFreePg);
2114 }
2115 } else {
2116 Pgno iFreePg; /* Index of free page to move pLastPg to */
2117 MemPage *pLastPg;
2118
drh16a9b832007-05-05 18:39:25 +00002119 rc = sqlite3BtreeGetPage(pBt, iLastPg, &pLastPg, 0);
danielk1977dddbcdc2007-04-26 14:42:34 +00002120 if( rc!=SQLITE_OK ){
2121 return rc;
2122 }
2123
danielk1977b4626a32007-04-28 15:47:43 +00002124 /* If nFin is zero, this loop runs exactly once and page pLastPg
2125 ** is swapped with the first free page pulled off the free list.
2126 **
2127 ** On the other hand, if nFin is greater than zero, then keep
2128 ** looping until a free-page located within the first nFin pages
2129 ** of the file is found.
2130 */
danielk1977dddbcdc2007-04-26 14:42:34 +00002131 do {
2132 MemPage *pFreePg;
2133 rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, 0, 0);
2134 if( rc!=SQLITE_OK ){
2135 releasePage(pLastPg);
2136 return rc;
2137 }
2138 releasePage(pFreePg);
2139 }while( nFin!=0 && iFreePg>nFin );
2140 assert( iFreePg<iLastPg );
danielk1977b4626a32007-04-28 15:47:43 +00002141
2142 rc = sqlite3PagerWrite(pLastPg->pDbPage);
2143 if( rc!=SQLITE_OK ){
2144 return rc;
2145 }
danielk1977dddbcdc2007-04-26 14:42:34 +00002146 rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg);
2147 releasePage(pLastPg);
2148 if( rc!=SQLITE_OK ){
2149 return rc;
2150 }
2151 }
2152 }
2153
2154 pBt->nTrunc = iLastPg - 1;
2155 while( pBt->nTrunc==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, pBt->nTrunc) ){
2156 pBt->nTrunc--;
2157 }
2158 return SQLITE_OK;
2159}
2160
2161/*
2162** A write-transaction must be opened before calling this function.
2163** It performs a single unit of work towards an incremental vacuum.
2164**
2165** If the incremental vacuum is finished after this function has run,
2166** SQLITE_DONE is returned. If it is not finished, but no error occured,
2167** SQLITE_OK is returned. Otherwise an SQLite error code.
2168*/
2169int sqlite3BtreeIncrVacuum(Btree *p){
drhd677b3d2007-08-20 22:48:41 +00002170 int rc;
danielk1977dddbcdc2007-04-26 14:42:34 +00002171 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00002172
2173 sqlite3BtreeEnter(p);
danielk1977dddbcdc2007-04-26 14:42:34 +00002174 assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
2175 if( !pBt->autoVacuum ){
drhd677b3d2007-08-20 22:48:41 +00002176 rc = SQLITE_DONE;
2177 }else{
2178 invalidateAllOverflowCache(pBt);
2179 rc = incrVacuumStep(pBt, 0);
danielk1977dddbcdc2007-04-26 14:42:34 +00002180 }
drhd677b3d2007-08-20 22:48:41 +00002181 sqlite3BtreeLeave(p);
2182 return rc;
danielk1977dddbcdc2007-04-26 14:42:34 +00002183}
2184
2185/*
danielk19773b8a05f2007-03-19 17:44:26 +00002186** This routine is called prior to sqlite3PagerCommit when a transaction
danielk1977687566d2004-11-02 12:56:41 +00002187** is commited for an auto-vacuum database.
danielk197724168722007-04-02 05:07:47 +00002188**
2189** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages
2190** the database file should be truncated to during the commit process.
2191** i.e. the database has been reorganized so that only the first *pnTrunc
2192** pages are in use.
danielk1977687566d2004-11-02 12:56:41 +00002193*/
danielk197724168722007-04-02 05:07:47 +00002194static int autoVacuumCommit(BtShared *pBt, Pgno *pnTrunc){
danielk1977dddbcdc2007-04-26 14:42:34 +00002195 int rc = SQLITE_OK;
danielk1977687566d2004-11-02 12:56:41 +00002196 Pager *pPager = pBt->pPager;
danielk1977687566d2004-11-02 12:56:41 +00002197#ifndef NDEBUG
danielk19773b8a05f2007-03-19 17:44:26 +00002198 int nRef = sqlite3PagerRefcount(pPager);
danielk1977687566d2004-11-02 12:56:41 +00002199#endif
2200
drh27641702007-08-22 02:56:42 +00002201 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk197792d4d7a2007-05-04 12:05:56 +00002202 invalidateAllOverflowCache(pBt);
danielk1977dddbcdc2007-04-26 14:42:34 +00002203 assert(pBt->autoVacuum);
2204 if( !pBt->incrVacuum ){
2205 Pgno nFin = 0;
danielk1977687566d2004-11-02 12:56:41 +00002206
danielk1977dddbcdc2007-04-26 14:42:34 +00002207 if( pBt->nTrunc==0 ){
2208 Pgno nFree;
2209 Pgno nPtrmap;
2210 const int pgsz = pBt->pageSize;
2211 Pgno nOrig = sqlite3PagerPagecount(pBt->pPager);
danielk1977e5321f02007-04-27 07:05:44 +00002212
2213 if( PTRMAP_ISPAGE(pBt, nOrig) ){
2214 return SQLITE_CORRUPT_BKPT;
2215 }
danielk1977dddbcdc2007-04-26 14:42:34 +00002216 if( nOrig==PENDING_BYTE_PAGE(pBt) ){
2217 nOrig--;
danielk1977687566d2004-11-02 12:56:41 +00002218 }
danielk1977dddbcdc2007-04-26 14:42:34 +00002219 nFree = get4byte(&pBt->pPage1->aData[36]);
2220 nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+pgsz/5)/(pgsz/5);
2221 nFin = nOrig - nFree - nPtrmap;
2222 if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){
2223 nFin--;
danielk1977ac11ee62005-01-15 12:45:51 +00002224 }
danielk1977dddbcdc2007-04-26 14:42:34 +00002225 while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
2226 nFin--;
2227 }
2228 }
danielk1977687566d2004-11-02 12:56:41 +00002229
danielk1977dddbcdc2007-04-26 14:42:34 +00002230 while( rc==SQLITE_OK ){
2231 rc = incrVacuumStep(pBt, nFin);
2232 }
2233 if( rc==SQLITE_DONE ){
2234 assert(nFin==0 || pBt->nTrunc==0 || nFin<=pBt->nTrunc);
2235 rc = SQLITE_OK;
2236 if( pBt->nTrunc ){
drh67f80b62007-07-23 19:26:17 +00002237 rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
danielk1977dddbcdc2007-04-26 14:42:34 +00002238 put4byte(&pBt->pPage1->aData[32], 0);
2239 put4byte(&pBt->pPage1->aData[36], 0);
2240 pBt->nTrunc = nFin;
2241 }
2242 }
2243 if( rc!=SQLITE_OK ){
2244 sqlite3PagerRollback(pPager);
2245 }
danielk1977687566d2004-11-02 12:56:41 +00002246 }
2247
danielk1977dddbcdc2007-04-26 14:42:34 +00002248 if( rc==SQLITE_OK ){
2249 *pnTrunc = pBt->nTrunc;
2250 pBt->nTrunc = 0;
2251 }
danielk19773b8a05f2007-03-19 17:44:26 +00002252 assert( nRef==sqlite3PagerRefcount(pPager) );
danielk1977687566d2004-11-02 12:56:41 +00002253 return rc;
2254}
danielk1977dddbcdc2007-04-26 14:42:34 +00002255
danielk1977687566d2004-11-02 12:56:41 +00002256#endif
2257
2258/*
drh80e35f42007-03-30 14:06:34 +00002259** This routine does the first phase of a two-phase commit. This routine
2260** causes a rollback journal to be created (if it does not already exist)
2261** and populated with enough information so that if a power loss occurs
2262** the database can be restored to its original state by playing back
2263** the journal. Then the contents of the journal are flushed out to
2264** the disk. After the journal is safely on oxide, the changes to the
2265** database are written into the database file and flushed to oxide.
2266** At the end of this call, the rollback journal still exists on the
2267** disk and we are still holding all locks, so the transaction has not
2268** committed. See sqlite3BtreeCommit() for the second phase of the
2269** commit process.
2270**
2271** This call is a no-op if no write-transaction is currently active on pBt.
2272**
2273** Otherwise, sync the database file for the btree pBt. zMaster points to
2274** the name of a master journal file that should be written into the
2275** individual journal file, or is NULL, indicating no master journal file
2276** (single database transaction).
2277**
2278** When this is called, the master journal should already have been
2279** created, populated with this journal pointer and synced to disk.
2280**
2281** Once this is routine has returned, the only thing required to commit
2282** the write-transaction for this database file is to delete the journal.
2283*/
2284int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){
2285 int rc = SQLITE_OK;
2286 if( p->inTrans==TRANS_WRITE ){
2287 BtShared *pBt = p->pBt;
2288 Pgno nTrunc = 0;
drhd677b3d2007-08-20 22:48:41 +00002289 sqlite3BtreeEnter(p);
drh80e35f42007-03-30 14:06:34 +00002290#ifndef SQLITE_OMIT_AUTOVACUUM
2291 if( pBt->autoVacuum ){
2292 rc = autoVacuumCommit(pBt, &nTrunc);
2293 if( rc!=SQLITE_OK ){
drhd677b3d2007-08-20 22:48:41 +00002294 sqlite3BtreeLeave(p);
drh80e35f42007-03-30 14:06:34 +00002295 return rc;
2296 }
2297 }
2298#endif
2299 rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zMaster, nTrunc);
drhd677b3d2007-08-20 22:48:41 +00002300 sqlite3BtreeLeave(p);
drh80e35f42007-03-30 14:06:34 +00002301 }
2302 return rc;
2303}
2304
2305/*
drh2aa679f2001-06-25 02:11:07 +00002306** Commit the transaction currently in progress.
drh5e00f6c2001-09-13 13:46:56 +00002307**
drh6e345992007-03-30 11:12:08 +00002308** This routine implements the second phase of a 2-phase commit. The
2309** sqlite3BtreeSync() routine does the first phase and should be invoked
2310** prior to calling this routine. The sqlite3BtreeSync() routine did
2311** all the work of writing information out to disk and flushing the
2312** contents so that they are written onto the disk platter. All this
2313** routine has to do is delete or truncate the rollback journal
2314** (which causes the transaction to commit) and drop locks.
2315**
drh5e00f6c2001-09-13 13:46:56 +00002316** This will release the write lock on the database file. If there
2317** are no active cursors, it also releases the read lock.
drha059ad02001-04-17 20:09:11 +00002318*/
drh80e35f42007-03-30 14:06:34 +00002319int sqlite3BtreeCommitPhaseTwo(Btree *p){
danielk1977aef0bf62005-12-30 16:28:01 +00002320 BtShared *pBt = p->pBt;
2321
drhd677b3d2007-08-20 22:48:41 +00002322 sqlite3BtreeEnter(p);
danielk1977aef0bf62005-12-30 16:28:01 +00002323 btreeIntegrity(p);
danielk1977aef0bf62005-12-30 16:28:01 +00002324
2325 /* If the handle has a write-transaction open, commit the shared-btrees
2326 ** transaction and set the shared state to TRANS_READ.
2327 */
2328 if( p->inTrans==TRANS_WRITE ){
danielk19777f7bc662006-01-23 13:47:47 +00002329 int rc;
danielk1977aef0bf62005-12-30 16:28:01 +00002330 assert( pBt->inTransaction==TRANS_WRITE );
2331 assert( pBt->nTransaction>0 );
drh80e35f42007-03-30 14:06:34 +00002332 rc = sqlite3PagerCommitPhaseTwo(pBt->pPager);
danielk19777f7bc662006-01-23 13:47:47 +00002333 if( rc!=SQLITE_OK ){
drhd677b3d2007-08-20 22:48:41 +00002334 sqlite3BtreeLeave(p);
danielk19777f7bc662006-01-23 13:47:47 +00002335 return rc;
2336 }
danielk1977aef0bf62005-12-30 16:28:01 +00002337 pBt->inTransaction = TRANS_READ;
2338 pBt->inStmt = 0;
danielk1977ee5741e2004-05-31 10:01:34 +00002339 }
danielk19777f7bc662006-01-23 13:47:47 +00002340 unlockAllTables(p);
danielk1977aef0bf62005-12-30 16:28:01 +00002341
2342 /* If the handle has any kind of transaction open, decrement the transaction
2343 ** count of the shared btree. If the transaction count reaches 0, set
2344 ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below
2345 ** will unlock the pager.
2346 */
2347 if( p->inTrans!=TRANS_NONE ){
2348 pBt->nTransaction--;
2349 if( 0==pBt->nTransaction ){
2350 pBt->inTransaction = TRANS_NONE;
2351 }
2352 }
2353
2354 /* Set the handles current transaction state to TRANS_NONE and unlock
2355 ** the pager if this call closed the only read or write transaction.
2356 */
2357 p->inTrans = TRANS_NONE;
drh5e00f6c2001-09-13 13:46:56 +00002358 unlockBtreeIfUnused(pBt);
danielk1977aef0bf62005-12-30 16:28:01 +00002359
2360 btreeIntegrity(p);
drhd677b3d2007-08-20 22:48:41 +00002361 sqlite3BtreeLeave(p);
danielk19777f7bc662006-01-23 13:47:47 +00002362 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +00002363}
2364
drh80e35f42007-03-30 14:06:34 +00002365/*
2366** Do both phases of a commit.
2367*/
2368int sqlite3BtreeCommit(Btree *p){
2369 int rc;
drhd677b3d2007-08-20 22:48:41 +00002370 sqlite3BtreeEnter(p);
drh80e35f42007-03-30 14:06:34 +00002371 rc = sqlite3BtreeCommitPhaseOne(p, 0);
2372 if( rc==SQLITE_OK ){
2373 rc = sqlite3BtreeCommitPhaseTwo(p);
2374 }
drhd677b3d2007-08-20 22:48:41 +00002375 sqlite3BtreeLeave(p);
drh80e35f42007-03-30 14:06:34 +00002376 return rc;
2377}
2378
danielk1977fbcd5852004-06-15 02:44:18 +00002379#ifndef NDEBUG
2380/*
2381** Return the number of write-cursors open on this handle. This is for use
2382** in assert() expressions, so it is only compiled if NDEBUG is not
2383** defined.
2384*/
danielk1977aef0bf62005-12-30 16:28:01 +00002385static int countWriteCursors(BtShared *pBt){
danielk1977fbcd5852004-06-15 02:44:18 +00002386 BtCursor *pCur;
2387 int r = 0;
2388 for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
danielk1977aef0bf62005-12-30 16:28:01 +00002389 if( pCur->wrFlag ) r++;
danielk1977fbcd5852004-06-15 02:44:18 +00002390 }
2391 return r;
2392}
2393#endif
2394
drhc39e0002004-05-07 23:50:57 +00002395/*
drhecdc7532001-09-23 02:35:53 +00002396** Rollback the transaction in progress. All cursors will be
2397** invalided by this operation. Any attempt to use a cursor
2398** that was open at the beginning of this operation will result
2399** in an error.
drh5e00f6c2001-09-13 13:46:56 +00002400**
2401** This will release the write lock on the database file. If there
2402** are no active cursors, it also releases the read lock.
drha059ad02001-04-17 20:09:11 +00002403*/
danielk1977aef0bf62005-12-30 16:28:01 +00002404int sqlite3BtreeRollback(Btree *p){
danielk19778d34dfd2006-01-24 16:37:57 +00002405 int rc;
danielk1977aef0bf62005-12-30 16:28:01 +00002406 BtShared *pBt = p->pBt;
drh24cd67e2004-05-10 16:18:47 +00002407 MemPage *pPage1;
danielk1977aef0bf62005-12-30 16:28:01 +00002408
drhd677b3d2007-08-20 22:48:41 +00002409 sqlite3BtreeEnter(p);
danielk19772b8c13e2006-01-24 14:21:24 +00002410 rc = saveAllCursors(pBt, 0, 0);
danielk19778d34dfd2006-01-24 16:37:57 +00002411#ifndef SQLITE_OMIT_SHARED_CACHE
danielk19772b8c13e2006-01-24 14:21:24 +00002412 if( rc!=SQLITE_OK ){
danielk19778d34dfd2006-01-24 16:37:57 +00002413 /* This is a horrible situation. An IO or malloc() error occured whilst
2414 ** trying to save cursor positions. If this is an automatic rollback (as
2415 ** the result of a constraint, malloc() failure or IO error) then
2416 ** the cache may be internally inconsistent (not contain valid trees) so
2417 ** we cannot simply return the error to the caller. Instead, abort
2418 ** all queries that may be using any of the cursors that failed to save.
2419 */
2420 while( pBt->pCursor ){
2421 sqlite3 *db = pBt->pCursor->pBtree->pSqlite;
2422 if( db ){
2423 sqlite3AbortOtherActiveVdbes(db, 0);
2424 }
2425 }
danielk19772b8c13e2006-01-24 14:21:24 +00002426 }
danielk19778d34dfd2006-01-24 16:37:57 +00002427#endif
danielk1977aef0bf62005-12-30 16:28:01 +00002428 btreeIntegrity(p);
2429 unlockAllTables(p);
2430
2431 if( p->inTrans==TRANS_WRITE ){
danielk19778d34dfd2006-01-24 16:37:57 +00002432 int rc2;
danielk1977aef0bf62005-12-30 16:28:01 +00002433
danielk1977dddbcdc2007-04-26 14:42:34 +00002434#ifndef SQLITE_OMIT_AUTOVACUUM
2435 pBt->nTrunc = 0;
2436#endif
2437
danielk19778d34dfd2006-01-24 16:37:57 +00002438 assert( TRANS_WRITE==pBt->inTransaction );
danielk19773b8a05f2007-03-19 17:44:26 +00002439 rc2 = sqlite3PagerRollback(pBt->pPager);
danielk19778d34dfd2006-01-24 16:37:57 +00002440 if( rc2!=SQLITE_OK ){
2441 rc = rc2;
2442 }
2443
drh24cd67e2004-05-10 16:18:47 +00002444 /* The rollback may have destroyed the pPage1->aData value. So
drh16a9b832007-05-05 18:39:25 +00002445 ** call sqlite3BtreeGetPage() on page 1 again to make
2446 ** sure pPage1->aData is set correctly. */
2447 if( sqlite3BtreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
drh24cd67e2004-05-10 16:18:47 +00002448 releasePage(pPage1);
2449 }
danielk1977fbcd5852004-06-15 02:44:18 +00002450 assert( countWriteCursors(pBt)==0 );
danielk1977aef0bf62005-12-30 16:28:01 +00002451 pBt->inTransaction = TRANS_READ;
drh24cd67e2004-05-10 16:18:47 +00002452 }
danielk1977aef0bf62005-12-30 16:28:01 +00002453
2454 if( p->inTrans!=TRANS_NONE ){
2455 assert( pBt->nTransaction>0 );
2456 pBt->nTransaction--;
2457 if( 0==pBt->nTransaction ){
2458 pBt->inTransaction = TRANS_NONE;
2459 }
2460 }
2461
2462 p->inTrans = TRANS_NONE;
danielk1977ee5741e2004-05-31 10:01:34 +00002463 pBt->inStmt = 0;
drh5e00f6c2001-09-13 13:46:56 +00002464 unlockBtreeIfUnused(pBt);
danielk1977aef0bf62005-12-30 16:28:01 +00002465
2466 btreeIntegrity(p);
drhd677b3d2007-08-20 22:48:41 +00002467 sqlite3BtreeLeave(p);
drha059ad02001-04-17 20:09:11 +00002468 return rc;
2469}
2470
2471/*
drhab01f612004-05-22 02:55:23 +00002472** Start a statement subtransaction. The subtransaction can
2473** can be rolled back independently of the main transaction.
2474** You must start a transaction before starting a subtransaction.
2475** The subtransaction is ended automatically if the main transaction
drh663fc632002-02-02 18:49:19 +00002476** commits or rolls back.
2477**
drhab01f612004-05-22 02:55:23 +00002478** Only one subtransaction may be active at a time. It is an error to try
2479** to start a new subtransaction if another subtransaction is already active.
2480**
2481** Statement subtransactions are used around individual SQL statements
2482** that are contained within a BEGIN...COMMIT block. If a constraint
2483** error occurs within the statement, the effect of that one statement
2484** can be rolled back without having to rollback the entire transaction.
drh663fc632002-02-02 18:49:19 +00002485*/
danielk1977aef0bf62005-12-30 16:28:01 +00002486int sqlite3BtreeBeginStmt(Btree *p){
drh663fc632002-02-02 18:49:19 +00002487 int rc;
danielk1977aef0bf62005-12-30 16:28:01 +00002488 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00002489 sqlite3BtreeEnter(p);
danielk1977aef0bf62005-12-30 16:28:01 +00002490 if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){
drhd677b3d2007-08-20 22:48:41 +00002491 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
2492 }else{
2493 assert( pBt->inTransaction==TRANS_WRITE );
2494 rc = pBt->readOnly ? SQLITE_OK : sqlite3PagerStmtBegin(pBt->pPager);
2495 pBt->inStmt = 1;
drh0d65dc02002-02-03 00:56:09 +00002496 }
drhd677b3d2007-08-20 22:48:41 +00002497 sqlite3BtreeLeave(p);
drh663fc632002-02-02 18:49:19 +00002498 return rc;
2499}
2500
2501
2502/*
drhab01f612004-05-22 02:55:23 +00002503** Commit the statment subtransaction currently in progress. If no
2504** subtransaction is active, this is a no-op.
drh663fc632002-02-02 18:49:19 +00002505*/
danielk1977aef0bf62005-12-30 16:28:01 +00002506int sqlite3BtreeCommitStmt(Btree *p){
drh663fc632002-02-02 18:49:19 +00002507 int rc;
danielk1977aef0bf62005-12-30 16:28:01 +00002508 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00002509 sqlite3BtreeEnter(p);
drh3aac2dd2004-04-26 14:10:20 +00002510 if( pBt->inStmt && !pBt->readOnly ){
danielk19773b8a05f2007-03-19 17:44:26 +00002511 rc = sqlite3PagerStmtCommit(pBt->pPager);
drh663fc632002-02-02 18:49:19 +00002512 }else{
2513 rc = SQLITE_OK;
2514 }
drh3aac2dd2004-04-26 14:10:20 +00002515 pBt->inStmt = 0;
drhd677b3d2007-08-20 22:48:41 +00002516 sqlite3BtreeLeave(p);
drh663fc632002-02-02 18:49:19 +00002517 return rc;
2518}
2519
2520/*
drhab01f612004-05-22 02:55:23 +00002521** Rollback the active statement subtransaction. If no subtransaction
2522** is active this routine is a no-op.
drh663fc632002-02-02 18:49:19 +00002523**
drhab01f612004-05-22 02:55:23 +00002524** All cursors will be invalidated by this operation. Any attempt
drh663fc632002-02-02 18:49:19 +00002525** to use a cursor that was open at the beginning of this operation
2526** will result in an error.
2527*/
danielk1977aef0bf62005-12-30 16:28:01 +00002528int sqlite3BtreeRollbackStmt(Btree *p){
danielk197797a227c2006-01-20 16:32:04 +00002529 int rc = SQLITE_OK;
danielk1977aef0bf62005-12-30 16:28:01 +00002530 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00002531 sqlite3BtreeEnter(p);
danielk197797a227c2006-01-20 16:32:04 +00002532 sqlite3MallocDisallow();
2533 if( pBt->inStmt && !pBt->readOnly ){
danielk19773b8a05f2007-03-19 17:44:26 +00002534 rc = sqlite3PagerStmtRollback(pBt->pPager);
danielk197797a227c2006-01-20 16:32:04 +00002535 assert( countWriteCursors(pBt)==0 );
2536 pBt->inStmt = 0;
2537 }
2538 sqlite3MallocAllow();
drhd677b3d2007-08-20 22:48:41 +00002539 sqlite3BtreeLeave(p);
drh663fc632002-02-02 18:49:19 +00002540 return rc;
2541}
2542
2543/*
drh3aac2dd2004-04-26 14:10:20 +00002544** Default key comparison function to be used if no comparison function
2545** is specified on the sqlite3BtreeCursor() call.
2546*/
2547static int dfltCompare(
2548 void *NotUsed, /* User data is not used */
2549 int n1, const void *p1, /* First key to compare */
2550 int n2, const void *p2 /* Second key to compare */
2551){
2552 int c;
2553 c = memcmp(p1, p2, n1<n2 ? n1 : n2);
2554 if( c==0 ){
2555 c = n1 - n2;
2556 }
2557 return c;
2558}
2559
2560/*
drh8b2f49b2001-06-08 00:21:52 +00002561** Create a new cursor for the BTree whose root is on the page
2562** iTable. The act of acquiring a cursor gets a read lock on
2563** the database file.
drh1bee3d72001-10-15 00:44:35 +00002564**
2565** If wrFlag==0, then the cursor can only be used for reading.
drhf74b8d92002-09-01 23:20:45 +00002566** If wrFlag==1, then the cursor can be used for reading or for
2567** writing if other conditions for writing are also met. These
2568** are the conditions that must be met in order for writing to
2569** be allowed:
drh6446c4d2001-12-15 14:22:18 +00002570**
drhf74b8d92002-09-01 23:20:45 +00002571** 1: The cursor must have been opened with wrFlag==1
2572**
drhfe5d71d2007-03-19 11:54:10 +00002573** 2: Other database connections that share the same pager cache
2574** but which are not in the READ_UNCOMMITTED state may not have
2575** cursors open with wrFlag==0 on the same table. Otherwise
2576** the changes made by this write cursor would be visible to
2577** the read cursors in the other database connection.
drhf74b8d92002-09-01 23:20:45 +00002578**
2579** 3: The database must be writable (not on read-only media)
2580**
2581** 4: There must be an active transaction.
2582**
drh6446c4d2001-12-15 14:22:18 +00002583** No checking is done to make sure that page iTable really is the
2584** root page of a b-tree. If it is not, then the cursor acquired
2585** will not work correctly.
drh3aac2dd2004-04-26 14:10:20 +00002586**
2587** The comparison function must be logically the same for every cursor
2588** on a particular table. Changing the comparison function will result
2589** in incorrect operations. If the comparison function is NULL, a
2590** default comparison function is used. The comparison function is
2591** always ignored for INTKEY tables.
drha059ad02001-04-17 20:09:11 +00002592*/
drhd677b3d2007-08-20 22:48:41 +00002593static int btreeCursor(
danielk1977aef0bf62005-12-30 16:28:01 +00002594 Btree *p, /* The btree */
drh3aac2dd2004-04-26 14:10:20 +00002595 int iTable, /* Root page of table to open */
2596 int wrFlag, /* 1 to write. 0 read-only */
2597 int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */
2598 void *pArg, /* First arg to xCompare() */
2599 BtCursor **ppCur /* Write new cursor here */
2600){
drha059ad02001-04-17 20:09:11 +00002601 int rc;
drh8dcd7ca2004-08-08 19:43:29 +00002602 BtCursor *pCur;
danielk1977aef0bf62005-12-30 16:28:01 +00002603 BtShared *pBt = p->pBt;
drhecdc7532001-09-23 02:35:53 +00002604
drh27641702007-08-22 02:56:42 +00002605 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhd0679ed2007-08-28 22:24:34 +00002606 assert( sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
drh8dcd7ca2004-08-08 19:43:29 +00002607 *ppCur = 0;
2608 if( wrFlag ){
drh8dcd7ca2004-08-08 19:43:29 +00002609 if( pBt->readOnly ){
2610 return SQLITE_READONLY;
2611 }
drh980b1a72006-08-16 16:42:48 +00002612 if( checkReadLocks(p, iTable, 0) ){
drh8dcd7ca2004-08-08 19:43:29 +00002613 return SQLITE_LOCKED;
2614 }
drha0c9a112004-03-10 13:42:37 +00002615 }
danielk1977aef0bf62005-12-30 16:28:01 +00002616
drh4b70f112004-05-02 21:12:19 +00002617 if( pBt->pPage1==0 ){
danielk1977aef0bf62005-12-30 16:28:01 +00002618 rc = lockBtreeWithRetry(p);
drha059ad02001-04-17 20:09:11 +00002619 if( rc!=SQLITE_OK ){
drha059ad02001-04-17 20:09:11 +00002620 return rc;
2621 }
drh1831f182007-04-24 17:35:59 +00002622 if( pBt->readOnly && wrFlag ){
2623 return SQLITE_READONLY;
2624 }
drha059ad02001-04-17 20:09:11 +00002625 }
drh17435752007-08-16 04:30:38 +00002626 pCur = sqlite3MallocZero( sizeof(*pCur) );
drha059ad02001-04-17 20:09:11 +00002627 if( pCur==0 ){
drhbd03cae2001-06-02 02:40:57 +00002628 rc = SQLITE_NOMEM;
2629 goto create_cursor_exception;
2630 }
drh8b2f49b2001-06-08 00:21:52 +00002631 pCur->pgnoRoot = (Pgno)iTable;
danielk19773b8a05f2007-03-19 17:44:26 +00002632 if( iTable==1 && sqlite3PagerPagecount(pBt->pPager)==0 ){
drh24cd67e2004-05-10 16:18:47 +00002633 rc = SQLITE_EMPTY;
2634 goto create_cursor_exception;
2635 }
drhde647132004-05-07 17:57:49 +00002636 rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0);
drhbd03cae2001-06-02 02:40:57 +00002637 if( rc!=SQLITE_OK ){
2638 goto create_cursor_exception;
drha059ad02001-04-17 20:09:11 +00002639 }
danielk1977aef0bf62005-12-30 16:28:01 +00002640
danielk1977aef0bf62005-12-30 16:28:01 +00002641 /* Now that no other errors can occur, finish filling in the BtCursor
2642 ** variables, link the cursor into the BtShared list and set *ppCur (the
2643 ** output argument to this function).
2644 */
drh3aac2dd2004-04-26 14:10:20 +00002645 pCur->xCompare = xCmp ? xCmp : dfltCompare;
2646 pCur->pArg = pArg;
danielk1977aef0bf62005-12-30 16:28:01 +00002647 pCur->pBtree = p;
drhd0679ed2007-08-28 22:24:34 +00002648 pCur->pBt = pBt;
drhecdc7532001-09-23 02:35:53 +00002649 pCur->wrFlag = wrFlag;
drha059ad02001-04-17 20:09:11 +00002650 pCur->pNext = pBt->pCursor;
2651 if( pCur->pNext ){
2652 pCur->pNext->pPrev = pCur;
2653 }
2654 pBt->pCursor = pCur;
danielk1977da184232006-01-05 11:34:32 +00002655 pCur->eState = CURSOR_INVALID;
drh2af926b2001-05-15 00:39:25 +00002656 *ppCur = pCur;
drhbd03cae2001-06-02 02:40:57 +00002657
danielk1977aef0bf62005-12-30 16:28:01 +00002658 return SQLITE_OK;
drhd677b3d2007-08-20 22:48:41 +00002659
drhbd03cae2001-06-02 02:40:57 +00002660create_cursor_exception:
drhbd03cae2001-06-02 02:40:57 +00002661 if( pCur ){
drh3aac2dd2004-04-26 14:10:20 +00002662 releasePage(pCur->pPage);
drh17435752007-08-16 04:30:38 +00002663 sqlite3_free(pCur);
drhbd03cae2001-06-02 02:40:57 +00002664 }
drh5e00f6c2001-09-13 13:46:56 +00002665 unlockBtreeIfUnused(pBt);
drhbd03cae2001-06-02 02:40:57 +00002666 return rc;
drha059ad02001-04-17 20:09:11 +00002667}
drhd677b3d2007-08-20 22:48:41 +00002668int sqlite3BtreeCursor(
2669 Btree *p, /* The btree */
2670 int iTable, /* Root page of table to open */
2671 int wrFlag, /* 1 to write. 0 read-only */
2672 int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */
2673 void *pArg, /* First arg to xCompare() */
2674 BtCursor **ppCur /* Write new cursor here */
2675){
2676 int rc;
2677 sqlite3BtreeEnter(p);
2678 rc = btreeCursor(p, iTable, wrFlag, xCmp, pArg, ppCur);
2679 sqlite3BtreeLeave(p);
2680 return rc;
2681}
2682
drha059ad02001-04-17 20:09:11 +00002683
2684/*
drh5e00f6c2001-09-13 13:46:56 +00002685** Close a cursor. The read lock on the database file is released
drhbd03cae2001-06-02 02:40:57 +00002686** when the last cursor is closed.
drha059ad02001-04-17 20:09:11 +00002687*/
drh3aac2dd2004-04-26 14:10:20 +00002688int sqlite3BtreeCloseCursor(BtCursor *pCur){
drhd0679ed2007-08-28 22:24:34 +00002689 BtShared *pBt = pCur->pBt;
drhd677b3d2007-08-20 22:48:41 +00002690
drhd0679ed2007-08-28 22:24:34 +00002691 assert( sqlite3_mutex_held(pCur->pBt->mutex) );
2692 assert( sqlite3_mutex_held(pCur->pBtree->pSqlite->mutex) );
drhbf700f32007-03-31 02:36:44 +00002693 clearCursorPosition(pCur);
drha059ad02001-04-17 20:09:11 +00002694 if( pCur->pPrev ){
2695 pCur->pPrev->pNext = pCur->pNext;
2696 }else{
2697 pBt->pCursor = pCur->pNext;
2698 }
2699 if( pCur->pNext ){
2700 pCur->pNext->pPrev = pCur->pPrev;
2701 }
drh3aac2dd2004-04-26 14:10:20 +00002702 releasePage(pCur->pPage);
drh5e00f6c2001-09-13 13:46:56 +00002703 unlockBtreeIfUnused(pBt);
danielk197792d4d7a2007-05-04 12:05:56 +00002704 invalidateOverflowCache(pCur);
drh17435752007-08-16 04:30:38 +00002705 sqlite3_free(pCur);
drh8c42ca92001-06-22 19:15:00 +00002706 return SQLITE_OK;
drha059ad02001-04-17 20:09:11 +00002707}
2708
drh7e3b0a02001-04-28 16:52:40 +00002709/*
drh5e2f8b92001-05-28 00:41:15 +00002710** Make a temporary cursor by filling in the fields of pTempCur.
2711** The temporary cursor is not on the cursor list for the Btree.
2712*/
drh16a9b832007-05-05 18:39:25 +00002713void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){
drhd0679ed2007-08-28 22:24:34 +00002714 cursorEnter(pCur);
drh5e2f8b92001-05-28 00:41:15 +00002715 memcpy(pTempCur, pCur, sizeof(*pCur));
2716 pTempCur->pNext = 0;
2717 pTempCur->pPrev = 0;
drhecdc7532001-09-23 02:35:53 +00002718 if( pTempCur->pPage ){
danielk19773b8a05f2007-03-19 17:44:26 +00002719 sqlite3PagerRef(pTempCur->pPage->pDbPage);
drhecdc7532001-09-23 02:35:53 +00002720 }
drhd0679ed2007-08-28 22:24:34 +00002721 cursorLeave(pCur);
drh5e2f8b92001-05-28 00:41:15 +00002722}
2723
2724/*
drhbd03cae2001-06-02 02:40:57 +00002725** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
drh5e2f8b92001-05-28 00:41:15 +00002726** function above.
2727*/
drh16a9b832007-05-05 18:39:25 +00002728void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){
drhd0679ed2007-08-28 22:24:34 +00002729 cursorEnter(pCur);
drhecdc7532001-09-23 02:35:53 +00002730 if( pCur->pPage ){
danielk19773b8a05f2007-03-19 17:44:26 +00002731 sqlite3PagerUnref(pCur->pPage->pDbPage);
drhecdc7532001-09-23 02:35:53 +00002732 }
drhd0679ed2007-08-28 22:24:34 +00002733 cursorLeave(pCur);
drh5e2f8b92001-05-28 00:41:15 +00002734}
2735
2736/*
drh86057612007-06-26 01:04:48 +00002737** Make sure the BtCursor* given in the argument has a valid
2738** BtCursor.info structure. If it is not already valid, call
danielk19771cc5ed82007-05-16 17:28:43 +00002739** sqlite3BtreeParseCell() to fill it in.
drhab01f612004-05-22 02:55:23 +00002740**
2741** BtCursor.info is a cache of the information in the current cell.
drh16a9b832007-05-05 18:39:25 +00002742** Using this cache reduces the number of calls to sqlite3BtreeParseCell().
drh86057612007-06-26 01:04:48 +00002743**
2744** 2007-06-25: There is a bug in some versions of MSVC that cause the
2745** compiler to crash when getCellInfo() is implemented as a macro.
2746** But there is a measureable speed advantage to using the macro on gcc
2747** (when less compiler optimizations like -Os or -O0 are used and the
2748** compiler is not doing agressive inlining.) So we use a real function
2749** for MSVC and a macro for everything else. Ticket #2457.
drh9188b382004-05-14 21:12:22 +00002750*/
drh9188b382004-05-14 21:12:22 +00002751#ifndef NDEBUG
danielk19771cc5ed82007-05-16 17:28:43 +00002752 static void assertCellInfo(BtCursor *pCur){
drh9188b382004-05-14 21:12:22 +00002753 CellInfo info;
drh51c6d962004-06-06 00:42:25 +00002754 memset(&info, 0, sizeof(info));
drh16a9b832007-05-05 18:39:25 +00002755 sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &info);
drh9188b382004-05-14 21:12:22 +00002756 assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
drh9188b382004-05-14 21:12:22 +00002757 }
danielk19771cc5ed82007-05-16 17:28:43 +00002758#else
2759 #define assertCellInfo(x)
2760#endif
drh86057612007-06-26 01:04:48 +00002761#ifdef _MSC_VER
2762 /* Use a real function in MSVC to work around bugs in that compiler. */
2763 static void getCellInfo(BtCursor *pCur){
2764 if( pCur->info.nSize==0 ){
2765 sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info);
2766 }else{
2767 assertCellInfo(pCur);
2768 }
2769 }
2770#else /* if not _MSC_VER */
2771 /* Use a macro in all other compilers so that the function is inlined */
2772#define getCellInfo(pCur) \
2773 if( pCur->info.nSize==0 ){ \
danielk19771cc5ed82007-05-16 17:28:43 +00002774 sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info); \
drh86057612007-06-26 01:04:48 +00002775 }else{ \
2776 assertCellInfo(pCur); \
2777 }
2778#endif /* _MSC_VER */
drh9188b382004-05-14 21:12:22 +00002779
2780/*
drh3aac2dd2004-04-26 14:10:20 +00002781** Set *pSize to the size of the buffer needed to hold the value of
2782** the key for the current entry. If the cursor is not pointing
2783** to a valid entry, *pSize is set to 0.
2784**
drh4b70f112004-05-02 21:12:19 +00002785** For a table with the INTKEY flag set, this routine returns the key
drh3aac2dd2004-04-26 14:10:20 +00002786** itself, not the number of bytes in the key.
drh7e3b0a02001-04-28 16:52:40 +00002787*/
drh4a1c3802004-05-12 15:15:47 +00002788int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
drhd677b3d2007-08-20 22:48:41 +00002789 int rc;
2790
drhd0679ed2007-08-28 22:24:34 +00002791 assert( sqlite3_mutex_held(pCur->pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +00002792 rc = restoreOrClearCursorPosition(pCur);
danielk1977da184232006-01-05 11:34:32 +00002793 if( rc==SQLITE_OK ){
2794 assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
2795 if( pCur->eState==CURSOR_INVALID ){
2796 *pSize = 0;
2797 }else{
drh86057612007-06-26 01:04:48 +00002798 getCellInfo(pCur);
danielk1977da184232006-01-05 11:34:32 +00002799 *pSize = pCur->info.nKey;
2800 }
drh72f82862001-05-24 21:06:34 +00002801 }
danielk1977da184232006-01-05 11:34:32 +00002802 return rc;
drha059ad02001-04-17 20:09:11 +00002803}
drh2af926b2001-05-15 00:39:25 +00002804
drh72f82862001-05-24 21:06:34 +00002805/*
drh0e1c19e2004-05-11 00:58:56 +00002806** Set *pSize to the number of bytes of data in the entry the
2807** cursor currently points to. Always return SQLITE_OK.
2808** Failure is not possible. If the cursor is not currently
2809** pointing to an entry (which can happen, for example, if
2810** the database is empty) then *pSize is set to 0.
2811*/
2812int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
drhd677b3d2007-08-20 22:48:41 +00002813 int rc;
2814
drhd0679ed2007-08-28 22:24:34 +00002815 assert( sqlite3_mutex_held(pCur->pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +00002816 rc = restoreOrClearCursorPosition(pCur);
danielk1977da184232006-01-05 11:34:32 +00002817 if( rc==SQLITE_OK ){
2818 assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
2819 if( pCur->eState==CURSOR_INVALID ){
2820 /* Not pointing at a valid entry - set *pSize to 0. */
2821 *pSize = 0;
2822 }else{
drh86057612007-06-26 01:04:48 +00002823 getCellInfo(pCur);
danielk1977da184232006-01-05 11:34:32 +00002824 *pSize = pCur->info.nData;
2825 }
drh0e1c19e2004-05-11 00:58:56 +00002826 }
danielk1977da184232006-01-05 11:34:32 +00002827 return rc;
drh0e1c19e2004-05-11 00:58:56 +00002828}
2829
2830/*
danielk1977d04417962007-05-02 13:16:30 +00002831** Given the page number of an overflow page in the database (parameter
2832** ovfl), this function finds the page number of the next page in the
2833** linked list of overflow pages. If possible, it uses the auto-vacuum
2834** pointer-map data instead of reading the content of page ovfl to do so.
2835**
2836** If an error occurs an SQLite error code is returned. Otherwise:
2837**
2838** Unless pPgnoNext is NULL, the page number of the next overflow
2839** page in the linked list is written to *pPgnoNext. If page ovfl
2840** is the last page in it's linked list, *pPgnoNext is set to zero.
2841**
2842** If ppPage is not NULL, *ppPage is set to the MemPage* handle
2843** for page ovfl. The underlying pager page may have been requested
2844** with the noContent flag set, so the page data accessable via
2845** this handle may not be trusted.
2846*/
2847static int getOverflowPage(
2848 BtShared *pBt,
2849 Pgno ovfl, /* Overflow page */
2850 MemPage **ppPage, /* OUT: MemPage handle */
2851 Pgno *pPgnoNext /* OUT: Next overflow page number */
2852){
2853 Pgno next = 0;
2854 int rc;
2855
drh27641702007-08-22 02:56:42 +00002856 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk1977d04417962007-05-02 13:16:30 +00002857 /* One of these must not be NULL. Otherwise, why call this function? */
2858 assert(ppPage || pPgnoNext);
2859
2860 /* If pPgnoNext is NULL, then this function is being called to obtain
2861 ** a MemPage* reference only. No page-data is required in this case.
2862 */
2863 if( !pPgnoNext ){
drh16a9b832007-05-05 18:39:25 +00002864 return sqlite3BtreeGetPage(pBt, ovfl, ppPage, 1);
danielk1977d04417962007-05-02 13:16:30 +00002865 }
2866
2867#ifndef SQLITE_OMIT_AUTOVACUUM
2868 /* Try to find the next page in the overflow list using the
2869 ** autovacuum pointer-map pages. Guess that the next page in
2870 ** the overflow list is page number (ovfl+1). If that guess turns
2871 ** out to be wrong, fall back to loading the data of page
2872 ** number ovfl to determine the next page number.
2873 */
2874 if( pBt->autoVacuum ){
2875 Pgno pgno;
2876 Pgno iGuess = ovfl+1;
2877 u8 eType;
2878
2879 while( PTRMAP_ISPAGE(pBt, iGuess) || iGuess==PENDING_BYTE_PAGE(pBt) ){
2880 iGuess++;
2881 }
2882
danielk197720713f32007-05-03 11:43:33 +00002883 if( iGuess<=sqlite3PagerPagecount(pBt->pPager) ){
danielk1977d04417962007-05-02 13:16:30 +00002884 rc = ptrmapGet(pBt, iGuess, &eType, &pgno);
2885 if( rc!=SQLITE_OK ){
2886 return rc;
2887 }
2888 if( eType==PTRMAP_OVERFLOW2 && pgno==ovfl ){
2889 next = iGuess;
2890 }
2891 }
2892 }
2893#endif
2894
2895 if( next==0 || ppPage ){
2896 MemPage *pPage = 0;
2897
drh16a9b832007-05-05 18:39:25 +00002898 rc = sqlite3BtreeGetPage(pBt, ovfl, &pPage, next!=0);
danielk1977d04417962007-05-02 13:16:30 +00002899 assert(rc==SQLITE_OK || pPage==0);
2900 if( next==0 && rc==SQLITE_OK ){
2901 next = get4byte(pPage->aData);
2902 }
2903
2904 if( ppPage ){
2905 *ppPage = pPage;
2906 }else{
2907 releasePage(pPage);
2908 }
2909 }
2910 *pPgnoNext = next;
2911
2912 return rc;
2913}
2914
danielk1977da107192007-05-04 08:32:13 +00002915/*
2916** Copy data from a buffer to a page, or from a page to a buffer.
2917**
2918** pPayload is a pointer to data stored on database page pDbPage.
2919** If argument eOp is false, then nByte bytes of data are copied
2920** from pPayload to the buffer pointed at by pBuf. If eOp is true,
2921** then sqlite3PagerWrite() is called on pDbPage and nByte bytes
2922** of data are copied from the buffer pBuf to pPayload.
2923**
2924** SQLITE_OK is returned on success, otherwise an error code.
2925*/
2926static int copyPayload(
2927 void *pPayload, /* Pointer to page data */
2928 void *pBuf, /* Pointer to buffer */
2929 int nByte, /* Number of bytes to copy */
2930 int eOp, /* 0 -> copy from page, 1 -> copy to page */
2931 DbPage *pDbPage /* Page containing pPayload */
2932){
2933 if( eOp ){
2934 /* Copy data from buffer to page (a write operation) */
2935 int rc = sqlite3PagerWrite(pDbPage);
2936 if( rc!=SQLITE_OK ){
2937 return rc;
2938 }
2939 memcpy(pPayload, pBuf, nByte);
2940 }else{
2941 /* Copy data from page to buffer (a read operation) */
2942 memcpy(pBuf, pPayload, nByte);
2943 }
2944 return SQLITE_OK;
2945}
danielk1977d04417962007-05-02 13:16:30 +00002946
2947/*
danielk19779f8d6402007-05-02 17:48:45 +00002948** This function is used to read or overwrite payload information
2949** for the entry that the pCur cursor is pointing to. If the eOp
2950** parameter is 0, this is a read operation (data copied into
2951** buffer pBuf). If it is non-zero, a write (data copied from
2952** buffer pBuf).
2953**
2954** A total of "amt" bytes are read or written beginning at "offset".
2955** Data is read to or from the buffer pBuf.
drh72f82862001-05-24 21:06:34 +00002956**
2957** This routine does not make a distinction between key and data.
danielk19779f8d6402007-05-02 17:48:45 +00002958** It just reads or writes bytes from the payload area. Data might
2959** appear on the main page or be scattered out on multiple overflow
2960** pages.
danielk1977da107192007-05-04 08:32:13 +00002961**
danielk1977dcbb5d32007-05-04 18:36:44 +00002962** If the BtCursor.isIncrblobHandle flag is set, and the current
danielk1977da107192007-05-04 08:32:13 +00002963** cursor entry uses one or more overflow pages, this function
2964** allocates space for and lazily popluates the overflow page-list
2965** cache array (BtCursor.aOverflow). Subsequent calls use this
2966** cache to make seeking to the supplied offset more efficient.
2967**
2968** Once an overflow page-list cache has been allocated, it may be
2969** invalidated if some other cursor writes to the same table, or if
2970** the cursor is moved to a different row. Additionally, in auto-vacuum
2971** mode, the following events may invalidate an overflow page-list cache.
2972**
2973** * An incremental vacuum,
2974** * A commit in auto_vacuum="full" mode,
2975** * Creating a table (may require moving an overflow page).
drh72f82862001-05-24 21:06:34 +00002976*/
danielk19779f8d6402007-05-02 17:48:45 +00002977static int accessPayload(
drh3aac2dd2004-04-26 14:10:20 +00002978 BtCursor *pCur, /* Cursor pointing to entry to read from */
2979 int offset, /* Begin reading this far into payload */
2980 int amt, /* Read this many bytes */
2981 unsigned char *pBuf, /* Write the bytes into this buffer */
danielk19779f8d6402007-05-02 17:48:45 +00002982 int skipKey, /* offset begins at data if this is true */
2983 int eOp /* zero to read. non-zero to write. */
drh3aac2dd2004-04-26 14:10:20 +00002984){
2985 unsigned char *aPayload;
danielk1977da107192007-05-04 08:32:13 +00002986 int rc = SQLITE_OK;
drhfa1a98a2004-05-14 19:08:17 +00002987 u32 nKey;
danielk19772dec9702007-05-02 16:48:37 +00002988 int iIdx = 0;
drhd0679ed2007-08-28 22:24:34 +00002989 MemPage *pPage = pCur->pPage; /* Btree page of current cursor entry */
2990 BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */
drh3aac2dd2004-04-26 14:10:20 +00002991
danielk1977da107192007-05-04 08:32:13 +00002992 assert( pPage );
danielk1977da184232006-01-05 11:34:32 +00002993 assert( pCur->eState==CURSOR_VALID );
drh3aac2dd2004-04-26 14:10:20 +00002994 assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
danielk1977da107192007-05-04 08:32:13 +00002995 assert( offset>=0 );
drhd0679ed2007-08-28 22:24:34 +00002996 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
danielk1977da107192007-05-04 08:32:13 +00002997
drh86057612007-06-26 01:04:48 +00002998 getCellInfo(pCur);
drh366fda62006-01-13 02:35:09 +00002999 aPayload = pCur->info.pCell + pCur->info.nHeader;
danielk1977da107192007-05-04 08:32:13 +00003000 nKey = (pPage->intKey ? 0 : pCur->info.nKey);
3001
drh3aac2dd2004-04-26 14:10:20 +00003002 if( skipKey ){
drhfa1a98a2004-05-14 19:08:17 +00003003 offset += nKey;
drh3aac2dd2004-04-26 14:10:20 +00003004 }
drhfa1a98a2004-05-14 19:08:17 +00003005 if( offset+amt > nKey+pCur->info.nData ){
danielk1977da107192007-05-04 08:32:13 +00003006 /* Trying to read or write past the end of the data is an error */
drha34b6762004-05-07 13:30:42 +00003007 return SQLITE_ERROR;
drh3aac2dd2004-04-26 14:10:20 +00003008 }
danielk1977da107192007-05-04 08:32:13 +00003009
3010 /* Check if data must be read/written to/from the btree page itself. */
drhfa1a98a2004-05-14 19:08:17 +00003011 if( offset<pCur->info.nLocal ){
drh2af926b2001-05-15 00:39:25 +00003012 int a = amt;
drhfa1a98a2004-05-14 19:08:17 +00003013 if( a+offset>pCur->info.nLocal ){
3014 a = pCur->info.nLocal - offset;
drh2af926b2001-05-15 00:39:25 +00003015 }
danielk1977da107192007-05-04 08:32:13 +00003016 rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage);
drh2aa679f2001-06-25 02:11:07 +00003017 offset = 0;
drha34b6762004-05-07 13:30:42 +00003018 pBuf += a;
drh2af926b2001-05-15 00:39:25 +00003019 amt -= a;
drhdd793422001-06-28 01:54:48 +00003020 }else{
drhfa1a98a2004-05-14 19:08:17 +00003021 offset -= pCur->info.nLocal;
drhbd03cae2001-06-02 02:40:57 +00003022 }
danielk1977da107192007-05-04 08:32:13 +00003023
3024 if( rc==SQLITE_OK && amt>0 ){
3025 const int ovflSize = pBt->usableSize - 4; /* Bytes content per ovfl page */
3026 Pgno nextPage;
3027
drhfa1a98a2004-05-14 19:08:17 +00003028 nextPage = get4byte(&aPayload[pCur->info.nLocal]);
danielk1977da107192007-05-04 08:32:13 +00003029
danielk19772dec9702007-05-02 16:48:37 +00003030#ifndef SQLITE_OMIT_INCRBLOB
danielk1977dcbb5d32007-05-04 18:36:44 +00003031 /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[]
danielk1977da107192007-05-04 08:32:13 +00003032 ** has not been allocated, allocate it now. The array is sized at
3033 ** one entry for each overflow page in the overflow chain. The
3034 ** page number of the first overflow page is stored in aOverflow[0],
3035 ** etc. A value of 0 in the aOverflow[] array means "not yet known"
3036 ** (the cache is lazily populated).
3037 */
danielk1977dcbb5d32007-05-04 18:36:44 +00003038 if( pCur->isIncrblobHandle && !pCur->aOverflow ){
danielk19772dec9702007-05-02 16:48:37 +00003039 int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize;
drh17435752007-08-16 04:30:38 +00003040 pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl);
danielk19772dec9702007-05-02 16:48:37 +00003041 if( nOvfl && !pCur->aOverflow ){
danielk1977da107192007-05-04 08:32:13 +00003042 rc = SQLITE_NOMEM;
danielk19772dec9702007-05-02 16:48:37 +00003043 }
3044 }
danielk1977da107192007-05-04 08:32:13 +00003045
3046 /* If the overflow page-list cache has been allocated and the
3047 ** entry for the first required overflow page is valid, skip
3048 ** directly to it.
3049 */
danielk19772dec9702007-05-02 16:48:37 +00003050 if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){
3051 iIdx = (offset/ovflSize);
3052 nextPage = pCur->aOverflow[iIdx];
3053 offset = (offset%ovflSize);
3054 }
3055#endif
danielk1977da107192007-05-04 08:32:13 +00003056
3057 for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){
3058
3059#ifndef SQLITE_OMIT_INCRBLOB
3060 /* If required, populate the overflow page-list cache. */
3061 if( pCur->aOverflow ){
3062 assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage);
3063 pCur->aOverflow[iIdx] = nextPage;
3064 }
3065#endif
3066
danielk1977d04417962007-05-02 13:16:30 +00003067 if( offset>=ovflSize ){
3068 /* The only reason to read this page is to obtain the page
danielk1977da107192007-05-04 08:32:13 +00003069 ** number for the next page in the overflow chain. The page
drhfd131da2007-08-07 17:13:03 +00003070 ** data is not required. So first try to lookup the overflow
3071 ** page-list cache, if any, then fall back to the getOverflowPage()
danielk1977da107192007-05-04 08:32:13 +00003072 ** function.
danielk1977d04417962007-05-02 13:16:30 +00003073 */
danielk19772dec9702007-05-02 16:48:37 +00003074#ifndef SQLITE_OMIT_INCRBLOB
danielk1977da107192007-05-04 08:32:13 +00003075 if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){
3076 nextPage = pCur->aOverflow[iIdx+1];
3077 } else
danielk19772dec9702007-05-02 16:48:37 +00003078#endif
danielk1977da107192007-05-04 08:32:13 +00003079 rc = getOverflowPage(pBt, nextPage, 0, &nextPage);
danielk1977da107192007-05-04 08:32:13 +00003080 offset -= ovflSize;
danielk1977d04417962007-05-02 13:16:30 +00003081 }else{
danielk19779f8d6402007-05-02 17:48:45 +00003082 /* Need to read this page properly. It contains some of the
3083 ** range of data that is being read (eOp==0) or written (eOp!=0).
danielk1977d04417962007-05-02 13:16:30 +00003084 */
3085 DbPage *pDbPage;
danielk1977cfe9a692004-06-16 12:00:29 +00003086 int a = amt;
danielk1977d04417962007-05-02 13:16:30 +00003087 rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
danielk1977da107192007-05-04 08:32:13 +00003088 if( rc==SQLITE_OK ){
3089 aPayload = sqlite3PagerGetData(pDbPage);
3090 nextPage = get4byte(aPayload);
3091 if( a + offset > ovflSize ){
3092 a = ovflSize - offset;
danielk19779f8d6402007-05-02 17:48:45 +00003093 }
danielk1977da107192007-05-04 08:32:13 +00003094 rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
3095 sqlite3PagerUnref(pDbPage);
3096 offset = 0;
3097 amt -= a;
3098 pBuf += a;
danielk19779f8d6402007-05-02 17:48:45 +00003099 }
danielk1977cfe9a692004-06-16 12:00:29 +00003100 }
drh2af926b2001-05-15 00:39:25 +00003101 }
drh2af926b2001-05-15 00:39:25 +00003102 }
danielk1977cfe9a692004-06-16 12:00:29 +00003103
danielk1977da107192007-05-04 08:32:13 +00003104 if( rc==SQLITE_OK && amt>0 ){
drh49285702005-09-17 15:20:26 +00003105 return SQLITE_CORRUPT_BKPT;
drha7fcb052001-12-14 15:09:55 +00003106 }
danielk1977da107192007-05-04 08:32:13 +00003107 return rc;
drh2af926b2001-05-15 00:39:25 +00003108}
3109
drh72f82862001-05-24 21:06:34 +00003110/*
drh3aac2dd2004-04-26 14:10:20 +00003111** Read part of the key associated with cursor pCur. Exactly
drha34b6762004-05-07 13:30:42 +00003112** "amt" bytes will be transfered into pBuf[]. The transfer
drh3aac2dd2004-04-26 14:10:20 +00003113** begins at "offset".
drh8c1238a2003-01-02 14:43:55 +00003114**
drh3aac2dd2004-04-26 14:10:20 +00003115** Return SQLITE_OK on success or an error code if anything goes
3116** wrong. An error is returned if "offset+amt" is larger than
3117** the available payload.
drh72f82862001-05-24 21:06:34 +00003118*/
drha34b6762004-05-07 13:30:42 +00003119int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
drhd677b3d2007-08-20 22:48:41 +00003120 int rc;
3121
drhd0679ed2007-08-28 22:24:34 +00003122 cursorEnter(pCur);
drhd677b3d2007-08-20 22:48:41 +00003123 rc = restoreOrClearCursorPosition(pCur);
danielk1977da184232006-01-05 11:34:32 +00003124 if( rc==SQLITE_OK ){
3125 assert( pCur->eState==CURSOR_VALID );
3126 assert( pCur->pPage!=0 );
3127 if( pCur->pPage->intKey ){
drhd0679ed2007-08-28 22:24:34 +00003128 cursorLeave(pCur);
danielk1977da184232006-01-05 11:34:32 +00003129 return SQLITE_CORRUPT_BKPT;
3130 }
3131 assert( pCur->pPage->intKey==0 );
3132 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
drh16a9b832007-05-05 18:39:25 +00003133 rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0);
drh6575a222005-03-10 17:06:34 +00003134 }
drhd0679ed2007-08-28 22:24:34 +00003135 cursorLeave(pCur);
danielk1977da184232006-01-05 11:34:32 +00003136 return rc;
drh3aac2dd2004-04-26 14:10:20 +00003137}
3138
3139/*
drh3aac2dd2004-04-26 14:10:20 +00003140** Read part of the data associated with cursor pCur. Exactly
drha34b6762004-05-07 13:30:42 +00003141** "amt" bytes will be transfered into pBuf[]. The transfer
drh3aac2dd2004-04-26 14:10:20 +00003142** begins at "offset".
3143**
3144** Return SQLITE_OK on success or an error code if anything goes
3145** wrong. An error is returned if "offset+amt" is larger than
3146** the available payload.
drh72f82862001-05-24 21:06:34 +00003147*/
drh3aac2dd2004-04-26 14:10:20 +00003148int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
drhd677b3d2007-08-20 22:48:41 +00003149 int rc;
3150
drhd0679ed2007-08-28 22:24:34 +00003151 cursorEnter(pCur);
drhd677b3d2007-08-20 22:48:41 +00003152 rc = restoreOrClearCursorPosition(pCur);
danielk1977da184232006-01-05 11:34:32 +00003153 if( rc==SQLITE_OK ){
3154 assert( pCur->eState==CURSOR_VALID );
3155 assert( pCur->pPage!=0 );
3156 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
drh16a9b832007-05-05 18:39:25 +00003157 rc = accessPayload(pCur, offset, amt, pBuf, 1, 0);
danielk1977da184232006-01-05 11:34:32 +00003158 }
drhd0679ed2007-08-28 22:24:34 +00003159 cursorLeave(pCur);
danielk1977da184232006-01-05 11:34:32 +00003160 return rc;
drh2af926b2001-05-15 00:39:25 +00003161}
3162
drh72f82862001-05-24 21:06:34 +00003163/*
drh0e1c19e2004-05-11 00:58:56 +00003164** Return a pointer to payload information from the entry that the
3165** pCur cursor is pointing to. The pointer is to the beginning of
3166** the key if skipKey==0 and it points to the beginning of data if
drhe51c44f2004-05-30 20:46:09 +00003167** skipKey==1. The number of bytes of available key/data is written
3168** into *pAmt. If *pAmt==0, then the value returned will not be
3169** a valid pointer.
drh0e1c19e2004-05-11 00:58:56 +00003170**
3171** This routine is an optimization. It is common for the entire key
3172** and data to fit on the local page and for there to be no overflow
3173** pages. When that is so, this routine can be used to access the
3174** key and data without making a copy. If the key and/or data spills
drh16a9b832007-05-05 18:39:25 +00003175** onto overflow pages, then accessPayload() must be used to reassembly
drh0e1c19e2004-05-11 00:58:56 +00003176** the key/data and copy it into a preallocated buffer.
3177**
3178** The pointer returned by this routine looks directly into the cached
3179** page of the database. The data might change or move the next time
3180** any btree routine is called.
3181*/
3182static const unsigned char *fetchPayload(
3183 BtCursor *pCur, /* Cursor pointing to entry to read from */
drhe51c44f2004-05-30 20:46:09 +00003184 int *pAmt, /* Write the number of available bytes here */
drh0e1c19e2004-05-11 00:58:56 +00003185 int skipKey /* read beginning at data if this is true */
3186){
3187 unsigned char *aPayload;
3188 MemPage *pPage;
drhfa1a98a2004-05-14 19:08:17 +00003189 u32 nKey;
3190 int nLocal;
drh0e1c19e2004-05-11 00:58:56 +00003191
3192 assert( pCur!=0 && pCur->pPage!=0 );
danielk1977da184232006-01-05 11:34:32 +00003193 assert( pCur->eState==CURSOR_VALID );
drhd0679ed2007-08-28 22:24:34 +00003194 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh0e1c19e2004-05-11 00:58:56 +00003195 pPage = pCur->pPage;
drh0e1c19e2004-05-11 00:58:56 +00003196 assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
drh86057612007-06-26 01:04:48 +00003197 getCellInfo(pCur);
drh43605152004-05-29 21:46:49 +00003198 aPayload = pCur->info.pCell;
drhfa1a98a2004-05-14 19:08:17 +00003199 aPayload += pCur->info.nHeader;
drh0e1c19e2004-05-11 00:58:56 +00003200 if( pPage->intKey ){
drhfa1a98a2004-05-14 19:08:17 +00003201 nKey = 0;
3202 }else{
3203 nKey = pCur->info.nKey;
drh0e1c19e2004-05-11 00:58:56 +00003204 }
drh0e1c19e2004-05-11 00:58:56 +00003205 if( skipKey ){
drhfa1a98a2004-05-14 19:08:17 +00003206 aPayload += nKey;
3207 nLocal = pCur->info.nLocal - nKey;
drh0e1c19e2004-05-11 00:58:56 +00003208 }else{
drhfa1a98a2004-05-14 19:08:17 +00003209 nLocal = pCur->info.nLocal;
drhe51c44f2004-05-30 20:46:09 +00003210 if( nLocal>nKey ){
3211 nLocal = nKey;
3212 }
drh0e1c19e2004-05-11 00:58:56 +00003213 }
drhe51c44f2004-05-30 20:46:09 +00003214 *pAmt = nLocal;
drh0e1c19e2004-05-11 00:58:56 +00003215 return aPayload;
3216}
3217
3218
3219/*
drhe51c44f2004-05-30 20:46:09 +00003220** For the entry that cursor pCur is point to, return as
3221** many bytes of the key or data as are available on the local
3222** b-tree page. Write the number of available bytes into *pAmt.
drh0e1c19e2004-05-11 00:58:56 +00003223**
3224** The pointer returned is ephemeral. The key/data may move
drhd677b3d2007-08-20 22:48:41 +00003225** or be destroyed on the next call to any Btree routine,
3226** including calls from other threads against the same cache.
3227** Hence, a mutex on the BtShared should be held prior to calling
3228** this routine.
drh0e1c19e2004-05-11 00:58:56 +00003229**
3230** These routines is used to get quick access to key and data
3231** in the common case where no overflow pages are used.
drh0e1c19e2004-05-11 00:58:56 +00003232*/
drhe51c44f2004-05-30 20:46:09 +00003233const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){
drhd0679ed2007-08-28 22:24:34 +00003234 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
danielk1977da184232006-01-05 11:34:32 +00003235 if( pCur->eState==CURSOR_VALID ){
3236 return (const void*)fetchPayload(pCur, pAmt, 0);
3237 }
3238 return 0;
drh0e1c19e2004-05-11 00:58:56 +00003239}
drhe51c44f2004-05-30 20:46:09 +00003240const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){
drhd0679ed2007-08-28 22:24:34 +00003241 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
danielk1977da184232006-01-05 11:34:32 +00003242 if( pCur->eState==CURSOR_VALID ){
3243 return (const void*)fetchPayload(pCur, pAmt, 1);
3244 }
3245 return 0;
drh0e1c19e2004-05-11 00:58:56 +00003246}
3247
3248
3249/*
drh8178a752003-01-05 21:41:40 +00003250** Move the cursor down to a new child page. The newPgno argument is the
drhab01f612004-05-22 02:55:23 +00003251** page number of the child page to move to.
drh72f82862001-05-24 21:06:34 +00003252*/
drh3aac2dd2004-04-26 14:10:20 +00003253static int moveToChild(BtCursor *pCur, u32 newPgno){
drh72f82862001-05-24 21:06:34 +00003254 int rc;
3255 MemPage *pNewPage;
drh3aac2dd2004-04-26 14:10:20 +00003256 MemPage *pOldPage;
drhd0679ed2007-08-28 22:24:34 +00003257 BtShared *pBt = pCur->pBt;
drh72f82862001-05-24 21:06:34 +00003258
drh27641702007-08-22 02:56:42 +00003259 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk1977da184232006-01-05 11:34:32 +00003260 assert( pCur->eState==CURSOR_VALID );
drhde647132004-05-07 17:57:49 +00003261 rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
drh6019e162001-07-02 17:51:45 +00003262 if( rc ) return rc;
drh428ae8c2003-01-04 16:48:09 +00003263 pNewPage->idxParent = pCur->idx;
drh3aac2dd2004-04-26 14:10:20 +00003264 pOldPage = pCur->pPage;
3265 pOldPage->idxShift = 0;
3266 releasePage(pOldPage);
drh72f82862001-05-24 21:06:34 +00003267 pCur->pPage = pNewPage;
3268 pCur->idx = 0;
drh271efa52004-05-30 19:19:05 +00003269 pCur->info.nSize = 0;
drh4be295b2003-12-16 03:44:47 +00003270 if( pNewPage->nCell<1 ){
drh49285702005-09-17 15:20:26 +00003271 return SQLITE_CORRUPT_BKPT;
drh4be295b2003-12-16 03:44:47 +00003272 }
drh72f82862001-05-24 21:06:34 +00003273 return SQLITE_OK;
3274}
3275
3276/*
drh8856d6a2004-04-29 14:42:46 +00003277** Return true if the page is the virtual root of its table.
3278**
3279** The virtual root page is the root page for most tables. But
3280** for the table rooted on page 1, sometime the real root page
3281** is empty except for the right-pointer. In such cases the
3282** virtual root page is the page that the right-pointer of page
3283** 1 is pointing to.
3284*/
drh16a9b832007-05-05 18:39:25 +00003285int sqlite3BtreeIsRootPage(MemPage *pPage){
drhd677b3d2007-08-20 22:48:41 +00003286 MemPage *pParent;
3287
drh27641702007-08-22 02:56:42 +00003288 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +00003289 pParent = pPage->pParent;
drhda200cc2004-05-09 11:51:38 +00003290 if( pParent==0 ) return 1;
3291 if( pParent->pgno>1 ) return 0;
3292 if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
drh8856d6a2004-04-29 14:42:46 +00003293 return 0;
3294}
3295
3296/*
drh5e2f8b92001-05-28 00:41:15 +00003297** Move the cursor up to the parent page.
3298**
3299** pCur->idx is set to the cell index that contains the pointer
3300** to the page we are coming from. If we are coming from the
3301** right-most child page then pCur->idx is set to one more than
drhbd03cae2001-06-02 02:40:57 +00003302** the largest cell index.
drh72f82862001-05-24 21:06:34 +00003303*/
drh16a9b832007-05-05 18:39:25 +00003304void sqlite3BtreeMoveToParent(BtCursor *pCur){
drh72f82862001-05-24 21:06:34 +00003305 MemPage *pParent;
drh8178a752003-01-05 21:41:40 +00003306 MemPage *pPage;
drh428ae8c2003-01-04 16:48:09 +00003307 int idxParent;
drh3aac2dd2004-04-26 14:10:20 +00003308
drhd0679ed2007-08-28 22:24:34 +00003309 cursorEnter(pCur);
danielk1977da184232006-01-05 11:34:32 +00003310 assert( pCur->eState==CURSOR_VALID );
drh8178a752003-01-05 21:41:40 +00003311 pPage = pCur->pPage;
3312 assert( pPage!=0 );
drh16a9b832007-05-05 18:39:25 +00003313 assert( !sqlite3BtreeIsRootPage(pPage) );
drh8178a752003-01-05 21:41:40 +00003314 pParent = pPage->pParent;
3315 assert( pParent!=0 );
3316 idxParent = pPage->idxParent;
danielk19773b8a05f2007-03-19 17:44:26 +00003317 sqlite3PagerRef(pParent->pDbPage);
drh3aac2dd2004-04-26 14:10:20 +00003318 releasePage(pPage);
drh72f82862001-05-24 21:06:34 +00003319 pCur->pPage = pParent;
drh271efa52004-05-30 19:19:05 +00003320 pCur->info.nSize = 0;
drh428ae8c2003-01-04 16:48:09 +00003321 assert( pParent->idxShift==0 );
drh43605152004-05-29 21:46:49 +00003322 pCur->idx = idxParent;
drhd0679ed2007-08-28 22:24:34 +00003323 cursorLeave(pCur);
drh72f82862001-05-24 21:06:34 +00003324}
3325
3326/*
3327** Move the cursor to the root page
3328*/
drh5e2f8b92001-05-28 00:41:15 +00003329static int moveToRoot(BtCursor *pCur){
drh3aac2dd2004-04-26 14:10:20 +00003330 MemPage *pRoot;
drh777e4c42006-01-13 04:31:58 +00003331 int rc = SQLITE_OK;
drhd677b3d2007-08-20 22:48:41 +00003332 Btree *p = pCur->pBtree;
3333 BtShared *pBt = p->pBt;
drhbd03cae2001-06-02 02:40:57 +00003334
drh27641702007-08-22 02:56:42 +00003335 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drhbf700f32007-03-31 02:36:44 +00003336 if( pCur->eState==CURSOR_REQUIRESEEK ){
3337 clearCursorPosition(pCur);
3338 }
drh777e4c42006-01-13 04:31:58 +00003339 pRoot = pCur->pPage;
danielk197797a227c2006-01-20 16:32:04 +00003340 if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
drh777e4c42006-01-13 04:31:58 +00003341 assert( pRoot->isInit );
3342 }else{
3343 if(
3344 SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
3345 ){
3346 pCur->eState = CURSOR_INVALID;
3347 return rc;
3348 }
3349 releasePage(pCur->pPage);
drh777e4c42006-01-13 04:31:58 +00003350 pCur->pPage = pRoot;
drhc39e0002004-05-07 23:50:57 +00003351 }
drh72f82862001-05-24 21:06:34 +00003352 pCur->idx = 0;
drh271efa52004-05-30 19:19:05 +00003353 pCur->info.nSize = 0;
drh8856d6a2004-04-29 14:42:46 +00003354 if( pRoot->nCell==0 && !pRoot->leaf ){
3355 Pgno subpage;
3356 assert( pRoot->pgno==1 );
drh43605152004-05-29 21:46:49 +00003357 subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
drh8856d6a2004-04-29 14:42:46 +00003358 assert( subpage>0 );
danielk1977da184232006-01-05 11:34:32 +00003359 pCur->eState = CURSOR_VALID;
drh4b70f112004-05-02 21:12:19 +00003360 rc = moveToChild(pCur, subpage);
drh8856d6a2004-04-29 14:42:46 +00003361 }
danielk1977da184232006-01-05 11:34:32 +00003362 pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
drh8856d6a2004-04-29 14:42:46 +00003363 return rc;
drh72f82862001-05-24 21:06:34 +00003364}
drh2af926b2001-05-15 00:39:25 +00003365
drh5e2f8b92001-05-28 00:41:15 +00003366/*
3367** Move the cursor down to the left-most leaf entry beneath the
3368** entry to which it is currently pointing.
drh777e4c42006-01-13 04:31:58 +00003369**
3370** The left-most leaf is the one with the smallest key - the first
3371** in ascending order.
drh5e2f8b92001-05-28 00:41:15 +00003372*/
3373static int moveToLeftmost(BtCursor *pCur){
3374 Pgno pgno;
drhd677b3d2007-08-20 22:48:41 +00003375 int rc = SQLITE_OK;
drh3aac2dd2004-04-26 14:10:20 +00003376 MemPage *pPage;
drh5e2f8b92001-05-28 00:41:15 +00003377
drhd0679ed2007-08-28 22:24:34 +00003378 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
danielk1977da184232006-01-05 11:34:32 +00003379 assert( pCur->eState==CURSOR_VALID );
drhd677b3d2007-08-20 22:48:41 +00003380 while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
drha34b6762004-05-07 13:30:42 +00003381 assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
danielk19771cc5ed82007-05-16 17:28:43 +00003382 pgno = get4byte(findCell(pPage, pCur->idx));
drh8178a752003-01-05 21:41:40 +00003383 rc = moveToChild(pCur, pgno);
drh5e2f8b92001-05-28 00:41:15 +00003384 }
drhd677b3d2007-08-20 22:48:41 +00003385 return rc;
drh5e2f8b92001-05-28 00:41:15 +00003386}
3387
drh2dcc9aa2002-12-04 13:40:25 +00003388/*
3389** Move the cursor down to the right-most leaf entry beneath the
3390** page to which it is currently pointing. Notice the difference
3391** between moveToLeftmost() and moveToRightmost(). moveToLeftmost()
3392** finds the left-most entry beneath the *entry* whereas moveToRightmost()
3393** finds the right-most entry beneath the *page*.
drh777e4c42006-01-13 04:31:58 +00003394**
3395** The right-most entry is the one with the largest key - the last
3396** key in ascending order.
drh2dcc9aa2002-12-04 13:40:25 +00003397*/
3398static int moveToRightmost(BtCursor *pCur){
3399 Pgno pgno;
drhd677b3d2007-08-20 22:48:41 +00003400 int rc = SQLITE_OK;
drh3aac2dd2004-04-26 14:10:20 +00003401 MemPage *pPage;
drh2dcc9aa2002-12-04 13:40:25 +00003402
drhd0679ed2007-08-28 22:24:34 +00003403 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
danielk1977da184232006-01-05 11:34:32 +00003404 assert( pCur->eState==CURSOR_VALID );
drhd677b3d2007-08-20 22:48:41 +00003405 while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
drh43605152004-05-29 21:46:49 +00003406 pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
drh3aac2dd2004-04-26 14:10:20 +00003407 pCur->idx = pPage->nCell;
drh8178a752003-01-05 21:41:40 +00003408 rc = moveToChild(pCur, pgno);
drh2dcc9aa2002-12-04 13:40:25 +00003409 }
drhd677b3d2007-08-20 22:48:41 +00003410 if( rc==SQLITE_OK ){
3411 pCur->idx = pPage->nCell - 1;
3412 pCur->info.nSize = 0;
3413 }
drh2dcc9aa2002-12-04 13:40:25 +00003414 return SQLITE_OK;
3415}
3416
drh5e00f6c2001-09-13 13:46:56 +00003417/* Move the cursor to the first entry in the table. Return SQLITE_OK
3418** on success. Set *pRes to 0 if the cursor actually points to something
drh77c679c2002-02-19 22:43:58 +00003419** or set *pRes to 1 if the table is empty.
drh5e00f6c2001-09-13 13:46:56 +00003420*/
drh3aac2dd2004-04-26 14:10:20 +00003421int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
drh5e00f6c2001-09-13 13:46:56 +00003422 int rc;
drhd677b3d2007-08-20 22:48:41 +00003423
drhd0679ed2007-08-28 22:24:34 +00003424 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh27641702007-08-22 02:56:42 +00003425 assert( sqlite3BtreeMutexHeld(pCur->pBtree->pSqlite->mutex) );
drh5e00f6c2001-09-13 13:46:56 +00003426 rc = moveToRoot(pCur);
drhd677b3d2007-08-20 22:48:41 +00003427 if( rc==SQLITE_OK ){
3428 if( pCur->eState==CURSOR_INVALID ){
3429 assert( pCur->pPage->nCell==0 );
3430 *pRes = 1;
3431 rc = SQLITE_OK;
3432 }else{
3433 assert( pCur->pPage->nCell>0 );
3434 *pRes = 0;
3435 rc = moveToLeftmost(pCur);
3436 }
drh5e00f6c2001-09-13 13:46:56 +00003437 }
drh5e00f6c2001-09-13 13:46:56 +00003438 return rc;
3439}
drh5e2f8b92001-05-28 00:41:15 +00003440
drh9562b552002-02-19 15:00:07 +00003441/* Move the cursor to the last entry in the table. Return SQLITE_OK
3442** on success. Set *pRes to 0 if the cursor actually points to something
drh77c679c2002-02-19 22:43:58 +00003443** or set *pRes to 1 if the table is empty.
drh9562b552002-02-19 15:00:07 +00003444*/
drh3aac2dd2004-04-26 14:10:20 +00003445int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
drh9562b552002-02-19 15:00:07 +00003446 int rc;
drhd677b3d2007-08-20 22:48:41 +00003447
drhd0679ed2007-08-28 22:24:34 +00003448 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh27641702007-08-22 02:56:42 +00003449 assert( sqlite3BtreeMutexHeld(pCur->pBtree->pSqlite->mutex) );
drh9562b552002-02-19 15:00:07 +00003450 rc = moveToRoot(pCur);
drhd677b3d2007-08-20 22:48:41 +00003451 if( rc==SQLITE_OK ){
3452 if( CURSOR_INVALID==pCur->eState ){
3453 assert( pCur->pPage->nCell==0 );
3454 *pRes = 1;
3455 }else{
3456 assert( pCur->eState==CURSOR_VALID );
3457 *pRes = 0;
3458 rc = moveToRightmost(pCur);
3459 }
drh9562b552002-02-19 15:00:07 +00003460 }
drh9562b552002-02-19 15:00:07 +00003461 return rc;
3462}
3463
drh3aac2dd2004-04-26 14:10:20 +00003464/* Move the cursor so that it points to an entry near pKey/nKey.
drh72f82862001-05-24 21:06:34 +00003465** Return a success code.
3466**
drh3aac2dd2004-04-26 14:10:20 +00003467** For INTKEY tables, only the nKey parameter is used. pKey is
3468** ignored. For other tables, nKey is the number of bytes of data
drh0b2f3162005-12-21 18:36:45 +00003469** in pKey. The comparison function specified when the cursor was
drh3aac2dd2004-04-26 14:10:20 +00003470** created is used to compare keys.
3471**
drh5e2f8b92001-05-28 00:41:15 +00003472** If an exact match is not found, then the cursor is always
drhbd03cae2001-06-02 02:40:57 +00003473** left pointing at a leaf page which would hold the entry if it
drh5e2f8b92001-05-28 00:41:15 +00003474** were present. The cursor might point to an entry that comes
3475** before or after the key.
3476**
drhbd03cae2001-06-02 02:40:57 +00003477** The result of comparing the key with the entry to which the
drhab01f612004-05-22 02:55:23 +00003478** cursor is written to *pRes if pRes!=NULL. The meaning of
drhbd03cae2001-06-02 02:40:57 +00003479** this value is as follows:
3480**
3481** *pRes<0 The cursor is left pointing at an entry that
drh1a844c32002-12-04 22:29:28 +00003482** is smaller than pKey or if the table is empty
3483** and the cursor is therefore left point to nothing.
drhbd03cae2001-06-02 02:40:57 +00003484**
3485** *pRes==0 The cursor is left pointing at an entry that
3486** exactly matches pKey.
3487**
3488** *pRes>0 The cursor is left pointing at an entry that
drh7c717f72001-06-24 20:39:41 +00003489** is larger than pKey.
drhd677b3d2007-08-20 22:48:41 +00003490**
drha059ad02001-04-17 20:09:11 +00003491*/
drhe4d90812007-03-29 05:51:49 +00003492int sqlite3BtreeMoveto(
3493 BtCursor *pCur, /* The cursor to be moved */
3494 const void *pKey, /* The key content for indices. Not used by tables */
3495 i64 nKey, /* Size of pKey. Or the key for tables */
3496 int biasRight, /* If true, bias the search to the high end */
3497 int *pRes /* Search result flag */
3498){
drh72f82862001-05-24 21:06:34 +00003499 int rc;
drhd677b3d2007-08-20 22:48:41 +00003500
drhd0679ed2007-08-28 22:24:34 +00003501 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh27641702007-08-22 02:56:42 +00003502 assert( sqlite3BtreeMutexHeld(pCur->pBtree->pSqlite->mutex) );
drh5e2f8b92001-05-28 00:41:15 +00003503 rc = moveToRoot(pCur);
drhd677b3d2007-08-20 22:48:41 +00003504 if( rc ){
3505 return rc;
3506 }
drhc39e0002004-05-07 23:50:57 +00003507 assert( pCur->pPage );
3508 assert( pCur->pPage->isInit );
danielk1977da184232006-01-05 11:34:32 +00003509 if( pCur->eState==CURSOR_INVALID ){
drhf328bc82004-05-10 23:29:49 +00003510 *pRes = -1;
drhc39e0002004-05-07 23:50:57 +00003511 assert( pCur->pPage->nCell==0 );
3512 return SQLITE_OK;
3513 }
drh14684382006-11-30 13:05:29 +00003514 for(;;){
drh72f82862001-05-24 21:06:34 +00003515 int lwr, upr;
3516 Pgno chldPg;
3517 MemPage *pPage = pCur->pPage;
drh1a844c32002-12-04 22:29:28 +00003518 int c = -1; /* pRes return if table is empty must be -1 */
drh72f82862001-05-24 21:06:34 +00003519 lwr = 0;
3520 upr = pPage->nCell-1;
drh4eec4c12005-01-21 00:22:37 +00003521 if( !pPage->intKey && pKey==0 ){
drh49285702005-09-17 15:20:26 +00003522 return SQLITE_CORRUPT_BKPT;
drh4eec4c12005-01-21 00:22:37 +00003523 }
drhe4d90812007-03-29 05:51:49 +00003524 if( biasRight ){
3525 pCur->idx = upr;
3526 }else{
3527 pCur->idx = (upr+lwr)/2;
3528 }
drhf1d68b32007-03-29 04:43:26 +00003529 if( lwr<=upr ) for(;;){
danielk197713adf8a2004-06-03 16:08:41 +00003530 void *pCellKey;
drh4a1c3802004-05-12 15:15:47 +00003531 i64 nCellKey;
drh366fda62006-01-13 02:35:09 +00003532 pCur->info.nSize = 0;
drh3aac2dd2004-04-26 14:10:20 +00003533 if( pPage->intKey ){
drh777e4c42006-01-13 04:31:58 +00003534 u8 *pCell;
danielk19771cc5ed82007-05-16 17:28:43 +00003535 pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize;
drhd172f862006-01-12 15:01:15 +00003536 if( pPage->hasData ){
danielk1977bab45c62006-01-16 15:14:27 +00003537 u32 dummy;
drhd172f862006-01-12 15:01:15 +00003538 pCell += getVarint32(pCell, &dummy);
3539 }
danielk1977bab45c62006-01-16 15:14:27 +00003540 getVarint(pCell, (u64 *)&nCellKey);
drh3aac2dd2004-04-26 14:10:20 +00003541 if( nCellKey<nKey ){
3542 c = -1;
3543 }else if( nCellKey>nKey ){
3544 c = +1;
3545 }else{
3546 c = 0;
3547 }
drh3aac2dd2004-04-26 14:10:20 +00003548 }else{
drhe51c44f2004-05-30 20:46:09 +00003549 int available;
danielk197713adf8a2004-06-03 16:08:41 +00003550 pCellKey = (void *)fetchPayload(pCur, &available, 0);
drh366fda62006-01-13 02:35:09 +00003551 nCellKey = pCur->info.nKey;
drhe51c44f2004-05-30 20:46:09 +00003552 if( available>=nCellKey ){
3553 c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
3554 }else{
drh17435752007-08-16 04:30:38 +00003555 pCellKey = sqlite3_malloc( nCellKey );
drhe51c44f2004-05-30 20:46:09 +00003556 if( pCellKey==0 ) return SQLITE_NOMEM;
danielk197713adf8a2004-06-03 16:08:41 +00003557 rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey);
drhe51c44f2004-05-30 20:46:09 +00003558 c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
drh17435752007-08-16 04:30:38 +00003559 sqlite3_free(pCellKey);
drhd677b3d2007-08-20 22:48:41 +00003560 if( rc ){
3561 return rc;
3562 }
drhe51c44f2004-05-30 20:46:09 +00003563 }
drh3aac2dd2004-04-26 14:10:20 +00003564 }
drh72f82862001-05-24 21:06:34 +00003565 if( c==0 ){
drh8b18dd42004-05-12 19:18:15 +00003566 if( pPage->leafData && !pPage->leaf ){
drhfc70e6f2004-05-12 21:11:27 +00003567 lwr = pCur->idx;
3568 upr = lwr - 1;
drh8b18dd42004-05-12 19:18:15 +00003569 break;
3570 }else{
drh8b18dd42004-05-12 19:18:15 +00003571 if( pRes ) *pRes = 0;
3572 return SQLITE_OK;
3573 }
drh72f82862001-05-24 21:06:34 +00003574 }
3575 if( c<0 ){
3576 lwr = pCur->idx+1;
3577 }else{
3578 upr = pCur->idx-1;
3579 }
drhf1d68b32007-03-29 04:43:26 +00003580 if( lwr>upr ){
3581 break;
3582 }
3583 pCur->idx = (lwr+upr)/2;
drh72f82862001-05-24 21:06:34 +00003584 }
3585 assert( lwr==upr+1 );
drh7aa128d2002-06-21 13:09:16 +00003586 assert( pPage->isInit );
drh3aac2dd2004-04-26 14:10:20 +00003587 if( pPage->leaf ){
drha34b6762004-05-07 13:30:42 +00003588 chldPg = 0;
drh3aac2dd2004-04-26 14:10:20 +00003589 }else if( lwr>=pPage->nCell ){
drh43605152004-05-29 21:46:49 +00003590 chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
drh72f82862001-05-24 21:06:34 +00003591 }else{
danielk19771cc5ed82007-05-16 17:28:43 +00003592 chldPg = get4byte(findCell(pPage, lwr));
drh72f82862001-05-24 21:06:34 +00003593 }
3594 if( chldPg==0 ){
drhc39e0002004-05-07 23:50:57 +00003595 assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
drh72f82862001-05-24 21:06:34 +00003596 if( pRes ) *pRes = c;
3597 return SQLITE_OK;
3598 }
drh428ae8c2003-01-04 16:48:09 +00003599 pCur->idx = lwr;
drh271efa52004-05-30 19:19:05 +00003600 pCur->info.nSize = 0;
drh8178a752003-01-05 21:41:40 +00003601 rc = moveToChild(pCur, chldPg);
drhc39e0002004-05-07 23:50:57 +00003602 if( rc ){
3603 return rc;
3604 }
drh72f82862001-05-24 21:06:34 +00003605 }
drhbd03cae2001-06-02 02:40:57 +00003606 /* NOT REACHED */
drh72f82862001-05-24 21:06:34 +00003607}
3608
drhd677b3d2007-08-20 22:48:41 +00003609
drh72f82862001-05-24 21:06:34 +00003610/*
drhc39e0002004-05-07 23:50:57 +00003611** Return TRUE if the cursor is not pointing at an entry of the table.
3612**
3613** TRUE will be returned after a call to sqlite3BtreeNext() moves
3614** past the last entry in the table or sqlite3BtreePrev() moves past
3615** the first entry. TRUE is also returned if the table is empty.
3616*/
3617int sqlite3BtreeEof(BtCursor *pCur){
danielk1977da184232006-01-05 11:34:32 +00003618 /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
3619 ** have been deleted? This API will need to change to return an error code
3620 ** as well as the boolean result value.
3621 */
3622 return (CURSOR_VALID!=pCur->eState);
drhc39e0002004-05-07 23:50:57 +00003623}
3624
3625/*
drhb21c8cd2007-08-21 19:33:56 +00003626** Return the database connection handle for a cursor.
3627*/
3628sqlite3 *sqlite3BtreeCursorDb(const BtCursor *pCur){
drhd0679ed2007-08-28 22:24:34 +00003629 assert( sqlite3_mutex_held(pCur->pBtree->pSqlite->mutex) );
drhb21c8cd2007-08-21 19:33:56 +00003630 return pCur->pBtree->pSqlite;
3631}
3632
3633/*
drhbd03cae2001-06-02 02:40:57 +00003634** Advance the cursor to the next entry in the database. If
drh8c1238a2003-01-02 14:43:55 +00003635** successful then set *pRes=0. If the cursor
drhbd03cae2001-06-02 02:40:57 +00003636** was already pointing to the last entry in the database before
drh8c1238a2003-01-02 14:43:55 +00003637** this routine was called, then set *pRes=1.
drh72f82862001-05-24 21:06:34 +00003638*/
drhd677b3d2007-08-20 22:48:41 +00003639static int btreeNext(BtCursor *pCur, int *pRes){
drh72f82862001-05-24 21:06:34 +00003640 int rc;
danielk197797a227c2006-01-20 16:32:04 +00003641 MemPage *pPage;
drh8b18dd42004-05-12 19:18:15 +00003642
drhd0679ed2007-08-28 22:24:34 +00003643 assert( sqlite3_mutex_held(pCur->pBt->mutex) );
drhbf700f32007-03-31 02:36:44 +00003644 rc = restoreOrClearCursorPosition(pCur);
danielk1977da184232006-01-05 11:34:32 +00003645 if( rc!=SQLITE_OK ){
3646 return rc;
3647 }
drh8c4d3a62007-04-06 01:03:32 +00003648 assert( pRes!=0 );
3649 pPage = pCur->pPage;
3650 if( CURSOR_INVALID==pCur->eState ){
3651 *pRes = 1;
3652 return SQLITE_OK;
3653 }
danielk1977da184232006-01-05 11:34:32 +00003654 if( pCur->skip>0 ){
3655 pCur->skip = 0;
3656 *pRes = 0;
3657 return SQLITE_OK;
3658 }
3659 pCur->skip = 0;
danielk1977da184232006-01-05 11:34:32 +00003660
drh8178a752003-01-05 21:41:40 +00003661 assert( pPage->isInit );
drh8178a752003-01-05 21:41:40 +00003662 assert( pCur->idx<pPage->nCell );
danielk19776a43f9b2004-11-16 04:57:24 +00003663
drh72f82862001-05-24 21:06:34 +00003664 pCur->idx++;
drh271efa52004-05-30 19:19:05 +00003665 pCur->info.nSize = 0;
drh8178a752003-01-05 21:41:40 +00003666 if( pCur->idx>=pPage->nCell ){
drha34b6762004-05-07 13:30:42 +00003667 if( !pPage->leaf ){
drh43605152004-05-29 21:46:49 +00003668 rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
drh5e2f8b92001-05-28 00:41:15 +00003669 if( rc ) return rc;
3670 rc = moveToLeftmost(pCur);
drh8c1238a2003-01-02 14:43:55 +00003671 *pRes = 0;
3672 return rc;
drh72f82862001-05-24 21:06:34 +00003673 }
drh5e2f8b92001-05-28 00:41:15 +00003674 do{
drh16a9b832007-05-05 18:39:25 +00003675 if( sqlite3BtreeIsRootPage(pPage) ){
drh8c1238a2003-01-02 14:43:55 +00003676 *pRes = 1;
danielk1977da184232006-01-05 11:34:32 +00003677 pCur->eState = CURSOR_INVALID;
drh5e2f8b92001-05-28 00:41:15 +00003678 return SQLITE_OK;
3679 }
drh16a9b832007-05-05 18:39:25 +00003680 sqlite3BtreeMoveToParent(pCur);
drh8178a752003-01-05 21:41:40 +00003681 pPage = pCur->pPage;
3682 }while( pCur->idx>=pPage->nCell );
drh8c1238a2003-01-02 14:43:55 +00003683 *pRes = 0;
drh8b18dd42004-05-12 19:18:15 +00003684 if( pPage->leafData ){
3685 rc = sqlite3BtreeNext(pCur, pRes);
3686 }else{
3687 rc = SQLITE_OK;
3688 }
3689 return rc;
drh8178a752003-01-05 21:41:40 +00003690 }
3691 *pRes = 0;
drh3aac2dd2004-04-26 14:10:20 +00003692 if( pPage->leaf ){
drh8178a752003-01-05 21:41:40 +00003693 return SQLITE_OK;
drh72f82862001-05-24 21:06:34 +00003694 }
drh5e2f8b92001-05-28 00:41:15 +00003695 rc = moveToLeftmost(pCur);
drh8c1238a2003-01-02 14:43:55 +00003696 return rc;
drh72f82862001-05-24 21:06:34 +00003697}
drhd677b3d2007-08-20 22:48:41 +00003698int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
3699 int rc;
3700 cursorEnter(pCur);
3701 rc = btreeNext(pCur, pRes);
3702 cursorLeave(pCur);
3703 return rc;
3704}
3705
drh72f82862001-05-24 21:06:34 +00003706
drh3b7511c2001-05-26 13:15:44 +00003707/*
drh2dcc9aa2002-12-04 13:40:25 +00003708** Step the cursor to the back to the previous entry in the database. If
drh8178a752003-01-05 21:41:40 +00003709** successful then set *pRes=0. If the cursor
drh2dcc9aa2002-12-04 13:40:25 +00003710** was already pointing to the first entry in the database before
drh8178a752003-01-05 21:41:40 +00003711** this routine was called, then set *pRes=1.
drh2dcc9aa2002-12-04 13:40:25 +00003712*/
drhd677b3d2007-08-20 22:48:41 +00003713static int btreePrevious(BtCursor *pCur, int *pRes){
drh2dcc9aa2002-12-04 13:40:25 +00003714 int rc;
3715 Pgno pgno;
drh8178a752003-01-05 21:41:40 +00003716 MemPage *pPage;
danielk1977da184232006-01-05 11:34:32 +00003717
drhd0679ed2007-08-28 22:24:34 +00003718 assert( sqlite3_mutex_held(pCur->pBt->mutex) );
drhbf700f32007-03-31 02:36:44 +00003719 rc = restoreOrClearCursorPosition(pCur);
danielk1977da184232006-01-05 11:34:32 +00003720 if( rc!=SQLITE_OK ){
3721 return rc;
3722 }
drh8c4d3a62007-04-06 01:03:32 +00003723 if( CURSOR_INVALID==pCur->eState ){
3724 *pRes = 1;
3725 return SQLITE_OK;
3726 }
danielk1977da184232006-01-05 11:34:32 +00003727 if( pCur->skip<0 ){
3728 pCur->skip = 0;
3729 *pRes = 0;
3730 return SQLITE_OK;
3731 }
3732 pCur->skip = 0;
danielk1977da184232006-01-05 11:34:32 +00003733
drh8178a752003-01-05 21:41:40 +00003734 pPage = pCur->pPage;
drh8178a752003-01-05 21:41:40 +00003735 assert( pPage->isInit );
drh2dcc9aa2002-12-04 13:40:25 +00003736 assert( pCur->idx>=0 );
drha34b6762004-05-07 13:30:42 +00003737 if( !pPage->leaf ){
danielk19771cc5ed82007-05-16 17:28:43 +00003738 pgno = get4byte( findCell(pPage, pCur->idx) );
drh8178a752003-01-05 21:41:40 +00003739 rc = moveToChild(pCur, pgno);
drhd677b3d2007-08-20 22:48:41 +00003740 if( rc ){
3741 return rc;
3742 }
drh2dcc9aa2002-12-04 13:40:25 +00003743 rc = moveToRightmost(pCur);
3744 }else{
3745 while( pCur->idx==0 ){
drh16a9b832007-05-05 18:39:25 +00003746 if( sqlite3BtreeIsRootPage(pPage) ){
danielk1977da184232006-01-05 11:34:32 +00003747 pCur->eState = CURSOR_INVALID;
drhc39e0002004-05-07 23:50:57 +00003748 *pRes = 1;
drh2dcc9aa2002-12-04 13:40:25 +00003749 return SQLITE_OK;
3750 }
drh16a9b832007-05-05 18:39:25 +00003751 sqlite3BtreeMoveToParent(pCur);
drh8178a752003-01-05 21:41:40 +00003752 pPage = pCur->pPage;
drh2dcc9aa2002-12-04 13:40:25 +00003753 }
3754 pCur->idx--;
drh271efa52004-05-30 19:19:05 +00003755 pCur->info.nSize = 0;
drh8237d452004-11-22 19:07:09 +00003756 if( pPage->leafData && !pPage->leaf ){
drh8b18dd42004-05-12 19:18:15 +00003757 rc = sqlite3BtreePrevious(pCur, pRes);
3758 }else{
3759 rc = SQLITE_OK;
3760 }
drh2dcc9aa2002-12-04 13:40:25 +00003761 }
drh8178a752003-01-05 21:41:40 +00003762 *pRes = 0;
drh2dcc9aa2002-12-04 13:40:25 +00003763 return rc;
3764}
drhd677b3d2007-08-20 22:48:41 +00003765int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
3766 int rc;
3767 cursorEnter(pCur);
3768 rc = btreePrevious(pCur, pRes);
3769 cursorLeave(pCur);
3770 return rc;
3771}
drh2dcc9aa2002-12-04 13:40:25 +00003772
3773/*
drh3b7511c2001-05-26 13:15:44 +00003774** Allocate a new page from the database file.
3775**
danielk19773b8a05f2007-03-19 17:44:26 +00003776** The new page is marked as dirty. (In other words, sqlite3PagerWrite()
drh3b7511c2001-05-26 13:15:44 +00003777** has already been called on the new page.) The new page has also
3778** been referenced and the calling routine is responsible for calling
danielk19773b8a05f2007-03-19 17:44:26 +00003779** sqlite3PagerUnref() on the new page when it is done.
drh3b7511c2001-05-26 13:15:44 +00003780**
3781** SQLITE_OK is returned on success. Any other return value indicates
3782** an error. *ppPage and *pPgno are undefined in the event of an error.
danielk19773b8a05f2007-03-19 17:44:26 +00003783** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned.
drhbea00b92002-07-08 10:59:50 +00003784**
drh199e3cf2002-07-18 11:01:47 +00003785** If the "nearby" parameter is not 0, then a (feeble) effort is made to
3786** locate a page close to the page number "nearby". This can be used in an
drhbea00b92002-07-08 10:59:50 +00003787** attempt to keep related pages close to each other in the database file,
3788** which in turn can make database access faster.
danielk1977cb1a7eb2004-11-05 12:27:02 +00003789**
3790** If the "exact" parameter is not 0, and the page-number nearby exists
3791** anywhere on the free-list, then it is guarenteed to be returned. This
3792** is only used by auto-vacuum databases when allocating a new table.
drh3b7511c2001-05-26 13:15:44 +00003793*/
drh4f0c5872007-03-26 22:05:01 +00003794static int allocateBtreePage(
danielk1977aef0bf62005-12-30 16:28:01 +00003795 BtShared *pBt,
danielk1977cb1a7eb2004-11-05 12:27:02 +00003796 MemPage **ppPage,
3797 Pgno *pPgno,
3798 Pgno nearby,
3799 u8 exact
3800){
drh3aac2dd2004-04-26 14:10:20 +00003801 MemPage *pPage1;
drh8c42ca92001-06-22 19:15:00 +00003802 int rc;
drh3aac2dd2004-04-26 14:10:20 +00003803 int n; /* Number of pages on the freelist */
3804 int k; /* Number of leaves on the trunk of the freelist */
drhd3627af2006-12-18 18:34:51 +00003805 MemPage *pTrunk = 0;
3806 MemPage *pPrevTrunk = 0;
drh30e58752002-03-02 20:41:57 +00003807
drh27641702007-08-22 02:56:42 +00003808 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drh3aac2dd2004-04-26 14:10:20 +00003809 pPage1 = pBt->pPage1;
3810 n = get4byte(&pPage1->aData[36]);
3811 if( n>0 ){
drh91025292004-05-03 19:49:32 +00003812 /* There are pages on the freelist. Reuse one of those pages. */
danielk1977cb1a7eb2004-11-05 12:27:02 +00003813 Pgno iTrunk;
danielk1977cb1a7eb2004-11-05 12:27:02 +00003814 u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
3815
3816 /* If the 'exact' parameter was true and a query of the pointer-map
3817 ** shows that the page 'nearby' is somewhere on the free-list, then
3818 ** the entire-list will be searched for that page.
3819 */
3820#ifndef SQLITE_OMIT_AUTOVACUUM
danielk19774ef24492007-05-23 09:52:41 +00003821 if( exact && nearby<=sqlite3PagerPagecount(pBt->pPager) ){
danielk1977cb1a7eb2004-11-05 12:27:02 +00003822 u8 eType;
3823 assert( nearby>0 );
3824 assert( pBt->autoVacuum );
3825 rc = ptrmapGet(pBt, nearby, &eType, 0);
3826 if( rc ) return rc;
3827 if( eType==PTRMAP_FREEPAGE ){
3828 searchList = 1;
3829 }
3830 *pPgno = nearby;
3831 }
3832#endif
3833
3834 /* Decrement the free-list count by 1. Set iTrunk to the index of the
3835 ** first free-list trunk page. iPrevTrunk is initially 1.
3836 */
danielk19773b8a05f2007-03-19 17:44:26 +00003837 rc = sqlite3PagerWrite(pPage1->pDbPage);
drh3b7511c2001-05-26 13:15:44 +00003838 if( rc ) return rc;
drh3aac2dd2004-04-26 14:10:20 +00003839 put4byte(&pPage1->aData[36], n-1);
danielk1977cb1a7eb2004-11-05 12:27:02 +00003840
3841 /* The code within this loop is run only once if the 'searchList' variable
3842 ** is not true. Otherwise, it runs once for each trunk-page on the
3843 ** free-list until the page 'nearby' is located.
3844 */
3845 do {
3846 pPrevTrunk = pTrunk;
3847 if( pPrevTrunk ){
3848 iTrunk = get4byte(&pPrevTrunk->aData[0]);
drhbea00b92002-07-08 10:59:50 +00003849 }else{
danielk1977cb1a7eb2004-11-05 12:27:02 +00003850 iTrunk = get4byte(&pPage1->aData[32]);
drhbea00b92002-07-08 10:59:50 +00003851 }
drh16a9b832007-05-05 18:39:25 +00003852 rc = sqlite3BtreeGetPage(pBt, iTrunk, &pTrunk, 0);
danielk1977cb1a7eb2004-11-05 12:27:02 +00003853 if( rc ){
drhd3627af2006-12-18 18:34:51 +00003854 pTrunk = 0;
3855 goto end_allocate_page;
danielk1977cb1a7eb2004-11-05 12:27:02 +00003856 }
3857
3858 k = get4byte(&pTrunk->aData[4]);
3859 if( k==0 && !searchList ){
3860 /* The trunk has no leaves and the list is not being searched.
3861 ** So extract the trunk page itself and use it as the newly
3862 ** allocated page */
3863 assert( pPrevTrunk==0 );
danielk19773b8a05f2007-03-19 17:44:26 +00003864 rc = sqlite3PagerWrite(pTrunk->pDbPage);
drhd3627af2006-12-18 18:34:51 +00003865 if( rc ){
3866 goto end_allocate_page;
3867 }
danielk1977cb1a7eb2004-11-05 12:27:02 +00003868 *pPgno = iTrunk;
3869 memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
3870 *ppPage = pTrunk;
3871 pTrunk = 0;
3872 TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
3873 }else if( k>pBt->usableSize/4 - 8 ){
3874 /* Value of k is out of range. Database corruption */
drhd3627af2006-12-18 18:34:51 +00003875 rc = SQLITE_CORRUPT_BKPT;
3876 goto end_allocate_page;
danielk1977cb1a7eb2004-11-05 12:27:02 +00003877#ifndef SQLITE_OMIT_AUTOVACUUM
3878 }else if( searchList && nearby==iTrunk ){
3879 /* The list is being searched and this trunk page is the page
3880 ** to allocate, regardless of whether it has leaves.
3881 */
3882 assert( *pPgno==iTrunk );
3883 *ppPage = pTrunk;
3884 searchList = 0;
danielk19773b8a05f2007-03-19 17:44:26 +00003885 rc = sqlite3PagerWrite(pTrunk->pDbPage);
drhd3627af2006-12-18 18:34:51 +00003886 if( rc ){
3887 goto end_allocate_page;
3888 }
danielk1977cb1a7eb2004-11-05 12:27:02 +00003889 if( k==0 ){
3890 if( !pPrevTrunk ){
3891 memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
3892 }else{
3893 memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
3894 }
3895 }else{
3896 /* The trunk page is required by the caller but it contains
3897 ** pointers to free-list leaves. The first leaf becomes a trunk
3898 ** page in this case.
3899 */
3900 MemPage *pNewTrunk;
3901 Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
drh16a9b832007-05-05 18:39:25 +00003902 rc = sqlite3BtreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
danielk1977cb1a7eb2004-11-05 12:27:02 +00003903 if( rc!=SQLITE_OK ){
drhd3627af2006-12-18 18:34:51 +00003904 goto end_allocate_page;
danielk1977cb1a7eb2004-11-05 12:27:02 +00003905 }
danielk19773b8a05f2007-03-19 17:44:26 +00003906 rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
danielk1977cb1a7eb2004-11-05 12:27:02 +00003907 if( rc!=SQLITE_OK ){
3908 releasePage(pNewTrunk);
drhd3627af2006-12-18 18:34:51 +00003909 goto end_allocate_page;
danielk1977cb1a7eb2004-11-05 12:27:02 +00003910 }
3911 memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
3912 put4byte(&pNewTrunk->aData[4], k-1);
3913 memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
drhd3627af2006-12-18 18:34:51 +00003914 releasePage(pNewTrunk);
danielk1977cb1a7eb2004-11-05 12:27:02 +00003915 if( !pPrevTrunk ){
3916 put4byte(&pPage1->aData[32], iNewTrunk);
3917 }else{
danielk19773b8a05f2007-03-19 17:44:26 +00003918 rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
drhd3627af2006-12-18 18:34:51 +00003919 if( rc ){
3920 goto end_allocate_page;
3921 }
danielk1977cb1a7eb2004-11-05 12:27:02 +00003922 put4byte(&pPrevTrunk->aData[0], iNewTrunk);
3923 }
danielk1977cb1a7eb2004-11-05 12:27:02 +00003924 }
3925 pTrunk = 0;
3926 TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
3927#endif
3928 }else{
3929 /* Extract a leaf from the trunk */
3930 int closest;
3931 Pgno iPage;
3932 unsigned char *aData = pTrunk->aData;
danielk19773b8a05f2007-03-19 17:44:26 +00003933 rc = sqlite3PagerWrite(pTrunk->pDbPage);
drhd3627af2006-12-18 18:34:51 +00003934 if( rc ){
3935 goto end_allocate_page;
3936 }
danielk1977cb1a7eb2004-11-05 12:27:02 +00003937 if( nearby>0 ){
3938 int i, dist;
3939 closest = 0;
3940 dist = get4byte(&aData[8]) - nearby;
3941 if( dist<0 ) dist = -dist;
3942 for(i=1; i<k; i++){
3943 int d2 = get4byte(&aData[8+i*4]) - nearby;
3944 if( d2<0 ) d2 = -d2;
3945 if( d2<dist ){
3946 closest = i;
3947 dist = d2;
3948 }
3949 }
3950 }else{
3951 closest = 0;
3952 }
3953
3954 iPage = get4byte(&aData[8+closest*4]);
3955 if( !searchList || iPage==nearby ){
3956 *pPgno = iPage;
danielk19773b8a05f2007-03-19 17:44:26 +00003957 if( *pPgno>sqlite3PagerPagecount(pBt->pPager) ){
danielk1977cb1a7eb2004-11-05 12:27:02 +00003958 /* Free page off the end of the file */
drh49285702005-09-17 15:20:26 +00003959 return SQLITE_CORRUPT_BKPT;
danielk1977cb1a7eb2004-11-05 12:27:02 +00003960 }
3961 TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
3962 ": %d more free pages\n",
3963 *pPgno, closest+1, k, pTrunk->pgno, n-1));
3964 if( closest<k-1 ){
3965 memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
3966 }
3967 put4byte(&aData[4], k-1);
drh16a9b832007-05-05 18:39:25 +00003968 rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 1);
danielk1977cb1a7eb2004-11-05 12:27:02 +00003969 if( rc==SQLITE_OK ){
drh538f5702007-04-13 02:14:30 +00003970 sqlite3PagerDontRollback((*ppPage)->pDbPage);
danielk19773b8a05f2007-03-19 17:44:26 +00003971 rc = sqlite3PagerWrite((*ppPage)->pDbPage);
danielk1977aac0a382005-01-16 11:07:06 +00003972 if( rc!=SQLITE_OK ){
3973 releasePage(*ppPage);
3974 }
danielk1977cb1a7eb2004-11-05 12:27:02 +00003975 }
3976 searchList = 0;
3977 }
drhee696e22004-08-30 16:52:17 +00003978 }
danielk1977cb1a7eb2004-11-05 12:27:02 +00003979 releasePage(pPrevTrunk);
drhd3627af2006-12-18 18:34:51 +00003980 pPrevTrunk = 0;
danielk1977cb1a7eb2004-11-05 12:27:02 +00003981 }while( searchList );
drh3b7511c2001-05-26 13:15:44 +00003982 }else{
drh3aac2dd2004-04-26 14:10:20 +00003983 /* There are no pages on the freelist, so create a new page at the
3984 ** end of the file */
danielk19773b8a05f2007-03-19 17:44:26 +00003985 *pPgno = sqlite3PagerPagecount(pBt->pPager) + 1;
danielk1977afcdd022004-10-31 16:25:42 +00003986
3987#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977dddbcdc2007-04-26 14:42:34 +00003988 if( pBt->nTrunc ){
3989 /* An incr-vacuum has already run within this transaction. So the
3990 ** page to allocate is not from the physical end of the file, but
3991 ** at pBt->nTrunc.
3992 */
3993 *pPgno = pBt->nTrunc+1;
3994 if( *pPgno==PENDING_BYTE_PAGE(pBt) ){
3995 (*pPgno)++;
3996 }
3997 }
danielk1977266664d2006-02-10 08:24:21 +00003998 if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){
danielk1977afcdd022004-10-31 16:25:42 +00003999 /* If *pPgno refers to a pointer-map page, allocate two new pages
4000 ** at the end of the file instead of one. The first allocated page
4001 ** becomes a new pointer-map page, the second is used by the caller.
4002 */
4003 TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno));
danielk1977599fcba2004-11-08 07:13:13 +00004004 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
danielk1977afcdd022004-10-31 16:25:42 +00004005 (*pPgno)++;
4006 }
danielk1977dddbcdc2007-04-26 14:42:34 +00004007 if( pBt->nTrunc ){
4008 pBt->nTrunc = *pPgno;
4009 }
danielk1977afcdd022004-10-31 16:25:42 +00004010#endif
4011
danielk1977599fcba2004-11-08 07:13:13 +00004012 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
drh16a9b832007-05-05 18:39:25 +00004013 rc = sqlite3BtreeGetPage(pBt, *pPgno, ppPage, 0);
drh3b7511c2001-05-26 13:15:44 +00004014 if( rc ) return rc;
danielk19773b8a05f2007-03-19 17:44:26 +00004015 rc = sqlite3PagerWrite((*ppPage)->pDbPage);
danielk1977aac0a382005-01-16 11:07:06 +00004016 if( rc!=SQLITE_OK ){
4017 releasePage(*ppPage);
4018 }
drh3a4c1412004-05-09 20:40:11 +00004019 TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
drh3b7511c2001-05-26 13:15:44 +00004020 }
danielk1977599fcba2004-11-08 07:13:13 +00004021
4022 assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
drhd3627af2006-12-18 18:34:51 +00004023
4024end_allocate_page:
4025 releasePage(pTrunk);
4026 releasePage(pPrevTrunk);
drh3b7511c2001-05-26 13:15:44 +00004027 return rc;
4028}
4029
4030/*
drh3aac2dd2004-04-26 14:10:20 +00004031** Add a page of the database file to the freelist.
drh5e2f8b92001-05-28 00:41:15 +00004032**
danielk19773b8a05f2007-03-19 17:44:26 +00004033** sqlite3PagerUnref() is NOT called for pPage.
drh3b7511c2001-05-26 13:15:44 +00004034*/
drh3aac2dd2004-04-26 14:10:20 +00004035static int freePage(MemPage *pPage){
danielk1977aef0bf62005-12-30 16:28:01 +00004036 BtShared *pBt = pPage->pBt;
drh3aac2dd2004-04-26 14:10:20 +00004037 MemPage *pPage1 = pBt->pPage1;
4038 int rc, n, k;
drh8b2f49b2001-06-08 00:21:52 +00004039
drh3aac2dd2004-04-26 14:10:20 +00004040 /* Prepare the page for freeing */
drh27641702007-08-22 02:56:42 +00004041 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh3aac2dd2004-04-26 14:10:20 +00004042 assert( pPage->pgno>1 );
4043 pPage->isInit = 0;
4044 releasePage(pPage->pParent);
4045 pPage->pParent = 0;
4046
drha34b6762004-05-07 13:30:42 +00004047 /* Increment the free page count on pPage1 */
danielk19773b8a05f2007-03-19 17:44:26 +00004048 rc = sqlite3PagerWrite(pPage1->pDbPage);
drh3aac2dd2004-04-26 14:10:20 +00004049 if( rc ) return rc;
4050 n = get4byte(&pPage1->aData[36]);
4051 put4byte(&pPage1->aData[36], n+1);
4052
drhfcce93f2006-02-22 03:08:32 +00004053#ifdef SQLITE_SECURE_DELETE
4054 /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then
4055 ** always fully overwrite deleted information with zeros.
4056 */
danielk19773b8a05f2007-03-19 17:44:26 +00004057 rc = sqlite3PagerWrite(pPage->pDbPage);
drhfcce93f2006-02-22 03:08:32 +00004058 if( rc ) return rc;
4059 memset(pPage->aData, 0, pPage->pBt->pageSize);
4060#endif
4061
danielk1977687566d2004-11-02 12:56:41 +00004062#ifndef SQLITE_OMIT_AUTOVACUUM
4063 /* If the database supports auto-vacuum, write an entry in the pointer-map
danielk1977cb1a7eb2004-11-05 12:27:02 +00004064 ** to indicate that the page is free.
danielk1977687566d2004-11-02 12:56:41 +00004065 */
4066 if( pBt->autoVacuum ){
4067 rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0);
danielk1977a64a0352004-11-05 01:45:13 +00004068 if( rc ) return rc;
danielk1977687566d2004-11-02 12:56:41 +00004069 }
4070#endif
4071
drh3aac2dd2004-04-26 14:10:20 +00004072 if( n==0 ){
4073 /* This is the first free page */
danielk19773b8a05f2007-03-19 17:44:26 +00004074 rc = sqlite3PagerWrite(pPage->pDbPage);
drhda200cc2004-05-09 11:51:38 +00004075 if( rc ) return rc;
drh3aac2dd2004-04-26 14:10:20 +00004076 memset(pPage->aData, 0, 8);
drha34b6762004-05-07 13:30:42 +00004077 put4byte(&pPage1->aData[32], pPage->pgno);
drh3a4c1412004-05-09 20:40:11 +00004078 TRACE(("FREE-PAGE: %d first\n", pPage->pgno));
drh3aac2dd2004-04-26 14:10:20 +00004079 }else{
4080 /* Other free pages already exist. Retrive the first trunk page
4081 ** of the freelist and find out how many leaves it has. */
drha34b6762004-05-07 13:30:42 +00004082 MemPage *pTrunk;
drh16a9b832007-05-05 18:39:25 +00004083 rc = sqlite3BtreeGetPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0);
drh3b7511c2001-05-26 13:15:44 +00004084 if( rc ) return rc;
drh3aac2dd2004-04-26 14:10:20 +00004085 k = get4byte(&pTrunk->aData[4]);
drhee696e22004-08-30 16:52:17 +00004086 if( k>=pBt->usableSize/4 - 8 ){
drh3aac2dd2004-04-26 14:10:20 +00004087 /* The trunk is full. Turn the page being freed into a new
4088 ** trunk page with no leaves. */
danielk19773b8a05f2007-03-19 17:44:26 +00004089 rc = sqlite3PagerWrite(pPage->pDbPage);
drh3aac2dd2004-04-26 14:10:20 +00004090 if( rc ) return rc;
4091 put4byte(pPage->aData, pTrunk->pgno);
4092 put4byte(&pPage->aData[4], 0);
4093 put4byte(&pPage1->aData[32], pPage->pgno);
drh3a4c1412004-05-09 20:40:11 +00004094 TRACE(("FREE-PAGE: %d new trunk page replacing %d\n",
4095 pPage->pgno, pTrunk->pgno));
drh3aac2dd2004-04-26 14:10:20 +00004096 }else{
4097 /* Add the newly freed page as a leaf on the current trunk */
danielk19773b8a05f2007-03-19 17:44:26 +00004098 rc = sqlite3PagerWrite(pTrunk->pDbPage);
drhf5345442007-04-09 12:45:02 +00004099 if( rc==SQLITE_OK ){
4100 put4byte(&pTrunk->aData[4], k+1);
4101 put4byte(&pTrunk->aData[8+k*4], pPage->pgno);
drhfcce93f2006-02-22 03:08:32 +00004102#ifndef SQLITE_SECURE_DELETE
drh538f5702007-04-13 02:14:30 +00004103 sqlite3PagerDontWrite(pPage->pDbPage);
drhfcce93f2006-02-22 03:08:32 +00004104#endif
drhf5345442007-04-09 12:45:02 +00004105 }
drh3a4c1412004-05-09 20:40:11 +00004106 TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
drh3aac2dd2004-04-26 14:10:20 +00004107 }
4108 releasePage(pTrunk);
drh3b7511c2001-05-26 13:15:44 +00004109 }
drh3b7511c2001-05-26 13:15:44 +00004110 return rc;
4111}
4112
4113/*
drh3aac2dd2004-04-26 14:10:20 +00004114** Free any overflow pages associated with the given Cell.
drh3b7511c2001-05-26 13:15:44 +00004115*/
drh3aac2dd2004-04-26 14:10:20 +00004116static int clearCell(MemPage *pPage, unsigned char *pCell){
danielk1977aef0bf62005-12-30 16:28:01 +00004117 BtShared *pBt = pPage->pBt;
drh6f11bef2004-05-13 01:12:56 +00004118 CellInfo info;
drh3aac2dd2004-04-26 14:10:20 +00004119 Pgno ovflPgno;
drh6f11bef2004-05-13 01:12:56 +00004120 int rc;
drh94440812007-03-06 11:42:19 +00004121 int nOvfl;
4122 int ovflPageSize;
drh3b7511c2001-05-26 13:15:44 +00004123
drh27641702007-08-22 02:56:42 +00004124 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh16a9b832007-05-05 18:39:25 +00004125 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
drh6f11bef2004-05-13 01:12:56 +00004126 if( info.iOverflow==0 ){
drha34b6762004-05-07 13:30:42 +00004127 return SQLITE_OK; /* No overflow pages. Return without doing anything */
drh3aac2dd2004-04-26 14:10:20 +00004128 }
drh6f11bef2004-05-13 01:12:56 +00004129 ovflPgno = get4byte(&pCell[info.iOverflow]);
drh94440812007-03-06 11:42:19 +00004130 ovflPageSize = pBt->usableSize - 4;
drh72365832007-03-06 15:53:44 +00004131 nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize;
4132 assert( ovflPgno==0 || nOvfl>0 );
4133 while( nOvfl-- ){
drh3aac2dd2004-04-26 14:10:20 +00004134 MemPage *pOvfl;
danielk19773b8a05f2007-03-19 17:44:26 +00004135 if( ovflPgno==0 || ovflPgno>sqlite3PagerPagecount(pBt->pPager) ){
drh49285702005-09-17 15:20:26 +00004136 return SQLITE_CORRUPT_BKPT;
danielk1977a1cb1832005-02-12 08:59:55 +00004137 }
danielk19778c0a9592007-04-30 16:55:00 +00004138
4139 rc = getOverflowPage(pBt, ovflPgno, &pOvfl, (nOvfl==0)?0:&ovflPgno);
drh3b7511c2001-05-26 13:15:44 +00004140 if( rc ) return rc;
drha34b6762004-05-07 13:30:42 +00004141 rc = freePage(pOvfl);
danielk19773b8a05f2007-03-19 17:44:26 +00004142 sqlite3PagerUnref(pOvfl->pDbPage);
danielk19776b456a22005-03-21 04:04:02 +00004143 if( rc ) return rc;
drh3b7511c2001-05-26 13:15:44 +00004144 }
drh5e2f8b92001-05-28 00:41:15 +00004145 return SQLITE_OK;
drh3b7511c2001-05-26 13:15:44 +00004146}
4147
4148/*
drh91025292004-05-03 19:49:32 +00004149** Create the byte sequence used to represent a cell on page pPage
4150** and write that byte sequence into pCell[]. Overflow pages are
4151** allocated and filled in as necessary. The calling procedure
4152** is responsible for making sure sufficient space has been allocated
4153** for pCell[].
4154**
4155** Note that pCell does not necessary need to point to the pPage->aData
4156** area. pCell might point to some temporary storage. The cell will
4157** be constructed in this temporary area then copied into pPage->aData
4158** later.
drh3b7511c2001-05-26 13:15:44 +00004159*/
4160static int fillInCell(
drh3aac2dd2004-04-26 14:10:20 +00004161 MemPage *pPage, /* The page that contains the cell */
drh4b70f112004-05-02 21:12:19 +00004162 unsigned char *pCell, /* Complete text of the cell */
drh4a1c3802004-05-12 15:15:47 +00004163 const void *pKey, i64 nKey, /* The key */
drh4b70f112004-05-02 21:12:19 +00004164 const void *pData,int nData, /* The data */
drhb026e052007-05-02 01:34:31 +00004165 int nZero, /* Extra zero bytes to append to pData */
drh4b70f112004-05-02 21:12:19 +00004166 int *pnSize /* Write cell size here */
drh3b7511c2001-05-26 13:15:44 +00004167){
drh3b7511c2001-05-26 13:15:44 +00004168 int nPayload;
drh8c6fa9b2004-05-26 00:01:53 +00004169 const u8 *pSrc;
drha34b6762004-05-07 13:30:42 +00004170 int nSrc, n, rc;
drh3aac2dd2004-04-26 14:10:20 +00004171 int spaceLeft;
4172 MemPage *pOvfl = 0;
drh9b171272004-05-08 02:03:22 +00004173 MemPage *pToRelease = 0;
drh3aac2dd2004-04-26 14:10:20 +00004174 unsigned char *pPrior;
4175 unsigned char *pPayload;
danielk1977aef0bf62005-12-30 16:28:01 +00004176 BtShared *pBt = pPage->pBt;
drh3aac2dd2004-04-26 14:10:20 +00004177 Pgno pgnoOvfl = 0;
drh4b70f112004-05-02 21:12:19 +00004178 int nHeader;
drh6f11bef2004-05-13 01:12:56 +00004179 CellInfo info;
drh3b7511c2001-05-26 13:15:44 +00004180
drh27641702007-08-22 02:56:42 +00004181 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +00004182
drh91025292004-05-03 19:49:32 +00004183 /* Fill in the header. */
drh43605152004-05-29 21:46:49 +00004184 nHeader = 0;
drh91025292004-05-03 19:49:32 +00004185 if( !pPage->leaf ){
4186 nHeader += 4;
4187 }
drh8b18dd42004-05-12 19:18:15 +00004188 if( pPage->hasData ){
drhb026e052007-05-02 01:34:31 +00004189 nHeader += putVarint(&pCell[nHeader], nData+nZero);
drh6f11bef2004-05-13 01:12:56 +00004190 }else{
drhb026e052007-05-02 01:34:31 +00004191 nData = nZero = 0;
drh91025292004-05-03 19:49:32 +00004192 }
drh6f11bef2004-05-13 01:12:56 +00004193 nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
drh16a9b832007-05-05 18:39:25 +00004194 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
drh6f11bef2004-05-13 01:12:56 +00004195 assert( info.nHeader==nHeader );
4196 assert( info.nKey==nKey );
drhb026e052007-05-02 01:34:31 +00004197 assert( info.nData==nData+nZero );
drh6f11bef2004-05-13 01:12:56 +00004198
4199 /* Fill in the payload */
drhb026e052007-05-02 01:34:31 +00004200 nPayload = nData + nZero;
drh3aac2dd2004-04-26 14:10:20 +00004201 if( pPage->intKey ){
4202 pSrc = pData;
4203 nSrc = nData;
drh91025292004-05-03 19:49:32 +00004204 nData = 0;
drh3aac2dd2004-04-26 14:10:20 +00004205 }else{
4206 nPayload += nKey;
4207 pSrc = pKey;
4208 nSrc = nKey;
4209 }
drh6f11bef2004-05-13 01:12:56 +00004210 *pnSize = info.nSize;
4211 spaceLeft = info.nLocal;
drh3aac2dd2004-04-26 14:10:20 +00004212 pPayload = &pCell[nHeader];
drh6f11bef2004-05-13 01:12:56 +00004213 pPrior = &pCell[info.iOverflow];
drh3b7511c2001-05-26 13:15:44 +00004214
drh3b7511c2001-05-26 13:15:44 +00004215 while( nPayload>0 ){
4216 if( spaceLeft==0 ){
danielk1977b39f70b2007-05-17 18:28:11 +00004217 int isExact = 0;
danielk1977afcdd022004-10-31 16:25:42 +00004218#ifndef SQLITE_OMIT_AUTOVACUUM
4219 Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
danielk1977b39f70b2007-05-17 18:28:11 +00004220 if( pBt->autoVacuum ){
4221 do{
4222 pgnoOvfl++;
4223 } while(
4224 PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt)
4225 );
danielk197789a4be82007-05-23 13:34:32 +00004226 if( pgnoOvfl>1 ){
danielk1977b39f70b2007-05-17 18:28:11 +00004227 /* isExact = 1; */
4228 }
4229 }
danielk1977afcdd022004-10-31 16:25:42 +00004230#endif
danielk1977b39f70b2007-05-17 18:28:11 +00004231 rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, isExact);
danielk1977afcdd022004-10-31 16:25:42 +00004232#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977a19df672004-11-03 11:37:07 +00004233 /* If the database supports auto-vacuum, and the second or subsequent
4234 ** overflow page is being allocated, add an entry to the pointer-map
danielk19774ef24492007-05-23 09:52:41 +00004235 ** for that page now.
4236 **
4237 ** If this is the first overflow page, then write a partial entry
4238 ** to the pointer-map. If we write nothing to this pointer-map slot,
4239 ** then the optimistic overflow chain processing in clearCell()
4240 ** may misinterpret the uninitialised values and delete the
4241 ** wrong pages from the database.
danielk1977afcdd022004-10-31 16:25:42 +00004242 */
danielk19774ef24492007-05-23 09:52:41 +00004243 if( pBt->autoVacuum && rc==SQLITE_OK ){
4244 u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1);
4245 rc = ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap);
danielk197789a4be82007-05-23 13:34:32 +00004246 if( rc ){
4247 releasePage(pOvfl);
4248 }
danielk1977afcdd022004-10-31 16:25:42 +00004249 }
4250#endif
drh3b7511c2001-05-26 13:15:44 +00004251 if( rc ){
drh9b171272004-05-08 02:03:22 +00004252 releasePage(pToRelease);
drh3b7511c2001-05-26 13:15:44 +00004253 return rc;
4254 }
drh3aac2dd2004-04-26 14:10:20 +00004255 put4byte(pPrior, pgnoOvfl);
drh9b171272004-05-08 02:03:22 +00004256 releasePage(pToRelease);
4257 pToRelease = pOvfl;
drh3aac2dd2004-04-26 14:10:20 +00004258 pPrior = pOvfl->aData;
4259 put4byte(pPrior, 0);
4260 pPayload = &pOvfl->aData[4];
drhb6f41482004-05-14 01:58:11 +00004261 spaceLeft = pBt->usableSize - 4;
drh3b7511c2001-05-26 13:15:44 +00004262 }
4263 n = nPayload;
4264 if( n>spaceLeft ) n = spaceLeft;
drhb026e052007-05-02 01:34:31 +00004265 if( nSrc>0 ){
4266 if( n>nSrc ) n = nSrc;
4267 assert( pSrc );
4268 memcpy(pPayload, pSrc, n);
4269 }else{
4270 memset(pPayload, 0, n);
4271 }
drh3b7511c2001-05-26 13:15:44 +00004272 nPayload -= n;
drhde647132004-05-07 17:57:49 +00004273 pPayload += n;
drh9b171272004-05-08 02:03:22 +00004274 pSrc += n;
drh3aac2dd2004-04-26 14:10:20 +00004275 nSrc -= n;
drh3b7511c2001-05-26 13:15:44 +00004276 spaceLeft -= n;
drh3aac2dd2004-04-26 14:10:20 +00004277 if( nSrc==0 ){
4278 nSrc = nData;
4279 pSrc = pData;
4280 }
drhdd793422001-06-28 01:54:48 +00004281 }
drh9b171272004-05-08 02:03:22 +00004282 releasePage(pToRelease);
drh3b7511c2001-05-26 13:15:44 +00004283 return SQLITE_OK;
4284}
4285
4286/*
drhbd03cae2001-06-02 02:40:57 +00004287** Change the MemPage.pParent pointer on the page whose number is
drh8b2f49b2001-06-08 00:21:52 +00004288** given in the second argument so that MemPage.pParent holds the
drhbd03cae2001-06-02 02:40:57 +00004289** pointer in the third argument.
4290*/
danielk1977aef0bf62005-12-30 16:28:01 +00004291static int reparentPage(BtShared *pBt, Pgno pgno, MemPage *pNewParent, int idx){
drhbd03cae2001-06-02 02:40:57 +00004292 MemPage *pThis;
danielk19773b8a05f2007-03-19 17:44:26 +00004293 DbPage *pDbPage;
drhbd03cae2001-06-02 02:40:57 +00004294
drh27641702007-08-22 02:56:42 +00004295 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drh43617e92006-03-06 20:55:46 +00004296 assert( pNewParent!=0 );
danielk1977afcdd022004-10-31 16:25:42 +00004297 if( pgno==0 ) return SQLITE_OK;
drh4b70f112004-05-02 21:12:19 +00004298 assert( pBt->pPager!=0 );
danielk19773b8a05f2007-03-19 17:44:26 +00004299 pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
4300 if( pDbPage ){
4301 pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
drhda200cc2004-05-09 11:51:38 +00004302 if( pThis->isInit ){
danielk19773b8a05f2007-03-19 17:44:26 +00004303 assert( pThis->aData==(sqlite3PagerGetData(pDbPage)) );
drhda200cc2004-05-09 11:51:38 +00004304 if( pThis->pParent!=pNewParent ){
danielk19773b8a05f2007-03-19 17:44:26 +00004305 if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
drhda200cc2004-05-09 11:51:38 +00004306 pThis->pParent = pNewParent;
danielk19773b8a05f2007-03-19 17:44:26 +00004307 sqlite3PagerRef(pNewParent->pDbPage);
drhda200cc2004-05-09 11:51:38 +00004308 }
4309 pThis->idxParent = idx;
drhdd793422001-06-28 01:54:48 +00004310 }
danielk19773b8a05f2007-03-19 17:44:26 +00004311 sqlite3PagerUnref(pDbPage);
drhbd03cae2001-06-02 02:40:57 +00004312 }
danielk1977afcdd022004-10-31 16:25:42 +00004313
4314#ifndef SQLITE_OMIT_AUTOVACUUM
4315 if( pBt->autoVacuum ){
4316 return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
4317 }
4318#endif
4319 return SQLITE_OK;
drhbd03cae2001-06-02 02:40:57 +00004320}
4321
danielk1977ac11ee62005-01-15 12:45:51 +00004322
4323
drhbd03cae2001-06-02 02:40:57 +00004324/*
drh4b70f112004-05-02 21:12:19 +00004325** Change the pParent pointer of all children of pPage to point back
4326** to pPage.
4327**
drhbd03cae2001-06-02 02:40:57 +00004328** In other words, for every child of pPage, invoke reparentPage()
drh5e00f6c2001-09-13 13:46:56 +00004329** to make sure that each child knows that pPage is its parent.
drhbd03cae2001-06-02 02:40:57 +00004330**
4331** This routine gets called after you memcpy() one page into
4332** another.
4333*/
danielk1977afcdd022004-10-31 16:25:42 +00004334static int reparentChildPages(MemPage *pPage){
drhbd03cae2001-06-02 02:40:57 +00004335 int i;
danielk1977aef0bf62005-12-30 16:28:01 +00004336 BtShared *pBt = pPage->pBt;
danielk1977afcdd022004-10-31 16:25:42 +00004337 int rc = SQLITE_OK;
drh4b70f112004-05-02 21:12:19 +00004338
drh27641702007-08-22 02:56:42 +00004339 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
danielk1977afcdd022004-10-31 16:25:42 +00004340 if( pPage->leaf ) return SQLITE_OK;
danielk1977afcdd022004-10-31 16:25:42 +00004341
drhbd03cae2001-06-02 02:40:57 +00004342 for(i=0; i<pPage->nCell; i++){
danielk19771cc5ed82007-05-16 17:28:43 +00004343 u8 *pCell = findCell(pPage, i);
danielk1977afcdd022004-10-31 16:25:42 +00004344 if( !pPage->leaf ){
4345 rc = reparentPage(pBt, get4byte(pCell), pPage, i);
4346 if( rc!=SQLITE_OK ) return rc;
4347 }
drhbd03cae2001-06-02 02:40:57 +00004348 }
danielk1977afcdd022004-10-31 16:25:42 +00004349 if( !pPage->leaf ){
4350 rc = reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]),
4351 pPage, i);
4352 pPage->idxShift = 0;
4353 }
4354 return rc;
drh14acc042001-06-10 19:56:58 +00004355}
4356
4357/*
4358** Remove the i-th cell from pPage. This routine effects pPage only.
4359** The cell content is not freed or deallocated. It is assumed that
4360** the cell content has been copied someplace else. This routine just
4361** removes the reference to the cell from pPage.
4362**
4363** "sz" must be the number of bytes in the cell.
drh14acc042001-06-10 19:56:58 +00004364*/
drh4b70f112004-05-02 21:12:19 +00004365static void dropCell(MemPage *pPage, int idx, int sz){
drh43605152004-05-29 21:46:49 +00004366 int i; /* Loop counter */
4367 int pc; /* Offset to cell content of cell being deleted */
4368 u8 *data; /* pPage->aData */
4369 u8 *ptr; /* Used to move bytes around within data[] */
4370
drh8c42ca92001-06-22 19:15:00 +00004371 assert( idx>=0 && idx<pPage->nCell );
drh43605152004-05-29 21:46:49 +00004372 assert( sz==cellSize(pPage, idx) );
danielk19773b8a05f2007-03-19 17:44:26 +00004373 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
drh27641702007-08-22 02:56:42 +00004374 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhda200cc2004-05-09 11:51:38 +00004375 data = pPage->aData;
drh43605152004-05-29 21:46:49 +00004376 ptr = &data[pPage->cellOffset + 2*idx];
4377 pc = get2byte(ptr);
4378 assert( pc>10 && pc+sz<=pPage->pBt->usableSize );
drhde647132004-05-07 17:57:49 +00004379 freeSpace(pPage, pc, sz);
drh43605152004-05-29 21:46:49 +00004380 for(i=idx+1; i<pPage->nCell; i++, ptr+=2){
4381 ptr[0] = ptr[2];
4382 ptr[1] = ptr[3];
drh14acc042001-06-10 19:56:58 +00004383 }
4384 pPage->nCell--;
drh43605152004-05-29 21:46:49 +00004385 put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
4386 pPage->nFree += 2;
drh428ae8c2003-01-04 16:48:09 +00004387 pPage->idxShift = 1;
drh14acc042001-06-10 19:56:58 +00004388}
4389
4390/*
4391** Insert a new cell on pPage at cell index "i". pCell points to the
4392** content of the cell.
4393**
4394** If the cell content will fit on the page, then put it there. If it
drh43605152004-05-29 21:46:49 +00004395** will not fit, then make a copy of the cell content into pTemp if
4396** pTemp is not null. Regardless of pTemp, allocate a new entry
4397** in pPage->aOvfl[] and make it point to the cell content (either
4398** in pTemp or the original pCell) and also record its index.
4399** Allocating a new entry in pPage->aCell[] implies that
4400** pPage->nOverflow is incremented.
danielk1977a3ad5e72005-01-07 08:56:44 +00004401**
4402** If nSkip is non-zero, then do not copy the first nSkip bytes of the
4403** cell. The caller will overwrite them after this function returns. If
drh4b238df2005-01-08 15:43:18 +00004404** nSkip is non-zero, then pCell may not point to an invalid memory location
danielk1977a3ad5e72005-01-07 08:56:44 +00004405** (but pCell+nSkip is always valid).
drh14acc042001-06-10 19:56:58 +00004406*/
danielk1977e80463b2004-11-03 03:01:16 +00004407static int insertCell(
drh24cd67e2004-05-10 16:18:47 +00004408 MemPage *pPage, /* Page into which we are copying */
drh43605152004-05-29 21:46:49 +00004409 int i, /* New cell becomes the i-th cell of the page */
4410 u8 *pCell, /* Content of the new cell */
4411 int sz, /* Bytes of content in pCell */
danielk1977a3ad5e72005-01-07 08:56:44 +00004412 u8 *pTemp, /* Temp storage space for pCell, if needed */
4413 u8 nSkip /* Do not write the first nSkip bytes of the cell */
drh24cd67e2004-05-10 16:18:47 +00004414){
drh43605152004-05-29 21:46:49 +00004415 int idx; /* Where to write new cell content in data[] */
4416 int j; /* Loop counter */
4417 int top; /* First byte of content for any cell in data[] */
4418 int end; /* First byte past the last cell pointer in data[] */
4419 int ins; /* Index in data[] where new cell pointer is inserted */
4420 int hdr; /* Offset into data[] of the page header */
4421 int cellOffset; /* Address of first cell pointer in data[] */
4422 u8 *data; /* The content of the whole page */
4423 u8 *ptr; /* Used for moving information around in data[] */
4424
4425 assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
4426 assert( sz==cellSizePtr(pPage, pCell) );
drh27641702007-08-22 02:56:42 +00004427 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh43605152004-05-29 21:46:49 +00004428 if( pPage->nOverflow || sz+2>pPage->nFree ){
drh24cd67e2004-05-10 16:18:47 +00004429 if( pTemp ){
danielk1977a3ad5e72005-01-07 08:56:44 +00004430 memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
drh43605152004-05-29 21:46:49 +00004431 pCell = pTemp;
drh24cd67e2004-05-10 16:18:47 +00004432 }
drh43605152004-05-29 21:46:49 +00004433 j = pPage->nOverflow++;
4434 assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
4435 pPage->aOvfl[j].pCell = pCell;
4436 pPage->aOvfl[j].idx = i;
4437 pPage->nFree = 0;
drh14acc042001-06-10 19:56:58 +00004438 }else{
danielk19776e465eb2007-08-21 13:11:00 +00004439 int rc = sqlite3PagerWrite(pPage->pDbPage);
4440 if( rc!=SQLITE_OK ){
4441 return rc;
4442 }
4443 assert( sqlite3PagerIswriteable(pPage->pDbPage) );
drh43605152004-05-29 21:46:49 +00004444 data = pPage->aData;
4445 hdr = pPage->hdrOffset;
4446 top = get2byte(&data[hdr+5]);
4447 cellOffset = pPage->cellOffset;
4448 end = cellOffset + 2*pPage->nCell + 2;
4449 ins = cellOffset + 2*i;
4450 if( end > top - sz ){
danielk19776e465eb2007-08-21 13:11:00 +00004451 rc = defragmentPage(pPage);
danielk19776b456a22005-03-21 04:04:02 +00004452 if( rc!=SQLITE_OK ) return rc;
drh43605152004-05-29 21:46:49 +00004453 top = get2byte(&data[hdr+5]);
4454 assert( end + sz <= top );
4455 }
4456 idx = allocateSpace(pPage, sz);
4457 assert( idx>0 );
4458 assert( end <= get2byte(&data[hdr+5]) );
4459 pPage->nCell++;
4460 pPage->nFree -= 2;
danielk1977a3ad5e72005-01-07 08:56:44 +00004461 memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip);
drh43605152004-05-29 21:46:49 +00004462 for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){
4463 ptr[0] = ptr[-2];
4464 ptr[1] = ptr[-1];
drhda200cc2004-05-09 11:51:38 +00004465 }
drh43605152004-05-29 21:46:49 +00004466 put2byte(&data[ins], idx);
4467 put2byte(&data[hdr+3], pPage->nCell);
4468 pPage->idxShift = 1;
danielk1977a19df672004-11-03 11:37:07 +00004469#ifndef SQLITE_OMIT_AUTOVACUUM
4470 if( pPage->pBt->autoVacuum ){
4471 /* The cell may contain a pointer to an overflow page. If so, write
4472 ** the entry for the overflow page into the pointer map.
4473 */
4474 CellInfo info;
drh16a9b832007-05-05 18:39:25 +00004475 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
drh72365832007-03-06 15:53:44 +00004476 assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload );
danielk1977a19df672004-11-03 11:37:07 +00004477 if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
4478 Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
danielk19776e465eb2007-08-21 13:11:00 +00004479 rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
danielk1977a19df672004-11-03 11:37:07 +00004480 if( rc!=SQLITE_OK ) return rc;
4481 }
4482 }
4483#endif
drh14acc042001-06-10 19:56:58 +00004484 }
danielk1977e80463b2004-11-03 03:01:16 +00004485
danielk1977e80463b2004-11-03 03:01:16 +00004486 return SQLITE_OK;
drh14acc042001-06-10 19:56:58 +00004487}
4488
4489/*
drhfa1a98a2004-05-14 19:08:17 +00004490** Add a list of cells to a page. The page should be initially empty.
4491** The cells are guaranteed to fit on the page.
4492*/
4493static void assemblePage(
4494 MemPage *pPage, /* The page to be assemblied */
4495 int nCell, /* The number of cells to add to this page */
drh43605152004-05-29 21:46:49 +00004496 u8 **apCell, /* Pointers to cell bodies */
drhfa1a98a2004-05-14 19:08:17 +00004497 int *aSize /* Sizes of the cells */
4498){
4499 int i; /* Loop counter */
4500 int totalSize; /* Total size of all cells */
4501 int hdr; /* Index of page header */
drh43605152004-05-29 21:46:49 +00004502 int cellptr; /* Address of next cell pointer */
4503 int cellbody; /* Address of next cell body */
drhfa1a98a2004-05-14 19:08:17 +00004504 u8 *data; /* Data for the page */
4505
drh43605152004-05-29 21:46:49 +00004506 assert( pPage->nOverflow==0 );
drh27641702007-08-22 02:56:42 +00004507 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhfa1a98a2004-05-14 19:08:17 +00004508 totalSize = 0;
4509 for(i=0; i<nCell; i++){
4510 totalSize += aSize[i];
4511 }
drh43605152004-05-29 21:46:49 +00004512 assert( totalSize+2*nCell<=pPage->nFree );
drhfa1a98a2004-05-14 19:08:17 +00004513 assert( pPage->nCell==0 );
drh43605152004-05-29 21:46:49 +00004514 cellptr = pPage->cellOffset;
drhfa1a98a2004-05-14 19:08:17 +00004515 data = pPage->aData;
4516 hdr = pPage->hdrOffset;
drh43605152004-05-29 21:46:49 +00004517 put2byte(&data[hdr+3], nCell);
drh09d0deb2005-08-02 17:13:09 +00004518 if( nCell ){
4519 cellbody = allocateSpace(pPage, totalSize);
4520 assert( cellbody>0 );
4521 assert( pPage->nFree >= 2*nCell );
4522 pPage->nFree -= 2*nCell;
4523 for(i=0; i<nCell; i++){
4524 put2byte(&data[cellptr], cellbody);
4525 memcpy(&data[cellbody], apCell[i], aSize[i]);
4526 cellptr += 2;
4527 cellbody += aSize[i];
4528 }
4529 assert( cellbody==pPage->pBt->usableSize );
drhfa1a98a2004-05-14 19:08:17 +00004530 }
4531 pPage->nCell = nCell;
drhfa1a98a2004-05-14 19:08:17 +00004532}
4533
drh14acc042001-06-10 19:56:58 +00004534/*
drhc3b70572003-01-04 19:44:07 +00004535** The following parameters determine how many adjacent pages get involved
4536** in a balancing operation. NN is the number of neighbors on either side
4537** of the page that participate in the balancing operation. NB is the
4538** total number of pages that participate, including the target page and
4539** NN neighbors on either side.
4540**
4541** The minimum value of NN is 1 (of course). Increasing NN above 1
4542** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
4543** in exchange for a larger degradation in INSERT and UPDATE performance.
4544** The value of NN appears to give the best results overall.
4545*/
4546#define NN 1 /* Number of neighbors on either side of pPage */
4547#define NB (NN*2+1) /* Total pages involved in the balance */
4548
drh43605152004-05-29 21:46:49 +00004549/* Forward reference */
danielk1977ac245ec2005-01-14 13:50:11 +00004550static int balance(MemPage*, int);
4551
drh615ae552005-01-16 23:21:00 +00004552#ifndef SQLITE_OMIT_QUICKBALANCE
drhf222e712005-01-14 22:55:49 +00004553/*
4554** This version of balance() handles the common special case where
4555** a new entry is being inserted on the extreme right-end of the
4556** tree, in other words, when the new entry will become the largest
4557** entry in the tree.
4558**
4559** Instead of trying balance the 3 right-most leaf pages, just add
4560** a new page to the right-hand side and put the one new entry in
4561** that page. This leaves the right side of the tree somewhat
4562** unbalanced. But odds are that we will be inserting new entries
4563** at the end soon afterwards so the nearly empty page will quickly
4564** fill up. On average.
4565**
4566** pPage is the leaf page which is the right-most page in the tree.
4567** pParent is its parent. pPage must have a single overflow entry
4568** which is also the right-most entry on the page.
4569*/
danielk1977ac245ec2005-01-14 13:50:11 +00004570static int balance_quick(MemPage *pPage, MemPage *pParent){
4571 int rc;
4572 MemPage *pNew;
4573 Pgno pgnoNew;
4574 u8 *pCell;
4575 int szCell;
4576 CellInfo info;
danielk1977aef0bf62005-12-30 16:28:01 +00004577 BtShared *pBt = pPage->pBt;
danielk197779a40da2005-01-16 08:00:01 +00004578 int parentIdx = pParent->nCell; /* pParent new divider cell index */
4579 int parentSize; /* Size of new divider cell */
4580 u8 parentCell[64]; /* Space for the new divider cell */
danielk1977ac245ec2005-01-14 13:50:11 +00004581
drh27641702007-08-22 02:56:42 +00004582 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +00004583
danielk1977ac245ec2005-01-14 13:50:11 +00004584 /* Allocate a new page. Insert the overflow cell from pPage
4585 ** into it. Then remove the overflow cell from pPage.
4586 */
drh4f0c5872007-03-26 22:05:01 +00004587 rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
danielk1977ac245ec2005-01-14 13:50:11 +00004588 if( rc!=SQLITE_OK ){
4589 return rc;
4590 }
4591 pCell = pPage->aOvfl[0].pCell;
4592 szCell = cellSizePtr(pPage, pCell);
4593 zeroPage(pNew, pPage->aData[0]);
4594 assemblePage(pNew, 1, &pCell, &szCell);
4595 pPage->nOverflow = 0;
4596
danielk197779a40da2005-01-16 08:00:01 +00004597 /* Set the parent of the newly allocated page to pParent. */
4598 pNew->pParent = pParent;
danielk19773b8a05f2007-03-19 17:44:26 +00004599 sqlite3PagerRef(pParent->pDbPage);
danielk197779a40da2005-01-16 08:00:01 +00004600
danielk1977ac245ec2005-01-14 13:50:11 +00004601 /* pPage is currently the right-child of pParent. Change this
4602 ** so that the right-child is the new page allocated above and
danielk197779a40da2005-01-16 08:00:01 +00004603 ** pPage is the next-to-right child.
danielk1977ac245ec2005-01-14 13:50:11 +00004604 */
danielk1977ac11ee62005-01-15 12:45:51 +00004605 assert( pPage->nCell>0 );
danielk19771cc5ed82007-05-16 17:28:43 +00004606 pCell = findCell(pPage, pPage->nCell-1);
drh16a9b832007-05-05 18:39:25 +00004607 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
drhb026e052007-05-02 01:34:31 +00004608 rc = fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, 0, &parentSize);
danielk1977ac245ec2005-01-14 13:50:11 +00004609 if( rc!=SQLITE_OK ){
danielk197779a40da2005-01-16 08:00:01 +00004610 return rc;
danielk1977ac245ec2005-01-14 13:50:11 +00004611 }
4612 assert( parentSize<64 );
4613 rc = insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
4614 if( rc!=SQLITE_OK ){
danielk197779a40da2005-01-16 08:00:01 +00004615 return rc;
danielk1977ac245ec2005-01-14 13:50:11 +00004616 }
4617 put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
4618 put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
4619
danielk197779a40da2005-01-16 08:00:01 +00004620#ifndef SQLITE_OMIT_AUTOVACUUM
4621 /* If this is an auto-vacuum database, update the pointer map
4622 ** with entries for the new page, and any pointer from the
4623 ** cell on the page to an overflow page.
4624 */
danielk1977ac11ee62005-01-15 12:45:51 +00004625 if( pBt->autoVacuum ){
4626 rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
danielk1977deb403e2007-05-24 09:20:16 +00004627 if( rc==SQLITE_OK ){
4628 rc = ptrmapPutOvfl(pNew, 0);
danielk1977ac11ee62005-01-15 12:45:51 +00004629 }
danielk197779a40da2005-01-16 08:00:01 +00004630 if( rc!=SQLITE_OK ){
danielk1977deb403e2007-05-24 09:20:16 +00004631 releasePage(pNew);
danielk197779a40da2005-01-16 08:00:01 +00004632 return rc;
danielk1977ac11ee62005-01-15 12:45:51 +00004633 }
4634 }
danielk197779a40da2005-01-16 08:00:01 +00004635#endif
danielk1977ac11ee62005-01-15 12:45:51 +00004636
danielk197779a40da2005-01-16 08:00:01 +00004637 /* Release the reference to the new page and balance the parent page,
4638 ** in case the divider cell inserted caused it to become overfull.
4639 */
danielk1977ac245ec2005-01-14 13:50:11 +00004640 releasePage(pNew);
4641 return balance(pParent, 0);
4642}
drh615ae552005-01-16 23:21:00 +00004643#endif /* SQLITE_OMIT_QUICKBALANCE */
drh43605152004-05-29 21:46:49 +00004644
drhc3b70572003-01-04 19:44:07 +00004645/*
drhab01f612004-05-22 02:55:23 +00004646** This routine redistributes Cells on pPage and up to NN*2 siblings
drh8b2f49b2001-06-08 00:21:52 +00004647** of pPage so that all pages have about the same amount of free space.
drh0c6cc4e2004-06-15 02:13:26 +00004648** Usually NN siblings on either side of pPage is used in the balancing,
4649** though more siblings might come from one side if pPage is the first
drhab01f612004-05-22 02:55:23 +00004650** or last child of its parent. If pPage has fewer than 2*NN siblings
drh8b2f49b2001-06-08 00:21:52 +00004651** (something which can only happen if pPage is the root page or a
drh14acc042001-06-10 19:56:58 +00004652** child of root) then all available siblings participate in the balancing.
drh8b2f49b2001-06-08 00:21:52 +00004653**
drh0c6cc4e2004-06-15 02:13:26 +00004654** The number of siblings of pPage might be increased or decreased by one or
4655** two in an effort to keep pages nearly full but not over full. The root page
drhab01f612004-05-22 02:55:23 +00004656** is special and is allowed to be nearly empty. If pPage is
drh8c42ca92001-06-22 19:15:00 +00004657** the root page, then the depth of the tree might be increased
drh8b2f49b2001-06-08 00:21:52 +00004658** or decreased by one, as necessary, to keep the root page from being
drhab01f612004-05-22 02:55:23 +00004659** overfull or completely empty.
drh14acc042001-06-10 19:56:58 +00004660**
drh8b2f49b2001-06-08 00:21:52 +00004661** Note that when this routine is called, some of the Cells on pPage
drh4b70f112004-05-02 21:12:19 +00004662** might not actually be stored in pPage->aData[]. This can happen
drh8b2f49b2001-06-08 00:21:52 +00004663** if the page is overfull. Part of the job of this routine is to
drh4b70f112004-05-02 21:12:19 +00004664** make sure all Cells for pPage once again fit in pPage->aData[].
drh14acc042001-06-10 19:56:58 +00004665**
drh8c42ca92001-06-22 19:15:00 +00004666** In the course of balancing the siblings of pPage, the parent of pPage
4667** might become overfull or underfull. If that happens, then this routine
4668** is called recursively on the parent.
4669**
drh5e00f6c2001-09-13 13:46:56 +00004670** If this routine fails for any reason, it might leave the database
4671** in a corrupted state. So if this routine fails, the database should
4672** be rolled back.
drh8b2f49b2001-06-08 00:21:52 +00004673*/
drh43605152004-05-29 21:46:49 +00004674static int balance_nonroot(MemPage *pPage){
drh8b2f49b2001-06-08 00:21:52 +00004675 MemPage *pParent; /* The parent of pPage */
drh16a9b832007-05-05 18:39:25 +00004676 BtShared *pBt; /* The whole database */
danielk1977634f2982005-03-28 08:44:07 +00004677 int nCell = 0; /* Number of cells in apCell[] */
4678 int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */
drh8b2f49b2001-06-08 00:21:52 +00004679 int nOld; /* Number of pages in apOld[] */
4680 int nNew; /* Number of pages in apNew[] */
drh8b2f49b2001-06-08 00:21:52 +00004681 int nDiv; /* Number of cells in apDiv[] */
drh14acc042001-06-10 19:56:58 +00004682 int i, j, k; /* Loop counters */
drha34b6762004-05-07 13:30:42 +00004683 int idx; /* Index of pPage in pParent->aCell[] */
4684 int nxDiv; /* Next divider slot in pParent->aCell[] */
drh14acc042001-06-10 19:56:58 +00004685 int rc; /* The return code */
drh91025292004-05-03 19:49:32 +00004686 int leafCorrection; /* 4 if pPage is a leaf. 0 if not */
drh8b18dd42004-05-12 19:18:15 +00004687 int leafData; /* True if pPage is a leaf of a LEAFDATA tree */
drh91025292004-05-03 19:49:32 +00004688 int usableSpace; /* Bytes in pPage beyond the header */
4689 int pageFlags; /* Value of pPage->aData[0] */
drh6019e162001-07-02 17:51:45 +00004690 int subtotal; /* Subtotal of bytes in cells on one page */
drhb6f41482004-05-14 01:58:11 +00004691 int iSpace = 0; /* First unused byte of aSpace[] */
drhc3b70572003-01-04 19:44:07 +00004692 MemPage *apOld[NB]; /* pPage and up to two siblings */
4693 Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
drh4b70f112004-05-02 21:12:19 +00004694 MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
drha2fce642004-06-05 00:01:44 +00004695 MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
4696 Pgno pgnoNew[NB+2]; /* Page numbers for each page in apNew[] */
drh4b70f112004-05-02 21:12:19 +00004697 u8 *apDiv[NB]; /* Divider cells in pParent */
drha2fce642004-06-05 00:01:44 +00004698 int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */
4699 int szNew[NB+2]; /* Combined size of cells place on i-th page */
danielk197750f059b2005-03-29 02:54:03 +00004700 u8 **apCell = 0; /* All cells begin balanced */
drh2e38c322004-09-03 18:38:44 +00004701 int *szCell; /* Local size of all cells in apCell[] */
4702 u8 *aCopy[NB]; /* Space for holding data of apCopy[] */
4703 u8 *aSpace; /* Space to hold copies of dividers cells */
danielk19774e17d142005-01-16 09:06:33 +00004704#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977ac11ee62005-01-15 12:45:51 +00004705 u8 *aFrom = 0;
4706#endif
drh8b2f49b2001-06-08 00:21:52 +00004707
drh27641702007-08-22 02:56:42 +00004708 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhd677b3d2007-08-20 22:48:41 +00004709
drh14acc042001-06-10 19:56:58 +00004710 /*
drh43605152004-05-29 21:46:49 +00004711 ** Find the parent page.
drh8b2f49b2001-06-08 00:21:52 +00004712 */
drh3a4c1412004-05-09 20:40:11 +00004713 assert( pPage->isInit );
danielk19776e465eb2007-08-21 13:11:00 +00004714 assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
drh4b70f112004-05-02 21:12:19 +00004715 pBt = pPage->pBt;
drh14acc042001-06-10 19:56:58 +00004716 pParent = pPage->pParent;
drh43605152004-05-29 21:46:49 +00004717 assert( pParent );
danielk19773b8a05f2007-03-19 17:44:26 +00004718 if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
danielk197707cb5602006-01-20 10:55:05 +00004719 return rc;
4720 }
drh43605152004-05-29 21:46:49 +00004721 TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
drh2e38c322004-09-03 18:38:44 +00004722
drh615ae552005-01-16 23:21:00 +00004723#ifndef SQLITE_OMIT_QUICKBALANCE
drhf222e712005-01-14 22:55:49 +00004724 /*
4725 ** A special case: If a new entry has just been inserted into a
4726 ** table (that is, a btree with integer keys and all data at the leaves)
drh09d0deb2005-08-02 17:13:09 +00004727 ** and the new entry is the right-most entry in the tree (it has the
drhf222e712005-01-14 22:55:49 +00004728 ** largest key) then use the special balance_quick() routine for
4729 ** balancing. balance_quick() is much faster and results in a tighter
4730 ** packing of data in the common case.
4731 */
danielk1977ac245ec2005-01-14 13:50:11 +00004732 if( pPage->leaf &&
4733 pPage->intKey &&
4734 pPage->leafData &&
4735 pPage->nOverflow==1 &&
4736 pPage->aOvfl[0].idx==pPage->nCell &&
danielk1977ac11ee62005-01-15 12:45:51 +00004737 pPage->pParent->pgno!=1 &&
danielk1977ac245ec2005-01-14 13:50:11 +00004738 get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
4739 ){
danielk1977ac11ee62005-01-15 12:45:51 +00004740 /*
4741 ** TODO: Check the siblings to the left of pPage. It may be that
4742 ** they are not full and no new page is required.
4743 */
danielk1977ac245ec2005-01-14 13:50:11 +00004744 return balance_quick(pPage, pParent);
4745 }
4746#endif
4747
danielk19776e465eb2007-08-21 13:11:00 +00004748 if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){
4749 return rc;
4750 }
4751
drh2e38c322004-09-03 18:38:44 +00004752 /*
drh4b70f112004-05-02 21:12:19 +00004753 ** Find the cell in the parent page whose left child points back
drh14acc042001-06-10 19:56:58 +00004754 ** to pPage. The "idx" variable is the index of that cell. If pPage
4755 ** is the rightmost child of pParent then set idx to pParent->nCell
drh8b2f49b2001-06-08 00:21:52 +00004756 */
drhbb49aba2003-01-04 18:53:27 +00004757 if( pParent->idxShift ){
drha34b6762004-05-07 13:30:42 +00004758 Pgno pgno;
drh4b70f112004-05-02 21:12:19 +00004759 pgno = pPage->pgno;
danielk19773b8a05f2007-03-19 17:44:26 +00004760 assert( pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
drhbb49aba2003-01-04 18:53:27 +00004761 for(idx=0; idx<pParent->nCell; idx++){
danielk19771cc5ed82007-05-16 17:28:43 +00004762 if( get4byte(findCell(pParent, idx))==pgno ){
drhbb49aba2003-01-04 18:53:27 +00004763 break;
4764 }
drh8b2f49b2001-06-08 00:21:52 +00004765 }
drh4b70f112004-05-02 21:12:19 +00004766 assert( idx<pParent->nCell
drh43605152004-05-29 21:46:49 +00004767 || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
drhbb49aba2003-01-04 18:53:27 +00004768 }else{
4769 idx = pPage->idxParent;
drh8b2f49b2001-06-08 00:21:52 +00004770 }
drh8b2f49b2001-06-08 00:21:52 +00004771
4772 /*
drh14acc042001-06-10 19:56:58 +00004773 ** Initialize variables so that it will be safe to jump
drh5edc3122001-09-13 21:53:09 +00004774 ** directly to balance_cleanup at any moment.
drh8b2f49b2001-06-08 00:21:52 +00004775 */
drh14acc042001-06-10 19:56:58 +00004776 nOld = nNew = 0;
danielk19773b8a05f2007-03-19 17:44:26 +00004777 sqlite3PagerRef(pParent->pDbPage);
drh14acc042001-06-10 19:56:58 +00004778
4779 /*
drh4b70f112004-05-02 21:12:19 +00004780 ** Find sibling pages to pPage and the cells in pParent that divide
drhc3b70572003-01-04 19:44:07 +00004781 ** the siblings. An attempt is made to find NN siblings on either
4782 ** side of pPage. More siblings are taken from one side, however, if
4783 ** pPage there are fewer than NN siblings on the other side. If pParent
4784 ** has NB or fewer children then all children of pParent are taken.
drh14acc042001-06-10 19:56:58 +00004785 */
drhc3b70572003-01-04 19:44:07 +00004786 nxDiv = idx - NN;
4787 if( nxDiv + NB > pParent->nCell ){
4788 nxDiv = pParent->nCell - NB + 1;
drh8b2f49b2001-06-08 00:21:52 +00004789 }
drhc3b70572003-01-04 19:44:07 +00004790 if( nxDiv<0 ){
4791 nxDiv = 0;
4792 }
drh8b2f49b2001-06-08 00:21:52 +00004793 nDiv = 0;
drhc3b70572003-01-04 19:44:07 +00004794 for(i=0, k=nxDiv; i<NB; i++, k++){
drh14acc042001-06-10 19:56:58 +00004795 if( k<pParent->nCell ){
danielk19771cc5ed82007-05-16 17:28:43 +00004796 apDiv[i] = findCell(pParent, k);
drh8b2f49b2001-06-08 00:21:52 +00004797 nDiv++;
drha34b6762004-05-07 13:30:42 +00004798 assert( !pParent->leaf );
drh43605152004-05-29 21:46:49 +00004799 pgnoOld[i] = get4byte(apDiv[i]);
drh14acc042001-06-10 19:56:58 +00004800 }else if( k==pParent->nCell ){
drh43605152004-05-29 21:46:49 +00004801 pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
drh14acc042001-06-10 19:56:58 +00004802 }else{
4803 break;
drh8b2f49b2001-06-08 00:21:52 +00004804 }
drhde647132004-05-07 17:57:49 +00004805 rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent);
drh6019e162001-07-02 17:51:45 +00004806 if( rc ) goto balance_cleanup;
drh428ae8c2003-01-04 16:48:09 +00004807 apOld[i]->idxParent = k;
drh91025292004-05-03 19:49:32 +00004808 apCopy[i] = 0;
4809 assert( i==nOld );
drh14acc042001-06-10 19:56:58 +00004810 nOld++;
danielk1977634f2982005-03-28 08:44:07 +00004811 nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
drh8b2f49b2001-06-08 00:21:52 +00004812 }
4813
drh8d97f1f2005-05-05 18:14:13 +00004814 /* Make nMaxCells a multiple of 2 in order to preserve 8-byte
4815 ** alignment */
4816 nMaxCells = (nMaxCells + 1)&~1;
4817
drh8b2f49b2001-06-08 00:21:52 +00004818 /*
danielk1977634f2982005-03-28 08:44:07 +00004819 ** Allocate space for memory structures
4820 */
drh17435752007-08-16 04:30:38 +00004821 apCell = sqlite3_malloc(
danielk1977634f2982005-03-28 08:44:07 +00004822 nMaxCells*sizeof(u8*) /* apCell */
4823 + nMaxCells*sizeof(int) /* szCell */
drhc96d8532005-05-03 12:30:33 +00004824 + ROUND8(sizeof(MemPage))*NB /* aCopy */
drh07d183d2005-05-01 22:52:42 +00004825 + pBt->pageSize*(5+NB) /* aSpace */
drhc96d8532005-05-03 12:30:33 +00004826 + (ISAUTOVACUUM ? nMaxCells : 0) /* aFrom */
danielk1977634f2982005-03-28 08:44:07 +00004827 );
4828 if( apCell==0 ){
4829 rc = SQLITE_NOMEM;
4830 goto balance_cleanup;
4831 }
4832 szCell = (int*)&apCell[nMaxCells];
4833 aCopy[0] = (u8*)&szCell[nMaxCells];
drhc96d8532005-05-03 12:30:33 +00004834 assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
danielk1977634f2982005-03-28 08:44:07 +00004835 for(i=1; i<NB; i++){
drhc96d8532005-05-03 12:30:33 +00004836 aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
4837 assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
danielk1977634f2982005-03-28 08:44:07 +00004838 }
drhc96d8532005-05-03 12:30:33 +00004839 aSpace = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
4840 assert( ((aSpace - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
danielk1977634f2982005-03-28 08:44:07 +00004841#ifndef SQLITE_OMIT_AUTOVACUUM
4842 if( pBt->autoVacuum ){
drh07d183d2005-05-01 22:52:42 +00004843 aFrom = &aSpace[5*pBt->pageSize];
danielk1977634f2982005-03-28 08:44:07 +00004844 }
4845#endif
4846
4847 /*
drh14acc042001-06-10 19:56:58 +00004848 ** Make copies of the content of pPage and its siblings into aOld[].
4849 ** The rest of this function will use data from the copies rather
4850 ** that the original pages since the original pages will be in the
4851 ** process of being overwritten.
4852 */
4853 for(i=0; i<nOld; i++){
drh07d183d2005-05-01 22:52:42 +00004854 MemPage *p = apCopy[i] = (MemPage*)&aCopy[i][pBt->pageSize];
drh07d183d2005-05-01 22:52:42 +00004855 p->aData = &((u8*)p)[-pBt->pageSize];
4856 memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage));
4857 /* The memcpy() above changes the value of p->aData so we have to
4858 ** set it again. */
drh07d183d2005-05-01 22:52:42 +00004859 p->aData = &((u8*)p)[-pBt->pageSize];
drh14acc042001-06-10 19:56:58 +00004860 }
4861
4862 /*
4863 ** Load pointers to all cells on sibling pages and the divider cells
4864 ** into the local apCell[] array. Make copies of the divider cells
drhb6f41482004-05-14 01:58:11 +00004865 ** into space obtained form aSpace[] and remove the the divider Cells
4866 ** from pParent.
drh4b70f112004-05-02 21:12:19 +00004867 **
4868 ** If the siblings are on leaf pages, then the child pointers of the
4869 ** divider cells are stripped from the cells before they are copied
drh96f5b762004-05-16 16:24:36 +00004870 ** into aSpace[]. In this way, all cells in apCell[] are without
drh4b70f112004-05-02 21:12:19 +00004871 ** child pointers. If siblings are not leaves, then all cell in
4872 ** apCell[] include child pointers. Either way, all cells in apCell[]
4873 ** are alike.
drh96f5b762004-05-16 16:24:36 +00004874 **
4875 ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf.
4876 ** leafData: 1 if pPage holds key+data and pParent holds only keys.
drh8b2f49b2001-06-08 00:21:52 +00004877 */
4878 nCell = 0;
drh4b70f112004-05-02 21:12:19 +00004879 leafCorrection = pPage->leaf*4;
drh8b18dd42004-05-12 19:18:15 +00004880 leafData = pPage->leafData && pPage->leaf;
drh8b2f49b2001-06-08 00:21:52 +00004881 for(i=0; i<nOld; i++){
drh4b70f112004-05-02 21:12:19 +00004882 MemPage *pOld = apCopy[i];
drh43605152004-05-29 21:46:49 +00004883 int limit = pOld->nCell+pOld->nOverflow;
4884 for(j=0; j<limit; j++){
danielk1977634f2982005-03-28 08:44:07 +00004885 assert( nCell<nMaxCells );
drh43605152004-05-29 21:46:49 +00004886 apCell[nCell] = findOverflowCell(pOld, j);
4887 szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
danielk1977ac11ee62005-01-15 12:45:51 +00004888#ifndef SQLITE_OMIT_AUTOVACUUM
4889 if( pBt->autoVacuum ){
4890 int a;
4891 aFrom[nCell] = i;
4892 for(a=0; a<pOld->nOverflow; a++){
4893 if( pOld->aOvfl[a].pCell==apCell[nCell] ){
4894 aFrom[nCell] = 0xFF;
4895 break;
4896 }
4897 }
4898 }
4899#endif
drh14acc042001-06-10 19:56:58 +00004900 nCell++;
drh8b2f49b2001-06-08 00:21:52 +00004901 }
4902 if( i<nOld-1 ){
drh43605152004-05-29 21:46:49 +00004903 int sz = cellSizePtr(pParent, apDiv[i]);
drh8b18dd42004-05-12 19:18:15 +00004904 if( leafData ){
drh96f5b762004-05-16 16:24:36 +00004905 /* With the LEAFDATA flag, pParent cells hold only INTKEYs that
4906 ** are duplicates of keys on the child pages. We need to remove
4907 ** the divider cells from pParent, but the dividers cells are not
4908 ** added to apCell[] because they are duplicates of child cells.
4909 */
drh8b18dd42004-05-12 19:18:15 +00004910 dropCell(pParent, nxDiv, sz);
drh4b70f112004-05-02 21:12:19 +00004911 }else{
drhb6f41482004-05-14 01:58:11 +00004912 u8 *pTemp;
danielk1977634f2982005-03-28 08:44:07 +00004913 assert( nCell<nMaxCells );
drhb6f41482004-05-14 01:58:11 +00004914 szCell[nCell] = sz;
4915 pTemp = &aSpace[iSpace];
4916 iSpace += sz;
drh07d183d2005-05-01 22:52:42 +00004917 assert( iSpace<=pBt->pageSize*5 );
drhb6f41482004-05-14 01:58:11 +00004918 memcpy(pTemp, apDiv[i], sz);
4919 apCell[nCell] = pTemp+leafCorrection;
danielk1977ac11ee62005-01-15 12:45:51 +00004920#ifndef SQLITE_OMIT_AUTOVACUUM
4921 if( pBt->autoVacuum ){
4922 aFrom[nCell] = 0xFF;
4923 }
4924#endif
drhb6f41482004-05-14 01:58:11 +00004925 dropCell(pParent, nxDiv, sz);
drh8b18dd42004-05-12 19:18:15 +00004926 szCell[nCell] -= leafCorrection;
drh43605152004-05-29 21:46:49 +00004927 assert( get4byte(pTemp)==pgnoOld[i] );
drh8b18dd42004-05-12 19:18:15 +00004928 if( !pOld->leaf ){
4929 assert( leafCorrection==0 );
4930 /* The right pointer of the child page pOld becomes the left
4931 ** pointer of the divider cell */
drh43605152004-05-29 21:46:49 +00004932 memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4);
drh8b18dd42004-05-12 19:18:15 +00004933 }else{
4934 assert( leafCorrection==4 );
danielk197739c96042007-05-12 10:41:47 +00004935 if( szCell[nCell]<4 ){
4936 /* Do not allow any cells smaller than 4 bytes. */
4937 szCell[nCell] = 4;
4938 }
drh8b18dd42004-05-12 19:18:15 +00004939 }
4940 nCell++;
drh4b70f112004-05-02 21:12:19 +00004941 }
drh8b2f49b2001-06-08 00:21:52 +00004942 }
4943 }
4944
4945 /*
drh6019e162001-07-02 17:51:45 +00004946 ** Figure out the number of pages needed to hold all nCell cells.
4947 ** Store this number in "k". Also compute szNew[] which is the total
4948 ** size of all cells on the i-th page and cntNew[] which is the index
drh4b70f112004-05-02 21:12:19 +00004949 ** in apCell[] of the cell that divides page i from page i+1.
drh6019e162001-07-02 17:51:45 +00004950 ** cntNew[k] should equal nCell.
4951 **
drh96f5b762004-05-16 16:24:36 +00004952 ** Values computed by this block:
4953 **
4954 ** k: The total number of sibling pages
4955 ** szNew[i]: Spaced used on the i-th sibling page.
4956 ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to
4957 ** the right of the i-th sibling page.
4958 ** usableSpace: Number of bytes of space available on each sibling.
4959 **
drh8b2f49b2001-06-08 00:21:52 +00004960 */
drh43605152004-05-29 21:46:49 +00004961 usableSpace = pBt->usableSize - 12 + leafCorrection;
drh6019e162001-07-02 17:51:45 +00004962 for(subtotal=k=i=0; i<nCell; i++){
danielk1977634f2982005-03-28 08:44:07 +00004963 assert( i<nMaxCells );
drh43605152004-05-29 21:46:49 +00004964 subtotal += szCell[i] + 2;
drh4b70f112004-05-02 21:12:19 +00004965 if( subtotal > usableSpace ){
drh6019e162001-07-02 17:51:45 +00004966 szNew[k] = subtotal - szCell[i];
4967 cntNew[k] = i;
drh8b18dd42004-05-12 19:18:15 +00004968 if( leafData ){ i--; }
drh6019e162001-07-02 17:51:45 +00004969 subtotal = 0;
4970 k++;
4971 }
4972 }
4973 szNew[k] = subtotal;
4974 cntNew[k] = nCell;
4975 k++;
drh96f5b762004-05-16 16:24:36 +00004976
4977 /*
4978 ** The packing computed by the previous block is biased toward the siblings
4979 ** on the left side. The left siblings are always nearly full, while the
4980 ** right-most sibling might be nearly empty. This block of code attempts
4981 ** to adjust the packing of siblings to get a better balance.
4982 **
4983 ** This adjustment is more than an optimization. The packing above might
4984 ** be so out of balance as to be illegal. For example, the right-most
4985 ** sibling might be completely empty. This adjustment is not optional.
4986 */
drh6019e162001-07-02 17:51:45 +00004987 for(i=k-1; i>0; i--){
drh96f5b762004-05-16 16:24:36 +00004988 int szRight = szNew[i]; /* Size of sibling on the right */
4989 int szLeft = szNew[i-1]; /* Size of sibling on the left */
4990 int r; /* Index of right-most cell in left sibling */
4991 int d; /* Index of first cell to the left of right sibling */
4992
4993 r = cntNew[i-1] - 1;
4994 d = r + 1 - leafData;
danielk1977634f2982005-03-28 08:44:07 +00004995 assert( d<nMaxCells );
4996 assert( r<nMaxCells );
drh43605152004-05-29 21:46:49 +00004997 while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){
4998 szRight += szCell[d] + 2;
4999 szLeft -= szCell[r] + 2;
drh6019e162001-07-02 17:51:45 +00005000 cntNew[i-1]--;
drh96f5b762004-05-16 16:24:36 +00005001 r = cntNew[i-1] - 1;
5002 d = r + 1 - leafData;
drh6019e162001-07-02 17:51:45 +00005003 }
drh96f5b762004-05-16 16:24:36 +00005004 szNew[i] = szRight;
5005 szNew[i-1] = szLeft;
drh6019e162001-07-02 17:51:45 +00005006 }
drh09d0deb2005-08-02 17:13:09 +00005007
5008 /* Either we found one or more cells (cntnew[0])>0) or we are the
5009 ** a virtual root page. A virtual root page is when the real root
5010 ** page is page 1 and we are the only child of that page.
5011 */
5012 assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
drh8b2f49b2001-06-08 00:21:52 +00005013
5014 /*
drh6b308672002-07-08 02:16:37 +00005015 ** Allocate k new pages. Reuse old pages where possible.
drh8b2f49b2001-06-08 00:21:52 +00005016 */
drh4b70f112004-05-02 21:12:19 +00005017 assert( pPage->pgno>1 );
5018 pageFlags = pPage->aData[0];
drh14acc042001-06-10 19:56:58 +00005019 for(i=0; i<k; i++){
drhda200cc2004-05-09 11:51:38 +00005020 MemPage *pNew;
drh6b308672002-07-08 02:16:37 +00005021 if( i<nOld ){
drhda200cc2004-05-09 11:51:38 +00005022 pNew = apNew[i] = apOld[i];
drh6b308672002-07-08 02:16:37 +00005023 pgnoNew[i] = pgnoOld[i];
5024 apOld[i] = 0;
danielk19773b8a05f2007-03-19 17:44:26 +00005025 rc = sqlite3PagerWrite(pNew->pDbPage);
drhf5345442007-04-09 12:45:02 +00005026 nNew++;
danielk197728129562005-01-11 10:25:06 +00005027 if( rc ) goto balance_cleanup;
drh6b308672002-07-08 02:16:37 +00005028 }else{
drh7aa8f852006-03-28 00:24:44 +00005029 assert( i>0 );
drh4f0c5872007-03-26 22:05:01 +00005030 rc = allocateBtreePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0);
drh6b308672002-07-08 02:16:37 +00005031 if( rc ) goto balance_cleanup;
drhda200cc2004-05-09 11:51:38 +00005032 apNew[i] = pNew;
drhf5345442007-04-09 12:45:02 +00005033 nNew++;
drh6b308672002-07-08 02:16:37 +00005034 }
drhda200cc2004-05-09 11:51:38 +00005035 zeroPage(pNew, pageFlags);
drh8b2f49b2001-06-08 00:21:52 +00005036 }
5037
danielk1977299b1872004-11-22 10:02:10 +00005038 /* Free any old pages that were not reused as new pages.
5039 */
5040 while( i<nOld ){
5041 rc = freePage(apOld[i]);
5042 if( rc ) goto balance_cleanup;
5043 releasePage(apOld[i]);
5044 apOld[i] = 0;
5045 i++;
5046 }
5047
drh8b2f49b2001-06-08 00:21:52 +00005048 /*
drhf9ffac92002-03-02 19:00:31 +00005049 ** Put the new pages in accending order. This helps to
5050 ** keep entries in the disk file in order so that a scan
5051 ** of the table is a linear scan through the file. That
5052 ** in turn helps the operating system to deliver pages
5053 ** from the disk more rapidly.
5054 **
5055 ** An O(n^2) insertion sort algorithm is used, but since
drhc3b70572003-01-04 19:44:07 +00005056 ** n is never more than NB (a small constant), that should
5057 ** not be a problem.
drhf9ffac92002-03-02 19:00:31 +00005058 **
drhc3b70572003-01-04 19:44:07 +00005059 ** When NB==3, this one optimization makes the database
5060 ** about 25% faster for large insertions and deletions.
drhf9ffac92002-03-02 19:00:31 +00005061 */
5062 for(i=0; i<k-1; i++){
5063 int minV = pgnoNew[i];
5064 int minI = i;
5065 for(j=i+1; j<k; j++){
drh7d02cb72003-06-04 16:24:39 +00005066 if( pgnoNew[j]<(unsigned)minV ){
drhf9ffac92002-03-02 19:00:31 +00005067 minI = j;
5068 minV = pgnoNew[j];
5069 }
5070 }
5071 if( minI>i ){
5072 int t;
5073 MemPage *pT;
5074 t = pgnoNew[i];
5075 pT = apNew[i];
5076 pgnoNew[i] = pgnoNew[minI];
5077 apNew[i] = apNew[minI];
5078 pgnoNew[minI] = t;
5079 apNew[minI] = pT;
5080 }
5081 }
drha2fce642004-06-05 00:01:44 +00005082 TRACE(("BALANCE: old: %d %d %d new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
drh24cd67e2004-05-10 16:18:47 +00005083 pgnoOld[0],
5084 nOld>=2 ? pgnoOld[1] : 0,
5085 nOld>=3 ? pgnoOld[2] : 0,
drh10c0fa62004-05-18 12:50:17 +00005086 pgnoNew[0], szNew[0],
5087 nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0,
5088 nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0,
drha2fce642004-06-05 00:01:44 +00005089 nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0,
5090 nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0));
drh24cd67e2004-05-10 16:18:47 +00005091
drhf9ffac92002-03-02 19:00:31 +00005092 /*
drh14acc042001-06-10 19:56:58 +00005093 ** Evenly distribute the data in apCell[] across the new pages.
5094 ** Insert divider cells into pParent as necessary.
5095 */
5096 j = 0;
5097 for(i=0; i<nNew; i++){
danielk1977ac11ee62005-01-15 12:45:51 +00005098 /* Assemble the new sibling page. */
drh14acc042001-06-10 19:56:58 +00005099 MemPage *pNew = apNew[i];
drh19642e52005-03-29 13:17:45 +00005100 assert( j<nMaxCells );
drh4b70f112004-05-02 21:12:19 +00005101 assert( pNew->pgno==pgnoNew[i] );
drhfa1a98a2004-05-14 19:08:17 +00005102 assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
drh09d0deb2005-08-02 17:13:09 +00005103 assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
drh43605152004-05-29 21:46:49 +00005104 assert( pNew->nOverflow==0 );
danielk1977ac11ee62005-01-15 12:45:51 +00005105
5106#ifndef SQLITE_OMIT_AUTOVACUUM
5107 /* If this is an auto-vacuum database, update the pointer map entries
5108 ** that point to the siblings that were rearranged. These can be: left
5109 ** children of cells, the right-child of the page, or overflow pages
5110 ** pointed to by cells.
5111 */
5112 if( pBt->autoVacuum ){
5113 for(k=j; k<cntNew[i]; k++){
danielk1977634f2982005-03-28 08:44:07 +00005114 assert( k<nMaxCells );
danielk1977ac11ee62005-01-15 12:45:51 +00005115 if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){
danielk197779a40da2005-01-16 08:00:01 +00005116 rc = ptrmapPutOvfl(pNew, k-j);
5117 if( rc!=SQLITE_OK ){
5118 goto balance_cleanup;
danielk1977ac11ee62005-01-15 12:45:51 +00005119 }
5120 }
5121 }
5122 }
5123#endif
5124
5125 j = cntNew[i];
5126
5127 /* If the sibling page assembled above was not the right-most sibling,
5128 ** insert a divider cell into the parent page.
5129 */
drh14acc042001-06-10 19:56:58 +00005130 if( i<nNew-1 && j<nCell ){
drh8b18dd42004-05-12 19:18:15 +00005131 u8 *pCell;
drh24cd67e2004-05-10 16:18:47 +00005132 u8 *pTemp;
drh8b18dd42004-05-12 19:18:15 +00005133 int sz;
danielk1977634f2982005-03-28 08:44:07 +00005134
5135 assert( j<nMaxCells );
drh8b18dd42004-05-12 19:18:15 +00005136 pCell = apCell[j];
5137 sz = szCell[j] + leafCorrection;
drh4b70f112004-05-02 21:12:19 +00005138 if( !pNew->leaf ){
drh43605152004-05-29 21:46:49 +00005139 memcpy(&pNew->aData[8], pCell, 4);
drh24cd67e2004-05-10 16:18:47 +00005140 pTemp = 0;
drh8b18dd42004-05-12 19:18:15 +00005141 }else if( leafData ){
drhfd131da2007-08-07 17:13:03 +00005142 /* If the tree is a leaf-data tree, and the siblings are leaves,
danielk1977ac11ee62005-01-15 12:45:51 +00005143 ** then there is no divider cell in apCell[]. Instead, the divider
5144 ** cell consists of the integer key for the right-most cell of
5145 ** the sibling-page assembled above only.
5146 */
drh6f11bef2004-05-13 01:12:56 +00005147 CellInfo info;
drh8b18dd42004-05-12 19:18:15 +00005148 j--;
drh16a9b832007-05-05 18:39:25 +00005149 sqlite3BtreeParseCellPtr(pNew, apCell[j], &info);
drhb6f41482004-05-14 01:58:11 +00005150 pCell = &aSpace[iSpace];
drhb026e052007-05-02 01:34:31 +00005151 fillInCell(pParent, pCell, 0, info.nKey, 0, 0, 0, &sz);
drhb6f41482004-05-14 01:58:11 +00005152 iSpace += sz;
drh07d183d2005-05-01 22:52:42 +00005153 assert( iSpace<=pBt->pageSize*5 );
drh8b18dd42004-05-12 19:18:15 +00005154 pTemp = 0;
drh4b70f112004-05-02 21:12:19 +00005155 }else{
5156 pCell -= 4;
drhb6f41482004-05-14 01:58:11 +00005157 pTemp = &aSpace[iSpace];
5158 iSpace += sz;
drh07d183d2005-05-01 22:52:42 +00005159 assert( iSpace<=pBt->pageSize*5 );
danielk19774aeff622007-05-12 09:30:47 +00005160 /* Obscure case for non-leaf-data trees: If the cell at pCell was
5161 ** previously stored on a leaf node, and it's reported size was 4
5162 ** bytes, then it may actually be smaller than this
5163 ** (see sqlite3BtreeParseCellPtr(), 4 bytes is the minimum size of
5164 ** any cell). But it's important to pass the correct size to
5165 ** insertCell(), so reparse the cell now.
5166 **
5167 ** Note that this can never happen in an SQLite data file, as all
5168 ** cells are at least 4 bytes. It only happens in b-trees used
5169 ** to evaluate "IN (SELECT ...)" and similar clauses.
5170 */
5171 if( szCell[j]==4 ){
5172 assert(leafCorrection==4);
5173 sz = cellSizePtr(pParent, pCell);
5174 }
drh4b70f112004-05-02 21:12:19 +00005175 }
danielk1977a3ad5e72005-01-07 08:56:44 +00005176 rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
danielk1977e80463b2004-11-03 03:01:16 +00005177 if( rc!=SQLITE_OK ) goto balance_cleanup;
drh43605152004-05-29 21:46:49 +00005178 put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
danielk1977ac11ee62005-01-15 12:45:51 +00005179#ifndef SQLITE_OMIT_AUTOVACUUM
5180 /* If this is an auto-vacuum database, and not a leaf-data tree,
5181 ** then update the pointer map with an entry for the overflow page
5182 ** that the cell just inserted points to (if any).
5183 */
5184 if( pBt->autoVacuum && !leafData ){
danielk197779a40da2005-01-16 08:00:01 +00005185 rc = ptrmapPutOvfl(pParent, nxDiv);
5186 if( rc!=SQLITE_OK ){
5187 goto balance_cleanup;
danielk1977ac11ee62005-01-15 12:45:51 +00005188 }
5189 }
5190#endif
drh14acc042001-06-10 19:56:58 +00005191 j++;
5192 nxDiv++;
5193 }
5194 }
drh6019e162001-07-02 17:51:45 +00005195 assert( j==nCell );
drh7aa8f852006-03-28 00:24:44 +00005196 assert( nOld>0 );
5197 assert( nNew>0 );
drh4b70f112004-05-02 21:12:19 +00005198 if( (pageFlags & PTF_LEAF)==0 ){
drh43605152004-05-29 21:46:49 +00005199 memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4);
drh14acc042001-06-10 19:56:58 +00005200 }
drh43605152004-05-29 21:46:49 +00005201 if( nxDiv==pParent->nCell+pParent->nOverflow ){
drh4b70f112004-05-02 21:12:19 +00005202 /* Right-most sibling is the right-most child of pParent */
drh43605152004-05-29 21:46:49 +00005203 put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]);
drh4b70f112004-05-02 21:12:19 +00005204 }else{
5205 /* Right-most sibling is the left child of the first entry in pParent
5206 ** past the right-most divider entry */
drh43605152004-05-29 21:46:49 +00005207 put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]);
drh14acc042001-06-10 19:56:58 +00005208 }
5209
5210 /*
5211 ** Reparent children of all cells.
drh8b2f49b2001-06-08 00:21:52 +00005212 */
5213 for(i=0; i<nNew; i++){
danielk1977afcdd022004-10-31 16:25:42 +00005214 rc = reparentChildPages(apNew[i]);
5215 if( rc!=SQLITE_OK ) goto balance_cleanup;
drh8b2f49b2001-06-08 00:21:52 +00005216 }
danielk1977afcdd022004-10-31 16:25:42 +00005217 rc = reparentChildPages(pParent);
5218 if( rc!=SQLITE_OK ) goto balance_cleanup;
drh8b2f49b2001-06-08 00:21:52 +00005219
5220 /*
drh3a4c1412004-05-09 20:40:11 +00005221 ** Balance the parent page. Note that the current page (pPage) might
danielk1977ac11ee62005-01-15 12:45:51 +00005222 ** have been added to the freelist so it might no longer be initialized.
drh3a4c1412004-05-09 20:40:11 +00005223 ** But the parent page will always be initialized.
drh8b2f49b2001-06-08 00:21:52 +00005224 */
drhda200cc2004-05-09 11:51:38 +00005225 assert( pParent->isInit );
danielk1977ac245ec2005-01-14 13:50:11 +00005226 rc = balance(pParent, 0);
drhda200cc2004-05-09 11:51:38 +00005227
drh8b2f49b2001-06-08 00:21:52 +00005228 /*
drh14acc042001-06-10 19:56:58 +00005229 ** Cleanup before returning.
drh8b2f49b2001-06-08 00:21:52 +00005230 */
drh14acc042001-06-10 19:56:58 +00005231balance_cleanup:
drh17435752007-08-16 04:30:38 +00005232 sqlite3_free(apCell);
drh8b2f49b2001-06-08 00:21:52 +00005233 for(i=0; i<nOld; i++){
drh91025292004-05-03 19:49:32 +00005234 releasePage(apOld[i]);
drh8b2f49b2001-06-08 00:21:52 +00005235 }
drh14acc042001-06-10 19:56:58 +00005236 for(i=0; i<nNew; i++){
drh91025292004-05-03 19:49:32 +00005237 releasePage(apNew[i]);
drh8b2f49b2001-06-08 00:21:52 +00005238 }
drh91025292004-05-03 19:49:32 +00005239 releasePage(pParent);
drh3a4c1412004-05-09 20:40:11 +00005240 TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
5241 pPage->pgno, nOld, nNew, nCell));
drh8b2f49b2001-06-08 00:21:52 +00005242 return rc;
5243}
5244
5245/*
drh43605152004-05-29 21:46:49 +00005246** This routine is called for the root page of a btree when the root
5247** page contains no cells. This is an opportunity to make the tree
5248** shallower by one level.
5249*/
5250static int balance_shallower(MemPage *pPage){
5251 MemPage *pChild; /* The only child page of pPage */
5252 Pgno pgnoChild; /* Page number for pChild */
drh2e38c322004-09-03 18:38:44 +00005253 int rc = SQLITE_OK; /* Return code from subprocedures */
danielk1977aef0bf62005-12-30 16:28:01 +00005254 BtShared *pBt; /* The main BTree structure */
drh2e38c322004-09-03 18:38:44 +00005255 int mxCellPerPage; /* Maximum number of cells per page */
5256 u8 **apCell; /* All cells from pages being balanced */
5257 int *szCell; /* Local size of all cells */
drh43605152004-05-29 21:46:49 +00005258
5259 assert( pPage->pParent==0 );
5260 assert( pPage->nCell==0 );
drh27641702007-08-22 02:56:42 +00005261 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh2e38c322004-09-03 18:38:44 +00005262 pBt = pPage->pBt;
5263 mxCellPerPage = MX_CELL(pBt);
drh17435752007-08-16 04:30:38 +00005264 apCell = sqlite3_malloc( mxCellPerPage*(sizeof(u8*)+sizeof(int)) );
drh2e38c322004-09-03 18:38:44 +00005265 if( apCell==0 ) return SQLITE_NOMEM;
5266 szCell = (int*)&apCell[mxCellPerPage];
drh43605152004-05-29 21:46:49 +00005267 if( pPage->leaf ){
5268 /* The table is completely empty */
5269 TRACE(("BALANCE: empty table %d\n", pPage->pgno));
5270 }else{
5271 /* The root page is empty but has one child. Transfer the
5272 ** information from that one child into the root page if it
5273 ** will fit. This reduces the depth of the tree by one.
5274 **
5275 ** If the root page is page 1, it has less space available than
5276 ** its child (due to the 100 byte header that occurs at the beginning
5277 ** of the database fle), so it might not be able to hold all of the
5278 ** information currently contained in the child. If this is the
5279 ** case, then do not do the transfer. Leave page 1 empty except
5280 ** for the right-pointer to the child page. The child page becomes
5281 ** the virtual root of the tree.
5282 */
5283 pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
5284 assert( pgnoChild>0 );
danielk19773b8a05f2007-03-19 17:44:26 +00005285 assert( pgnoChild<=sqlite3PagerPagecount(pPage->pBt->pPager) );
drh16a9b832007-05-05 18:39:25 +00005286 rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0);
drh2e38c322004-09-03 18:38:44 +00005287 if( rc ) goto end_shallow_balance;
drh43605152004-05-29 21:46:49 +00005288 if( pPage->pgno==1 ){
drh16a9b832007-05-05 18:39:25 +00005289 rc = sqlite3BtreeInitPage(pChild, pPage);
drh2e38c322004-09-03 18:38:44 +00005290 if( rc ) goto end_shallow_balance;
drh43605152004-05-29 21:46:49 +00005291 assert( pChild->nOverflow==0 );
5292 if( pChild->nFree>=100 ){
5293 /* The child information will fit on the root page, so do the
5294 ** copy */
5295 int i;
5296 zeroPage(pPage, pChild->aData[0]);
5297 for(i=0; i<pChild->nCell; i++){
danielk19771cc5ed82007-05-16 17:28:43 +00005298 apCell[i] = findCell(pChild,i);
drh43605152004-05-29 21:46:49 +00005299 szCell[i] = cellSizePtr(pChild, apCell[i]);
5300 }
5301 assemblePage(pPage, pChild->nCell, apCell, szCell);
danielk1977ae825582004-11-23 09:06:55 +00005302 /* Copy the right-pointer of the child to the parent. */
5303 put4byte(&pPage->aData[pPage->hdrOffset+8],
5304 get4byte(&pChild->aData[pChild->hdrOffset+8]));
drh43605152004-05-29 21:46:49 +00005305 freePage(pChild);
5306 TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
5307 }else{
5308 /* The child has more information that will fit on the root.
5309 ** The tree is already balanced. Do nothing. */
5310 TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
5311 }
5312 }else{
5313 memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
5314 pPage->isInit = 0;
5315 pPage->pParent = 0;
drh16a9b832007-05-05 18:39:25 +00005316 rc = sqlite3BtreeInitPage(pPage, 0);
drh43605152004-05-29 21:46:49 +00005317 assert( rc==SQLITE_OK );
5318 freePage(pChild);
5319 TRACE(("BALANCE: transfer child %d into root %d\n",
5320 pChild->pgno, pPage->pgno));
5321 }
danielk1977afcdd022004-10-31 16:25:42 +00005322 rc = reparentChildPages(pPage);
danielk1977ac11ee62005-01-15 12:45:51 +00005323 assert( pPage->nOverflow==0 );
5324#ifndef SQLITE_OMIT_AUTOVACUUM
5325 if( pBt->autoVacuum ){
danielk1977aac0a382005-01-16 11:07:06 +00005326 int i;
danielk1977ac11ee62005-01-15 12:45:51 +00005327 for(i=0; i<pPage->nCell; i++){
danielk197779a40da2005-01-16 08:00:01 +00005328 rc = ptrmapPutOvfl(pPage, i);
5329 if( rc!=SQLITE_OK ){
5330 goto end_shallow_balance;
danielk1977ac11ee62005-01-15 12:45:51 +00005331 }
5332 }
5333 }
5334#endif
drh43605152004-05-29 21:46:49 +00005335 releasePage(pChild);
5336 }
drh2e38c322004-09-03 18:38:44 +00005337end_shallow_balance:
drh17435752007-08-16 04:30:38 +00005338 sqlite3_free(apCell);
drh2e38c322004-09-03 18:38:44 +00005339 return rc;
drh43605152004-05-29 21:46:49 +00005340}
5341
5342
5343/*
5344** The root page is overfull
5345**
5346** When this happens, Create a new child page and copy the
5347** contents of the root into the child. Then make the root
5348** page an empty page with rightChild pointing to the new
5349** child. Finally, call balance_internal() on the new child
5350** to cause it to split.
5351*/
5352static int balance_deeper(MemPage *pPage){
5353 int rc; /* Return value from subprocedures */
5354 MemPage *pChild; /* Pointer to a new child page */
5355 Pgno pgnoChild; /* Page number of the new child page */
danielk1977aef0bf62005-12-30 16:28:01 +00005356 BtShared *pBt; /* The BTree */
drh43605152004-05-29 21:46:49 +00005357 int usableSize; /* Total usable size of a page */
5358 u8 *data; /* Content of the parent page */
5359 u8 *cdata; /* Content of the child page */
5360 int hdr; /* Offset to page header in parent */
5361 int brk; /* Offset to content of first cell in parent */
5362
5363 assert( pPage->pParent==0 );
5364 assert( pPage->nOverflow>0 );
5365 pBt = pPage->pBt;
drh27641702007-08-22 02:56:42 +00005366 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
drh4f0c5872007-03-26 22:05:01 +00005367 rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
drh43605152004-05-29 21:46:49 +00005368 if( rc ) return rc;
danielk19773b8a05f2007-03-19 17:44:26 +00005369 assert( sqlite3PagerIswriteable(pChild->pDbPage) );
drh43605152004-05-29 21:46:49 +00005370 usableSize = pBt->usableSize;
5371 data = pPage->aData;
5372 hdr = pPage->hdrOffset;
5373 brk = get2byte(&data[hdr+5]);
5374 cdata = pChild->aData;
5375 memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
5376 memcpy(&cdata[brk], &data[brk], usableSize-brk);
danielk1977c7dc7532004-11-17 10:22:03 +00005377 assert( pChild->isInit==0 );
drh16a9b832007-05-05 18:39:25 +00005378 rc = sqlite3BtreeInitPage(pChild, pPage);
danielk19776b456a22005-03-21 04:04:02 +00005379 if( rc ) goto balancedeeper_out;
drh43605152004-05-29 21:46:49 +00005380 memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
5381 pChild->nOverflow = pPage->nOverflow;
5382 if( pChild->nOverflow ){
5383 pChild->nFree = 0;
5384 }
5385 assert( pChild->nCell==pPage->nCell );
5386 zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
5387 put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
5388 TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
danielk19774e17d142005-01-16 09:06:33 +00005389#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977ac11ee62005-01-15 12:45:51 +00005390 if( pBt->autoVacuum ){
5391 int i;
5392 rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);
danielk19776b456a22005-03-21 04:04:02 +00005393 if( rc ) goto balancedeeper_out;
danielk1977ac11ee62005-01-15 12:45:51 +00005394 for(i=0; i<pChild->nCell; i++){
danielk197779a40da2005-01-16 08:00:01 +00005395 rc = ptrmapPutOvfl(pChild, i);
5396 if( rc!=SQLITE_OK ){
5397 return rc;
danielk1977ac11ee62005-01-15 12:45:51 +00005398 }
5399 }
5400 }
danielk19774e17d142005-01-16 09:06:33 +00005401#endif
drh43605152004-05-29 21:46:49 +00005402 rc = balance_nonroot(pChild);
danielk19776b456a22005-03-21 04:04:02 +00005403
5404balancedeeper_out:
drh43605152004-05-29 21:46:49 +00005405 releasePage(pChild);
5406 return rc;
5407}
5408
5409/*
5410** Decide if the page pPage needs to be balanced. If balancing is
5411** required, call the appropriate balancing routine.
5412*/
danielk1977ac245ec2005-01-14 13:50:11 +00005413static int balance(MemPage *pPage, int insert){
drh43605152004-05-29 21:46:49 +00005414 int rc = SQLITE_OK;
drh27641702007-08-22 02:56:42 +00005415 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drh43605152004-05-29 21:46:49 +00005416 if( pPage->pParent==0 ){
danielk19776e465eb2007-08-21 13:11:00 +00005417 rc = sqlite3PagerWrite(pPage->pDbPage);
5418 if( rc==SQLITE_OK && pPage->nOverflow>0 ){
drh43605152004-05-29 21:46:49 +00005419 rc = balance_deeper(pPage);
5420 }
danielk1977687566d2004-11-02 12:56:41 +00005421 if( rc==SQLITE_OK && pPage->nCell==0 ){
drh43605152004-05-29 21:46:49 +00005422 rc = balance_shallower(pPage);
5423 }
5424 }else{
danielk1977ac245ec2005-01-14 13:50:11 +00005425 if( pPage->nOverflow>0 ||
5426 (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
drh43605152004-05-29 21:46:49 +00005427 rc = balance_nonroot(pPage);
5428 }
5429 }
5430 return rc;
5431}
5432
5433/*
drh8dcd7ca2004-08-08 19:43:29 +00005434** This routine checks all cursors that point to table pgnoRoot.
drh980b1a72006-08-16 16:42:48 +00005435** If any of those cursors were opened with wrFlag==0 in a different
5436** database connection (a database connection that shares the pager
5437** cache with the current connection) and that other connection
5438** is not in the ReadUncommmitted state, then this routine returns
5439** SQLITE_LOCKED.
danielk1977299b1872004-11-22 10:02:10 +00005440**
5441** In addition to checking for read-locks (where a read-lock
5442** means a cursor opened with wrFlag==0) this routine also moves
drh16a9b832007-05-05 18:39:25 +00005443** all write cursors so that they are pointing to the
drh980b1a72006-08-16 16:42:48 +00005444** first Cell on the root page. This is necessary because an insert
danielk1977299b1872004-11-22 10:02:10 +00005445** or delete might change the number of cells on a page or delete
5446** a page entirely and we do not want to leave any cursors
5447** pointing to non-existant pages or cells.
drhf74b8d92002-09-01 23:20:45 +00005448*/
drh980b1a72006-08-16 16:42:48 +00005449static int checkReadLocks(Btree *pBtree, Pgno pgnoRoot, BtCursor *pExclude){
danielk1977299b1872004-11-22 10:02:10 +00005450 BtCursor *p;
drh980b1a72006-08-16 16:42:48 +00005451 BtShared *pBt = pBtree->pBt;
5452 sqlite3 *db = pBtree->pSqlite;
drh27641702007-08-22 02:56:42 +00005453 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
5454 assert( sqlite3BtreeMutexHeld(db->mutex) );
danielk1977299b1872004-11-22 10:02:10 +00005455 for(p=pBt->pCursor; p; p=p->pNext){
drh980b1a72006-08-16 16:42:48 +00005456 if( p==pExclude ) continue;
5457 if( p->eState!=CURSOR_VALID ) continue;
5458 if( p->pgnoRoot!=pgnoRoot ) continue;
5459 if( p->wrFlag==0 ){
5460 sqlite3 *dbOther = p->pBtree->pSqlite;
5461 if( dbOther==0 ||
5462 (dbOther!=db && (dbOther->flags & SQLITE_ReadUncommitted)==0) ){
5463 return SQLITE_LOCKED;
5464 }
5465 }else if( p->pPage->pgno!=p->pgnoRoot ){
danielk1977299b1872004-11-22 10:02:10 +00005466 moveToRoot(p);
5467 }
5468 }
drhf74b8d92002-09-01 23:20:45 +00005469 return SQLITE_OK;
5470}
5471
5472/*
drh3b7511c2001-05-26 13:15:44 +00005473** Insert a new record into the BTree. The key is given by (pKey,nKey)
5474** and the data is given by (pData,nData). The cursor is used only to
drh91025292004-05-03 19:49:32 +00005475** define what table the record should be inserted into. The cursor
drh4b70f112004-05-02 21:12:19 +00005476** is left pointing at a random location.
5477**
5478** For an INTKEY table, only the nKey value of the key is used. pKey is
5479** ignored. For a ZERODATA table, the pData and nData are both ignored.
drh3b7511c2001-05-26 13:15:44 +00005480*/
drh3aac2dd2004-04-26 14:10:20 +00005481int sqlite3BtreeInsert(
drh5c4d9702001-08-20 00:33:58 +00005482 BtCursor *pCur, /* Insert data into the table of this cursor */
drh4a1c3802004-05-12 15:15:47 +00005483 const void *pKey, i64 nKey, /* The key of the new record */
drhe4d90812007-03-29 05:51:49 +00005484 const void *pData, int nData, /* The data of the new record */
drhb026e052007-05-02 01:34:31 +00005485 int nZero, /* Number of extra 0 bytes to append to data */
drhe4d90812007-03-29 05:51:49 +00005486 int appendBias /* True if this is likely an append */
drh3b7511c2001-05-26 13:15:44 +00005487){
drh3b7511c2001-05-26 13:15:44 +00005488 int rc;
5489 int loc;
drh14acc042001-06-10 19:56:58 +00005490 int szNew;
drh3b7511c2001-05-26 13:15:44 +00005491 MemPage *pPage;
drhd677b3d2007-08-20 22:48:41 +00005492 Btree *p = pCur->pBtree;
5493 BtShared *pBt = p->pBt;
drha34b6762004-05-07 13:30:42 +00005494 unsigned char *oldCell;
drh2e38c322004-09-03 18:38:44 +00005495 unsigned char *newCell = 0;
drh3b7511c2001-05-26 13:15:44 +00005496
drhd0679ed2007-08-28 22:24:34 +00005497 cursorEnter(pCur);
danielk1977aef0bf62005-12-30 16:28:01 +00005498 if( pBt->inTransaction!=TRANS_WRITE ){
drhf74b8d92002-09-01 23:20:45 +00005499 /* Must start a transaction before doing an insert */
drhd677b3d2007-08-20 22:48:41 +00005500 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drhd0679ed2007-08-28 22:24:34 +00005501 cursorLeave(pCur);
drhd677b3d2007-08-20 22:48:41 +00005502 return rc;
drh8b2f49b2001-06-08 00:21:52 +00005503 }
drhf74b8d92002-09-01 23:20:45 +00005504 assert( !pBt->readOnly );
drhecdc7532001-09-23 02:35:53 +00005505 if( !pCur->wrFlag ){
drhd0679ed2007-08-28 22:24:34 +00005506 cursorLeave(pCur);
drhecdc7532001-09-23 02:35:53 +00005507 return SQLITE_PERM; /* Cursor not open for writing */
5508 }
drh980b1a72006-08-16 16:42:48 +00005509 if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
drhd0679ed2007-08-28 22:24:34 +00005510 cursorLeave(pCur);
drhf74b8d92002-09-01 23:20:45 +00005511 return SQLITE_LOCKED; /* The table pCur points to has a read lock */
5512 }
danielk1977da184232006-01-05 11:34:32 +00005513
5514 /* Save the positions of any other cursors open on this table */
drhbf700f32007-03-31 02:36:44 +00005515 clearCursorPosition(pCur);
danielk19772e94d4d2006-01-09 05:36:27 +00005516 if(
danielk19772e94d4d2006-01-09 05:36:27 +00005517 SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
drhe4d90812007-03-29 05:51:49 +00005518 SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, appendBias, &loc))
danielk19772e94d4d2006-01-09 05:36:27 +00005519 ){
drhd0679ed2007-08-28 22:24:34 +00005520 cursorLeave(pCur);
danielk1977da184232006-01-05 11:34:32 +00005521 return rc;
5522 }
5523
drh14acc042001-06-10 19:56:58 +00005524 pPage = pCur->pPage;
drh4a1c3802004-05-12 15:15:47 +00005525 assert( pPage->intKey || nKey>=0 );
drh8b18dd42004-05-12 19:18:15 +00005526 assert( pPage->leaf || !pPage->leafData );
drh3a4c1412004-05-09 20:40:11 +00005527 TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
5528 pCur->pgnoRoot, nKey, nData, pPage->pgno,
5529 loc==0 ? "overwrite" : "new entry"));
drh7aa128d2002-06-21 13:09:16 +00005530 assert( pPage->isInit );
drh17435752007-08-16 04:30:38 +00005531 newCell = sqlite3_malloc( MX_CELL_SIZE(pBt) );
drh2e38c322004-09-03 18:38:44 +00005532 if( newCell==0 ) return SQLITE_NOMEM;
drhb026e052007-05-02 01:34:31 +00005533 rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
drh2e38c322004-09-03 18:38:44 +00005534 if( rc ) goto end_insert;
drh43605152004-05-29 21:46:49 +00005535 assert( szNew==cellSizePtr(pPage, newCell) );
drh2e38c322004-09-03 18:38:44 +00005536 assert( szNew<=MX_CELL_SIZE(pBt) );
danielk1977da184232006-01-05 11:34:32 +00005537 if( loc==0 && CURSOR_VALID==pCur->eState ){
drha34b6762004-05-07 13:30:42 +00005538 int szOld;
5539 assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
danielk19776e465eb2007-08-21 13:11:00 +00005540 rc = sqlite3PagerWrite(pPage->pDbPage);
5541 if( rc ){
5542 goto end_insert;
5543 }
danielk19771cc5ed82007-05-16 17:28:43 +00005544 oldCell = findCell(pPage, pCur->idx);
drh4b70f112004-05-02 21:12:19 +00005545 if( !pPage->leaf ){
drh43605152004-05-29 21:46:49 +00005546 memcpy(newCell, oldCell, 4);
drh4b70f112004-05-02 21:12:19 +00005547 }
drh43605152004-05-29 21:46:49 +00005548 szOld = cellSizePtr(pPage, oldCell);
drh4b70f112004-05-02 21:12:19 +00005549 rc = clearCell(pPage, oldCell);
drh2e38c322004-09-03 18:38:44 +00005550 if( rc ) goto end_insert;
drh4b70f112004-05-02 21:12:19 +00005551 dropCell(pPage, pCur->idx, szOld);
drh7c717f72001-06-24 20:39:41 +00005552 }else if( loc<0 && pPage->nCell>0 ){
drh4b70f112004-05-02 21:12:19 +00005553 assert( pPage->leaf );
drh14acc042001-06-10 19:56:58 +00005554 pCur->idx++;
drh271efa52004-05-30 19:19:05 +00005555 pCur->info.nSize = 0;
drh14acc042001-06-10 19:56:58 +00005556 }else{
drh4b70f112004-05-02 21:12:19 +00005557 assert( pPage->leaf );
drh3b7511c2001-05-26 13:15:44 +00005558 }
danielk1977a3ad5e72005-01-07 08:56:44 +00005559 rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
danielk1977e80463b2004-11-03 03:01:16 +00005560 if( rc!=SQLITE_OK ) goto end_insert;
danielk1977ac245ec2005-01-14 13:50:11 +00005561 rc = balance(pPage, 1);
drh23e11ca2004-05-04 17:27:28 +00005562 /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
drh3fc190c2001-09-14 03:24:23 +00005563 /* fflush(stdout); */
danielk1977299b1872004-11-22 10:02:10 +00005564 if( rc==SQLITE_OK ){
5565 moveToRoot(pCur);
5566 }
drh2e38c322004-09-03 18:38:44 +00005567end_insert:
drh17435752007-08-16 04:30:38 +00005568 sqlite3_free(newCell);
drhd0679ed2007-08-28 22:24:34 +00005569 cursorLeave(pCur);
drh5e2f8b92001-05-28 00:41:15 +00005570 return rc;
5571}
5572
5573/*
drh4b70f112004-05-02 21:12:19 +00005574** Delete the entry that the cursor is pointing to. The cursor
5575** is left pointing at a random location.
drh3b7511c2001-05-26 13:15:44 +00005576*/
drh3aac2dd2004-04-26 14:10:20 +00005577int sqlite3BtreeDelete(BtCursor *pCur){
drh5e2f8b92001-05-28 00:41:15 +00005578 MemPage *pPage = pCur->pPage;
drh4b70f112004-05-02 21:12:19 +00005579 unsigned char *pCell;
drh5e2f8b92001-05-28 00:41:15 +00005580 int rc;
danielk1977cfe9a692004-06-16 12:00:29 +00005581 Pgno pgnoChild = 0;
drhd677b3d2007-08-20 22:48:41 +00005582 Btree *p = pCur->pBtree;
5583 BtShared *pBt = p->pBt;
drh8b2f49b2001-06-08 00:21:52 +00005584
drhd0679ed2007-08-28 22:24:34 +00005585 cursorEnter(pCur);
drh7aa128d2002-06-21 13:09:16 +00005586 assert( pPage->isInit );
danielk1977aef0bf62005-12-30 16:28:01 +00005587 if( pBt->inTransaction!=TRANS_WRITE ){
drhf74b8d92002-09-01 23:20:45 +00005588 /* Must start a transaction before doing a delete */
drhd677b3d2007-08-20 22:48:41 +00005589 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drhd0679ed2007-08-28 22:24:34 +00005590 cursorLeave(pCur);
drhd677b3d2007-08-20 22:48:41 +00005591 return rc;
drh8b2f49b2001-06-08 00:21:52 +00005592 }
drhf74b8d92002-09-01 23:20:45 +00005593 assert( !pBt->readOnly );
drhbd03cae2001-06-02 02:40:57 +00005594 if( pCur->idx >= pPage->nCell ){
drhd0679ed2007-08-28 22:24:34 +00005595 cursorLeave(pCur);
drhbd03cae2001-06-02 02:40:57 +00005596 return SQLITE_ERROR; /* The cursor is not pointing to anything */
5597 }
drhecdc7532001-09-23 02:35:53 +00005598 if( !pCur->wrFlag ){
drhd0679ed2007-08-28 22:24:34 +00005599 cursorLeave(pCur);
drhecdc7532001-09-23 02:35:53 +00005600 return SQLITE_PERM; /* Did not open this cursor for writing */
5601 }
drh980b1a72006-08-16 16:42:48 +00005602 if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
drhd0679ed2007-08-28 22:24:34 +00005603 cursorLeave(pCur);
drhf74b8d92002-09-01 23:20:45 +00005604 return SQLITE_LOCKED; /* The table pCur points to has a read lock */
5605 }
danielk1977da184232006-01-05 11:34:32 +00005606
5607 /* Restore the current cursor position (a no-op if the cursor is not in
5608 ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors
danielk19773b8a05f2007-03-19 17:44:26 +00005609 ** open on the same table. Then call sqlite3PagerWrite() on the page
danielk1977da184232006-01-05 11:34:32 +00005610 ** that the entry will be deleted from.
5611 */
5612 if(
drhbf700f32007-03-31 02:36:44 +00005613 (rc = restoreOrClearCursorPosition(pCur))!=0 ||
drhd1167392006-01-23 13:00:35 +00005614 (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 ||
danielk19773b8a05f2007-03-19 17:44:26 +00005615 (rc = sqlite3PagerWrite(pPage->pDbPage))!=0
danielk1977da184232006-01-05 11:34:32 +00005616 ){
drhd0679ed2007-08-28 22:24:34 +00005617 cursorLeave(pCur);
danielk1977da184232006-01-05 11:34:32 +00005618 return rc;
5619 }
danielk1977e6efa742004-11-10 11:55:10 +00005620
5621 /* Locate the cell within it's page and leave pCell pointing to the
5622 ** data. The clearCell() call frees any overflow pages associated with the
5623 ** cell. The cell itself is still intact.
5624 */
danielk19771cc5ed82007-05-16 17:28:43 +00005625 pCell = findCell(pPage, pCur->idx);
drh4b70f112004-05-02 21:12:19 +00005626 if( !pPage->leaf ){
drh43605152004-05-29 21:46:49 +00005627 pgnoChild = get4byte(pCell);
drh4b70f112004-05-02 21:12:19 +00005628 }
danielk197728129562005-01-11 10:25:06 +00005629 rc = clearCell(pPage, pCell);
drhd677b3d2007-08-20 22:48:41 +00005630 if( rc ){
drhd0679ed2007-08-28 22:24:34 +00005631 cursorLeave(pCur);
drhd677b3d2007-08-20 22:48:41 +00005632 return rc;
5633 }
danielk1977e6efa742004-11-10 11:55:10 +00005634
drh4b70f112004-05-02 21:12:19 +00005635 if( !pPage->leaf ){
drh14acc042001-06-10 19:56:58 +00005636 /*
drh5e00f6c2001-09-13 13:46:56 +00005637 ** The entry we are about to delete is not a leaf so if we do not
drh9ca7d3b2001-06-28 11:50:21 +00005638 ** do something we will leave a hole on an internal page.
5639 ** We have to fill the hole by moving in a cell from a leaf. The
5640 ** next Cell after the one to be deleted is guaranteed to exist and
danielk1977299b1872004-11-22 10:02:10 +00005641 ** to be a leaf so we can use it.
drh5e2f8b92001-05-28 00:41:15 +00005642 */
drh14acc042001-06-10 19:56:58 +00005643 BtCursor leafCur;
drh4b70f112004-05-02 21:12:19 +00005644 unsigned char *pNext;
drh02afc862006-01-20 18:10:57 +00005645 int szNext; /* The compiler warning is wrong: szNext is always
5646 ** initialized before use. Adding an extra initialization
5647 ** to silence the compiler slows down the code. */
danielk1977299b1872004-11-22 10:02:10 +00005648 int notUsed;
danielk19776b456a22005-03-21 04:04:02 +00005649 unsigned char *tempCell = 0;
drh8b18dd42004-05-12 19:18:15 +00005650 assert( !pPage->leafData );
drh16a9b832007-05-05 18:39:25 +00005651 sqlite3BtreeGetTempCursor(pCur, &leafCur);
danielk1977299b1872004-11-22 10:02:10 +00005652 rc = sqlite3BtreeNext(&leafCur, &notUsed);
danielk19776b456a22005-03-21 04:04:02 +00005653 if( rc==SQLITE_OK ){
danielk19773b8a05f2007-03-19 17:44:26 +00005654 rc = sqlite3PagerWrite(leafCur.pPage->pDbPage);
danielk19776b456a22005-03-21 04:04:02 +00005655 }
5656 if( rc==SQLITE_OK ){
5657 TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
5658 pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
5659 dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
danielk19771cc5ed82007-05-16 17:28:43 +00005660 pNext = findCell(leafCur.pPage, leafCur.idx);
danielk19776b456a22005-03-21 04:04:02 +00005661 szNext = cellSizePtr(leafCur.pPage, pNext);
5662 assert( MX_CELL_SIZE(pBt)>=szNext+4 );
drh17435752007-08-16 04:30:38 +00005663 tempCell = sqlite3_malloc( MX_CELL_SIZE(pBt) );
danielk19776b456a22005-03-21 04:04:02 +00005664 if( tempCell==0 ){
5665 rc = SQLITE_NOMEM;
5666 }
5667 }
5668 if( rc==SQLITE_OK ){
5669 rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
5670 }
5671 if( rc==SQLITE_OK ){
5672 put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
5673 rc = balance(pPage, 0);
5674 }
5675 if( rc==SQLITE_OK ){
5676 dropCell(leafCur.pPage, leafCur.idx, szNext);
5677 rc = balance(leafCur.pPage, 0);
5678 }
drh17435752007-08-16 04:30:38 +00005679 sqlite3_free(tempCell);
drh16a9b832007-05-05 18:39:25 +00005680 sqlite3BtreeReleaseTempCursor(&leafCur);
drh5e2f8b92001-05-28 00:41:15 +00005681 }else{
danielk1977299b1872004-11-22 10:02:10 +00005682 TRACE(("DELETE: table=%d delete from leaf %d\n",
5683 pCur->pgnoRoot, pPage->pgno));
5684 dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
danielk1977ac245ec2005-01-14 13:50:11 +00005685 rc = balance(pPage, 0);
drh5e2f8b92001-05-28 00:41:15 +00005686 }
danielk19776b456a22005-03-21 04:04:02 +00005687 if( rc==SQLITE_OK ){
5688 moveToRoot(pCur);
5689 }
drhd0679ed2007-08-28 22:24:34 +00005690 cursorLeave(pCur);
drh5e2f8b92001-05-28 00:41:15 +00005691 return rc;
drh3b7511c2001-05-26 13:15:44 +00005692}
drh8b2f49b2001-06-08 00:21:52 +00005693
5694/*
drhc6b52df2002-01-04 03:09:29 +00005695** Create a new BTree table. Write into *piTable the page
5696** number for the root page of the new table.
5697**
drhab01f612004-05-22 02:55:23 +00005698** The type of type is determined by the flags parameter. Only the
5699** following values of flags are currently in use. Other values for
5700** flags might not work:
5701**
5702** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys
5703** BTREE_ZERODATA Used for SQL indices
drh8b2f49b2001-06-08 00:21:52 +00005704*/
drhd677b3d2007-08-20 22:48:41 +00005705static int btreeCreateTable(Btree *p, int *piTable, int flags){
danielk1977aef0bf62005-12-30 16:28:01 +00005706 BtShared *pBt = p->pBt;
drh8b2f49b2001-06-08 00:21:52 +00005707 MemPage *pRoot;
5708 Pgno pgnoRoot;
5709 int rc;
drhd677b3d2007-08-20 22:48:41 +00005710
drhd0679ed2007-08-28 22:24:34 +00005711 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
5712 assert( sqlite3_mutex_held(pBt->mutex) );
danielk1977aef0bf62005-12-30 16:28:01 +00005713 if( pBt->inTransaction!=TRANS_WRITE ){
drhf74b8d92002-09-01 23:20:45 +00005714 /* Must start a transaction first */
drhd677b3d2007-08-20 22:48:41 +00005715 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
5716 return rc;
drh8b2f49b2001-06-08 00:21:52 +00005717 }
danielk197728129562005-01-11 10:25:06 +00005718 assert( !pBt->readOnly );
danielk1977e6efa742004-11-10 11:55:10 +00005719
danielk1977003ba062004-11-04 02:57:33 +00005720#ifdef SQLITE_OMIT_AUTOVACUUM
drh4f0c5872007-03-26 22:05:01 +00005721 rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
drhd677b3d2007-08-20 22:48:41 +00005722 if( rc ){
5723 return rc;
5724 }
danielk1977003ba062004-11-04 02:57:33 +00005725#else
danielk1977687566d2004-11-02 12:56:41 +00005726 if( pBt->autoVacuum ){
danielk1977003ba062004-11-04 02:57:33 +00005727 Pgno pgnoMove; /* Move a page here to make room for the root-page */
5728 MemPage *pPageMove; /* The page to move to. */
5729
danielk197720713f32007-05-03 11:43:33 +00005730 /* Creating a new table may probably require moving an existing database
5731 ** to make room for the new tables root page. In case this page turns
5732 ** out to be an overflow page, delete all overflow page-map caches
5733 ** held by open cursors.
5734 */
danielk197792d4d7a2007-05-04 12:05:56 +00005735 invalidateAllOverflowCache(pBt);
danielk197720713f32007-05-03 11:43:33 +00005736
danielk1977003ba062004-11-04 02:57:33 +00005737 /* Read the value of meta[3] from the database to determine where the
5738 ** root page of the new table should go. meta[3] is the largest root-page
5739 ** created so far, so the new root-page is (meta[3]+1).
5740 */
danielk1977aef0bf62005-12-30 16:28:01 +00005741 rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot);
drhd677b3d2007-08-20 22:48:41 +00005742 if( rc!=SQLITE_OK ){
5743 return rc;
5744 }
danielk1977003ba062004-11-04 02:57:33 +00005745 pgnoRoot++;
5746
danielk1977599fcba2004-11-08 07:13:13 +00005747 /* The new root-page may not be allocated on a pointer-map page, or the
5748 ** PENDING_BYTE page.
5749 */
danielk1977266664d2006-02-10 08:24:21 +00005750 if( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
danielk1977599fcba2004-11-08 07:13:13 +00005751 pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
danielk1977003ba062004-11-04 02:57:33 +00005752 pgnoRoot++;
5753 }
5754 assert( pgnoRoot>=3 );
5755
5756 /* Allocate a page. The page that currently resides at pgnoRoot will
5757 ** be moved to the allocated page (unless the allocated page happens
5758 ** to reside at pgnoRoot).
5759 */
drh4f0c5872007-03-26 22:05:01 +00005760 rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1);
danielk1977003ba062004-11-04 02:57:33 +00005761 if( rc!=SQLITE_OK ){
danielk1977687566d2004-11-02 12:56:41 +00005762 return rc;
5763 }
danielk1977003ba062004-11-04 02:57:33 +00005764
5765 if( pgnoMove!=pgnoRoot ){
danielk1977f35843b2007-04-07 15:03:17 +00005766 /* pgnoRoot is the page that will be used for the root-page of
5767 ** the new table (assuming an error did not occur). But we were
5768 ** allocated pgnoMove. If required (i.e. if it was not allocated
5769 ** by extending the file), the current page at position pgnoMove
5770 ** is already journaled.
5771 */
danielk1977003ba062004-11-04 02:57:33 +00005772 u8 eType;
5773 Pgno iPtrPage;
5774
5775 releasePage(pPageMove);
danielk1977f35843b2007-04-07 15:03:17 +00005776
5777 /* Move the page currently at pgnoRoot to pgnoMove. */
drh16a9b832007-05-05 18:39:25 +00005778 rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
danielk1977003ba062004-11-04 02:57:33 +00005779 if( rc!=SQLITE_OK ){
5780 return rc;
5781 }
5782 rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
drhccae6022005-02-26 17:31:26 +00005783 if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
danielk1977003ba062004-11-04 02:57:33 +00005784 releasePage(pRoot);
5785 return rc;
5786 }
drhccae6022005-02-26 17:31:26 +00005787 assert( eType!=PTRMAP_ROOTPAGE );
5788 assert( eType!=PTRMAP_FREEPAGE );
danielk19773b8a05f2007-03-19 17:44:26 +00005789 rc = sqlite3PagerWrite(pRoot->pDbPage);
danielk19775fd057a2005-03-09 13:09:43 +00005790 if( rc!=SQLITE_OK ){
5791 releasePage(pRoot);
5792 return rc;
5793 }
danielk1977003ba062004-11-04 02:57:33 +00005794 rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove);
5795 releasePage(pRoot);
danielk1977f35843b2007-04-07 15:03:17 +00005796
5797 /* Obtain the page at pgnoRoot */
danielk1977003ba062004-11-04 02:57:33 +00005798 if( rc!=SQLITE_OK ){
5799 return rc;
5800 }
drh16a9b832007-05-05 18:39:25 +00005801 rc = sqlite3BtreeGetPage(pBt, pgnoRoot, &pRoot, 0);
danielk1977003ba062004-11-04 02:57:33 +00005802 if( rc!=SQLITE_OK ){
5803 return rc;
5804 }
danielk19773b8a05f2007-03-19 17:44:26 +00005805 rc = sqlite3PagerWrite(pRoot->pDbPage);
danielk1977003ba062004-11-04 02:57:33 +00005806 if( rc!=SQLITE_OK ){
5807 releasePage(pRoot);
5808 return rc;
5809 }
5810 }else{
5811 pRoot = pPageMove;
5812 }
5813
danielk197742741be2005-01-08 12:42:39 +00005814 /* Update the pointer-map and meta-data with the new root-page number. */
danielk1977003ba062004-11-04 02:57:33 +00005815 rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0);
5816 if( rc ){
5817 releasePage(pRoot);
5818 return rc;
5819 }
danielk1977aef0bf62005-12-30 16:28:01 +00005820 rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
danielk1977003ba062004-11-04 02:57:33 +00005821 if( rc ){
5822 releasePage(pRoot);
5823 return rc;
5824 }
danielk197742741be2005-01-08 12:42:39 +00005825
danielk1977003ba062004-11-04 02:57:33 +00005826 }else{
drh4f0c5872007-03-26 22:05:01 +00005827 rc = allocateBtreePage(pBt, &pRoot, &pgnoRoot, 1, 0);
danielk1977003ba062004-11-04 02:57:33 +00005828 if( rc ) return rc;
danielk1977687566d2004-11-02 12:56:41 +00005829 }
5830#endif
danielk19773b8a05f2007-03-19 17:44:26 +00005831 assert( sqlite3PagerIswriteable(pRoot->pDbPage) );
drhde647132004-05-07 17:57:49 +00005832 zeroPage(pRoot, flags | PTF_LEAF);
danielk19773b8a05f2007-03-19 17:44:26 +00005833 sqlite3PagerUnref(pRoot->pDbPage);
drh8b2f49b2001-06-08 00:21:52 +00005834 *piTable = (int)pgnoRoot;
5835 return SQLITE_OK;
5836}
drhd677b3d2007-08-20 22:48:41 +00005837int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
5838 int rc;
5839 sqlite3BtreeEnter(p);
5840 rc = btreeCreateTable(p, piTable, flags);
5841 sqlite3BtreeLeave(p);
5842 return rc;
5843}
drh8b2f49b2001-06-08 00:21:52 +00005844
5845/*
5846** Erase the given database page and all its children. Return
5847** the page to the freelist.
5848*/
drh4b70f112004-05-02 21:12:19 +00005849static int clearDatabasePage(
danielk1977aef0bf62005-12-30 16:28:01 +00005850 BtShared *pBt, /* The BTree that contains the table */
drh4b70f112004-05-02 21:12:19 +00005851 Pgno pgno, /* Page number to clear */
5852 MemPage *pParent, /* Parent page. NULL for the root */
5853 int freePageFlag /* Deallocate page if true */
5854){
danielk19776b456a22005-03-21 04:04:02 +00005855 MemPage *pPage = 0;
drh8b2f49b2001-06-08 00:21:52 +00005856 int rc;
drh4b70f112004-05-02 21:12:19 +00005857 unsigned char *pCell;
5858 int i;
drh8b2f49b2001-06-08 00:21:52 +00005859
drh27641702007-08-22 02:56:42 +00005860 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk19773b8a05f2007-03-19 17:44:26 +00005861 if( pgno>sqlite3PagerPagecount(pBt->pPager) ){
drh49285702005-09-17 15:20:26 +00005862 return SQLITE_CORRUPT_BKPT;
danielk1977a1cb1832005-02-12 08:59:55 +00005863 }
5864
drhde647132004-05-07 17:57:49 +00005865 rc = getAndInitPage(pBt, pgno, &pPage, pParent);
danielk19776b456a22005-03-21 04:04:02 +00005866 if( rc ) goto cleardatabasepage_out;
drh4b70f112004-05-02 21:12:19 +00005867 for(i=0; i<pPage->nCell; i++){
danielk19771cc5ed82007-05-16 17:28:43 +00005868 pCell = findCell(pPage, i);
drh4b70f112004-05-02 21:12:19 +00005869 if( !pPage->leaf ){
drh43605152004-05-29 21:46:49 +00005870 rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
danielk19776b456a22005-03-21 04:04:02 +00005871 if( rc ) goto cleardatabasepage_out;
drh8b2f49b2001-06-08 00:21:52 +00005872 }
drh4b70f112004-05-02 21:12:19 +00005873 rc = clearCell(pPage, pCell);
danielk19776b456a22005-03-21 04:04:02 +00005874 if( rc ) goto cleardatabasepage_out;
drh8b2f49b2001-06-08 00:21:52 +00005875 }
drha34b6762004-05-07 13:30:42 +00005876 if( !pPage->leaf ){
drh43605152004-05-29 21:46:49 +00005877 rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
danielk19776b456a22005-03-21 04:04:02 +00005878 if( rc ) goto cleardatabasepage_out;
drh2aa679f2001-06-25 02:11:07 +00005879 }
5880 if( freePageFlag ){
drh4b70f112004-05-02 21:12:19 +00005881 rc = freePage(pPage);
danielk19773b8a05f2007-03-19 17:44:26 +00005882 }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){
drh3a4c1412004-05-09 20:40:11 +00005883 zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
drh2aa679f2001-06-25 02:11:07 +00005884 }
danielk19776b456a22005-03-21 04:04:02 +00005885
5886cleardatabasepage_out:
drh4b70f112004-05-02 21:12:19 +00005887 releasePage(pPage);
drh2aa679f2001-06-25 02:11:07 +00005888 return rc;
drh8b2f49b2001-06-08 00:21:52 +00005889}
5890
5891/*
drhab01f612004-05-22 02:55:23 +00005892** Delete all information from a single table in the database. iTable is
5893** the page number of the root of the table. After this routine returns,
5894** the root page is empty, but still exists.
5895**
5896** This routine will fail with SQLITE_LOCKED if there are any open
5897** read cursors on the table. Open write cursors are moved to the
5898** root of the table.
drh8b2f49b2001-06-08 00:21:52 +00005899*/
danielk1977aef0bf62005-12-30 16:28:01 +00005900int sqlite3BtreeClearTable(Btree *p, int iTable){
drh8b2f49b2001-06-08 00:21:52 +00005901 int rc;
danielk1977aef0bf62005-12-30 16:28:01 +00005902 BtShared *pBt = p->pBt;
drhd677b3d2007-08-20 22:48:41 +00005903 sqlite3BtreeEnter(p);
danielk1977aef0bf62005-12-30 16:28:01 +00005904 if( p->inTrans!=TRANS_WRITE ){
drhd677b3d2007-08-20 22:48:41 +00005905 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
5906 }else if( (rc = checkReadLocks(p, iTable, 0))!=SQLITE_OK ){
5907 /* nothing to do */
5908 }else if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){
5909 /* nothing to do */
5910 }else{
5911 rc = clearDatabasePage(pBt, (Pgno)iTable, 0, 0);
drh8b2f49b2001-06-08 00:21:52 +00005912 }
drhd677b3d2007-08-20 22:48:41 +00005913 sqlite3BtreeLeave(p);
5914 return rc;
drh8b2f49b2001-06-08 00:21:52 +00005915}
5916
5917/*
5918** Erase all information in a table and add the root of the table to
5919** the freelist. Except, the root of the principle table (the one on
drhab01f612004-05-22 02:55:23 +00005920** page 1) is never added to the freelist.
5921**
5922** This routine will fail with SQLITE_LOCKED if there are any open
5923** cursors on the table.
drh205f48e2004-11-05 00:43:11 +00005924**
5925** If AUTOVACUUM is enabled and the page at iTable is not the last
5926** root page in the database file, then the last root page
5927** in the database file is moved into the slot formerly occupied by
5928** iTable and that last slot formerly occupied by the last root page
5929** is added to the freelist instead of iTable. In this say, all
5930** root pages are kept at the beginning of the database file, which
5931** is necessary for AUTOVACUUM to work right. *piMoved is set to the
5932** page number that used to be the last root page in the file before
5933** the move. If no page gets moved, *piMoved is set to 0.
5934** The last root page is recorded in meta[3] and the value of
5935** meta[3] is updated by this procedure.
drh8b2f49b2001-06-08 00:21:52 +00005936*/
drhd677b3d2007-08-20 22:48:41 +00005937static int btreeDropTable(Btree *p, int iTable, int *piMoved){
drh8b2f49b2001-06-08 00:21:52 +00005938 int rc;
danielk1977a0bf2652004-11-04 14:30:04 +00005939 MemPage *pPage = 0;
danielk1977aef0bf62005-12-30 16:28:01 +00005940 BtShared *pBt = p->pBt;
danielk1977a0bf2652004-11-04 14:30:04 +00005941
drh27641702007-08-22 02:56:42 +00005942 assert( sqlite3BtreeMutexHeld(pBt->mutex) );
danielk1977aef0bf62005-12-30 16:28:01 +00005943 if( p->inTrans!=TRANS_WRITE ){
drhf74b8d92002-09-01 23:20:45 +00005944 return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
drh8b2f49b2001-06-08 00:21:52 +00005945 }
danielk1977a0bf2652004-11-04 14:30:04 +00005946
danielk1977e6efa742004-11-10 11:55:10 +00005947 /* It is illegal to drop a table if any cursors are open on the
5948 ** database. This is because in auto-vacuum mode the backend may
5949 ** need to move another root-page to fill a gap left by the deleted
5950 ** root page. If an open cursor was using this page a problem would
5951 ** occur.
5952 */
5953 if( pBt->pCursor ){
5954 return SQLITE_LOCKED;
drh5df72a52002-06-06 23:16:05 +00005955 }
danielk1977a0bf2652004-11-04 14:30:04 +00005956
drh16a9b832007-05-05 18:39:25 +00005957 rc = sqlite3BtreeGetPage(pBt, (Pgno)iTable, &pPage, 0);
drh2aa679f2001-06-25 02:11:07 +00005958 if( rc ) return rc;
danielk1977aef0bf62005-12-30 16:28:01 +00005959 rc = sqlite3BtreeClearTable(p, iTable);
danielk19776b456a22005-03-21 04:04:02 +00005960 if( rc ){
5961 releasePage(pPage);
5962 return rc;
5963 }
danielk1977a0bf2652004-11-04 14:30:04 +00005964
drh205f48e2004-11-05 00:43:11 +00005965 *piMoved = 0;
danielk1977a0bf2652004-11-04 14:30:04 +00005966
drh4b70f112004-05-02 21:12:19 +00005967 if( iTable>1 ){
danielk1977a0bf2652004-11-04 14:30:04 +00005968#ifdef SQLITE_OMIT_AUTOVACUUM
drha34b6762004-05-07 13:30:42 +00005969 rc = freePage(pPage);
danielk1977a0bf2652004-11-04 14:30:04 +00005970 releasePage(pPage);
5971#else
5972 if( pBt->autoVacuum ){
5973 Pgno maxRootPgno;
danielk1977aef0bf62005-12-30 16:28:01 +00005974 rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno);
danielk1977a0bf2652004-11-04 14:30:04 +00005975 if( rc!=SQLITE_OK ){
5976 releasePage(pPage);
5977 return rc;
5978 }
5979
5980 if( iTable==maxRootPgno ){
5981 /* If the table being dropped is the table with the largest root-page
5982 ** number in the database, put the root page on the free list.
5983 */
5984 rc = freePage(pPage);
5985 releasePage(pPage);
5986 if( rc!=SQLITE_OK ){
5987 return rc;
5988 }
5989 }else{
5990 /* The table being dropped does not have the largest root-page
5991 ** number in the database. So move the page that does into the
5992 ** gap left by the deleted root-page.
5993 */
5994 MemPage *pMove;
5995 releasePage(pPage);
drh16a9b832007-05-05 18:39:25 +00005996 rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
danielk1977a0bf2652004-11-04 14:30:04 +00005997 if( rc!=SQLITE_OK ){
5998 return rc;
5999 }
6000 rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable);
6001 releasePage(pMove);
6002 if( rc!=SQLITE_OK ){
6003 return rc;
6004 }
drh16a9b832007-05-05 18:39:25 +00006005 rc = sqlite3BtreeGetPage(pBt, maxRootPgno, &pMove, 0);
danielk1977a0bf2652004-11-04 14:30:04 +00006006 if( rc!=SQLITE_OK ){
6007 return rc;
6008 }
6009 rc = freePage(pMove);
6010 releasePage(pMove);
6011 if( rc!=SQLITE_OK ){
6012 return rc;
6013 }
6014 *piMoved = maxRootPgno;
6015 }
6016
danielk1977599fcba2004-11-08 07:13:13 +00006017 /* Set the new 'max-root-page' value in the database header. This
6018 ** is the old value less one, less one more if that happens to
6019 ** be a root-page number, less one again if that is the
6020 ** PENDING_BYTE_PAGE.
6021 */
danielk197787a6e732004-11-05 12:58:25 +00006022 maxRootPgno--;
danielk1977599fcba2004-11-08 07:13:13 +00006023 if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){
6024 maxRootPgno--;
6025 }
danielk1977266664d2006-02-10 08:24:21 +00006026 if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){
danielk197787a6e732004-11-05 12:58:25 +00006027 maxRootPgno--;
6028 }
danielk1977599fcba2004-11-08 07:13:13 +00006029 assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
6030
danielk1977aef0bf62005-12-30 16:28:01 +00006031 rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
danielk1977a0bf2652004-11-04 14:30:04 +00006032 }else{
6033 rc = freePage(pPage);
6034 releasePage(pPage);
6035 }
6036#endif
drh2aa679f2001-06-25 02:11:07 +00006037 }else{
danielk1977a0bf2652004-11-04 14:30:04 +00006038 /* If sqlite3BtreeDropTable was called on page 1. */
drha34b6762004-05-07 13:30:42 +00006039 zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
danielk1977a0bf2652004-11-04 14:30:04 +00006040 releasePage(pPage);
drh8b2f49b2001-06-08 00:21:52 +00006041 }
drh8b2f49b2001-06-08 00:21:52 +00006042 return rc;
6043}
drhd677b3d2007-08-20 22:48:41 +00006044int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
6045 int rc;
6046 sqlite3BtreeEnter(p);
6047 rc = btreeDropTable(p, iTable, piMoved);
6048 sqlite3BtreeLeave(p);
6049 return rc;
6050}
drh8b2f49b2001-06-08 00:21:52 +00006051
drh001bbcb2003-03-19 03:14:00 +00006052
drh8b2f49b2001-06-08 00:21:52 +00006053/*
drh23e11ca2004-05-04 17:27:28 +00006054** Read the meta-information out of a database file. Meta[0]
6055** is the number of free pages currently in the database. Meta[1]
drha3b321d2004-05-11 09:31:31 +00006056** through meta[15] are available for use by higher layers. Meta[0]
6057** is read-only, the others are read/write.
6058**
6059** The schema layer numbers meta values differently. At the schema
6060** layer (and the SetCookie and ReadCookie opcodes) the number of
6061** free pages is not visible. So Cookie[0] is the same as Meta[1].
drh8b2f49b2001-06-08 00:21:52 +00006062*/
danielk1977aef0bf62005-12-30 16:28:01 +00006063int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
danielk19773b8a05f2007-03-19 17:44:26 +00006064 DbPage *pDbPage;
drh8b2f49b2001-06-08 00:21:52 +00006065 int rc;
drh4b70f112004-05-02 21:12:19 +00006066 unsigned char *pP1;
danielk1977aef0bf62005-12-30 16:28:01 +00006067 BtShared *pBt = p->pBt;
drh8b2f49b2001-06-08 00:21:52 +00006068
drhd677b3d2007-08-20 22:48:41 +00006069 sqlite3BtreeEnter(p);
6070
danielk1977da184232006-01-05 11:34:32 +00006071 /* Reading a meta-data value requires a read-lock on page 1 (and hence
6072 ** the sqlite_master table. We grab this lock regardless of whether or
6073 ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page
6074 ** 1 is treated as a special case by queryTableLock() and lockTable()).
6075 */
6076 rc = queryTableLock(p, 1, READ_LOCK);
6077 if( rc!=SQLITE_OK ){
drhd677b3d2007-08-20 22:48:41 +00006078 sqlite3BtreeLeave(p);
danielk1977da184232006-01-05 11:34:32 +00006079 return rc;
6080 }
6081
drh23e11ca2004-05-04 17:27:28 +00006082 assert( idx>=0 && idx<=15 );
danielk19773b8a05f2007-03-19 17:44:26 +00006083 rc = sqlite3PagerGet(pBt->pPager, 1, &pDbPage);
drhd677b3d2007-08-20 22:48:41 +00006084 if( rc ){
6085 sqlite3BtreeLeave(p);
6086 return rc;
6087 }
danielk19773b8a05f2007-03-19 17:44:26 +00006088 pP1 = (unsigned char *)sqlite3PagerGetData(pDbPage);
drh23e11ca2004-05-04 17:27:28 +00006089 *pMeta = get4byte(&pP1[36 + idx*4]);
danielk19773b8a05f2007-03-19 17:44:26 +00006090 sqlite3PagerUnref(pDbPage);
drhae157872004-08-14 19:20:09 +00006091
danielk1977599fcba2004-11-08 07:13:13 +00006092 /* If autovacuumed is disabled in this build but we are trying to
6093 ** access an autovacuumed database, then make the database readonly.
6094 */
danielk1977003ba062004-11-04 02:57:33 +00006095#ifdef SQLITE_OMIT_AUTOVACUUM
drhae157872004-08-14 19:20:09 +00006096 if( idx==4 && *pMeta>0 ) pBt->readOnly = 1;
danielk1977003ba062004-11-04 02:57:33 +00006097#endif
drhae157872004-08-14 19:20:09 +00006098
danielk1977da184232006-01-05 11:34:32 +00006099 /* Grab the read-lock on page 1. */
6100 rc = lockTable(p, 1, READ_LOCK);
drhd677b3d2007-08-20 22:48:41 +00006101 sqlite3BtreeLeave(p);
danielk1977da184232006-01-05 11:34:32 +00006102 return rc;
drh8b2f49b2001-06-08 00:21:52 +00006103}
6104
6105/*
drh23e11ca2004-05-04 17:27:28 +00006106** Write meta-information back into the database. Meta[0] is
6107** read-only and may not be written.
drh8b2f49b2001-06-08 00:21:52 +00006108*/
danielk1977aef0bf62005-12-30 16:28:01 +00006109int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
6110 BtShared *pBt = p->pBt;
drh4b70f112004-05-02 21:12:19 +00006111 unsigned char *pP1;
drha34b6762004-05-07 13:30:42 +00006112 int rc;
drh23e11ca2004-05-04 17:27:28 +00006113 assert( idx>=1 && idx<=15 );
drhd677b3d2007-08-20 22:48:41 +00006114 sqlite3BtreeEnter(p);
danielk1977aef0bf62005-12-30 16:28:01 +00006115 if( p->inTrans!=TRANS_WRITE ){
drhd677b3d2007-08-20 22:48:41 +00006116 rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
6117 }else{
6118 assert( pBt->pPage1!=0 );
6119 pP1 = pBt->pPage1->aData;
6120 rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
6121 if( rc==SQLITE_OK ){
6122 put4byte(&pP1[36 + idx*4], iMeta);
6123 if( idx==7 ){
6124 assert( pBt->autoVacuum || iMeta==0 );
6125 assert( iMeta==0 || iMeta==1 );
6126 pBt->incrVacuum = iMeta;
6127 }
6128 }
drh5df72a52002-06-06 23:16:05 +00006129 }
drhd677b3d2007-08-20 22:48:41 +00006130 sqlite3BtreeLeave(p);
6131 return rc;
drh8b2f49b2001-06-08 00:21:52 +00006132}
drh8c42ca92001-06-22 19:15:00 +00006133
drhf328bc82004-05-10 23:29:49 +00006134/*
6135** Return the flag byte at the beginning of the page that the cursor
6136** is currently pointing to.
6137*/
6138int sqlite3BtreeFlags(BtCursor *pCur){
danielk1977da184232006-01-05 11:34:32 +00006139 /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
drh777e4c42006-01-13 04:31:58 +00006140 ** restoreOrClearCursorPosition() here.
danielk1977da184232006-01-05 11:34:32 +00006141 */
drhf328bc82004-05-10 23:29:49 +00006142 MemPage *pPage = pCur->pPage;
drh27641702007-08-22 02:56:42 +00006143 assert( sqlite3BtreeMutexHeld(pPage->pBt->mutex) );
drhd0679ed2007-08-28 22:24:34 +00006144 assert( pPage->pBt==pCur->pBt );
drhf328bc82004-05-10 23:29:49 +00006145 return pPage ? pPage->aData[pPage->hdrOffset] : 0;
6146}
6147
drhdd793422001-06-28 01:54:48 +00006148
drhdd793422001-06-28 01:54:48 +00006149/*
drh5eddca62001-06-30 21:53:53 +00006150** Return the pager associated with a BTree. This routine is used for
6151** testing and debugging only.
drhdd793422001-06-28 01:54:48 +00006152*/
danielk1977aef0bf62005-12-30 16:28:01 +00006153Pager *sqlite3BtreePager(Btree *p){
6154 return p->pBt->pPager;
drhdd793422001-06-28 01:54:48 +00006155}
drh5eddca62001-06-30 21:53:53 +00006156
drhb7f91642004-10-31 02:22:47 +00006157#ifndef SQLITE_OMIT_INTEGRITY_CHECK
drh5eddca62001-06-30 21:53:53 +00006158/*
6159** Append a message to the error message string.
6160*/
drh2e38c322004-09-03 18:38:44 +00006161static void checkAppendMsg(
6162 IntegrityCk *pCheck,
6163 char *zMsg1,
6164 const char *zFormat,
6165 ...
6166){
6167 va_list ap;
6168 char *zMsg2;
drh1dcdbc02007-01-27 02:24:54 +00006169 if( !pCheck->mxErr ) return;
6170 pCheck->mxErr--;
6171 pCheck->nErr++;
drh2e38c322004-09-03 18:38:44 +00006172 va_start(ap, zFormat);
danielk19771e536952007-08-16 10:09:01 +00006173 zMsg2 = sqlite3VMPrintf(0, zFormat, ap);
drh2e38c322004-09-03 18:38:44 +00006174 va_end(ap);
6175 if( zMsg1==0 ) zMsg1 = "";
drh5eddca62001-06-30 21:53:53 +00006176 if( pCheck->zErrMsg ){
6177 char *zOld = pCheck->zErrMsg;
6178 pCheck->zErrMsg = 0;
danielk19774adee202004-05-08 08:23:19 +00006179 sqlite3SetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0);
drh17435752007-08-16 04:30:38 +00006180 sqlite3_free(zOld);
drh5eddca62001-06-30 21:53:53 +00006181 }else{
danielk19774adee202004-05-08 08:23:19 +00006182 sqlite3SetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
drh5eddca62001-06-30 21:53:53 +00006183 }
drh17435752007-08-16 04:30:38 +00006184 sqlite3_free(zMsg2);
drh5eddca62001-06-30 21:53:53 +00006185}
drhb7f91642004-10-31 02:22:47 +00006186#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
drh5eddca62001-06-30 21:53:53 +00006187
drhb7f91642004-10-31 02:22:47 +00006188#ifndef SQLITE_OMIT_INTEGRITY_CHECK
drh5eddca62001-06-30 21:53:53 +00006189/*
6190** Add 1 to the reference count for page iPage. If this is the second
6191** reference to the page, add an error message to pCheck->zErrMsg.
6192** Return 1 if there are 2 ore more references to the page and 0 if
6193** if this is the first reference to the page.
6194**
6195** Also check that the page number is in bounds.
6196*/
drhaaab5722002-02-19 13:39:21 +00006197static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
drh5eddca62001-06-30 21:53:53 +00006198 if( iPage==0 ) return 1;
drh0de8c112002-07-06 16:32:14 +00006199 if( iPage>pCheck->nPage || iPage<0 ){
drh2e38c322004-09-03 18:38:44 +00006200 checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage);
drh5eddca62001-06-30 21:53:53 +00006201 return 1;
6202 }
6203 if( pCheck->anRef[iPage]==1 ){
drh2e38c322004-09-03 18:38:44 +00006204 checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage);
drh5eddca62001-06-30 21:53:53 +00006205 return 1;
6206 }
6207 return (pCheck->anRef[iPage]++)>1;
6208}
6209
danielk1977afcdd022004-10-31 16:25:42 +00006210#ifndef SQLITE_OMIT_AUTOVACUUM
6211/*
6212** Check that the entry in the pointer-map for page iChild maps to
6213** page iParent, pointer type ptrType. If not, append an error message
6214** to pCheck.
6215*/
6216static void checkPtrmap(
6217 IntegrityCk *pCheck, /* Integrity check context */
6218 Pgno iChild, /* Child page number */
6219 u8 eType, /* Expected pointer map type */
6220 Pgno iParent, /* Expected pointer map parent page number */
6221 char *zContext /* Context description (used for error msg) */
6222){
6223 int rc;
6224 u8 ePtrmapType;
6225 Pgno iPtrmapParent;
6226
6227 rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
6228 if( rc!=SQLITE_OK ){
6229 checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild);
6230 return;
6231 }
6232
6233 if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
6234 checkAppendMsg(pCheck, zContext,
6235 "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)",
6236 iChild, eType, iParent, ePtrmapType, iPtrmapParent);
6237 }
6238}
6239#endif
6240
drh5eddca62001-06-30 21:53:53 +00006241/*
6242** Check the integrity of the freelist or of an overflow page list.
6243** Verify that the number of pages on the list is N.
6244*/
drh30e58752002-03-02 20:41:57 +00006245static void checkList(
6246 IntegrityCk *pCheck, /* Integrity checking context */
6247 int isFreeList, /* True for a freelist. False for overflow page list */
6248 int iPage, /* Page number for first page in the list */
6249 int N, /* Expected number of pages in the list */
6250 char *zContext /* Context for error messages */
6251){
6252 int i;
drh3a4c1412004-05-09 20:40:11 +00006253 int expected = N;
6254 int iFirst = iPage;
drh1dcdbc02007-01-27 02:24:54 +00006255 while( N-- > 0 && pCheck->mxErr ){
danielk19773b8a05f2007-03-19 17:44:26 +00006256 DbPage *pOvflPage;
6257 unsigned char *pOvflData;
drh5eddca62001-06-30 21:53:53 +00006258 if( iPage<1 ){
drh2e38c322004-09-03 18:38:44 +00006259 checkAppendMsg(pCheck, zContext,
6260 "%d of %d pages missing from overflow list starting at %d",
drh3a4c1412004-05-09 20:40:11 +00006261 N+1, expected, iFirst);
drh5eddca62001-06-30 21:53:53 +00006262 break;
6263 }
6264 if( checkRef(pCheck, iPage, zContext) ) break;
danielk19773b8a05f2007-03-19 17:44:26 +00006265 if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){
drh2e38c322004-09-03 18:38:44 +00006266 checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage);
drh5eddca62001-06-30 21:53:53 +00006267 break;
6268 }
danielk19773b8a05f2007-03-19 17:44:26 +00006269 pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage);
drh30e58752002-03-02 20:41:57 +00006270 if( isFreeList ){
danielk19773b8a05f2007-03-19 17:44:26 +00006271 int n = get4byte(&pOvflData[4]);
danielk1977687566d2004-11-02 12:56:41 +00006272#ifndef SQLITE_OMIT_AUTOVACUUM
6273 if( pCheck->pBt->autoVacuum ){
6274 checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext);
6275 }
6276#endif
drh855eb1c2004-08-31 13:45:11 +00006277 if( n>pCheck->pBt->usableSize/4-8 ){
drh2e38c322004-09-03 18:38:44 +00006278 checkAppendMsg(pCheck, zContext,
6279 "freelist leaf count too big on page %d", iPage);
drhee696e22004-08-30 16:52:17 +00006280 N--;
6281 }else{
6282 for(i=0; i<n; i++){
danielk19773b8a05f2007-03-19 17:44:26 +00006283 Pgno iFreePage = get4byte(&pOvflData[8+i*4]);
danielk1977687566d2004-11-02 12:56:41 +00006284#ifndef SQLITE_OMIT_AUTOVACUUM
6285 if( pCheck->pBt->autoVacuum ){
6286 checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext);
6287 }
6288#endif
6289 checkRef(pCheck, iFreePage, zContext);
drhee696e22004-08-30 16:52:17 +00006290 }
6291 N -= n;
drh30e58752002-03-02 20:41:57 +00006292 }
drh30e58752002-03-02 20:41:57 +00006293 }
danielk1977afcdd022004-10-31 16:25:42 +00006294#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977687566d2004-11-02 12:56:41 +00006295 else{
6296 /* If this database supports auto-vacuum and iPage is not the last
6297 ** page in this overflow list, check that the pointer-map entry for
6298 ** the following page matches iPage.
6299 */
6300 if( pCheck->pBt->autoVacuum && N>0 ){
danielk19773b8a05f2007-03-19 17:44:26 +00006301 i = get4byte(pOvflData);
danielk1977687566d2004-11-02 12:56:41 +00006302 checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext);
6303 }
danielk1977afcdd022004-10-31 16:25:42 +00006304 }
6305#endif
danielk19773b8a05f2007-03-19 17:44:26 +00006306 iPage = get4byte(pOvflData);
6307 sqlite3PagerUnref(pOvflPage);
drh5eddca62001-06-30 21:53:53 +00006308 }
6309}
drhb7f91642004-10-31 02:22:47 +00006310#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
drh5eddca62001-06-30 21:53:53 +00006311
drhb7f91642004-10-31 02:22:47 +00006312#ifndef SQLITE_OMIT_INTEGRITY_CHECK
drh5eddca62001-06-30 21:53:53 +00006313/*
6314** Do various sanity checks on a single page of a tree. Return
6315** the tree depth. Root pages return 0. Parents of root pages
6316** return 1, and so forth.
6317**
6318** These checks are done:
6319**
6320** 1. Make sure that cells and freeblocks do not overlap
6321** but combine to completely cover the page.
drhda200cc2004-05-09 11:51:38 +00006322** NO 2. Make sure cell keys are in order.
6323** NO 3. Make sure no key is less than or equal to zLowerBound.
6324** NO 4. Make sure no key is greater than or equal to zUpperBound.
drh5eddca62001-06-30 21:53:53 +00006325** 5. Check the integrity of overflow pages.
6326** 6. Recursively call checkTreePage on all children.
6327** 7. Verify that the depth of all children is the same.
drh6019e162001-07-02 17:51:45 +00006328** 8. Make sure this page is at least 33% full or else it is
drh5eddca62001-06-30 21:53:53 +00006329** the root of the tree.
6330*/
6331static int checkTreePage(
drhaaab5722002-02-19 13:39:21 +00006332 IntegrityCk *pCheck, /* Context for the sanity check */
drh5eddca62001-06-30 21:53:53 +00006333 int iPage, /* Page number of the page to check */
6334 MemPage *pParent, /* Parent page */
drh74161702006-02-24 02:53:49 +00006335 char *zParentContext /* Parent context */
drh5eddca62001-06-30 21:53:53 +00006336){
6337 MemPage *pPage;
drhda200cc2004-05-09 11:51:38 +00006338 int i, rc, depth, d2, pgno, cnt;
drh43605152004-05-29 21:46:49 +00006339 int hdr, cellStart;
6340 int nCell;
drhda200cc2004-05-09 11:51:38 +00006341 u8 *data;
danielk1977aef0bf62005-12-30 16:28:01 +00006342 BtShared *pBt;
drh4f26bb62005-09-08 14:17:20 +00006343 int usableSize;
drh5eddca62001-06-30 21:53:53 +00006344 char zContext[100];
drh2e38c322004-09-03 18:38:44 +00006345 char *hit;
drh5eddca62001-06-30 21:53:53 +00006346
drh5bb3eb92007-05-04 13:15:55 +00006347 sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage);
danielk1977ef73ee92004-11-06 12:26:07 +00006348
drh5eddca62001-06-30 21:53:53 +00006349 /* Check that the page exists
6350 */
drhd9cb6ac2005-10-20 07:28:17 +00006351 pBt = pCheck->pBt;
drhb6f41482004-05-14 01:58:11 +00006352 usableSize = pBt->usableSize;
drh5eddca62001-06-30 21:53:53 +00006353 if( iPage==0 ) return 0;
6354 if( checkRef(pCheck, iPage, zParentContext) ) return 0;
drh16a9b832007-05-05 18:39:25 +00006355 if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
drh2e38c322004-09-03 18:38:44 +00006356 checkAppendMsg(pCheck, zContext,
6357 "unable to get the page. error code=%d", rc);
drh5eddca62001-06-30 21:53:53 +00006358 return 0;
6359 }
drh16a9b832007-05-05 18:39:25 +00006360 if( (rc = sqlite3BtreeInitPage(pPage, pParent))!=0 ){
6361 checkAppendMsg(pCheck, zContext,
6362 "sqlite3BtreeInitPage() returns error code %d", rc);
drh91025292004-05-03 19:49:32 +00006363 releasePage(pPage);
drh5eddca62001-06-30 21:53:53 +00006364 return 0;
6365 }
6366
6367 /* Check out all the cells.
6368 */
6369 depth = 0;
drh1dcdbc02007-01-27 02:24:54 +00006370 for(i=0; i<pPage->nCell && pCheck->mxErr; i++){
drh6f11bef2004-05-13 01:12:56 +00006371 u8 *pCell;
6372 int sz;
6373 CellInfo info;
drh5eddca62001-06-30 21:53:53 +00006374
6375 /* Check payload overflow pages
6376 */
drh5bb3eb92007-05-04 13:15:55 +00006377 sqlite3_snprintf(sizeof(zContext), zContext,
6378 "On tree page %d cell %d: ", iPage, i);
danielk19771cc5ed82007-05-16 17:28:43 +00006379 pCell = findCell(pPage,i);
drh16a9b832007-05-05 18:39:25 +00006380 sqlite3BtreeParseCellPtr(pPage, pCell, &info);
drh6f11bef2004-05-13 01:12:56 +00006381 sz = info.nData;
6382 if( !pPage->intKey ) sz += info.nKey;
drh72365832007-03-06 15:53:44 +00006383 assert( sz==info.nPayload );
drh6f11bef2004-05-13 01:12:56 +00006384 if( sz>info.nLocal ){
drhb6f41482004-05-14 01:58:11 +00006385 int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
danielk1977afcdd022004-10-31 16:25:42 +00006386 Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
6387#ifndef SQLITE_OMIT_AUTOVACUUM
6388 if( pBt->autoVacuum ){
danielk1977687566d2004-11-02 12:56:41 +00006389 checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext);
danielk1977afcdd022004-10-31 16:25:42 +00006390 }
6391#endif
6392 checkList(pCheck, 0, pgnoOvfl, nPage, zContext);
drh5eddca62001-06-30 21:53:53 +00006393 }
6394
6395 /* Check sanity of left child page.
6396 */
drhda200cc2004-05-09 11:51:38 +00006397 if( !pPage->leaf ){
drh43605152004-05-29 21:46:49 +00006398 pgno = get4byte(pCell);
danielk1977afcdd022004-10-31 16:25:42 +00006399#ifndef SQLITE_OMIT_AUTOVACUUM
6400 if( pBt->autoVacuum ){
6401 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext);
6402 }
6403#endif
drh74161702006-02-24 02:53:49 +00006404 d2 = checkTreePage(pCheck,pgno,pPage,zContext);
drhda200cc2004-05-09 11:51:38 +00006405 if( i>0 && d2!=depth ){
6406 checkAppendMsg(pCheck, zContext, "Child page depth differs");
6407 }
6408 depth = d2;
drh5eddca62001-06-30 21:53:53 +00006409 }
drh5eddca62001-06-30 21:53:53 +00006410 }
drhda200cc2004-05-09 11:51:38 +00006411 if( !pPage->leaf ){
drh43605152004-05-29 21:46:49 +00006412 pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
drh5bb3eb92007-05-04 13:15:55 +00006413 sqlite3_snprintf(sizeof(zContext), zContext,
6414 "On page %d at right child: ", iPage);
danielk1977afcdd022004-10-31 16:25:42 +00006415#ifndef SQLITE_OMIT_AUTOVACUUM
6416 if( pBt->autoVacuum ){
danielk1977687566d2004-11-02 12:56:41 +00006417 checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0);
danielk1977afcdd022004-10-31 16:25:42 +00006418 }
6419#endif
drh74161702006-02-24 02:53:49 +00006420 checkTreePage(pCheck, pgno, pPage, zContext);
drhda200cc2004-05-09 11:51:38 +00006421 }
drh5eddca62001-06-30 21:53:53 +00006422
6423 /* Check for complete coverage of the page
6424 */
drhda200cc2004-05-09 11:51:38 +00006425 data = pPage->aData;
6426 hdr = pPage->hdrOffset;
drh17435752007-08-16 04:30:38 +00006427 hit = sqlite3MallocZero( usableSize );
drh2e38c322004-09-03 18:38:44 +00006428 if( hit ){
6429 memset(hit, 1, get2byte(&data[hdr+5]));
6430 nCell = get2byte(&data[hdr+3]);
6431 cellStart = hdr + 12 - 4*pPage->leaf;
6432 for(i=0; i<nCell; i++){
6433 int pc = get2byte(&data[cellStart+i*2]);
6434 int size = cellSizePtr(pPage, &data[pc]);
6435 int j;
danielk19777701e812005-01-10 12:59:51 +00006436 if( (pc+size-1)>=usableSize || pc<0 ){
6437 checkAppendMsg(pCheck, 0,
6438 "Corruption detected in cell %d on page %d",i,iPage,0);
6439 }else{
6440 for(j=pc+size-1; j>=pc; j--) hit[j]++;
6441 }
drh2e38c322004-09-03 18:38:44 +00006442 }
6443 for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000;
6444 cnt++){
6445 int size = get2byte(&data[i+2]);
6446 int j;
danielk19777701e812005-01-10 12:59:51 +00006447 if( (i+size-1)>=usableSize || i<0 ){
6448 checkAppendMsg(pCheck, 0,
6449 "Corruption detected in cell %d on page %d",i,iPage,0);
6450 }else{
6451 for(j=i+size-1; j>=i; j--) hit[j]++;
6452 }
drh2e38c322004-09-03 18:38:44 +00006453 i = get2byte(&data[i]);
6454 }
6455 for(i=cnt=0; i<usableSize; i++){
6456 if( hit[i]==0 ){
6457 cnt++;
6458 }else if( hit[i]>1 ){
6459 checkAppendMsg(pCheck, 0,
6460 "Multiple uses for byte %d of page %d", i, iPage);
6461 break;
6462 }
6463 }
6464 if( cnt!=data[hdr+7] ){
6465 checkAppendMsg(pCheck, 0,
6466 "Fragmented space is %d byte reported as %d on page %d",
6467 cnt, data[hdr+7], iPage);
drh5eddca62001-06-30 21:53:53 +00006468 }
6469 }
drh17435752007-08-16 04:30:38 +00006470 sqlite3_free(hit);
drh6019e162001-07-02 17:51:45 +00006471
drh4b70f112004-05-02 21:12:19 +00006472 releasePage(pPage);
drhda200cc2004-05-09 11:51:38 +00006473 return depth+1;
drh5eddca62001-06-30 21:53:53 +00006474}
drhb7f91642004-10-31 02:22:47 +00006475#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
drh5eddca62001-06-30 21:53:53 +00006476
drhb7f91642004-10-31 02:22:47 +00006477#ifndef SQLITE_OMIT_INTEGRITY_CHECK
drh5eddca62001-06-30 21:53:53 +00006478/*
6479** This routine does a complete check of the given BTree file. aRoot[] is
6480** an array of pages numbers were each page number is the root page of
6481** a table. nRoot is the number of entries in aRoot.
6482**
6483** If everything checks out, this routine returns NULL. If something is
6484** amiss, an error message is written into memory obtained from malloc()
6485** and a pointer to that error message is returned. The calling function
6486** is responsible for freeing the error message when it is done.
6487*/
drh1dcdbc02007-01-27 02:24:54 +00006488char *sqlite3BtreeIntegrityCheck(
6489 Btree *p, /* The btree to be checked */
6490 int *aRoot, /* An array of root pages numbers for individual trees */
6491 int nRoot, /* Number of entries in aRoot[] */
6492 int mxErr, /* Stop reporting errors after this many */
6493 int *pnErr /* Write number of errors seen to this variable */
6494){
drh5eddca62001-06-30 21:53:53 +00006495 int i;
6496 int nRef;
drhaaab5722002-02-19 13:39:21 +00006497 IntegrityCk sCheck;
danielk1977aef0bf62005-12-30 16:28:01 +00006498 BtShared *pBt = p->pBt;
drh5eddca62001-06-30 21:53:53 +00006499
drhd677b3d2007-08-20 22:48:41 +00006500 sqlite3BtreeEnter(p);
danielk19773b8a05f2007-03-19 17:44:26 +00006501 nRef = sqlite3PagerRefcount(pBt->pPager);
danielk1977aef0bf62005-12-30 16:28:01 +00006502 if( lockBtreeWithRetry(p)!=SQLITE_OK ){
drhd677b3d2007-08-20 22:48:41 +00006503 sqlite3BtreeLeave(p);
drh17435752007-08-16 04:30:38 +00006504 return sqlite3StrDup("Unable to acquire a read lock on the database");
drhefc251d2001-07-01 22:12:01 +00006505 }
drh5eddca62001-06-30 21:53:53 +00006506 sCheck.pBt = pBt;
6507 sCheck.pPager = pBt->pPager;
danielk19773b8a05f2007-03-19 17:44:26 +00006508 sCheck.nPage = sqlite3PagerPagecount(sCheck.pPager);
drh1dcdbc02007-01-27 02:24:54 +00006509 sCheck.mxErr = mxErr;
6510 sCheck.nErr = 0;
6511 *pnErr = 0;
danielk1977e5321f02007-04-27 07:05:44 +00006512#ifndef SQLITE_OMIT_AUTOVACUUM
6513 if( pBt->nTrunc!=0 ){
6514 sCheck.nPage = pBt->nTrunc;
6515 }
6516#endif
drh0de8c112002-07-06 16:32:14 +00006517 if( sCheck.nPage==0 ){
6518 unlockBtreeIfUnused(pBt);
drhd677b3d2007-08-20 22:48:41 +00006519 sqlite3BtreeLeave(p);
drh0de8c112002-07-06 16:32:14 +00006520 return 0;
6521 }
drh17435752007-08-16 04:30:38 +00006522 sCheck.anRef = sqlite3_malloc( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
danielk1977ac245ec2005-01-14 13:50:11 +00006523 if( !sCheck.anRef ){
6524 unlockBtreeIfUnused(pBt);
drh1dcdbc02007-01-27 02:24:54 +00006525 *pnErr = 1;
drhd677b3d2007-08-20 22:48:41 +00006526 sqlite3BtreeLeave(p);
danielk19771e536952007-08-16 10:09:01 +00006527 return sqlite3MPrintf(p->pSqlite, "Unable to malloc %d bytes",
danielk1977ac245ec2005-01-14 13:50:11 +00006528 (sCheck.nPage+1)*sizeof(sCheck.anRef[0]));
6529 }
drhda200cc2004-05-09 11:51:38 +00006530 for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
drh42cac6d2004-11-20 20:31:11 +00006531 i = PENDING_BYTE_PAGE(pBt);
drh1f595712004-06-15 01:40:29 +00006532 if( i<=sCheck.nPage ){
6533 sCheck.anRef[i] = 1;
6534 }
drh5eddca62001-06-30 21:53:53 +00006535 sCheck.zErrMsg = 0;
6536
6537 /* Check the integrity of the freelist
6538 */
drha34b6762004-05-07 13:30:42 +00006539 checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
6540 get4byte(&pBt->pPage1->aData[36]), "Main freelist: ");
drh5eddca62001-06-30 21:53:53 +00006541
6542 /* Check all the tables.
6543 */
drh1dcdbc02007-01-27 02:24:54 +00006544 for(i=0; i<nRoot && sCheck.mxErr; i++){
drh4ff6dfa2002-03-03 23:06:00 +00006545 if( aRoot[i]==0 ) continue;
danielk1977687566d2004-11-02 12:56:41 +00006546#ifndef SQLITE_OMIT_AUTOVACUUM
danielk1977687566d2004-11-02 12:56:41 +00006547 if( pBt->autoVacuum && aRoot[i]>1 ){
6548 checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0);
6549 }
6550#endif
drh74161702006-02-24 02:53:49 +00006551 checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ");
drh5eddca62001-06-30 21:53:53 +00006552 }
6553
6554 /* Make sure every page in the file is referenced
6555 */
drh1dcdbc02007-01-27 02:24:54 +00006556 for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){
danielk1977afcdd022004-10-31 16:25:42 +00006557#ifdef SQLITE_OMIT_AUTOVACUUM
drh5eddca62001-06-30 21:53:53 +00006558 if( sCheck.anRef[i]==0 ){
drh2e38c322004-09-03 18:38:44 +00006559 checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
drh5eddca62001-06-30 21:53:53 +00006560 }
danielk1977afcdd022004-10-31 16:25:42 +00006561#else
6562 /* If the database supports auto-vacuum, make sure no tables contain
6563 ** references to pointer-map pages.
6564 */
6565 if( sCheck.anRef[i]==0 &&
danielk1977266664d2006-02-10 08:24:21 +00006566 (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
danielk1977afcdd022004-10-31 16:25:42 +00006567 checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
6568 }
6569 if( sCheck.anRef[i]!=0 &&
danielk1977266664d2006-02-10 08:24:21 +00006570 (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
danielk1977afcdd022004-10-31 16:25:42 +00006571 checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i);
6572 }
6573#endif
drh5eddca62001-06-30 21:53:53 +00006574 }
6575
6576 /* Make sure this analysis did not leave any unref() pages
6577 */
drh5e00f6c2001-09-13 13:46:56 +00006578 unlockBtreeIfUnused(pBt);
danielk19773b8a05f2007-03-19 17:44:26 +00006579 if( nRef != sqlite3PagerRefcount(pBt->pPager) ){
drh2e38c322004-09-03 18:38:44 +00006580 checkAppendMsg(&sCheck, 0,
drh5eddca62001-06-30 21:53:53 +00006581 "Outstanding page count goes from %d to %d during this analysis",
danielk19773b8a05f2007-03-19 17:44:26 +00006582 nRef, sqlite3PagerRefcount(pBt->pPager)
drh5eddca62001-06-30 21:53:53 +00006583 );
drh5eddca62001-06-30 21:53:53 +00006584 }
6585
6586 /* Clean up and report errors.
6587 */
drhd677b3d2007-08-20 22:48:41 +00006588 sqlite3BtreeLeave(p);
drh17435752007-08-16 04:30:38 +00006589 sqlite3_free(sCheck.anRef);
drh1dcdbc02007-01-27 02:24:54 +00006590 *pnErr = sCheck.nErr;
drh5eddca62001-06-30 21:53:53 +00006591 return sCheck.zErrMsg;
6592}
drhb7f91642004-10-31 02:22:47 +00006593#endif /* SQLITE_OMIT_INTEGRITY_CHECK */
paulb95a8862003-04-01 21:16:41 +00006594
drh73509ee2003-04-06 20:44:45 +00006595/*
6596** Return the full pathname of the underlying database file.
drhd0679ed2007-08-28 22:24:34 +00006597**
6598** The pager filename is invariant as long as the pager is
6599** open so it is safe to access without the BtShared mutex.
drh73509ee2003-04-06 20:44:45 +00006600*/
danielk1977aef0bf62005-12-30 16:28:01 +00006601const char *sqlite3BtreeGetFilename(Btree *p){
6602 assert( p->pBt->pPager!=0 );
drhd0679ed2007-08-28 22:24:34 +00006603 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
danielk19773b8a05f2007-03-19 17:44:26 +00006604 return sqlite3PagerFilename(p->pBt->pPager);
drh73509ee2003-04-06 20:44:45 +00006605}
6606
6607/*
danielk19775865e3d2004-06-14 06:03:57 +00006608** Return the pathname of the directory that contains the database file.
drhd0679ed2007-08-28 22:24:34 +00006609**
6610** The pager directory name is invariant as long as the pager is
6611** open so it is safe to access without the BtShared mutex.
danielk19775865e3d2004-06-14 06:03:57 +00006612*/
danielk1977aef0bf62005-12-30 16:28:01 +00006613const char *sqlite3BtreeGetDirname(Btree *p){
6614 assert( p->pBt->pPager!=0 );
drhd0679ed2007-08-28 22:24:34 +00006615 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
danielk19773b8a05f2007-03-19 17:44:26 +00006616 return sqlite3PagerDirname(p->pBt->pPager);
danielk19775865e3d2004-06-14 06:03:57 +00006617}
6618
6619/*
6620** Return the pathname of the journal file for this database. The return
6621** value of this routine is the same regardless of whether the journal file
6622** has been created or not.
drhd0679ed2007-08-28 22:24:34 +00006623**
6624** The pager journal filename is invariant as long as the pager is
6625** open so it is safe to access without the BtShared mutex.
danielk19775865e3d2004-06-14 06:03:57 +00006626*/
danielk1977aef0bf62005-12-30 16:28:01 +00006627const char *sqlite3BtreeGetJournalname(Btree *p){
6628 assert( p->pBt->pPager!=0 );
drhd0679ed2007-08-28 22:24:34 +00006629 assert( sqlite3_mutex_held(p->pSqlite->mutex) );
danielk19773b8a05f2007-03-19 17:44:26 +00006630 return sqlite3PagerJournalname(p->pBt->pPager);
danielk19775865e3d2004-06-14 06:03:57 +00006631}
6632
drhb7f91642004-10-31 02:22:47 +00006633#ifndef SQLITE_OMIT_VACUUM
danielk19775865e3d2004-06-14 06:03:57 +00006634/*
drhf7c57532003-04-25 13:22:51 +00006635** Copy the complete content of pBtFrom into pBtTo. A transaction
6636** must be active for both files.
6637**
6638** The size of file pBtFrom may be reduced by this operation.
drh43605152004-05-29 21:46:49 +00006639** If anything goes wrong, the transaction on pBtFrom is rolled back.
drh73509ee2003-04-06 20:44:45 +00006640*/
drhd677b3d2007-08-20 22:48:41 +00006641static int btreeCopyFile(Btree *pTo, Btree *pFrom){
drhf7c57532003-04-25 13:22:51 +00006642 int rc = SQLITE_OK;
drh50f2f432005-09-16 11:32:18 +00006643 Pgno i, nPage, nToPage, iSkip;
drhf7c57532003-04-25 13:22:51 +00006644
danielk1977aef0bf62005-12-30 16:28:01 +00006645 BtShared *pBtTo = pTo->pBt;
6646 BtShared *pBtFrom = pFrom->pBt;
6647
6648 if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){
danielk1977ee5741e2004-05-31 10:01:34 +00006649 return SQLITE_ERROR;
6650 }
drhf7c57532003-04-25 13:22:51 +00006651 if( pBtTo->pCursor ) return SQLITE_BUSY;
danielk19773b8a05f2007-03-19 17:44:26 +00006652 nToPage = sqlite3PagerPagecount(pBtTo->pPager);
6653 nPage = sqlite3PagerPagecount(pBtFrom->pPager);
drh50f2f432005-09-16 11:32:18 +00006654 iSkip = PENDING_BYTE_PAGE(pBtTo);
danielk1977369f27e2004-06-15 11:40:04 +00006655 for(i=1; rc==SQLITE_OK && i<=nPage; i++){
danielk19773b8a05f2007-03-19 17:44:26 +00006656 DbPage *pDbPage;
drh50f2f432005-09-16 11:32:18 +00006657 if( i==iSkip ) continue;
danielk19773b8a05f2007-03-19 17:44:26 +00006658 rc = sqlite3PagerGet(pBtFrom->pPager, i, &pDbPage);
drhf7c57532003-04-25 13:22:51 +00006659 if( rc ) break;
danielk19773b8a05f2007-03-19 17:44:26 +00006660 rc = sqlite3PagerOverwrite(pBtTo->pPager, i, sqlite3PagerGetData(pDbPage));
6661 sqlite3PagerUnref(pDbPage);
drhf7c57532003-04-25 13:22:51 +00006662 }
drh538f5702007-04-13 02:14:30 +00006663
6664 /* If the file is shrinking, journal the pages that are being truncated
6665 ** so that they can be rolled back if the commit fails.
6666 */
drh2e6d11b2003-04-25 15:37:57 +00006667 for(i=nPage+1; rc==SQLITE_OK && i<=nToPage; i++){
danielk19773b8a05f2007-03-19 17:44:26 +00006668 DbPage *pDbPage;
drh49285702005-09-17 15:20:26 +00006669 if( i==iSkip ) continue;
danielk19773b8a05f2007-03-19 17:44:26 +00006670 rc = sqlite3PagerGet(pBtTo->pPager, i, &pDbPage);
drh2e6d11b2003-04-25 15:37:57 +00006671 if( rc ) break;
danielk19773b8a05f2007-03-19 17:44:26 +00006672 rc = sqlite3PagerWrite(pDbPage);
drh538f5702007-04-13 02:14:30 +00006673 sqlite3PagerDontWrite(pDbPage);
6674 /* Yeah. It seems wierd to call DontWrite() right after Write(). But
6675 ** that is because the names of those procedures do not exactly
6676 ** represent what they do. Write() really means "put this page in the
6677 ** rollback journal and mark it as dirty so that it will be written
6678 ** to the database file later." DontWrite() undoes the second part of
6679 ** that and prevents the page from being written to the database. The
6680 ** page is still on the rollback journal, though. And that is the whole
6681 ** point of this loop: to put pages on the rollback journal. */
danielk19773b8a05f2007-03-19 17:44:26 +00006682 sqlite3PagerUnref(pDbPage);
drh2e6d11b2003-04-25 15:37:57 +00006683 }
6684 if( !rc && nPage<nToPage ){
danielk19773b8a05f2007-03-19 17:44:26 +00006685 rc = sqlite3PagerTruncate(pBtTo->pPager, nPage);
drh2e6d11b2003-04-25 15:37:57 +00006686 }
drh538f5702007-04-13 02:14:30 +00006687
drhf7c57532003-04-25 13:22:51 +00006688 if( rc ){
danielk1977aef0bf62005-12-30 16:28:01 +00006689 sqlite3BtreeRollback(pTo);
drhf7c57532003-04-25 13:22:51 +00006690 }
6691 return rc;
drh73509ee2003-04-06 20:44:45 +00006692}
drhd677b3d2007-08-20 22:48:41 +00006693int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
6694 int rc;
6695 sqlite3BtreeEnter(pTo);
6696 sqlite3BtreeEnter(pFrom);
6697 rc = btreeCopyFile(pTo, pFrom);
6698 sqlite3BtreeLeave(pFrom);
6699 sqlite3BtreeLeave(pTo);
6700 return rc;
6701}
6702
drhb7f91642004-10-31 02:22:47 +00006703#endif /* SQLITE_OMIT_VACUUM */
danielk19771d850a72004-05-31 08:26:49 +00006704
6705/*
6706** Return non-zero if a transaction is active.
6707*/
danielk1977aef0bf62005-12-30 16:28:01 +00006708int sqlite3BtreeIsInTrans(Btree *p){
drhd0679ed2007-08-28 22:24:34 +00006709 assert( p==0 || sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
danielk1977aef0bf62005-12-30 16:28:01 +00006710 return (p && (p->inTrans==TRANS_WRITE));
danielk19771d850a72004-05-31 08:26:49 +00006711}
6712
6713/*
6714** Return non-zero if a statement transaction is active.
6715*/
danielk1977aef0bf62005-12-30 16:28:01 +00006716int sqlite3BtreeIsInStmt(Btree *p){
drh27641702007-08-22 02:56:42 +00006717 assert( sqlite3BtreeMutexHeld(p->pBt->mutex) );
6718 assert( sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
danielk1977aef0bf62005-12-30 16:28:01 +00006719 return (p->pBt && p->pBt->inStmt);
danielk19771d850a72004-05-31 08:26:49 +00006720}
danielk197713adf8a2004-06-03 16:08:41 +00006721
6722/*
danielk19772372c2b2006-06-27 16:34:56 +00006723** Return non-zero if a read (or write) transaction is active.
6724*/
6725int sqlite3BtreeIsInReadTrans(Btree *p){
drh27641702007-08-22 02:56:42 +00006726 assert( sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
danielk19772372c2b2006-06-27 16:34:56 +00006727 return (p && (p->inTrans!=TRANS_NONE));
6728}
6729
6730/*
danielk1977da184232006-01-05 11:34:32 +00006731** This function returns a pointer to a blob of memory associated with
6732** a single shared-btree. The memory is used by client code for it's own
6733** purposes (for example, to store a high-level schema associated with
6734** the shared-btree). The btree layer manages reference counting issues.
6735**
6736** The first time this is called on a shared-btree, nBytes bytes of memory
6737** are allocated, zeroed, and returned to the caller. For each subsequent
6738** call the nBytes parameter is ignored and a pointer to the same blob
6739** of memory returned.
6740**
6741** Just before the shared-btree is closed, the function passed as the
6742** xFree argument when the memory allocation was made is invoked on the
drh17435752007-08-16 04:30:38 +00006743** blob of allocated memory. This function should not call sqlite3_free()
danielk1977da184232006-01-05 11:34:32 +00006744** on the memory, the btree layer does that.
6745*/
6746void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
6747 BtShared *pBt = p->pBt;
drh27641702007-08-22 02:56:42 +00006748 sqlite3BtreeEnter(p);
danielk1977da184232006-01-05 11:34:32 +00006749 if( !pBt->pSchema ){
drh17435752007-08-16 04:30:38 +00006750 pBt->pSchema = sqlite3MallocZero(nBytes);
danielk1977da184232006-01-05 11:34:32 +00006751 pBt->xFreeSchema = xFree;
6752 }
drh27641702007-08-22 02:56:42 +00006753 sqlite3BtreeLeave(p);
danielk1977da184232006-01-05 11:34:32 +00006754 return pBt->pSchema;
6755}
6756
danielk1977c87d34d2006-01-06 13:00:28 +00006757/*
6758** Return true if another user of the same shared btree as the argument
6759** handle holds an exclusive lock on the sqlite_master table.
6760*/
6761int sqlite3BtreeSchemaLocked(Btree *p){
drh27641702007-08-22 02:56:42 +00006762 int rc;
6763 assert( sqlite3BtreeMutexHeld(p->pSqlite->mutex) );
6764 sqlite3BtreeEnter(p);
6765 rc = (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK);
6766 sqlite3BtreeLeave(p);
6767 return rc;
danielk1977c87d34d2006-01-06 13:00:28 +00006768}
6769
drha154dcd2006-03-22 22:10:07 +00006770
6771#ifndef SQLITE_OMIT_SHARED_CACHE
6772/*
6773** Obtain a lock on the table whose root page is iTab. The
6774** lock is a write lock if isWritelock is true or a read lock
6775** if it is false.
6776*/
danielk1977c00da102006-01-07 13:21:04 +00006777int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
danielk19772e94d4d2006-01-09 05:36:27 +00006778 int rc = SQLITE_OK;
danielk1977c00da102006-01-07 13:21:04 +00006779 u8 lockType = (isWriteLock?WRITE_LOCK:READ_LOCK);
drhd677b3d2007-08-20 22:48:41 +00006780 sqlite3BtreeEnter(p);
danielk19772e94d4d2006-01-09 05:36:27 +00006781 rc = queryTableLock(p, iTab, lockType);
danielk1977c00da102006-01-07 13:21:04 +00006782 if( rc==SQLITE_OK ){
6783 rc = lockTable(p, iTab, lockType);
6784 }
drhd677b3d2007-08-20 22:48:41 +00006785 sqlite3BtreeLeave(p);
danielk1977c00da102006-01-07 13:21:04 +00006786 return rc;
6787}
drha154dcd2006-03-22 22:10:07 +00006788#endif
danielk1977b82e7ed2006-01-11 14:09:31 +00006789
danielk1977b4e9af92007-05-01 17:49:49 +00006790#ifndef SQLITE_OMIT_INCRBLOB
6791/*
6792** Argument pCsr must be a cursor opened for writing on an
6793** INTKEY table currently pointing at a valid table entry.
6794** This function modifies the data stored as part of that entry.
6795** Only the data content may only be modified, it is not possible
6796** to change the length of the data stored.
6797*/
danielk1977dcbb5d32007-05-04 18:36:44 +00006798int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){
drhd0679ed2007-08-28 22:24:34 +00006799 assert( sqlite3BtreeMutexHeld(pCsr->pBt->mutex) );
drh27641702007-08-22 02:56:42 +00006800 assert( sqlite3BtreeMutexHeld(pCsr->pBtree->pSqlite->mutex) );
danielk1977dcbb5d32007-05-04 18:36:44 +00006801 assert(pCsr->isIncrblobHandle);
6802 if( pCsr->eState==CURSOR_REQUIRESEEK ){
6803 return SQLITE_ABORT;
6804 }
6805
danielk1977d04417962007-05-02 13:16:30 +00006806 /* Check some preconditions:
danielk1977dcbb5d32007-05-04 18:36:44 +00006807 ** (a) the cursor is open for writing,
6808 ** (b) there is no read-lock on the table being modified and
6809 ** (c) the cursor points at a valid row of an intKey table.
danielk1977d04417962007-05-02 13:16:30 +00006810 */
danielk1977d04417962007-05-02 13:16:30 +00006811 if( !pCsr->wrFlag ){
danielk1977dcbb5d32007-05-04 18:36:44 +00006812 return SQLITE_READONLY;
danielk1977d04417962007-05-02 13:16:30 +00006813 }
drhd0679ed2007-08-28 22:24:34 +00006814 assert( !pCsr->pBt->readOnly
6815 && pCsr->pBt->inTransaction==TRANS_WRITE );
danielk1977d04417962007-05-02 13:16:30 +00006816 if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr) ){
6817 return SQLITE_LOCKED; /* The table pCur points to has a read lock */
6818 }
6819 if( pCsr->eState==CURSOR_INVALID || !pCsr->pPage->intKey ){
6820 return SQLITE_ERROR;
danielk1977b4e9af92007-05-01 17:49:49 +00006821 }
6822
danielk19779f8d6402007-05-02 17:48:45 +00006823 return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1);
danielk1977b4e9af92007-05-01 17:49:49 +00006824}
danielk19772dec9702007-05-02 16:48:37 +00006825
6826/*
6827** Set a flag on this cursor to cache the locations of pages from the
danielk1977da107192007-05-04 08:32:13 +00006828** overflow list for the current row. This is used by cursors opened
6829** for incremental blob IO only.
6830**
6831** This function sets a flag only. The actual page location cache
6832** (stored in BtCursor.aOverflow[]) is allocated and used by function
6833** accessPayload() (the worker function for sqlite3BtreeData() and
6834** sqlite3BtreePutData()).
danielk19772dec9702007-05-02 16:48:37 +00006835*/
6836void sqlite3BtreeCacheOverflow(BtCursor *pCur){
drhd0679ed2007-08-28 22:24:34 +00006837 assert( sqlite3BtreeMutexHeld(pCur->pBt->mutex) );
drh27641702007-08-22 02:56:42 +00006838 assert( sqlite3BtreeMutexHeld(pCur->pBtree->pSqlite->mutex) );
danielk1977dcbb5d32007-05-04 18:36:44 +00006839 assert(!pCur->isIncrblobHandle);
danielk19772dec9702007-05-02 16:48:37 +00006840 assert(!pCur->aOverflow);
danielk1977dcbb5d32007-05-04 18:36:44 +00006841 pCur->isIncrblobHandle = 1;
danielk19772dec9702007-05-02 16:48:37 +00006842}
danielk1977b4e9af92007-05-01 17:49:49 +00006843#endif