blob: dafdfb6182f8b84caa931a7c955bafa961025df9 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drh94f33312002-08-12 12:29:56 +000021** @(#) $Id: pager.c,v 1.50 2002/08/12 12:29:57 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drhd9b02572001-04-15 00:37:09 +000023#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000024#include "pager.h"
drh8cfbf082001-09-19 13:22:39 +000025#include "os.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
30** The page cache as a whole is always in one of the following
31** states:
32**
33** SQLITE_UNLOCK The page cache is not currently reading or
34** writing the database file. There is no
35** data held in memory. This is the initial
36** state.
37**
38** SQLITE_READLOCK The page cache is reading the database.
39** Writing is not permitted. There can be
40** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000041** file at the same time.
drhed7c8552001-04-11 14:29:21 +000042**
43** SQLITE_WRITELOCK The page cache is writing the database.
44** Access is exclusive. No other processes or
45** threads can be reading or writing while one
46** process is writing.
47**
drh306dc212001-05-21 13:45:10 +000048** The page cache comes up in SQLITE_UNLOCK. The first time a
49** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000050** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000051** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000052** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000053** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
54** called on an outstanding page which means that the pager must
55** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
56** The sqlite_page_rollback() and sqlite_page_commit() functions
57** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000058*/
59#define SQLITE_UNLOCK 0
60#define SQLITE_READLOCK 1
61#define SQLITE_WRITELOCK 2
62
drhd9b02572001-04-15 00:37:09 +000063
drhed7c8552001-04-11 14:29:21 +000064/*
65** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000066** This header is only visible to this pager module. The client
67** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000068*/
drhd9b02572001-04-15 00:37:09 +000069typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000070struct PgHdr {
71 Pager *pPager; /* The pager to which this page belongs */
72 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000073 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000074 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000075 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
76 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh193a6b42002-07-07 16:52:46 +000077 u8 inJournal; /* TRUE if has been written to journal */
78 u8 inCkpt; /* TRUE if written to the checkpoint journal */
79 u8 dirty; /* TRUE if we need to write back changes */
80 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh69688d52001-04-14 16:38:23 +000081 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000082 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000083};
84
85/*
drh69688d52001-04-14 16:38:23 +000086** Convert a pointer to a PgHdr into a pointer to its data
87** and back again.
drhed7c8552001-04-11 14:29:21 +000088*/
89#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
90#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000091#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000092
93/*
drhed7c8552001-04-11 14:29:21 +000094** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +000095** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +000096*/
drh603240c2002-03-05 01:11:12 +000097#define N_PG_HASH 2003
drhed7c8552001-04-11 14:29:21 +000098
99/*
100** A open page cache is an instance of the following structure.
101*/
102struct Pager {
103 char *zFilename; /* Name of the database file */
104 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000105 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000106 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000107 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000108 int origDbSize; /* dbSize before the current change */
drhfa86c412002-02-02 15:01:15 +0000109 int ckptSize, ckptJSize; /* Size of database and journal at ckpt_begin() */
drh7e3b0a02001-04-28 16:52:40 +0000110 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000111 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000112 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000113 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000114 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000115 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drh603240c2002-03-05 01:11:12 +0000116 u8 journalOpen; /* True if journal file descriptors is valid */
117 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000118 u8 ckptInUse; /* True we are in a checkpoint */
drh603240c2002-03-05 01:11:12 +0000119 u8 noSync; /* Do not sync the journal if true */
120 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
121 u8 errMask; /* One of several kinds of errors */
122 u8 tempFile; /* zFilename is a temporary file */
123 u8 readOnly; /* True for a read-only database */
124 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000125 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000126 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh94f33312002-08-12 12:29:56 +0000127 u8 journalFormat; /* Version number of the journal file */
drh603240c2002-03-05 01:11:12 +0000128 u8 *aInJournal; /* One bit for each page in the database file */
129 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000130 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000131 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000132 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000133};
134
135/*
136** These are bits that can be set in Pager.errMask.
137*/
138#define PAGER_ERR_FULL 0x01 /* a write() failed */
139#define PAGER_ERR_MEM 0x02 /* malloc() failed */
140#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
141#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000142#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000143
144/*
145** The journal file contains page records in the following
146** format.
147*/
148typedef struct PageRecord PageRecord;
149struct PageRecord {
150 Pgno pgno; /* The page number */
151 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
152};
153
154/*
drh5e00f6c2001-09-13 13:46:56 +0000155** Journal files begin with the following magic string. The data
156** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000157**
158** There are two journal formats. The older journal format writes
159** 32-bit integers in the byte-order of the host machine. The new
160** format writes integers as big-endian. All new journals use the
161** new format, but we have to be able to read an older journal in order
162** to roll it back.
drhd9b02572001-04-15 00:37:09 +0000163*/
drh94f33312002-08-12 12:29:56 +0000164static const unsigned char aOldJournalMagic[] = {
drhd9b02572001-04-15 00:37:09 +0000165 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000166};
drh94f33312002-08-12 12:29:56 +0000167static const unsigned char aJournalMagic[] = {
168 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
169};
170#define SQLITE_NEW_JOURNAL_FORMAT 1
171#define SQLITE_OLD_JOURNAL_FORMAT 0
172
173/*
174** The following integer, if set, causes journals to be written in the
175** old format. This is used for testing purposes only - to make sure
176** the code is able to rollback an old journal.
177*/
178#ifdef SQLITE_TEST
179int pager_old_format = 0;
180#endif
drhed7c8552001-04-11 14:29:21 +0000181
182/*
183** Hash a page number
184*/
drhd9b02572001-04-15 00:37:09 +0000185#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000186
187/*
drhdd793422001-06-28 01:54:48 +0000188** Enable reference count tracking here:
189*/
190#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000191 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000192 static void pager_refinfo(PgHdr *p){
193 static int cnt = 0;
194 if( !pager_refinfo_enable ) return;
195 printf(
196 "REFCNT: %4d addr=0x%08x nRef=%d\n",
197 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
198 );
199 cnt++; /* Something to set a breakpoint on */
200 }
201# define REFINFO(X) pager_refinfo(X)
202#else
203# define REFINFO(X)
204#endif
205
206/*
drh94f33312002-08-12 12:29:56 +0000207** Read a 32-bit integer from the given file descriptor
208*/
209static int read32bits(Pager *pPager, OsFile *fd, u32 *pRes){
210 u32 res;
211 int rc;
212 rc = sqliteOsRead(fd, &res, sizeof(res));
213 if( rc==SQLITE_OK && pPager->journalFormat==SQLITE_NEW_JOURNAL_FORMAT ){
214 unsigned char ac[4];
215 memcpy(ac, &res, 4);
216 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
217 }
218 *pRes = res;
219 return rc;
220}
221
222/*
223** Write a 32-bit integer into the given file descriptor. Writing
224** is always done using the new journal format.
225*/
226static int write32bits(OsFile *fd, u32 val){
227 unsigned char ac[4];
228#ifdef SQLITE_TEST
229 if( pager_old_format ){
230 return sqliteOsWrite(fd, &val, 4);
231 }
232#endif
233 ac[0] = (val>>24) & 0xff;
234 ac[1] = (val>>16) & 0xff;
235 ac[2] = (val>>8) & 0xff;
236 ac[3] = val & 0xff;
237 return sqliteOsWrite(fd, ac, 4);
238}
239
240
241/*
drhd9b02572001-04-15 00:37:09 +0000242** Convert the bits in the pPager->errMask into an approprate
243** return code.
244*/
245static int pager_errcode(Pager *pPager){
246 int rc = SQLITE_OK;
247 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000248 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000249 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
250 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
251 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
252 return rc;
drhed7c8552001-04-11 14:29:21 +0000253}
254
255/*
256** Find a page in the hash table given its page number. Return
257** a pointer to the page or NULL if not found.
258*/
drhd9b02572001-04-15 00:37:09 +0000259static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000260 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
261 while( p && p->pgno!=pgno ){
262 p = p->pNextHash;
263 }
264 return p;
265}
266
267/*
268** Unlock the database and clear the in-memory cache. This routine
269** sets the state of the pager back to what it was when it was first
270** opened. Any outstanding pages are invalidated and subsequent attempts
271** to access those pages will likely result in a coredump.
272*/
drhd9b02572001-04-15 00:37:09 +0000273static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000274 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000275 for(pPg=pPager->pAll; pPg; pPg=pNext){
276 pNext = pPg->pNextAll;
277 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000278 }
279 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000280 pPager->pLast = 0;
281 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000282 memset(pPager->aHash, 0, sizeof(pPager->aHash));
283 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000284 if( pPager->state>=SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000285 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000286 }
drha7fcb052001-12-14 15:09:55 +0000287 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000288 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000289 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000290 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000291 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000292}
293
294/*
295** When this routine is called, the pager has the journal file open and
296** a write lock on the database. This routine releases the database
297** write lock and acquires a read lock in its place. The journal file
298** is deleted and closed.
drhed7c8552001-04-11 14:29:21 +0000299*/
drhd9b02572001-04-15 00:37:09 +0000300static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000301 int rc;
drhd9b02572001-04-15 00:37:09 +0000302 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000303 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh663fc632002-02-02 18:49:19 +0000304 sqlitepager_ckpt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000305 if( pPager->ckptOpen ){
306 sqliteOsClose(&pPager->cpfd);
307 pPager->ckptOpen = 0;
308 }
drha7fcb052001-12-14 15:09:55 +0000309 sqliteOsClose(&pPager->jfd);
drh8cfbf082001-09-19 13:22:39 +0000310 pPager->journalOpen = 0;
311 sqliteOsDelete(pPager->zJournal);
drha7fcb052001-12-14 15:09:55 +0000312 rc = sqliteOsReadLock(&pPager->fd);
drh6019e162001-07-02 17:51:45 +0000313 sqliteFree( pPager->aInJournal );
314 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000315 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
316 pPg->inJournal = 0;
317 pPg->dirty = 0;
318 }
drh8e298f92002-07-06 16:28:47 +0000319 if( rc==SQLITE_OK ){
320 pPager->state = SQLITE_READLOCK;
321 }else{
322 /* This can only happen if a process does a BEGIN, then forks and the
323 ** child process does the COMMIT. Because of the semantics of unix
324 ** file locking, the unlock will fail.
325 */
326 pPager->state = SQLITE_UNLOCK;
327 }
drhed7c8552001-04-11 14:29:21 +0000328 return rc;
329}
330
drhed7c8552001-04-11 14:29:21 +0000331/*
drhfa86c412002-02-02 15:01:15 +0000332** Read a single page from the journal file opened on file descriptor
333** jfd. Playback this one page.
334*/
335static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
336 int rc;
337 PgHdr *pPg; /* An existing page in the cache */
338 PageRecord pgRec;
339
drh94f33312002-08-12 12:29:56 +0000340 rc = read32bits(pPager, jfd, &pgRec.pgno);
341 if( rc!=SQLITE_OK ) return rc;
342 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drhfa86c412002-02-02 15:01:15 +0000343 if( rc!=SQLITE_OK ) return rc;
344
345 /* Sanity checking on the page */
346 if( pgRec.pgno>pPager->dbSize || pgRec.pgno==0 ) return SQLITE_CORRUPT;
347
348 /* Playback the page. Update the in-memory copy of the page
349 ** at the same time, if there is one.
350 */
351 pPg = pager_lookup(pPager, pgRec.pgno);
352 if( pPg ){
353 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
354 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
355 }
356 rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
357 if( rc==SQLITE_OK ){
358 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
359 }
360 return rc;
361}
362
363/*
drhed7c8552001-04-11 14:29:21 +0000364** Playback the journal and thus restore the database file to
365** the state it was in before we started making changes.
366**
drhd9b02572001-04-15 00:37:09 +0000367** The journal file format is as follows: There is an initial
368** file-type string for sanity checking. Then there is a single
369** Pgno number which is the number of pages in the database before
370** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000371** Next come zero or more page records where each page record
372** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
373** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000374**
drhd9b02572001-04-15 00:37:09 +0000375** If the file opened as the journal file is not a well-formed
376** journal file (as determined by looking at the magic number
377** at the beginning) then this routine returns SQLITE_PROTOCOL.
378** If any other errors occur during playback, the database will
379** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
380** pPager->errMask and SQLITE_CORRUPT is returned. If it all
381** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000382*/
drhd9b02572001-04-15 00:37:09 +0000383static int pager_playback(Pager *pPager){
384 int nRec; /* Number of Records */
385 int i; /* Loop counter */
386 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000387 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000388 int rc;
389
drhc3a64ba2001-11-22 00:01:27 +0000390 /* Figure out how many records are in the journal. Abort early if
391 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000392 */
drh8cfbf082001-09-19 13:22:39 +0000393 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000394 sqliteOsSeek(&pPager->jfd, 0);
395 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
drhc3a64ba2001-11-22 00:01:27 +0000396 if( rc!=SQLITE_OK ){
397 goto end_playback;
398 }
399 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
400 if( nRec<=0 ){
401 goto end_playback;
402 }
403
404 /* Read the beginning of the journal and truncate the
405 ** database file back to its original size.
406 */
drha7fcb052001-12-14 15:09:55 +0000407 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000408 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000409 rc = SQLITE_PROTOCOL;
410 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000411 }
drh94f33312002-08-12 12:29:56 +0000412 if( memcmp(aMagic, aOldJournalMagic, sizeof(aMagic))==0 ){
413 pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
414 }else if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))==0 ){
415 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
416 }else{
417 rc = SQLITE_PROTOCOL;
418 goto end_playback;
419 }
420 rc = read32bits(pPager, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000421 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000422 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000423 }
drha7fcb052001-12-14 15:09:55 +0000424 rc = sqliteOsTruncate(&pPager->fd, mxPg*SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000425 if( rc!=SQLITE_OK ){
426 goto end_playback;
427 }
drhd9b02572001-04-15 00:37:09 +0000428 pPager->dbSize = mxPg;
429
drhfa86c412002-02-02 15:01:15 +0000430 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000431 */
drhd9b02572001-04-15 00:37:09 +0000432 for(i=nRec-1; i>=0; i--){
drhfa86c412002-02-02 15:01:15 +0000433 rc = pager_playback_one_page(pPager, &pPager->jfd);
drhd9b02572001-04-15 00:37:09 +0000434 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000435 }
drh81a20f22001-10-12 17:30:04 +0000436
437end_playback:
drhd9b02572001-04-15 00:37:09 +0000438 if( rc!=SQLITE_OK ){
439 pager_unwritelock(pPager);
440 pPager->errMask |= PAGER_ERR_CORRUPT;
441 rc = SQLITE_CORRUPT;
442 }else{
443 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000444 }
drhd9b02572001-04-15 00:37:09 +0000445 return rc;
drhed7c8552001-04-11 14:29:21 +0000446}
447
448/*
drhfa86c412002-02-02 15:01:15 +0000449** Playback the checkpoint journal.
450**
451** This is similar to playing back the transaction journal but with
452** a few extra twists.
453**
drh663fc632002-02-02 18:49:19 +0000454** (1) The number of pages in the database file at the start of
455** the checkpoint is stored in pPager->ckptSize, not in the
456** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000457**
458** (2) In addition to playing back the checkpoint journal, also
459** playback all pages of the transaction journal beginning
460** at offset pPager->ckptJSize.
461*/
462static int pager_ckpt_playback(Pager *pPager){
463 int nRec; /* Number of Records */
464 int i; /* Loop counter */
465 int rc;
466
467 /* Truncate the database back to its original size.
468 */
drh663fc632002-02-02 18:49:19 +0000469 rc = sqliteOsTruncate(&pPager->fd, pPager->ckptSize*SQLITE_PAGE_SIZE);
drhfa86c412002-02-02 15:01:15 +0000470 pPager->dbSize = pPager->ckptSize;
471
472 /* Figure out how many records are in the checkpoint journal.
473 */
drh0f892532002-05-30 12:27:03 +0000474 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000475 sqliteOsSeek(&pPager->cpfd, 0);
476 rc = sqliteOsFileSize(&pPager->cpfd, &nRec);
477 if( rc!=SQLITE_OK ){
478 goto end_ckpt_playback;
479 }
480 nRec /= sizeof(PageRecord);
481
482 /* Copy original pages out of the checkpoint journal and back into the
483 ** database file.
484 */
drh94f33312002-08-12 12:29:56 +0000485 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
drhfa86c412002-02-02 15:01:15 +0000486 for(i=nRec-1; i>=0; i--){
487 rc = pager_playback_one_page(pPager, &pPager->cpfd);
488 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
489 }
490
491 /* Figure out how many pages need to be copied out of the transaction
492 ** journal.
493 */
494 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
495 if( rc!=SQLITE_OK ){
496 goto end_ckpt_playback;
497 }
498 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
499 if( rc!=SQLITE_OK ){
500 goto end_ckpt_playback;
501 }
502 nRec = (nRec - pPager->ckptJSize)/sizeof(PageRecord);
503 for(i=nRec-1; i>=0; i--){
504 rc = pager_playback_one_page(pPager, &pPager->jfd);
505 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
506 }
507
508
509end_ckpt_playback:
drhfa86c412002-02-02 15:01:15 +0000510 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000511 pPager->errMask |= PAGER_ERR_CORRUPT;
512 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000513 }
514 return rc;
515}
516
517/*
drhf57b14a2001-09-14 18:54:08 +0000518** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000519**
520** The maximum number is the absolute value of the mxPage parameter.
521** If mxPage is negative, the noSync flag is also set. noSync bypasses
522** calls to sqliteOsSync(). The pager runs much faster with noSync on,
523** but if the operating system crashes or there is an abrupt power
524** failure, the database file might be left in an inconsistent and
525** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000526*/
527void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000528 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000529 pPager->noSync = pPager->tempFile;
drh603240c2002-03-05 01:11:12 +0000530 }else{
531 pPager->noSync = 1;
532 mxPage = -mxPage;
533 }
drhf57b14a2001-09-14 18:54:08 +0000534 if( mxPage>10 ){
535 pPager->mxPage = mxPage;
536 }
537}
538
539/*
drhfa86c412002-02-02 15:01:15 +0000540** Open a temporary file. Write the name of the file into zName
541** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
542** the file descriptor into *fd. Return SQLITE_OK on success or some
543** other error code if we fail.
544**
545** The OS will automatically delete the temporary file when it is
546** closed.
547*/
548static int sqlitepager_opentemp(char *zFile, OsFile *fd){
549 int cnt = 8;
550 int rc;
551 do{
552 cnt--;
553 sqliteOsTempFileName(zFile);
554 rc = sqliteOsOpenExclusive(zFile, fd, 1);
555 }while( cnt>0 && rc!=SQLITE_OK );
556 return rc;
557}
558
559/*
drhed7c8552001-04-11 14:29:21 +0000560** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000561** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000562** the first call to sqlitepager_get() and is only held open until the
563** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000564**
drh6446c4d2001-12-15 14:22:18 +0000565** If zFilename is NULL then a randomly-named temporary file is created
566** and used as the file to be cached. The file will be deleted
567** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000568*/
drh7e3b0a02001-04-28 16:52:40 +0000569int sqlitepager_open(
570 Pager **ppPager, /* Return the Pager structure here */
571 const char *zFilename, /* Name of the database file to open */
572 int mxPage, /* Max number of in-memory cache pages */
573 int nExtra /* Extra bytes append to each in-memory page */
574){
drhed7c8552001-04-11 14:29:21 +0000575 Pager *pPager;
576 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000577 OsFile fd;
578 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000579 int tempFile;
580 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000581 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000582
drhd9b02572001-04-15 00:37:09 +0000583 *ppPager = 0;
584 if( sqlite_malloc_failed ){
585 return SQLITE_NOMEM;
586 }
drh5e00f6c2001-09-13 13:46:56 +0000587 if( zFilename ){
drh8cfbf082001-09-19 13:22:39 +0000588 rc = sqliteOsOpenReadWrite(zFilename, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000589 tempFile = 0;
590 }else{
drhfa86c412002-02-02 15:01:15 +0000591 rc = sqlitepager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000592 zFilename = zTemp;
593 tempFile = 1;
594 }
drh8cfbf082001-09-19 13:22:39 +0000595 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000596 return SQLITE_CANTOPEN;
597 }
598 nameLen = strlen(zFilename);
599 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000600 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000601 sqliteOsClose(&fd);
drhd9b02572001-04-15 00:37:09 +0000602 return SQLITE_NOMEM;
603 }
drhed7c8552001-04-11 14:29:21 +0000604 pPager->zFilename = (char*)&pPager[1];
605 pPager->zJournal = &pPager->zFilename[nameLen+1];
606 strcpy(pPager->zFilename, zFilename);
607 strcpy(pPager->zJournal, zFilename);
608 strcpy(&pPager->zJournal[nameLen], "-journal");
609 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000610 pPager->journalOpen = 0;
drhfa86c412002-02-02 15:01:15 +0000611 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000612 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000613 pPager->nRef = 0;
614 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000615 pPager->ckptSize = 0;
616 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000617 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000618 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000619 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000620 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000621 pPager->tempFile = tempFile;
622 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000623 pPager->needSync = 0;
drha1680452002-04-18 01:56:57 +0000624 pPager->noSync = pPager->tempFile;
drhed7c8552001-04-11 14:29:21 +0000625 pPager->pFirst = 0;
626 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000627 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000628 memset(pPager->aHash, 0, sizeof(pPager->aHash));
629 *ppPager = pPager;
630 return SQLITE_OK;
631}
632
633/*
drh72f82862001-05-24 21:06:34 +0000634** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000635** when the reference count on each page reaches zero. The destructor can
636** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000637**
638** The destructor is not called as a result sqlitepager_close().
639** Destructors are only called by sqlitepager_unref().
640*/
641void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
642 pPager->xDestructor = xDesc;
643}
644
645/*
drh5e00f6c2001-09-13 13:46:56 +0000646** Return the total number of pages in the disk file associated with
647** pPager.
drhed7c8552001-04-11 14:29:21 +0000648*/
drhd9b02572001-04-15 00:37:09 +0000649int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000650 int n;
drhd9b02572001-04-15 00:37:09 +0000651 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000652 if( pPager->dbSize>=0 ){
653 return pPager->dbSize;
654 }
drha7fcb052001-12-14 15:09:55 +0000655 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000656 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000657 return 0;
drhed7c8552001-04-11 14:29:21 +0000658 }
drh8cfbf082001-09-19 13:22:39 +0000659 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000660 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000661 pPager->dbSize = n;
662 }
663 return n;
664}
665
666/*
667** Shutdown the page cache. Free all memory and close all files.
668**
669** If a transaction was in progress when this routine is called, that
670** transaction is rolled back. All outstanding pages are invalidated
671** and their memory is freed. Any attempt to use a page associated
672** with this page cache after this function returns will likely
673** result in a coredump.
674*/
drhd9b02572001-04-15 00:37:09 +0000675int sqlitepager_close(Pager *pPager){
676 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000677 switch( pPager->state ){
678 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000679 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +0000680 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000681 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000682 break;
683 }
684 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +0000685 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000686 break;
687 }
688 default: {
689 /* Do nothing */
690 break;
691 }
692 }
drhd9b02572001-04-15 00:37:09 +0000693 for(pPg=pPager->pAll; pPg; pPg=pNext){
694 pNext = pPg->pNextAll;
695 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000696 }
drha7fcb052001-12-14 15:09:55 +0000697 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000698 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +0000699 /* Temp files are automatically deleted by the OS
700 ** if( pPager->tempFile ){
701 ** sqliteOsDelete(pPager->zFilename);
702 ** }
703 */
drhed7c8552001-04-11 14:29:21 +0000704 sqliteFree(pPager);
705 return SQLITE_OK;
706}
707
708/*
drh5e00f6c2001-09-13 13:46:56 +0000709** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000710*/
drhd9b02572001-04-15 00:37:09 +0000711Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000712 PgHdr *p = DATA_TO_PGHDR(pData);
713 return p->pgno;
714}
715
716/*
drh7e3b0a02001-04-28 16:52:40 +0000717** Increment the reference count for a page. If the page is
718** currently on the freelist (the reference count is zero) then
719** remove it from the freelist.
720*/
drhdf0b3b02001-06-23 11:36:20 +0000721static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000722 if( pPg->nRef==0 ){
723 /* The page is currently on the freelist. Remove it. */
724 if( pPg->pPrevFree ){
725 pPg->pPrevFree->pNextFree = pPg->pNextFree;
726 }else{
727 pPg->pPager->pFirst = pPg->pNextFree;
728 }
729 if( pPg->pNextFree ){
730 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
731 }else{
732 pPg->pPager->pLast = pPg->pPrevFree;
733 }
734 pPg->pPager->nRef++;
735 }
736 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000737 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000738}
739
740/*
741** Increment the reference count for a page. The input pointer is
742** a reference to the page data.
743*/
744int sqlitepager_ref(void *pData){
745 PgHdr *pPg = DATA_TO_PGHDR(pData);
746 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000747 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000748}
749
750/*
drhb19a2bc2001-09-16 00:13:26 +0000751** Sync the journal and then write all free dirty pages to the database
752** file.
753**
754** Writing all free dirty pages to the database after the sync is a
755** non-obvious optimization. fsync() is an expensive operation so we
drhaaab5722002-02-19 13:39:21 +0000756** want to minimize the number ot times it is called. After an fsync() call,
drh6446c4d2001-12-15 14:22:18 +0000757** we are free to write dirty pages back to the database. It is best
758** to go ahead and write as many dirty pages as possible to minimize
759** the risk of having to do another fsync() later on. Writing dirty
760** free pages in this way was observed to make database operations go
761** up to 10 times faster.
drhfa86c412002-02-02 15:01:15 +0000762**
763** If we are writing to temporary database, there is no need to preserve
764** the integrity of the journal file, so we can save time and skip the
765** fsync().
drh50e5dad2001-09-15 00:57:28 +0000766*/
767static int syncAllPages(Pager *pPager){
768 PgHdr *pPg;
769 int rc = SQLITE_OK;
770 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +0000771 if( !pPager->tempFile ){
772 rc = sqliteOsSync(&pPager->jfd);
773 if( rc!=0 ) return rc;
774 }
drh50e5dad2001-09-15 00:57:28 +0000775 pPager->needSync = 0;
776 }
777 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
778 if( pPg->dirty ){
drha7fcb052001-12-14 15:09:55 +0000779 sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
780 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh50e5dad2001-09-15 00:57:28 +0000781 if( rc!=SQLITE_OK ) break;
782 pPg->dirty = 0;
783 }
784 }
drh81a20f22001-10-12 17:30:04 +0000785 return rc;
drh50e5dad2001-09-15 00:57:28 +0000786}
787
788/*
drhd9b02572001-04-15 00:37:09 +0000789** Acquire a page.
790**
drh58a11682001-11-10 13:51:08 +0000791** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +0000792** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000793**
drh306dc212001-05-21 13:45:10 +0000794** A _get works for any page number greater than 0. If the database
795** file is smaller than the requested page, then no actual disk
796** read occurs and the memory image of the page is initialized to
797** all zeros. The extra data appended to a page is always initialized
798** to zeros the first time a page is loaded into memory.
799**
drhd9b02572001-04-15 00:37:09 +0000800** The acquisition might fail for several reasons. In all cases,
801** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000802**
803** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
804** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000805** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000806** just returns 0. This routine acquires a read-lock the first time it
807** has to go to disk, and could also playback an old journal if necessary.
808** Since _lookup() never goes to disk, it never has to deal with locks
809** or journal files.
drhed7c8552001-04-11 14:29:21 +0000810*/
drhd9b02572001-04-15 00:37:09 +0000811int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000812 PgHdr *pPg;
813
drhd9b02572001-04-15 00:37:09 +0000814 /* Make sure we have not hit any critical errors.
815 */
816 if( pPager==0 || pgno==0 ){
817 return SQLITE_ERROR;
818 }
819 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
820 return pager_errcode(pPager);
821 }
822
drhed7c8552001-04-11 14:29:21 +0000823 /* If this is the first page accessed, then get a read lock
824 ** on the database file.
825 */
826 if( pPager->nRef==0 ){
drha7fcb052001-12-14 15:09:55 +0000827 if( sqliteOsReadLock(&pPager->fd)!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000828 *ppPage = 0;
829 return SQLITE_BUSY;
830 }
drhd9b02572001-04-15 00:37:09 +0000831 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000832
833 /* If a journal file exists, try to play it back.
834 */
drh8cfbf082001-09-19 13:22:39 +0000835 if( sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +0000836 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +0000837
drha7fcb052001-12-14 15:09:55 +0000838 /* Get a write lock on the database
839 */
840 rc = sqliteOsWriteLock(&pPager->fd);
841 if( rc!=SQLITE_OK ){
drh6446c4d2001-12-15 14:22:18 +0000842 rc = sqliteOsUnlock(&pPager->fd);
drha7fcb052001-12-14 15:09:55 +0000843 assert( rc==SQLITE_OK );
844 *ppPage = 0;
845 return SQLITE_BUSY;
846 }
847 pPager->state = SQLITE_WRITELOCK;
848
drhed7c8552001-04-11 14:29:21 +0000849 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +0000850 ** we cannot get exclusive access to the journal file.
851 **
852 ** Even though we will only be reading from the journal, not writing,
853 ** we have to open the journal for writing in order to obtain an
854 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +0000855 */
drhf57b3392001-10-08 13:22:32 +0000856 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drha7fcb052001-12-14 15:09:55 +0000857 if( rc!=SQLITE_OK ){
858 rc = sqliteOsUnlock(&pPager->fd);
859 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +0000860 *ppPage = 0;
861 return SQLITE_BUSY;
862 }
drha7fcb052001-12-14 15:09:55 +0000863 pPager->journalOpen = 1;
drhed7c8552001-04-11 14:29:21 +0000864
865 /* Playback and delete the journal. Drop the database write
866 ** lock and reacquire the read lock.
867 */
drhd9b02572001-04-15 00:37:09 +0000868 rc = pager_playback(pPager);
869 if( rc!=SQLITE_OK ){
870 return rc;
871 }
drhed7c8552001-04-11 14:29:21 +0000872 }
873 pPg = 0;
874 }else{
875 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000876 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000877 }
878 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000879 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000880 int h;
drh7e3b0a02001-04-28 16:52:40 +0000881 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000882 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
883 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000884 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000885 if( pPg==0 ){
886 *ppPage = 0;
887 pager_unwritelock(pPager);
888 pPager->errMask |= PAGER_ERR_MEM;
889 return SQLITE_NOMEM;
890 }
drhed7c8552001-04-11 14:29:21 +0000891 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000892 pPg->pNextAll = pPager->pAll;
893 if( pPager->pAll ){
894 pPager->pAll->pPrevAll = pPg;
895 }
896 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000897 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000898 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000899 }else{
drhd9b02572001-04-15 00:37:09 +0000900 /* Recycle an older page. First locate the page to be recycled.
901 ** Try to find one that is not dirty and is near the head of
902 ** of the free list */
drhed7c8552001-04-11 14:29:21 +0000903 pPg = pPager->pFirst;
drh603240c2002-03-05 01:11:12 +0000904 while( pPg && pPg->dirty ){
drhd9b02572001-04-15 00:37:09 +0000905 pPg = pPg->pNextFree;
906 }
drhb19a2bc2001-09-16 00:13:26 +0000907
908 /* If we could not find a page that has not been used recently
909 ** and which is not dirty, then sync the journal and write all
910 ** dirty free pages into the database file, thus making them
911 ** clean pages and available for recycling.
912 **
913 ** We have to sync the journal before writing a page to the main
914 ** database. But syncing is a very slow operation. So after a
915 ** sync, it is best to write everything we can back to the main
916 ** database to minimize the risk of having to sync again in the
drh94f33312002-08-12 12:29:56 +0000917 ** near future. That is why we write all dirty pages after a
drhb19a2bc2001-09-16 00:13:26 +0000918 ** sync.
919 */
drh603240c2002-03-05 01:11:12 +0000920 if( pPg==0 ){
drh50e5dad2001-09-15 00:57:28 +0000921 int rc = syncAllPages(pPager);
922 if( rc!=0 ){
923 sqlitepager_rollback(pPager);
924 *ppPage = 0;
925 return SQLITE_IOERR;
926 }
927 pPg = pPager->pFirst;
928 }
drhd9b02572001-04-15 00:37:09 +0000929 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +0000930 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000931
drh193a6b42002-07-07 16:52:46 +0000932 /* If the page we are recyclying is marked as alwaysRollback, then
933 ** set the global alwaysRollback flag, thus disabling the
934 ** sqlite_dont_rollback() optimization for the rest of this transaction.
935 ** It is necessary to do this because the page marked alwaysRollback
936 ** might be reloaded at a later time but at that point we won't remember
937 ** that is was marked alwaysRollback. This means that all pages must
938 ** be marked as alwaysRollback from here on out.
939 */
940 if( pPg->alwaysRollback ){
941 pPager->alwaysRollback = 1;
942 }
943
drhd9b02572001-04-15 00:37:09 +0000944 /* Unlink the old page from the free list and the hash table
945 */
drh6019e162001-07-02 17:51:45 +0000946 if( pPg->pPrevFree ){
947 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000948 }else{
drh6019e162001-07-02 17:51:45 +0000949 assert( pPager->pFirst==pPg );
950 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000951 }
drh6019e162001-07-02 17:51:45 +0000952 if( pPg->pNextFree ){
953 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
954 }else{
955 assert( pPager->pLast==pPg );
956 pPager->pLast = pPg->pPrevFree;
957 }
958 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000959 if( pPg->pNextHash ){
960 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
961 }
962 if( pPg->pPrevHash ){
963 pPg->pPrevHash->pNextHash = pPg->pNextHash;
964 }else{
drhd9b02572001-04-15 00:37:09 +0000965 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000966 assert( pPager->aHash[h]==pPg );
967 pPager->aHash[h] = pPg->pNextHash;
968 }
drh6019e162001-07-02 17:51:45 +0000969 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000970 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000971 }
972 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +0000973 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drh6019e162001-07-02 17:51:45 +0000974 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
975 }else{
976 pPg->inJournal = 0;
977 }
drh663fc632002-02-02 18:49:19 +0000978 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize ){
drhfa86c412002-02-02 15:01:15 +0000979 pPg->inCkpt = (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0;
980 }else{
981 pPg->inCkpt = 0;
982 }
drhed7c8552001-04-11 14:29:21 +0000983 pPg->dirty = 0;
984 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000985 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000986 pPager->nRef++;
987 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000988 pPg->pNextHash = pPager->aHash[h];
989 pPager->aHash[h] = pPg;
990 if( pPg->pNextHash ){
991 assert( pPg->pNextHash->pPrevHash==0 );
992 pPg->pNextHash->pPrevHash = pPg;
993 }
drh306dc212001-05-21 13:45:10 +0000994 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
drh1ab43002002-01-14 09:28:19 +0000995 if( pPager->dbSize<(int)pgno ){
drh306dc212001-05-21 13:45:10 +0000996 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
997 }else{
drh81a20f22001-10-12 17:30:04 +0000998 int rc;
drha7fcb052001-12-14 15:09:55 +0000999 sqliteOsSeek(&pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
1000 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +00001001 if( rc!=SQLITE_OK ){
1002 return rc;
1003 }
drh306dc212001-05-21 13:45:10 +00001004 }
drh7e3b0a02001-04-28 16:52:40 +00001005 if( pPager->nExtra>0 ){
1006 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1007 }
drhed7c8552001-04-11 14:29:21 +00001008 }else{
drhd9b02572001-04-15 00:37:09 +00001009 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001010 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001011 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001012 }
1013 *ppPage = PGHDR_TO_DATA(pPg);
1014 return SQLITE_OK;
1015}
1016
1017/*
drh7e3b0a02001-04-28 16:52:40 +00001018** Acquire a page if it is already in the in-memory cache. Do
1019** not read the page from disk. Return a pointer to the page,
1020** or 0 if the page is not in cache.
1021**
1022** See also sqlitepager_get(). The difference between this routine
1023** and sqlitepager_get() is that _get() will go to the disk and read
1024** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001025** returns NULL if the page is not in cache or if a disk I/O error
1026** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001027*/
1028void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1029 PgHdr *pPg;
1030
1031 /* Make sure we have not hit any critical errors.
1032 */
1033 if( pPager==0 || pgno==0 ){
1034 return 0;
1035 }
1036 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1037 return 0;
1038 }
1039 if( pPager->nRef==0 ){
1040 return 0;
1041 }
1042 pPg = pager_lookup(pPager, pgno);
1043 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001044 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001045 return PGHDR_TO_DATA(pPg);
1046}
1047
1048/*
drhed7c8552001-04-11 14:29:21 +00001049** Release a page.
1050**
1051** If the number of references to the page drop to zero, then the
1052** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001053** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001054** removed.
1055*/
drhd9b02572001-04-15 00:37:09 +00001056int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001057 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001058
1059 /* Decrement the reference count for this page
1060 */
drhed7c8552001-04-11 14:29:21 +00001061 pPg = DATA_TO_PGHDR(pData);
1062 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001063 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001064 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001065
drh72f82862001-05-24 21:06:34 +00001066 /* When the number of references to a page reach 0, call the
1067 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001068 */
drhed7c8552001-04-11 14:29:21 +00001069 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001070 Pager *pPager;
1071 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001072 pPg->pNextFree = 0;
1073 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001074 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001075 if( pPg->pPrevFree ){
1076 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001077 }else{
1078 pPager->pFirst = pPg;
1079 }
drh72f82862001-05-24 21:06:34 +00001080 if( pPager->xDestructor ){
1081 pPager->xDestructor(pData);
1082 }
drhd9b02572001-04-15 00:37:09 +00001083
1084 /* When all pages reach the freelist, drop the read lock from
1085 ** the database file.
1086 */
1087 pPager->nRef--;
1088 assert( pPager->nRef>=0 );
1089 if( pPager->nRef==0 ){
1090 pager_reset(pPager);
1091 }
drhed7c8552001-04-11 14:29:21 +00001092 }
drhd9b02572001-04-15 00:37:09 +00001093 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001094}
1095
1096/*
drh4b845d72002-03-05 12:41:19 +00001097** Acquire a write-lock on the database. The lock is removed when
1098** the any of the following happen:
1099**
1100** * sqlitepager_commit() is called.
1101** * sqlitepager_rollback() is called.
1102** * sqlitepager_close() is called.
1103** * sqlitepager_unref() is called to on every outstanding page.
1104**
1105** The parameter to this routine is a pointer to any open page of the
1106** database file. Nothing changes about the page - it is used merely
1107** to acquire a pointer to the Pager structure and as proof that there
1108** is already a read-lock on the database.
1109**
1110** If the database is already write-locked, this routine is a no-op.
1111*/
1112int sqlitepager_begin(void *pData){
1113 PgHdr *pPg = DATA_TO_PGHDR(pData);
1114 Pager *pPager = pPg->pPager;
1115 int rc = SQLITE_OK;
1116 assert( pPg->nRef>0 );
1117 assert( pPager->state!=SQLITE_UNLOCK );
1118 if( pPager->state==SQLITE_READLOCK ){
1119 assert( pPager->aInJournal==0 );
1120 rc = sqliteOsWriteLock(&pPager->fd);
1121 if( rc!=SQLITE_OK ){
1122 return rc;
1123 }
1124 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1125 if( pPager->aInJournal==0 ){
1126 sqliteOsReadLock(&pPager->fd);
1127 return SQLITE_NOMEM;
1128 }
drh8e298f92002-07-06 16:28:47 +00001129 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
drh4b845d72002-03-05 12:41:19 +00001130 if( rc!=SQLITE_OK ){
1131 sqliteFree(pPager->aInJournal);
1132 pPager->aInJournal = 0;
1133 sqliteOsReadLock(&pPager->fd);
1134 return SQLITE_CANTOPEN;
1135 }
1136 pPager->journalOpen = 1;
drha1680452002-04-18 01:56:57 +00001137 pPager->needSync = 0;
1138 pPager->dirtyFile = 0;
drh193a6b42002-07-07 16:52:46 +00001139 pPager->alwaysRollback = 0;
drh4b845d72002-03-05 12:41:19 +00001140 pPager->state = SQLITE_WRITELOCK;
1141 sqlitepager_pagecount(pPager);
1142 pPager->origDbSize = pPager->dbSize;
drh94f33312002-08-12 12:29:56 +00001143#ifdef SQLITE_TEST
1144 if( pager_old_format ){
1145 rc = sqliteOsWrite(&pPager->jfd, aOldJournalMagic,
1146 sizeof(aOldJournalMagic));
1147 }else{
1148 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1149 }
1150#else
drh4b845d72002-03-05 12:41:19 +00001151 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
drh94f33312002-08-12 12:29:56 +00001152#endif
drh4b845d72002-03-05 12:41:19 +00001153 if( rc==SQLITE_OK ){
drh94f33312002-08-12 12:29:56 +00001154 rc = write32bits(&pPager->jfd, pPager->dbSize);
drh4b845d72002-03-05 12:41:19 +00001155 }
1156 if( rc!=SQLITE_OK ){
1157 rc = pager_unwritelock(pPager);
1158 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1159 }
1160 }
1161 return rc;
1162}
1163
1164/*
drhed7c8552001-04-11 14:29:21 +00001165** Mark a data page as writeable. The page is written into the journal
1166** if it is not there already. This routine must be called before making
1167** changes to a page.
1168**
1169** The first time this routine is called, the pager creates a new
1170** journal and acquires a write lock on the database. If the write
1171** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001172** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001173** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001174**
1175** If the journal file could not be written because the disk is full,
1176** then this routine returns SQLITE_FULL and does an immediate rollback.
1177** All subsequent write attempts also return SQLITE_FULL until there
1178** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1179** reset.
drhed7c8552001-04-11 14:29:21 +00001180*/
drhd9b02572001-04-15 00:37:09 +00001181int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001182 PgHdr *pPg = DATA_TO_PGHDR(pData);
1183 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001184 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001185
drh6446c4d2001-12-15 14:22:18 +00001186 /* Check for errors
1187 */
drhd9b02572001-04-15 00:37:09 +00001188 if( pPager->errMask ){
1189 return pager_errcode(pPager);
1190 }
drh5e00f6c2001-09-13 13:46:56 +00001191 if( pPager->readOnly ){
1192 return SQLITE_PERM;
1193 }
drh6446c4d2001-12-15 14:22:18 +00001194
1195 /* Mark the page as dirty. If the page has already been written
1196 ** to the journal then we can return right away.
1197 */
drhd9b02572001-04-15 00:37:09 +00001198 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001199 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001200 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001201 return SQLITE_OK;
1202 }
drh6446c4d2001-12-15 14:22:18 +00001203
1204 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001205 ** written to the transaction journal or the ckeckpoint journal
1206 ** or both.
1207 **
1208 ** First check to see that the transaction journal exists and
1209 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001210 */
drhd9b02572001-04-15 00:37:09 +00001211 assert( pPager->state!=SQLITE_UNLOCK );
drh4b845d72002-03-05 12:41:19 +00001212 rc = sqlitepager_begin(pData);
drha1680452002-04-18 01:56:57 +00001213 pPager->dirtyFile = 1;
drh4b845d72002-03-05 12:41:19 +00001214 if( rc!=SQLITE_OK ) return rc;
drhd9b02572001-04-15 00:37:09 +00001215 assert( pPager->state==SQLITE_WRITELOCK );
drh8cfbf082001-09-19 13:22:39 +00001216 assert( pPager->journalOpen );
drh6446c4d2001-12-15 14:22:18 +00001217
drhfa86c412002-02-02 15:01:15 +00001218 /* The transaction journal now exists and we have a write lock on the
1219 ** main database file. Write the current page to the transaction
1220 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001221 */
drhfa86c412002-02-02 15:01:15 +00001222 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
drh94f33312002-08-12 12:29:56 +00001223 rc = write32bits(&pPager->jfd, pPg->pgno);
drhd9b02572001-04-15 00:37:09 +00001224 if( rc==SQLITE_OK ){
drha7fcb052001-12-14 15:09:55 +00001225 rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +00001226 }
1227 if( rc!=SQLITE_OK ){
1228 sqlitepager_rollback(pPager);
1229 pPager->errMask |= PAGER_ERR_FULL;
1230 return rc;
1231 }
drh6019e162001-07-02 17:51:45 +00001232 assert( pPager->aInJournal!=0 );
1233 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh603240c2002-03-05 01:11:12 +00001234 pPager->needSync = !pPager->noSync;
drhfa86c412002-02-02 15:01:15 +00001235 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001236 if( pPager->ckptInUse ){
drhfa86c412002-02-02 15:01:15 +00001237 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1238 pPg->inCkpt = 1;
1239 }
drh69688d52001-04-14 16:38:23 +00001240 }
drh6446c4d2001-12-15 14:22:18 +00001241
drhfa86c412002-02-02 15:01:15 +00001242 /* If the checkpoint journal is open and the page is not in it,
1243 ** then write the current page to the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001244 */
drh0f892532002-05-30 12:27:03 +00001245 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001246 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh94f33312002-08-12 12:29:56 +00001247 rc = write32bits(&pPager->cpfd, pPg->pgno);
drhfa86c412002-02-02 15:01:15 +00001248 if( rc==SQLITE_OK ){
1249 rc = sqliteOsWrite(&pPager->cpfd, pData, SQLITE_PAGE_SIZE);
1250 }
1251 if( rc!=SQLITE_OK ){
1252 sqlitepager_rollback(pPager);
1253 pPager->errMask |= PAGER_ERR_FULL;
1254 return rc;
1255 }
1256 assert( pPager->aInCkpt!=0 );
1257 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1258 pPg->inCkpt = 1;
1259 }
1260
1261 /* Update the database size and return.
1262 */
drh1ab43002002-01-14 09:28:19 +00001263 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001264 pPager->dbSize = pPg->pgno;
1265 }
drh69688d52001-04-14 16:38:23 +00001266 return rc;
drhed7c8552001-04-11 14:29:21 +00001267}
1268
1269/*
drhaacc5432002-01-06 17:07:40 +00001270** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +00001271** to sqlitepager_write(). In other words, return TRUE if it is ok
1272** to change the content of the page.
1273*/
1274int sqlitepager_iswriteable(void *pData){
1275 PgHdr *pPg = DATA_TO_PGHDR(pData);
1276 return pPg->dirty;
1277}
1278
1279/*
drh30e58752002-03-02 20:41:57 +00001280** A call to this routine tells the pager that it is not necessary to
1281** write the information on page "pgno" back to the disk, even though
1282** that page might be marked as dirty.
1283**
1284** The overlying software layer calls this routine when all of the data
1285** on the given page is unused. The pager marks the page as clean so
1286** that it does not get written to disk.
1287**
1288** Tests show that this optimization, together with the
1289** sqlitepager_dont_rollback() below, more than double the speed
1290** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001291**
1292** When this routine is called, set the alwaysRollback flag to true.
1293** Subsequent calls to sqlitepager_dont_rollback() for the same page
1294** will thereafter be ignored. This is necessary to avoid a problem
1295** where a page with data is added to the freelist during one part of
1296** a transaction then removed from the freelist during a later part
1297** of the same transaction and reused for some other purpose. When it
1298** is first added to the freelist, this routine is called. When reused,
1299** the dont_rollback() routine is called. But because the page contains
1300** critical data, we still need to be sure it gets rolled back in spite
1301** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001302*/
1303void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1304 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001305
drh30e58752002-03-02 20:41:57 +00001306 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001307 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001308 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001309 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1310 /* If this pages is the last page in the file and the file has grown
1311 ** during the current transaction, then do NOT mark the page as clean.
1312 ** When the database file grows, we must make sure that the last page
1313 ** gets written at least once so that the disk file will be the correct
1314 ** size. If you do not write this page and the size of the file
1315 ** on the disk ends up being too small, that can lead to database
1316 ** corruption during the next transaction.
1317 */
1318 }else{
1319 pPg->dirty = 0;
1320 }
drh30e58752002-03-02 20:41:57 +00001321 }
1322}
1323
1324/*
1325** A call to this routine tells the pager that if a rollback occurs,
1326** it is not necessary to restore the data on the given page. This
1327** means that the pager does not have to record the given page in the
1328** rollback journal.
1329*/
1330void sqlitepager_dont_rollback(void *pData){
1331 PgHdr *pPg = DATA_TO_PGHDR(pData);
1332 Pager *pPager = pPg->pPager;
1333
1334 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001335 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001336 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1337 assert( pPager->aInJournal!=0 );
1338 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1339 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001340 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001341 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1342 pPg->inCkpt = 1;
1343 }
1344 }
drh0f892532002-05-30 12:27:03 +00001345 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001346 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1347 assert( pPager->aInCkpt!=0 );
1348 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1349 pPg->inCkpt = 1;
1350 }
1351}
1352
1353/*
drhed7c8552001-04-11 14:29:21 +00001354** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001355**
1356** If the commit fails for any reason, a rollback attempt is made
1357** and an error code is returned. If the commit worked, SQLITE_OK
1358** is returned.
drhed7c8552001-04-11 14:29:21 +00001359*/
drhd9b02572001-04-15 00:37:09 +00001360int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001361 int rc;
drhed7c8552001-04-11 14:29:21 +00001362 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001363
1364 if( pPager->errMask==PAGER_ERR_FULL ){
1365 rc = sqlitepager_rollback(pPager);
1366 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1367 return rc;
1368 }
1369 if( pPager->errMask!=0 ){
1370 rc = pager_errcode(pPager);
1371 return rc;
1372 }
1373 if( pPager->state!=SQLITE_WRITELOCK ){
1374 return SQLITE_ERROR;
1375 }
drh8cfbf082001-09-19 13:22:39 +00001376 assert( pPager->journalOpen );
drha1680452002-04-18 01:56:57 +00001377 if( pPager->dirtyFile==0 ){
1378 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
1379 ** if there have been no changes to the database file. */
1380 rc = pager_unwritelock(pPager);
1381 pPager->dbSize = -1;
1382 return rc;
1383 }
drha7fcb052001-12-14 15:09:55 +00001384 if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001385 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001386 }
drha1b351a2001-09-14 16:42:12 +00001387 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1388 if( pPg->dirty==0 ) continue;
drha7fcb052001-12-14 15:09:55 +00001389 rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001390 if( rc!=SQLITE_OK ) goto commit_abort;
drha7fcb052001-12-14 15:09:55 +00001391 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001392 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001393 }
drh603240c2002-03-05 01:11:12 +00001394 if( !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
1395 goto commit_abort;
1396 }
drhd9b02572001-04-15 00:37:09 +00001397 rc = pager_unwritelock(pPager);
1398 pPager->dbSize = -1;
1399 return rc;
1400
1401 /* Jump here if anything goes wrong during the commit process.
1402 */
1403commit_abort:
1404 rc = sqlitepager_rollback(pPager);
1405 if( rc==SQLITE_OK ){
1406 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001407 }
drhed7c8552001-04-11 14:29:21 +00001408 return rc;
1409}
1410
1411/*
1412** Rollback all changes. The database falls back to read-only mode.
1413** All in-memory cache pages revert to their original data contents.
1414** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001415**
1416** This routine cannot fail unless some other process is not following
1417** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1418** process is writing trash into the journal file (SQLITE_CORRUPT) or
1419** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1420** codes are returned for all these occasions. Otherwise,
1421** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001422*/
drhd9b02572001-04-15 00:37:09 +00001423int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001424 int rc;
drhd9b02572001-04-15 00:37:09 +00001425 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00001426 if( pPager->state>=SQLITE_WRITELOCK ){
1427 pager_playback(pPager);
1428 }
drhd9b02572001-04-15 00:37:09 +00001429 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001430 }
drhd9b02572001-04-15 00:37:09 +00001431 if( pPager->state!=SQLITE_WRITELOCK ){
1432 return SQLITE_OK;
1433 }
1434 rc = pager_playback(pPager);
1435 if( rc!=SQLITE_OK ){
1436 rc = SQLITE_CORRUPT;
1437 pPager->errMask |= PAGER_ERR_CORRUPT;
1438 }
1439 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001440 return rc;
drh98808ba2001-10-18 12:34:46 +00001441}
drhd9b02572001-04-15 00:37:09 +00001442
1443/*
drh5e00f6c2001-09-13 13:46:56 +00001444** Return TRUE if the database file is opened read-only. Return FALSE
1445** if the database is (in theory) writable.
1446*/
1447int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001448 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001449}
1450
1451/*
drhd9b02572001-04-15 00:37:09 +00001452** This routine is used for testing and analysis only.
1453*/
1454int *sqlitepager_stats(Pager *pPager){
1455 static int a[9];
1456 a[0] = pPager->nRef;
1457 a[1] = pPager->nPage;
1458 a[2] = pPager->mxPage;
1459 a[3] = pPager->dbSize;
1460 a[4] = pPager->state;
1461 a[5] = pPager->errMask;
1462 a[6] = pPager->nHit;
1463 a[7] = pPager->nMiss;
1464 a[8] = pPager->nOvfl;
1465 return a;
1466}
drhdd793422001-06-28 01:54:48 +00001467
drhfa86c412002-02-02 15:01:15 +00001468/*
1469** Set the checkpoint.
1470**
1471** This routine should be called with the transaction journal already
1472** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00001473** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00001474*/
1475int sqlitepager_ckpt_begin(Pager *pPager){
1476 int rc;
1477 char zTemp[SQLITE_TEMPNAME_SIZE];
1478 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00001479 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00001480 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
1481 if( pPager->aInCkpt==0 ){
1482 sqliteOsReadLock(&pPager->fd);
1483 return SQLITE_NOMEM;
1484 }
1485 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
1486 if( rc ) goto ckpt_begin_failed;
drh663fc632002-02-02 18:49:19 +00001487 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00001488 if( !pPager->ckptOpen ){
1489 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
1490 if( rc ) goto ckpt_begin_failed;
1491 pPager->ckptOpen = 1;
1492 }
1493 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00001494 return SQLITE_OK;
1495
1496ckpt_begin_failed:
1497 if( pPager->aInCkpt ){
1498 sqliteFree(pPager->aInCkpt);
1499 pPager->aInCkpt = 0;
1500 }
1501 return rc;
1502}
1503
1504/*
1505** Commit a checkpoint.
1506*/
1507int sqlitepager_ckpt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00001508 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00001509 PgHdr *pPg;
drh0f892532002-05-30 12:27:03 +00001510 sqliteOsTruncate(&pPager->cpfd, 0);
1511 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00001512 sqliteFree( pPager->aInCkpt );
1513 pPager->aInCkpt = 0;
1514 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1515 pPg->inCkpt = 0;
1516 }
1517 }
drhfa86c412002-02-02 15:01:15 +00001518 return SQLITE_OK;
1519}
1520
1521/*
1522** Rollback a checkpoint.
1523*/
1524int sqlitepager_ckpt_rollback(Pager *pPager){
1525 int rc;
drh0f892532002-05-30 12:27:03 +00001526 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00001527 rc = pager_ckpt_playback(pPager);
1528 sqlitepager_ckpt_commit(pPager);
1529 }else{
1530 rc = SQLITE_OK;
1531 }
drhfa86c412002-02-02 15:01:15 +00001532 return rc;
1533}
1534
drhdd793422001-06-28 01:54:48 +00001535#if SQLITE_TEST
1536/*
1537** Print a listing of all referenced pages and their ref count.
1538*/
1539void sqlitepager_refdump(Pager *pPager){
1540 PgHdr *pPg;
1541 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1542 if( pPg->nRef<=0 ) continue;
1543 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1544 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1545 }
1546}
1547#endif