blob: 24649e4c9aae407c5dd9d4d37196bda43b80a378 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drh4e371ee2002-09-05 16:08:27 +000021** @(#) $Id: pager.c,v 1.52 2002/09/05 16:08:27 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drhd9b02572001-04-15 00:37:09 +000023#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000024#include "pager.h"
drh8cfbf082001-09-19 13:22:39 +000025#include "os.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
30** The page cache as a whole is always in one of the following
31** states:
32**
33** SQLITE_UNLOCK The page cache is not currently reading or
34** writing the database file. There is no
35** data held in memory. This is the initial
36** state.
37**
38** SQLITE_READLOCK The page cache is reading the database.
39** Writing is not permitted. There can be
40** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000041** file at the same time.
drhed7c8552001-04-11 14:29:21 +000042**
43** SQLITE_WRITELOCK The page cache is writing the database.
44** Access is exclusive. No other processes or
45** threads can be reading or writing while one
46** process is writing.
47**
drh306dc212001-05-21 13:45:10 +000048** The page cache comes up in SQLITE_UNLOCK. The first time a
49** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000050** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000051** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000052** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000053** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
54** called on an outstanding page which means that the pager must
55** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
56** The sqlite_page_rollback() and sqlite_page_commit() functions
57** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000058*/
59#define SQLITE_UNLOCK 0
60#define SQLITE_READLOCK 1
61#define SQLITE_WRITELOCK 2
62
drhd9b02572001-04-15 00:37:09 +000063
drhed7c8552001-04-11 14:29:21 +000064/*
65** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000066** This header is only visible to this pager module. The client
67** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000068*/
drhd9b02572001-04-15 00:37:09 +000069typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000070struct PgHdr {
71 Pager *pPager; /* The pager to which this page belongs */
72 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000073 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000074 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000075 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
76 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh193a6b42002-07-07 16:52:46 +000077 u8 inJournal; /* TRUE if has been written to journal */
78 u8 inCkpt; /* TRUE if written to the checkpoint journal */
79 u8 dirty; /* TRUE if we need to write back changes */
80 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh69688d52001-04-14 16:38:23 +000081 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000082 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000083};
84
85/*
drh69688d52001-04-14 16:38:23 +000086** Convert a pointer to a PgHdr into a pointer to its data
87** and back again.
drhed7c8552001-04-11 14:29:21 +000088*/
89#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
90#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000091#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000092
93/*
drhed7c8552001-04-11 14:29:21 +000094** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +000095** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +000096*/
drh603240c2002-03-05 01:11:12 +000097#define N_PG_HASH 2003
drhed7c8552001-04-11 14:29:21 +000098
99/*
100** A open page cache is an instance of the following structure.
101*/
102struct Pager {
103 char *zFilename; /* Name of the database file */
104 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000105 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000106 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000107 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000108 int origDbSize; /* dbSize before the current change */
drhfa86c412002-02-02 15:01:15 +0000109 int ckptSize, ckptJSize; /* Size of database and journal at ckpt_begin() */
drh7e3b0a02001-04-28 16:52:40 +0000110 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000111 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000112 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000113 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000114 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000115 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drh603240c2002-03-05 01:11:12 +0000116 u8 journalOpen; /* True if journal file descriptors is valid */
117 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000118 u8 ckptInUse; /* True we are in a checkpoint */
drh603240c2002-03-05 01:11:12 +0000119 u8 noSync; /* Do not sync the journal if true */
120 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
121 u8 errMask; /* One of several kinds of errors */
122 u8 tempFile; /* zFilename is a temporary file */
123 u8 readOnly; /* True for a read-only database */
124 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000125 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000126 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh94f33312002-08-12 12:29:56 +0000127 u8 journalFormat; /* Version number of the journal file */
drh603240c2002-03-05 01:11:12 +0000128 u8 *aInJournal; /* One bit for each page in the database file */
129 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000130 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000131 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000132 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000133};
134
135/*
136** These are bits that can be set in Pager.errMask.
137*/
138#define PAGER_ERR_FULL 0x01 /* a write() failed */
139#define PAGER_ERR_MEM 0x02 /* malloc() failed */
140#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
141#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000142#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000143
144/*
145** The journal file contains page records in the following
146** format.
147*/
148typedef struct PageRecord PageRecord;
149struct PageRecord {
150 Pgno pgno; /* The page number */
151 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
152};
153
154/*
drh5e00f6c2001-09-13 13:46:56 +0000155** Journal files begin with the following magic string. The data
156** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000157**
158** There are two journal formats. The older journal format writes
159** 32-bit integers in the byte-order of the host machine. The new
160** format writes integers as big-endian. All new journals use the
161** new format, but we have to be able to read an older journal in order
162** to roll it back.
drhd9b02572001-04-15 00:37:09 +0000163*/
drh94f33312002-08-12 12:29:56 +0000164static const unsigned char aOldJournalMagic[] = {
drhd9b02572001-04-15 00:37:09 +0000165 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000166};
drh94f33312002-08-12 12:29:56 +0000167static const unsigned char aJournalMagic[] = {
168 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
169};
170#define SQLITE_NEW_JOURNAL_FORMAT 1
171#define SQLITE_OLD_JOURNAL_FORMAT 0
172
173/*
174** The following integer, if set, causes journals to be written in the
175** old format. This is used for testing purposes only - to make sure
176** the code is able to rollback an old journal.
177*/
178#ifdef SQLITE_TEST
179int pager_old_format = 0;
drh74587e52002-08-13 00:01:16 +0000180#else
181# define pager_old_format 0
drh94f33312002-08-12 12:29:56 +0000182#endif
drhed7c8552001-04-11 14:29:21 +0000183
184/*
185** Hash a page number
186*/
drhd9b02572001-04-15 00:37:09 +0000187#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000188
189/*
drhdd793422001-06-28 01:54:48 +0000190** Enable reference count tracking here:
191*/
drh74587e52002-08-13 00:01:16 +0000192#ifdef SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000193 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000194 static void pager_refinfo(PgHdr *p){
195 static int cnt = 0;
196 if( !pager_refinfo_enable ) return;
197 printf(
198 "REFCNT: %4d addr=0x%08x nRef=%d\n",
199 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
200 );
201 cnt++; /* Something to set a breakpoint on */
202 }
203# define REFINFO(X) pager_refinfo(X)
204#else
205# define REFINFO(X)
206#endif
207
208/*
drh94f33312002-08-12 12:29:56 +0000209** Read a 32-bit integer from the given file descriptor
210*/
211static int read32bits(Pager *pPager, OsFile *fd, u32 *pRes){
212 u32 res;
213 int rc;
214 rc = sqliteOsRead(fd, &res, sizeof(res));
215 if( rc==SQLITE_OK && pPager->journalFormat==SQLITE_NEW_JOURNAL_FORMAT ){
216 unsigned char ac[4];
217 memcpy(ac, &res, 4);
218 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
219 }
220 *pRes = res;
221 return rc;
222}
223
224/*
225** Write a 32-bit integer into the given file descriptor. Writing
226** is always done using the new journal format.
227*/
228static int write32bits(OsFile *fd, u32 val){
229 unsigned char ac[4];
drh94f33312002-08-12 12:29:56 +0000230 if( pager_old_format ){
231 return sqliteOsWrite(fd, &val, 4);
232 }
drh94f33312002-08-12 12:29:56 +0000233 ac[0] = (val>>24) & 0xff;
234 ac[1] = (val>>16) & 0xff;
235 ac[2] = (val>>8) & 0xff;
236 ac[3] = val & 0xff;
237 return sqliteOsWrite(fd, ac, 4);
238}
239
240
241/*
drhd9b02572001-04-15 00:37:09 +0000242** Convert the bits in the pPager->errMask into an approprate
243** return code.
244*/
245static int pager_errcode(Pager *pPager){
246 int rc = SQLITE_OK;
247 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000248 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000249 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
250 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
251 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
252 return rc;
drhed7c8552001-04-11 14:29:21 +0000253}
254
255/*
256** Find a page in the hash table given its page number. Return
257** a pointer to the page or NULL if not found.
258*/
drhd9b02572001-04-15 00:37:09 +0000259static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000260 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
261 while( p && p->pgno!=pgno ){
262 p = p->pNextHash;
263 }
264 return p;
265}
266
267/*
268** Unlock the database and clear the in-memory cache. This routine
269** sets the state of the pager back to what it was when it was first
270** opened. Any outstanding pages are invalidated and subsequent attempts
271** to access those pages will likely result in a coredump.
272*/
drhd9b02572001-04-15 00:37:09 +0000273static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000274 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000275 for(pPg=pPager->pAll; pPg; pPg=pNext){
276 pNext = pPg->pNextAll;
277 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000278 }
279 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000280 pPager->pLast = 0;
281 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000282 memset(pPager->aHash, 0, sizeof(pPager->aHash));
283 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000284 if( pPager->state>=SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000285 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000286 }
drha7fcb052001-12-14 15:09:55 +0000287 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000288 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000289 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000290 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000291 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000292}
293
294/*
295** When this routine is called, the pager has the journal file open and
296** a write lock on the database. This routine releases the database
297** write lock and acquires a read lock in its place. The journal file
298** is deleted and closed.
drhed7c8552001-04-11 14:29:21 +0000299*/
drhd9b02572001-04-15 00:37:09 +0000300static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000301 int rc;
drhd9b02572001-04-15 00:37:09 +0000302 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000303 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh663fc632002-02-02 18:49:19 +0000304 sqlitepager_ckpt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000305 if( pPager->ckptOpen ){
306 sqliteOsClose(&pPager->cpfd);
307 pPager->ckptOpen = 0;
308 }
drha7fcb052001-12-14 15:09:55 +0000309 sqliteOsClose(&pPager->jfd);
drh8cfbf082001-09-19 13:22:39 +0000310 pPager->journalOpen = 0;
311 sqliteOsDelete(pPager->zJournal);
drha7fcb052001-12-14 15:09:55 +0000312 rc = sqliteOsReadLock(&pPager->fd);
drh6019e162001-07-02 17:51:45 +0000313 sqliteFree( pPager->aInJournal );
314 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000315 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
316 pPg->inJournal = 0;
317 pPg->dirty = 0;
318 }
drh8e298f92002-07-06 16:28:47 +0000319 if( rc==SQLITE_OK ){
320 pPager->state = SQLITE_READLOCK;
321 }else{
322 /* This can only happen if a process does a BEGIN, then forks and the
323 ** child process does the COMMIT. Because of the semantics of unix
324 ** file locking, the unlock will fail.
325 */
326 pPager->state = SQLITE_UNLOCK;
327 }
drhed7c8552001-04-11 14:29:21 +0000328 return rc;
329}
330
drhed7c8552001-04-11 14:29:21 +0000331/*
drhfa86c412002-02-02 15:01:15 +0000332** Read a single page from the journal file opened on file descriptor
333** jfd. Playback this one page.
334*/
335static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
336 int rc;
337 PgHdr *pPg; /* An existing page in the cache */
338 PageRecord pgRec;
339
drh94f33312002-08-12 12:29:56 +0000340 rc = read32bits(pPager, jfd, &pgRec.pgno);
341 if( rc!=SQLITE_OK ) return rc;
342 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drhfa86c412002-02-02 15:01:15 +0000343 if( rc!=SQLITE_OK ) return rc;
344
345 /* Sanity checking on the page */
346 if( pgRec.pgno>pPager->dbSize || pgRec.pgno==0 ) return SQLITE_CORRUPT;
347
348 /* Playback the page. Update the in-memory copy of the page
349 ** at the same time, if there is one.
350 */
351 pPg = pager_lookup(pPager, pgRec.pgno);
352 if( pPg ){
353 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
354 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
355 }
356 rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
357 if( rc==SQLITE_OK ){
358 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
359 }
360 return rc;
361}
362
363/*
drhed7c8552001-04-11 14:29:21 +0000364** Playback the journal and thus restore the database file to
365** the state it was in before we started making changes.
366**
drhd9b02572001-04-15 00:37:09 +0000367** The journal file format is as follows: There is an initial
368** file-type string for sanity checking. Then there is a single
369** Pgno number which is the number of pages in the database before
370** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000371** Next come zero or more page records where each page record
372** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
373** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000374**
drhd9b02572001-04-15 00:37:09 +0000375** If the file opened as the journal file is not a well-formed
376** journal file (as determined by looking at the magic number
377** at the beginning) then this routine returns SQLITE_PROTOCOL.
378** If any other errors occur during playback, the database will
379** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
380** pPager->errMask and SQLITE_CORRUPT is returned. If it all
381** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000382*/
drhd9b02572001-04-15 00:37:09 +0000383static int pager_playback(Pager *pPager){
384 int nRec; /* Number of Records */
385 int i; /* Loop counter */
386 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000387 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000388 int rc;
389
drhc3a64ba2001-11-22 00:01:27 +0000390 /* Figure out how many records are in the journal. Abort early if
391 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000392 */
drh8cfbf082001-09-19 13:22:39 +0000393 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000394 sqliteOsSeek(&pPager->jfd, 0);
395 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
drhc3a64ba2001-11-22 00:01:27 +0000396 if( rc!=SQLITE_OK ){
397 goto end_playback;
398 }
399 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
400 if( nRec<=0 ){
401 goto end_playback;
402 }
403
404 /* Read the beginning of the journal and truncate the
405 ** database file back to its original size.
406 */
drha7fcb052001-12-14 15:09:55 +0000407 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000408 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000409 rc = SQLITE_PROTOCOL;
410 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000411 }
drh94f33312002-08-12 12:29:56 +0000412 if( memcmp(aMagic, aOldJournalMagic, sizeof(aMagic))==0 ){
413 pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
414 }else if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))==0 ){
415 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
416 }else{
417 rc = SQLITE_PROTOCOL;
418 goto end_playback;
419 }
420 rc = read32bits(pPager, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000421 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000422 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000423 }
drha7fcb052001-12-14 15:09:55 +0000424 rc = sqliteOsTruncate(&pPager->fd, mxPg*SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000425 if( rc!=SQLITE_OK ){
426 goto end_playback;
427 }
drhd9b02572001-04-15 00:37:09 +0000428 pPager->dbSize = mxPg;
429
drhfa86c412002-02-02 15:01:15 +0000430 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000431 */
drhd9b02572001-04-15 00:37:09 +0000432 for(i=nRec-1; i>=0; i--){
drhfa86c412002-02-02 15:01:15 +0000433 rc = pager_playback_one_page(pPager, &pPager->jfd);
drhd9b02572001-04-15 00:37:09 +0000434 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000435 }
drh81a20f22001-10-12 17:30:04 +0000436
437end_playback:
drhd9b02572001-04-15 00:37:09 +0000438 if( rc!=SQLITE_OK ){
439 pager_unwritelock(pPager);
440 pPager->errMask |= PAGER_ERR_CORRUPT;
441 rc = SQLITE_CORRUPT;
442 }else{
443 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000444 }
drhd9b02572001-04-15 00:37:09 +0000445 return rc;
drhed7c8552001-04-11 14:29:21 +0000446}
447
448/*
drhfa86c412002-02-02 15:01:15 +0000449** Playback the checkpoint journal.
450**
451** This is similar to playing back the transaction journal but with
452** a few extra twists.
453**
drh663fc632002-02-02 18:49:19 +0000454** (1) The number of pages in the database file at the start of
455** the checkpoint is stored in pPager->ckptSize, not in the
456** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000457**
458** (2) In addition to playing back the checkpoint journal, also
459** playback all pages of the transaction journal beginning
460** at offset pPager->ckptJSize.
461*/
462static int pager_ckpt_playback(Pager *pPager){
463 int nRec; /* Number of Records */
464 int i; /* Loop counter */
465 int rc;
466
467 /* Truncate the database back to its original size.
468 */
drh663fc632002-02-02 18:49:19 +0000469 rc = sqliteOsTruncate(&pPager->fd, pPager->ckptSize*SQLITE_PAGE_SIZE);
drhfa86c412002-02-02 15:01:15 +0000470 pPager->dbSize = pPager->ckptSize;
471
472 /* Figure out how many records are in the checkpoint journal.
473 */
drh0f892532002-05-30 12:27:03 +0000474 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000475 sqliteOsSeek(&pPager->cpfd, 0);
476 rc = sqliteOsFileSize(&pPager->cpfd, &nRec);
477 if( rc!=SQLITE_OK ){
478 goto end_ckpt_playback;
479 }
480 nRec /= sizeof(PageRecord);
481
482 /* Copy original pages out of the checkpoint journal and back into the
483 ** database file.
484 */
drh74587e52002-08-13 00:01:16 +0000485 if( pager_old_format ){
486 pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
487 }else{
488 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
489 }
drhfa86c412002-02-02 15:01:15 +0000490 for(i=nRec-1; i>=0; i--){
491 rc = pager_playback_one_page(pPager, &pPager->cpfd);
492 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
493 }
494
495 /* Figure out how many pages need to be copied out of the transaction
496 ** journal.
497 */
498 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
499 if( rc!=SQLITE_OK ){
500 goto end_ckpt_playback;
501 }
502 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
503 if( rc!=SQLITE_OK ){
504 goto end_ckpt_playback;
505 }
506 nRec = (nRec - pPager->ckptJSize)/sizeof(PageRecord);
507 for(i=nRec-1; i>=0; i--){
508 rc = pager_playback_one_page(pPager, &pPager->jfd);
509 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
510 }
511
512
513end_ckpt_playback:
drhfa86c412002-02-02 15:01:15 +0000514 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000515 pPager->errMask |= PAGER_ERR_CORRUPT;
516 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000517 }
518 return rc;
519}
520
521/*
drhf57b14a2001-09-14 18:54:08 +0000522** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000523**
524** The maximum number is the absolute value of the mxPage parameter.
525** If mxPage is negative, the noSync flag is also set. noSync bypasses
526** calls to sqliteOsSync(). The pager runs much faster with noSync on,
527** but if the operating system crashes or there is an abrupt power
528** failure, the database file might be left in an inconsistent and
529** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000530*/
531void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000532 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000533 pPager->noSync = pPager->tempFile;
drh603240c2002-03-05 01:11:12 +0000534 }else{
535 pPager->noSync = 1;
536 mxPage = -mxPage;
537 }
drhf57b14a2001-09-14 18:54:08 +0000538 if( mxPage>10 ){
539 pPager->mxPage = mxPage;
540 }
541}
542
543/*
drhfa86c412002-02-02 15:01:15 +0000544** Open a temporary file. Write the name of the file into zName
545** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
546** the file descriptor into *fd. Return SQLITE_OK on success or some
547** other error code if we fail.
548**
549** The OS will automatically delete the temporary file when it is
550** closed.
551*/
552static int sqlitepager_opentemp(char *zFile, OsFile *fd){
553 int cnt = 8;
554 int rc;
555 do{
556 cnt--;
557 sqliteOsTempFileName(zFile);
558 rc = sqliteOsOpenExclusive(zFile, fd, 1);
559 }while( cnt>0 && rc!=SQLITE_OK );
560 return rc;
561}
562
563/*
drhed7c8552001-04-11 14:29:21 +0000564** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000565** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000566** the first call to sqlitepager_get() and is only held open until the
567** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000568**
drh6446c4d2001-12-15 14:22:18 +0000569** If zFilename is NULL then a randomly-named temporary file is created
570** and used as the file to be cached. The file will be deleted
571** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000572*/
drh7e3b0a02001-04-28 16:52:40 +0000573int sqlitepager_open(
574 Pager **ppPager, /* Return the Pager structure here */
575 const char *zFilename, /* Name of the database file to open */
576 int mxPage, /* Max number of in-memory cache pages */
577 int nExtra /* Extra bytes append to each in-memory page */
578){
drhed7c8552001-04-11 14:29:21 +0000579 Pager *pPager;
580 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000581 OsFile fd;
582 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000583 int tempFile;
584 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000585 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000586
drhd9b02572001-04-15 00:37:09 +0000587 *ppPager = 0;
588 if( sqlite_malloc_failed ){
589 return SQLITE_NOMEM;
590 }
drh5e00f6c2001-09-13 13:46:56 +0000591 if( zFilename ){
drh8cfbf082001-09-19 13:22:39 +0000592 rc = sqliteOsOpenReadWrite(zFilename, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000593 tempFile = 0;
594 }else{
drhfa86c412002-02-02 15:01:15 +0000595 rc = sqlitepager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000596 zFilename = zTemp;
597 tempFile = 1;
598 }
drh8cfbf082001-09-19 13:22:39 +0000599 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000600 return SQLITE_CANTOPEN;
601 }
602 nameLen = strlen(zFilename);
603 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000604 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000605 sqliteOsClose(&fd);
drhd9b02572001-04-15 00:37:09 +0000606 return SQLITE_NOMEM;
607 }
drhed7c8552001-04-11 14:29:21 +0000608 pPager->zFilename = (char*)&pPager[1];
609 pPager->zJournal = &pPager->zFilename[nameLen+1];
610 strcpy(pPager->zFilename, zFilename);
611 strcpy(pPager->zJournal, zFilename);
612 strcpy(&pPager->zJournal[nameLen], "-journal");
613 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000614 pPager->journalOpen = 0;
drhfa86c412002-02-02 15:01:15 +0000615 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000616 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000617 pPager->nRef = 0;
618 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000619 pPager->ckptSize = 0;
620 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000621 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000622 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000623 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000624 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000625 pPager->tempFile = tempFile;
626 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000627 pPager->needSync = 0;
drha1680452002-04-18 01:56:57 +0000628 pPager->noSync = pPager->tempFile;
drhed7c8552001-04-11 14:29:21 +0000629 pPager->pFirst = 0;
630 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000631 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000632 memset(pPager->aHash, 0, sizeof(pPager->aHash));
633 *ppPager = pPager;
634 return SQLITE_OK;
635}
636
637/*
drh72f82862001-05-24 21:06:34 +0000638** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000639** when the reference count on each page reaches zero. The destructor can
640** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000641**
642** The destructor is not called as a result sqlitepager_close().
643** Destructors are only called by sqlitepager_unref().
644*/
645void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
646 pPager->xDestructor = xDesc;
647}
648
649/*
drh5e00f6c2001-09-13 13:46:56 +0000650** Return the total number of pages in the disk file associated with
651** pPager.
drhed7c8552001-04-11 14:29:21 +0000652*/
drhd9b02572001-04-15 00:37:09 +0000653int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000654 int n;
drhd9b02572001-04-15 00:37:09 +0000655 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000656 if( pPager->dbSize>=0 ){
657 return pPager->dbSize;
658 }
drha7fcb052001-12-14 15:09:55 +0000659 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000660 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000661 return 0;
drhed7c8552001-04-11 14:29:21 +0000662 }
drh8cfbf082001-09-19 13:22:39 +0000663 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000664 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000665 pPager->dbSize = n;
666 }
667 return n;
668}
669
670/*
671** Shutdown the page cache. Free all memory and close all files.
672**
673** If a transaction was in progress when this routine is called, that
674** transaction is rolled back. All outstanding pages are invalidated
675** and their memory is freed. Any attempt to use a page associated
676** with this page cache after this function returns will likely
677** result in a coredump.
678*/
drhd9b02572001-04-15 00:37:09 +0000679int sqlitepager_close(Pager *pPager){
680 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000681 switch( pPager->state ){
682 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000683 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +0000684 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000685 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000686 break;
687 }
688 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +0000689 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000690 break;
691 }
692 default: {
693 /* Do nothing */
694 break;
695 }
696 }
drhd9b02572001-04-15 00:37:09 +0000697 for(pPg=pPager->pAll; pPg; pPg=pNext){
698 pNext = pPg->pNextAll;
699 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000700 }
drha7fcb052001-12-14 15:09:55 +0000701 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000702 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +0000703 /* Temp files are automatically deleted by the OS
704 ** if( pPager->tempFile ){
705 ** sqliteOsDelete(pPager->zFilename);
706 ** }
707 */
drhed7c8552001-04-11 14:29:21 +0000708 sqliteFree(pPager);
709 return SQLITE_OK;
710}
711
712/*
drh5e00f6c2001-09-13 13:46:56 +0000713** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000714*/
drhd9b02572001-04-15 00:37:09 +0000715Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000716 PgHdr *p = DATA_TO_PGHDR(pData);
717 return p->pgno;
718}
719
720/*
drh7e3b0a02001-04-28 16:52:40 +0000721** Increment the reference count for a page. If the page is
722** currently on the freelist (the reference count is zero) then
723** remove it from the freelist.
724*/
drhdf0b3b02001-06-23 11:36:20 +0000725static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000726 if( pPg->nRef==0 ){
727 /* The page is currently on the freelist. Remove it. */
728 if( pPg->pPrevFree ){
729 pPg->pPrevFree->pNextFree = pPg->pNextFree;
730 }else{
731 pPg->pPager->pFirst = pPg->pNextFree;
732 }
733 if( pPg->pNextFree ){
734 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
735 }else{
736 pPg->pPager->pLast = pPg->pPrevFree;
737 }
738 pPg->pPager->nRef++;
739 }
740 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000741 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000742}
743
744/*
745** Increment the reference count for a page. The input pointer is
746** a reference to the page data.
747*/
748int sqlitepager_ref(void *pData){
749 PgHdr *pPg = DATA_TO_PGHDR(pData);
750 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000751 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000752}
753
754/*
drhb19a2bc2001-09-16 00:13:26 +0000755** Sync the journal and then write all free dirty pages to the database
756** file.
757**
758** Writing all free dirty pages to the database after the sync is a
759** non-obvious optimization. fsync() is an expensive operation so we
drhaaab5722002-02-19 13:39:21 +0000760** want to minimize the number ot times it is called. After an fsync() call,
drh6446c4d2001-12-15 14:22:18 +0000761** we are free to write dirty pages back to the database. It is best
762** to go ahead and write as many dirty pages as possible to minimize
763** the risk of having to do another fsync() later on. Writing dirty
764** free pages in this way was observed to make database operations go
765** up to 10 times faster.
drhfa86c412002-02-02 15:01:15 +0000766**
767** If we are writing to temporary database, there is no need to preserve
768** the integrity of the journal file, so we can save time and skip the
769** fsync().
drh50e5dad2001-09-15 00:57:28 +0000770*/
771static int syncAllPages(Pager *pPager){
772 PgHdr *pPg;
773 int rc = SQLITE_OK;
774 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +0000775 if( !pPager->tempFile ){
776 rc = sqliteOsSync(&pPager->jfd);
777 if( rc!=0 ) return rc;
778 }
drh50e5dad2001-09-15 00:57:28 +0000779 pPager->needSync = 0;
780 }
781 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
782 if( pPg->dirty ){
drha7fcb052001-12-14 15:09:55 +0000783 sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
784 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh50e5dad2001-09-15 00:57:28 +0000785 if( rc!=SQLITE_OK ) break;
786 pPg->dirty = 0;
787 }
788 }
drh81a20f22001-10-12 17:30:04 +0000789 return rc;
drh50e5dad2001-09-15 00:57:28 +0000790}
791
792/*
drhd9b02572001-04-15 00:37:09 +0000793** Acquire a page.
794**
drh58a11682001-11-10 13:51:08 +0000795** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +0000796** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000797**
drh306dc212001-05-21 13:45:10 +0000798** A _get works for any page number greater than 0. If the database
799** file is smaller than the requested page, then no actual disk
800** read occurs and the memory image of the page is initialized to
801** all zeros. The extra data appended to a page is always initialized
802** to zeros the first time a page is loaded into memory.
803**
drhd9b02572001-04-15 00:37:09 +0000804** The acquisition might fail for several reasons. In all cases,
805** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000806**
807** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
808** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000809** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000810** just returns 0. This routine acquires a read-lock the first time it
811** has to go to disk, and could also playback an old journal if necessary.
812** Since _lookup() never goes to disk, it never has to deal with locks
813** or journal files.
drhed7c8552001-04-11 14:29:21 +0000814*/
drhd9b02572001-04-15 00:37:09 +0000815int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000816 PgHdr *pPg;
817
drhd9b02572001-04-15 00:37:09 +0000818 /* Make sure we have not hit any critical errors.
819 */
820 if( pPager==0 || pgno==0 ){
821 return SQLITE_ERROR;
822 }
823 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
824 return pager_errcode(pPager);
825 }
826
drhed7c8552001-04-11 14:29:21 +0000827 /* If this is the first page accessed, then get a read lock
828 ** on the database file.
829 */
830 if( pPager->nRef==0 ){
drha7fcb052001-12-14 15:09:55 +0000831 if( sqliteOsReadLock(&pPager->fd)!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000832 *ppPage = 0;
833 return SQLITE_BUSY;
834 }
drhd9b02572001-04-15 00:37:09 +0000835 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000836
837 /* If a journal file exists, try to play it back.
838 */
drh8cfbf082001-09-19 13:22:39 +0000839 if( sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +0000840 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +0000841
drha7fcb052001-12-14 15:09:55 +0000842 /* Get a write lock on the database
843 */
844 rc = sqliteOsWriteLock(&pPager->fd);
845 if( rc!=SQLITE_OK ){
drh6446c4d2001-12-15 14:22:18 +0000846 rc = sqliteOsUnlock(&pPager->fd);
drha7fcb052001-12-14 15:09:55 +0000847 assert( rc==SQLITE_OK );
848 *ppPage = 0;
849 return SQLITE_BUSY;
850 }
851 pPager->state = SQLITE_WRITELOCK;
852
drhed7c8552001-04-11 14:29:21 +0000853 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +0000854 ** we cannot get exclusive access to the journal file.
855 **
856 ** Even though we will only be reading from the journal, not writing,
857 ** we have to open the journal for writing in order to obtain an
858 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +0000859 */
drhf57b3392001-10-08 13:22:32 +0000860 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drha7fcb052001-12-14 15:09:55 +0000861 if( rc!=SQLITE_OK ){
862 rc = sqliteOsUnlock(&pPager->fd);
863 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +0000864 *ppPage = 0;
865 return SQLITE_BUSY;
866 }
drha7fcb052001-12-14 15:09:55 +0000867 pPager->journalOpen = 1;
drhed7c8552001-04-11 14:29:21 +0000868
869 /* Playback and delete the journal. Drop the database write
870 ** lock and reacquire the read lock.
871 */
drhd9b02572001-04-15 00:37:09 +0000872 rc = pager_playback(pPager);
873 if( rc!=SQLITE_OK ){
874 return rc;
875 }
drhed7c8552001-04-11 14:29:21 +0000876 }
877 pPg = 0;
878 }else{
879 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000880 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000881 }
882 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000883 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000884 int h;
drh7e3b0a02001-04-28 16:52:40 +0000885 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000886 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
887 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000888 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000889 if( pPg==0 ){
890 *ppPage = 0;
891 pager_unwritelock(pPager);
892 pPager->errMask |= PAGER_ERR_MEM;
893 return SQLITE_NOMEM;
894 }
drhed7c8552001-04-11 14:29:21 +0000895 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000896 pPg->pNextAll = pPager->pAll;
897 if( pPager->pAll ){
898 pPager->pAll->pPrevAll = pPg;
899 }
900 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000901 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000902 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000903 }else{
drhd9b02572001-04-15 00:37:09 +0000904 /* Recycle an older page. First locate the page to be recycled.
905 ** Try to find one that is not dirty and is near the head of
906 ** of the free list */
drhed7c8552001-04-11 14:29:21 +0000907 pPg = pPager->pFirst;
drh603240c2002-03-05 01:11:12 +0000908 while( pPg && pPg->dirty ){
drhd9b02572001-04-15 00:37:09 +0000909 pPg = pPg->pNextFree;
910 }
drhb19a2bc2001-09-16 00:13:26 +0000911
912 /* If we could not find a page that has not been used recently
913 ** and which is not dirty, then sync the journal and write all
914 ** dirty free pages into the database file, thus making them
915 ** clean pages and available for recycling.
916 **
917 ** We have to sync the journal before writing a page to the main
918 ** database. But syncing is a very slow operation. So after a
919 ** sync, it is best to write everything we can back to the main
920 ** database to minimize the risk of having to sync again in the
drh94f33312002-08-12 12:29:56 +0000921 ** near future. That is why we write all dirty pages after a
drhb19a2bc2001-09-16 00:13:26 +0000922 ** sync.
923 */
drh603240c2002-03-05 01:11:12 +0000924 if( pPg==0 ){
drh50e5dad2001-09-15 00:57:28 +0000925 int rc = syncAllPages(pPager);
926 if( rc!=0 ){
927 sqlitepager_rollback(pPager);
928 *ppPage = 0;
929 return SQLITE_IOERR;
930 }
931 pPg = pPager->pFirst;
932 }
drhd9b02572001-04-15 00:37:09 +0000933 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +0000934 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000935
drh193a6b42002-07-07 16:52:46 +0000936 /* If the page we are recyclying is marked as alwaysRollback, then
937 ** set the global alwaysRollback flag, thus disabling the
938 ** sqlite_dont_rollback() optimization for the rest of this transaction.
939 ** It is necessary to do this because the page marked alwaysRollback
940 ** might be reloaded at a later time but at that point we won't remember
941 ** that is was marked alwaysRollback. This means that all pages must
942 ** be marked as alwaysRollback from here on out.
943 */
944 if( pPg->alwaysRollback ){
945 pPager->alwaysRollback = 1;
946 }
947
drhd9b02572001-04-15 00:37:09 +0000948 /* Unlink the old page from the free list and the hash table
949 */
drh6019e162001-07-02 17:51:45 +0000950 if( pPg->pPrevFree ){
951 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000952 }else{
drh6019e162001-07-02 17:51:45 +0000953 assert( pPager->pFirst==pPg );
954 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000955 }
drh6019e162001-07-02 17:51:45 +0000956 if( pPg->pNextFree ){
957 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
958 }else{
959 assert( pPager->pLast==pPg );
960 pPager->pLast = pPg->pPrevFree;
961 }
962 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000963 if( pPg->pNextHash ){
964 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
965 }
966 if( pPg->pPrevHash ){
967 pPg->pPrevHash->pNextHash = pPg->pNextHash;
968 }else{
drhd9b02572001-04-15 00:37:09 +0000969 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000970 assert( pPager->aHash[h]==pPg );
971 pPager->aHash[h] = pPg->pNextHash;
972 }
drh6019e162001-07-02 17:51:45 +0000973 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000974 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000975 }
976 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +0000977 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drh6019e162001-07-02 17:51:45 +0000978 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
979 }else{
980 pPg->inJournal = 0;
981 }
drh663fc632002-02-02 18:49:19 +0000982 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize ){
drhfa86c412002-02-02 15:01:15 +0000983 pPg->inCkpt = (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0;
984 }else{
985 pPg->inCkpt = 0;
986 }
drhed7c8552001-04-11 14:29:21 +0000987 pPg->dirty = 0;
988 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000989 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000990 pPager->nRef++;
991 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000992 pPg->pNextHash = pPager->aHash[h];
993 pPager->aHash[h] = pPg;
994 if( pPg->pNextHash ){
995 assert( pPg->pNextHash->pPrevHash==0 );
996 pPg->pNextHash->pPrevHash = pPg;
997 }
drh306dc212001-05-21 13:45:10 +0000998 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
drh1ab43002002-01-14 09:28:19 +0000999 if( pPager->dbSize<(int)pgno ){
drh306dc212001-05-21 13:45:10 +00001000 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1001 }else{
drh81a20f22001-10-12 17:30:04 +00001002 int rc;
drha7fcb052001-12-14 15:09:55 +00001003 sqliteOsSeek(&pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
1004 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +00001005 if( rc!=SQLITE_OK ){
drh4e371ee2002-09-05 16:08:27 +00001006 int fileSize;
1007 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
1008 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
1009 return rc;
1010 }else{
1011 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1012 }
drh81a20f22001-10-12 17:30:04 +00001013 }
drh306dc212001-05-21 13:45:10 +00001014 }
drh7e3b0a02001-04-28 16:52:40 +00001015 if( pPager->nExtra>0 ){
1016 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1017 }
drhed7c8552001-04-11 14:29:21 +00001018 }else{
drhd9b02572001-04-15 00:37:09 +00001019 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001020 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001021 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001022 }
1023 *ppPage = PGHDR_TO_DATA(pPg);
1024 return SQLITE_OK;
1025}
1026
1027/*
drh7e3b0a02001-04-28 16:52:40 +00001028** Acquire a page if it is already in the in-memory cache. Do
1029** not read the page from disk. Return a pointer to the page,
1030** or 0 if the page is not in cache.
1031**
1032** See also sqlitepager_get(). The difference between this routine
1033** and sqlitepager_get() is that _get() will go to the disk and read
1034** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001035** returns NULL if the page is not in cache or if a disk I/O error
1036** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001037*/
1038void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1039 PgHdr *pPg;
1040
1041 /* Make sure we have not hit any critical errors.
1042 */
1043 if( pPager==0 || pgno==0 ){
1044 return 0;
1045 }
1046 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1047 return 0;
1048 }
1049 if( pPager->nRef==0 ){
1050 return 0;
1051 }
1052 pPg = pager_lookup(pPager, pgno);
1053 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001054 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001055 return PGHDR_TO_DATA(pPg);
1056}
1057
1058/*
drhed7c8552001-04-11 14:29:21 +00001059** Release a page.
1060**
1061** If the number of references to the page drop to zero, then the
1062** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001063** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001064** removed.
1065*/
drhd9b02572001-04-15 00:37:09 +00001066int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001067 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001068
1069 /* Decrement the reference count for this page
1070 */
drhed7c8552001-04-11 14:29:21 +00001071 pPg = DATA_TO_PGHDR(pData);
1072 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001073 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001074 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001075
drh72f82862001-05-24 21:06:34 +00001076 /* When the number of references to a page reach 0, call the
1077 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001078 */
drhed7c8552001-04-11 14:29:21 +00001079 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001080 Pager *pPager;
1081 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001082 pPg->pNextFree = 0;
1083 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001084 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001085 if( pPg->pPrevFree ){
1086 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001087 }else{
1088 pPager->pFirst = pPg;
1089 }
drh72f82862001-05-24 21:06:34 +00001090 if( pPager->xDestructor ){
1091 pPager->xDestructor(pData);
1092 }
drhd9b02572001-04-15 00:37:09 +00001093
1094 /* When all pages reach the freelist, drop the read lock from
1095 ** the database file.
1096 */
1097 pPager->nRef--;
1098 assert( pPager->nRef>=0 );
1099 if( pPager->nRef==0 ){
1100 pager_reset(pPager);
1101 }
drhed7c8552001-04-11 14:29:21 +00001102 }
drhd9b02572001-04-15 00:37:09 +00001103 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001104}
1105
1106/*
drh4b845d72002-03-05 12:41:19 +00001107** Acquire a write-lock on the database. The lock is removed when
1108** the any of the following happen:
1109**
1110** * sqlitepager_commit() is called.
1111** * sqlitepager_rollback() is called.
1112** * sqlitepager_close() is called.
1113** * sqlitepager_unref() is called to on every outstanding page.
1114**
1115** The parameter to this routine is a pointer to any open page of the
1116** database file. Nothing changes about the page - it is used merely
1117** to acquire a pointer to the Pager structure and as proof that there
1118** is already a read-lock on the database.
1119**
1120** If the database is already write-locked, this routine is a no-op.
1121*/
1122int sqlitepager_begin(void *pData){
1123 PgHdr *pPg = DATA_TO_PGHDR(pData);
1124 Pager *pPager = pPg->pPager;
1125 int rc = SQLITE_OK;
1126 assert( pPg->nRef>0 );
1127 assert( pPager->state!=SQLITE_UNLOCK );
1128 if( pPager->state==SQLITE_READLOCK ){
1129 assert( pPager->aInJournal==0 );
1130 rc = sqliteOsWriteLock(&pPager->fd);
1131 if( rc!=SQLITE_OK ){
1132 return rc;
1133 }
1134 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1135 if( pPager->aInJournal==0 ){
1136 sqliteOsReadLock(&pPager->fd);
1137 return SQLITE_NOMEM;
1138 }
drh8e298f92002-07-06 16:28:47 +00001139 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
drh4b845d72002-03-05 12:41:19 +00001140 if( rc!=SQLITE_OK ){
1141 sqliteFree(pPager->aInJournal);
1142 pPager->aInJournal = 0;
1143 sqliteOsReadLock(&pPager->fd);
1144 return SQLITE_CANTOPEN;
1145 }
1146 pPager->journalOpen = 1;
drha1680452002-04-18 01:56:57 +00001147 pPager->needSync = 0;
1148 pPager->dirtyFile = 0;
drh193a6b42002-07-07 16:52:46 +00001149 pPager->alwaysRollback = 0;
drh4b845d72002-03-05 12:41:19 +00001150 pPager->state = SQLITE_WRITELOCK;
1151 sqlitepager_pagecount(pPager);
1152 pPager->origDbSize = pPager->dbSize;
drh94f33312002-08-12 12:29:56 +00001153 if( pager_old_format ){
1154 rc = sqliteOsWrite(&pPager->jfd, aOldJournalMagic,
1155 sizeof(aOldJournalMagic));
1156 }else{
1157 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1158 }
drh4b845d72002-03-05 12:41:19 +00001159 if( rc==SQLITE_OK ){
drh94f33312002-08-12 12:29:56 +00001160 rc = write32bits(&pPager->jfd, pPager->dbSize);
drh4b845d72002-03-05 12:41:19 +00001161 }
1162 if( rc!=SQLITE_OK ){
1163 rc = pager_unwritelock(pPager);
drh4e371ee2002-09-05 16:08:27 +00001164 if( rc==SQLITE_OK ){
1165 rc = SQLITE_FULL;
1166 }
drh4b845d72002-03-05 12:41:19 +00001167 }
1168 }
1169 return rc;
1170}
1171
1172/*
drhed7c8552001-04-11 14:29:21 +00001173** Mark a data page as writeable. The page is written into the journal
1174** if it is not there already. This routine must be called before making
1175** changes to a page.
1176**
1177** The first time this routine is called, the pager creates a new
1178** journal and acquires a write lock on the database. If the write
1179** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001180** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001181** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001182**
1183** If the journal file could not be written because the disk is full,
1184** then this routine returns SQLITE_FULL and does an immediate rollback.
1185** All subsequent write attempts also return SQLITE_FULL until there
1186** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1187** reset.
drhed7c8552001-04-11 14:29:21 +00001188*/
drhd9b02572001-04-15 00:37:09 +00001189int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001190 PgHdr *pPg = DATA_TO_PGHDR(pData);
1191 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001192 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001193
drh6446c4d2001-12-15 14:22:18 +00001194 /* Check for errors
1195 */
drhd9b02572001-04-15 00:37:09 +00001196 if( pPager->errMask ){
1197 return pager_errcode(pPager);
1198 }
drh5e00f6c2001-09-13 13:46:56 +00001199 if( pPager->readOnly ){
1200 return SQLITE_PERM;
1201 }
drh6446c4d2001-12-15 14:22:18 +00001202
1203 /* Mark the page as dirty. If the page has already been written
1204 ** to the journal then we can return right away.
1205 */
drhd9b02572001-04-15 00:37:09 +00001206 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001207 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001208 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001209 return SQLITE_OK;
1210 }
drh6446c4d2001-12-15 14:22:18 +00001211
1212 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001213 ** written to the transaction journal or the ckeckpoint journal
1214 ** or both.
1215 **
1216 ** First check to see that the transaction journal exists and
1217 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001218 */
drhd9b02572001-04-15 00:37:09 +00001219 assert( pPager->state!=SQLITE_UNLOCK );
drh4b845d72002-03-05 12:41:19 +00001220 rc = sqlitepager_begin(pData);
drha1680452002-04-18 01:56:57 +00001221 pPager->dirtyFile = 1;
drh4b845d72002-03-05 12:41:19 +00001222 if( rc!=SQLITE_OK ) return rc;
drhd9b02572001-04-15 00:37:09 +00001223 assert( pPager->state==SQLITE_WRITELOCK );
drh8cfbf082001-09-19 13:22:39 +00001224 assert( pPager->journalOpen );
drh6446c4d2001-12-15 14:22:18 +00001225
drhfa86c412002-02-02 15:01:15 +00001226 /* The transaction journal now exists and we have a write lock on the
1227 ** main database file. Write the current page to the transaction
1228 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001229 */
drhfa86c412002-02-02 15:01:15 +00001230 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
drh94f33312002-08-12 12:29:56 +00001231 rc = write32bits(&pPager->jfd, pPg->pgno);
drhd9b02572001-04-15 00:37:09 +00001232 if( rc==SQLITE_OK ){
drha7fcb052001-12-14 15:09:55 +00001233 rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +00001234 }
1235 if( rc!=SQLITE_OK ){
1236 sqlitepager_rollback(pPager);
1237 pPager->errMask |= PAGER_ERR_FULL;
1238 return rc;
1239 }
drh6019e162001-07-02 17:51:45 +00001240 assert( pPager->aInJournal!=0 );
1241 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh603240c2002-03-05 01:11:12 +00001242 pPager->needSync = !pPager->noSync;
drhfa86c412002-02-02 15:01:15 +00001243 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001244 if( pPager->ckptInUse ){
drhfa86c412002-02-02 15:01:15 +00001245 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1246 pPg->inCkpt = 1;
1247 }
drh69688d52001-04-14 16:38:23 +00001248 }
drh6446c4d2001-12-15 14:22:18 +00001249
drhfa86c412002-02-02 15:01:15 +00001250 /* If the checkpoint journal is open and the page is not in it,
1251 ** then write the current page to the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001252 */
drh0f892532002-05-30 12:27:03 +00001253 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001254 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh94f33312002-08-12 12:29:56 +00001255 rc = write32bits(&pPager->cpfd, pPg->pgno);
drhfa86c412002-02-02 15:01:15 +00001256 if( rc==SQLITE_OK ){
1257 rc = sqliteOsWrite(&pPager->cpfd, pData, SQLITE_PAGE_SIZE);
1258 }
1259 if( rc!=SQLITE_OK ){
1260 sqlitepager_rollback(pPager);
1261 pPager->errMask |= PAGER_ERR_FULL;
1262 return rc;
1263 }
1264 assert( pPager->aInCkpt!=0 );
1265 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1266 pPg->inCkpt = 1;
1267 }
1268
1269 /* Update the database size and return.
1270 */
drh1ab43002002-01-14 09:28:19 +00001271 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001272 pPager->dbSize = pPg->pgno;
1273 }
drh69688d52001-04-14 16:38:23 +00001274 return rc;
drhed7c8552001-04-11 14:29:21 +00001275}
1276
1277/*
drhaacc5432002-01-06 17:07:40 +00001278** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +00001279** to sqlitepager_write(). In other words, return TRUE if it is ok
1280** to change the content of the page.
1281*/
1282int sqlitepager_iswriteable(void *pData){
1283 PgHdr *pPg = DATA_TO_PGHDR(pData);
1284 return pPg->dirty;
1285}
1286
1287/*
drh30e58752002-03-02 20:41:57 +00001288** A call to this routine tells the pager that it is not necessary to
1289** write the information on page "pgno" back to the disk, even though
1290** that page might be marked as dirty.
1291**
1292** The overlying software layer calls this routine when all of the data
1293** on the given page is unused. The pager marks the page as clean so
1294** that it does not get written to disk.
1295**
1296** Tests show that this optimization, together with the
1297** sqlitepager_dont_rollback() below, more than double the speed
1298** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001299**
1300** When this routine is called, set the alwaysRollback flag to true.
1301** Subsequent calls to sqlitepager_dont_rollback() for the same page
1302** will thereafter be ignored. This is necessary to avoid a problem
1303** where a page with data is added to the freelist during one part of
1304** a transaction then removed from the freelist during a later part
1305** of the same transaction and reused for some other purpose. When it
1306** is first added to the freelist, this routine is called. When reused,
1307** the dont_rollback() routine is called. But because the page contains
1308** critical data, we still need to be sure it gets rolled back in spite
1309** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001310*/
1311void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1312 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001313
drh30e58752002-03-02 20:41:57 +00001314 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001315 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001316 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001317 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1318 /* If this pages is the last page in the file and the file has grown
1319 ** during the current transaction, then do NOT mark the page as clean.
1320 ** When the database file grows, we must make sure that the last page
1321 ** gets written at least once so that the disk file will be the correct
1322 ** size. If you do not write this page and the size of the file
1323 ** on the disk ends up being too small, that can lead to database
1324 ** corruption during the next transaction.
1325 */
1326 }else{
1327 pPg->dirty = 0;
1328 }
drh30e58752002-03-02 20:41:57 +00001329 }
1330}
1331
1332/*
1333** A call to this routine tells the pager that if a rollback occurs,
1334** it is not necessary to restore the data on the given page. This
1335** means that the pager does not have to record the given page in the
1336** rollback journal.
1337*/
1338void sqlitepager_dont_rollback(void *pData){
1339 PgHdr *pPg = DATA_TO_PGHDR(pData);
1340 Pager *pPager = pPg->pPager;
1341
1342 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001343 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001344 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1345 assert( pPager->aInJournal!=0 );
1346 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1347 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001348 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001349 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1350 pPg->inCkpt = 1;
1351 }
1352 }
drh0f892532002-05-30 12:27:03 +00001353 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001354 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1355 assert( pPager->aInCkpt!=0 );
1356 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1357 pPg->inCkpt = 1;
1358 }
1359}
1360
1361/*
drhed7c8552001-04-11 14:29:21 +00001362** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001363**
1364** If the commit fails for any reason, a rollback attempt is made
1365** and an error code is returned. If the commit worked, SQLITE_OK
1366** is returned.
drhed7c8552001-04-11 14:29:21 +00001367*/
drhd9b02572001-04-15 00:37:09 +00001368int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001369 int rc;
drhed7c8552001-04-11 14:29:21 +00001370 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001371
1372 if( pPager->errMask==PAGER_ERR_FULL ){
1373 rc = sqlitepager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00001374 if( rc==SQLITE_OK ){
1375 rc = SQLITE_FULL;
1376 }
drhd9b02572001-04-15 00:37:09 +00001377 return rc;
1378 }
1379 if( pPager->errMask!=0 ){
1380 rc = pager_errcode(pPager);
1381 return rc;
1382 }
1383 if( pPager->state!=SQLITE_WRITELOCK ){
1384 return SQLITE_ERROR;
1385 }
drh8cfbf082001-09-19 13:22:39 +00001386 assert( pPager->journalOpen );
drha1680452002-04-18 01:56:57 +00001387 if( pPager->dirtyFile==0 ){
1388 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
1389 ** if there have been no changes to the database file. */
1390 rc = pager_unwritelock(pPager);
1391 pPager->dbSize = -1;
1392 return rc;
1393 }
drha7fcb052001-12-14 15:09:55 +00001394 if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001395 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001396 }
drha1b351a2001-09-14 16:42:12 +00001397 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1398 if( pPg->dirty==0 ) continue;
drha7fcb052001-12-14 15:09:55 +00001399 rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001400 if( rc!=SQLITE_OK ) goto commit_abort;
drha7fcb052001-12-14 15:09:55 +00001401 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001402 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001403 }
drh603240c2002-03-05 01:11:12 +00001404 if( !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
1405 goto commit_abort;
1406 }
drhd9b02572001-04-15 00:37:09 +00001407 rc = pager_unwritelock(pPager);
1408 pPager->dbSize = -1;
1409 return rc;
1410
1411 /* Jump here if anything goes wrong during the commit process.
1412 */
1413commit_abort:
1414 rc = sqlitepager_rollback(pPager);
1415 if( rc==SQLITE_OK ){
1416 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001417 }
drhed7c8552001-04-11 14:29:21 +00001418 return rc;
1419}
1420
1421/*
1422** Rollback all changes. The database falls back to read-only mode.
1423** All in-memory cache pages revert to their original data contents.
1424** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001425**
1426** This routine cannot fail unless some other process is not following
1427** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1428** process is writing trash into the journal file (SQLITE_CORRUPT) or
1429** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1430** codes are returned for all these occasions. Otherwise,
1431** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001432*/
drhd9b02572001-04-15 00:37:09 +00001433int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001434 int rc;
drhd9b02572001-04-15 00:37:09 +00001435 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00001436 if( pPager->state>=SQLITE_WRITELOCK ){
1437 pager_playback(pPager);
1438 }
drhd9b02572001-04-15 00:37:09 +00001439 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001440 }
drhd9b02572001-04-15 00:37:09 +00001441 if( pPager->state!=SQLITE_WRITELOCK ){
1442 return SQLITE_OK;
1443 }
1444 rc = pager_playback(pPager);
1445 if( rc!=SQLITE_OK ){
1446 rc = SQLITE_CORRUPT;
1447 pPager->errMask |= PAGER_ERR_CORRUPT;
1448 }
1449 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001450 return rc;
drh98808ba2001-10-18 12:34:46 +00001451}
drhd9b02572001-04-15 00:37:09 +00001452
1453/*
drh5e00f6c2001-09-13 13:46:56 +00001454** Return TRUE if the database file is opened read-only. Return FALSE
1455** if the database is (in theory) writable.
1456*/
1457int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001458 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001459}
1460
1461/*
drhd9b02572001-04-15 00:37:09 +00001462** This routine is used for testing and analysis only.
1463*/
1464int *sqlitepager_stats(Pager *pPager){
1465 static int a[9];
1466 a[0] = pPager->nRef;
1467 a[1] = pPager->nPage;
1468 a[2] = pPager->mxPage;
1469 a[3] = pPager->dbSize;
1470 a[4] = pPager->state;
1471 a[5] = pPager->errMask;
1472 a[6] = pPager->nHit;
1473 a[7] = pPager->nMiss;
1474 a[8] = pPager->nOvfl;
1475 return a;
1476}
drhdd793422001-06-28 01:54:48 +00001477
drhfa86c412002-02-02 15:01:15 +00001478/*
1479** Set the checkpoint.
1480**
1481** This routine should be called with the transaction journal already
1482** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00001483** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00001484*/
1485int sqlitepager_ckpt_begin(Pager *pPager){
1486 int rc;
1487 char zTemp[SQLITE_TEMPNAME_SIZE];
1488 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00001489 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00001490 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
1491 if( pPager->aInCkpt==0 ){
1492 sqliteOsReadLock(&pPager->fd);
1493 return SQLITE_NOMEM;
1494 }
1495 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
1496 if( rc ) goto ckpt_begin_failed;
drh663fc632002-02-02 18:49:19 +00001497 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00001498 if( !pPager->ckptOpen ){
1499 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
1500 if( rc ) goto ckpt_begin_failed;
1501 pPager->ckptOpen = 1;
1502 }
1503 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00001504 return SQLITE_OK;
1505
1506ckpt_begin_failed:
1507 if( pPager->aInCkpt ){
1508 sqliteFree(pPager->aInCkpt);
1509 pPager->aInCkpt = 0;
1510 }
1511 return rc;
1512}
1513
1514/*
1515** Commit a checkpoint.
1516*/
1517int sqlitepager_ckpt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00001518 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00001519 PgHdr *pPg;
drh0f892532002-05-30 12:27:03 +00001520 sqliteOsTruncate(&pPager->cpfd, 0);
1521 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00001522 sqliteFree( pPager->aInCkpt );
1523 pPager->aInCkpt = 0;
1524 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1525 pPg->inCkpt = 0;
1526 }
1527 }
drhfa86c412002-02-02 15:01:15 +00001528 return SQLITE_OK;
1529}
1530
1531/*
1532** Rollback a checkpoint.
1533*/
1534int sqlitepager_ckpt_rollback(Pager *pPager){
1535 int rc;
drh0f892532002-05-30 12:27:03 +00001536 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00001537 rc = pager_ckpt_playback(pPager);
1538 sqlitepager_ckpt_commit(pPager);
1539 }else{
1540 rc = SQLITE_OK;
1541 }
drhfa86c412002-02-02 15:01:15 +00001542 return rc;
1543}
1544
drh74587e52002-08-13 00:01:16 +00001545#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00001546/*
1547** Print a listing of all referenced pages and their ref count.
1548*/
1549void sqlitepager_refdump(Pager *pPager){
1550 PgHdr *pPg;
1551 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1552 if( pPg->nRef<=0 ) continue;
1553 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1554 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1555 }
1556}
1557#endif