blob: fb28913f1f2615772e59f8c2acf040c00a232fc0 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drh836faa42003-01-11 13:30:57 +000021** @(#) $Id: pager.c,v 1.66 2003/01/11 13:30:58 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drh829e8022002-11-06 14:08:11 +000023#include "os.h" /* Must be first to enable large file support */
drhd9b02572001-04-15 00:37:09 +000024#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000025#include "pager.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
30** The page cache as a whole is always in one of the following
31** states:
32**
33** SQLITE_UNLOCK The page cache is not currently reading or
34** writing the database file. There is no
35** data held in memory. This is the initial
36** state.
37**
38** SQLITE_READLOCK The page cache is reading the database.
39** Writing is not permitted. There can be
40** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000041** file at the same time.
drhed7c8552001-04-11 14:29:21 +000042**
43** SQLITE_WRITELOCK The page cache is writing the database.
44** Access is exclusive. No other processes or
45** threads can be reading or writing while one
46** process is writing.
47**
drh306dc212001-05-21 13:45:10 +000048** The page cache comes up in SQLITE_UNLOCK. The first time a
49** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000050** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000051** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000052** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000053** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
54** called on an outstanding page which means that the pager must
55** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
56** The sqlite_page_rollback() and sqlite_page_commit() functions
57** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000058*/
59#define SQLITE_UNLOCK 0
60#define SQLITE_READLOCK 1
61#define SQLITE_WRITELOCK 2
62
drhd9b02572001-04-15 00:37:09 +000063
drhed7c8552001-04-11 14:29:21 +000064/*
65** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000066** This header is only visible to this pager module. The client
67** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000068*/
drhd9b02572001-04-15 00:37:09 +000069typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000070struct PgHdr {
71 Pager *pPager; /* The pager to which this page belongs */
72 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000073 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000074 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000075 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
76 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh03eb96a2002-11-10 23:32:56 +000077 PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
drh193a6b42002-07-07 16:52:46 +000078 u8 inJournal; /* TRUE if has been written to journal */
79 u8 inCkpt; /* TRUE if written to the checkpoint journal */
80 u8 dirty; /* TRUE if we need to write back changes */
81 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh69688d52001-04-14 16:38:23 +000082 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000083 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000084};
85
86/*
drh69688d52001-04-14 16:38:23 +000087** Convert a pointer to a PgHdr into a pointer to its data
88** and back again.
drhed7c8552001-04-11 14:29:21 +000089*/
90#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
91#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000092#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000093
94/*
drhed7c8552001-04-11 14:29:21 +000095** How big to make the hash table used for locating in-memory pages
drh836faa42003-01-11 13:30:57 +000096** by page number.
drhed7c8552001-04-11 14:29:21 +000097*/
drh836faa42003-01-11 13:30:57 +000098#define N_PG_HASH 2048
99
100/*
101** Hash a page number
102*/
103#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
drhed7c8552001-04-11 14:29:21 +0000104
105/*
106** A open page cache is an instance of the following structure.
107*/
108struct Pager {
109 char *zFilename; /* Name of the database file */
110 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000111 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000112 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000113 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000114 int origDbSize; /* dbSize before the current change */
drh28be87c2002-11-05 23:03:02 +0000115 int ckptSize; /* Size of database (in pages) at ckpt_begin() */
116 off_t ckptJSize; /* Size of journal at ckpt_begin() */
drh9bd47a92003-01-07 14:46:08 +0000117 int ckptNRec; /* Number of records in the checkpoint journal */
drh7e3b0a02001-04-28 16:52:40 +0000118 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000119 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000120 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000121 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000122 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000123 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drh603240c2002-03-05 01:11:12 +0000124 u8 journalOpen; /* True if journal file descriptors is valid */
drhda47d772002-12-02 04:25:19 +0000125 u8 useJournal; /* Do not use a rollback journal on this file */
drh603240c2002-03-05 01:11:12 +0000126 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000127 u8 ckptInUse; /* True we are in a checkpoint */
drhda47d772002-12-02 04:25:19 +0000128 u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
drh603240c2002-03-05 01:11:12 +0000129 u8 noSync; /* Do not sync the journal if true */
130 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
131 u8 errMask; /* One of several kinds of errors */
132 u8 tempFile; /* zFilename is a temporary file */
133 u8 readOnly; /* True for a read-only database */
134 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000135 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000136 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh94f33312002-08-12 12:29:56 +0000137 u8 journalFormat; /* Version number of the journal file */
drh603240c2002-03-05 01:11:12 +0000138 u8 *aInJournal; /* One bit for each page in the database file */
139 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000140 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000141 PgHdr *pAll; /* List of all pages */
drh03eb96a2002-11-10 23:32:56 +0000142 PgHdr *pCkpt; /* List of pages in the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000143 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000144};
145
146/*
147** These are bits that can be set in Pager.errMask.
148*/
149#define PAGER_ERR_FULL 0x01 /* a write() failed */
150#define PAGER_ERR_MEM 0x02 /* malloc() failed */
151#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
152#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000153#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000154
155/*
156** The journal file contains page records in the following
157** format.
158*/
159typedef struct PageRecord PageRecord;
160struct PageRecord {
161 Pgno pgno; /* The page number */
162 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
163};
164
165/*
drh5e00f6c2001-09-13 13:46:56 +0000166** Journal files begin with the following magic string. The data
167** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000168**
169** There are two journal formats. The older journal format writes
170** 32-bit integers in the byte-order of the host machine. The new
171** format writes integers as big-endian. All new journals use the
172** new format, but we have to be able to read an older journal in order
173** to roll it back.
drhd9b02572001-04-15 00:37:09 +0000174*/
drh94f33312002-08-12 12:29:56 +0000175static const unsigned char aOldJournalMagic[] = {
drhd9b02572001-04-15 00:37:09 +0000176 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000177};
drh94f33312002-08-12 12:29:56 +0000178static const unsigned char aJournalMagic[] = {
179 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
180};
181#define SQLITE_NEW_JOURNAL_FORMAT 1
182#define SQLITE_OLD_JOURNAL_FORMAT 0
183
184/*
185** The following integer, if set, causes journals to be written in the
186** old format. This is used for testing purposes only - to make sure
187** the code is able to rollback an old journal.
188*/
189#ifdef SQLITE_TEST
190int pager_old_format = 0;
drh74587e52002-08-13 00:01:16 +0000191#else
192# define pager_old_format 0
drh94f33312002-08-12 12:29:56 +0000193#endif
drhed7c8552001-04-11 14:29:21 +0000194
195/*
drhdd793422001-06-28 01:54:48 +0000196** Enable reference count tracking here:
197*/
drh74587e52002-08-13 00:01:16 +0000198#ifdef SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000199 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000200 static void pager_refinfo(PgHdr *p){
201 static int cnt = 0;
202 if( !pager_refinfo_enable ) return;
203 printf(
204 "REFCNT: %4d addr=0x%08x nRef=%d\n",
205 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
206 );
207 cnt++; /* Something to set a breakpoint on */
208 }
209# define REFINFO(X) pager_refinfo(X)
210#else
211# define REFINFO(X)
212#endif
213
214/*
drh94f33312002-08-12 12:29:56 +0000215** Read a 32-bit integer from the given file descriptor
216*/
217static int read32bits(Pager *pPager, OsFile *fd, u32 *pRes){
218 u32 res;
219 int rc;
220 rc = sqliteOsRead(fd, &res, sizeof(res));
221 if( rc==SQLITE_OK && pPager->journalFormat==SQLITE_NEW_JOURNAL_FORMAT ){
222 unsigned char ac[4];
223 memcpy(ac, &res, 4);
224 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
225 }
226 *pRes = res;
227 return rc;
228}
229
230/*
231** Write a 32-bit integer into the given file descriptor. Writing
232** is always done using the new journal format.
233*/
234static int write32bits(OsFile *fd, u32 val){
235 unsigned char ac[4];
drh94f33312002-08-12 12:29:56 +0000236 if( pager_old_format ){
237 return sqliteOsWrite(fd, &val, 4);
238 }
drh94f33312002-08-12 12:29:56 +0000239 ac[0] = (val>>24) & 0xff;
240 ac[1] = (val>>16) & 0xff;
241 ac[2] = (val>>8) & 0xff;
242 ac[3] = val & 0xff;
243 return sqliteOsWrite(fd, ac, 4);
244}
245
246
247/*
drhd9b02572001-04-15 00:37:09 +0000248** Convert the bits in the pPager->errMask into an approprate
249** return code.
250*/
251static int pager_errcode(Pager *pPager){
252 int rc = SQLITE_OK;
253 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000254 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000255 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
256 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
257 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
258 return rc;
drhed7c8552001-04-11 14:29:21 +0000259}
260
261/*
drh03eb96a2002-11-10 23:32:56 +0000262** Add or remove a page from the list of all pages that are in the
263** checkpoint journal.
264**
265** The Pager keeps a separate list of pages that are currently in
266** the checkpoint journal. This helps the sqlitepager_ckpt_commit()
267** routine run MUCH faster for the common case where there are many
268** pages in memory but only a few are in the checkpoint journal.
269*/
270static void page_add_to_ckpt_list(PgHdr *pPg){
271 Pager *pPager = pPg->pPager;
272 if( pPg->inCkpt ) return;
273 assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
274 pPg->pPrevCkpt = 0;
275 if( pPager->pCkpt ){
276 pPager->pCkpt->pPrevCkpt = pPg;
277 }
278 pPg->pNextCkpt = pPager->pCkpt;
279 pPager->pCkpt = pPg;
280 pPg->inCkpt = 1;
281}
282static void page_remove_from_ckpt_list(PgHdr *pPg){
283 if( !pPg->inCkpt ) return;
284 if( pPg->pPrevCkpt ){
285 assert( pPg->pPrevCkpt->pNextCkpt==pPg );
286 pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
287 }else{
288 assert( pPg->pPager->pCkpt==pPg );
289 pPg->pPager->pCkpt = pPg->pNextCkpt;
290 }
291 if( pPg->pNextCkpt ){
292 assert( pPg->pNextCkpt->pPrevCkpt==pPg );
293 pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
294 }
295 pPg->pNextCkpt = 0;
296 pPg->pPrevCkpt = 0;
297 pPg->inCkpt = 0;
298}
299
300/*
drhed7c8552001-04-11 14:29:21 +0000301** Find a page in the hash table given its page number. Return
302** a pointer to the page or NULL if not found.
303*/
drhd9b02572001-04-15 00:37:09 +0000304static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drh836faa42003-01-11 13:30:57 +0000305 PgHdr *p = pPager->aHash[pager_hash(pgno)];
drhed7c8552001-04-11 14:29:21 +0000306 while( p && p->pgno!=pgno ){
307 p = p->pNextHash;
308 }
309 return p;
310}
311
312/*
313** Unlock the database and clear the in-memory cache. This routine
314** sets the state of the pager back to what it was when it was first
315** opened. Any outstanding pages are invalidated and subsequent attempts
316** to access those pages will likely result in a coredump.
317*/
drhd9b02572001-04-15 00:37:09 +0000318static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000319 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000320 for(pPg=pPager->pAll; pPg; pPg=pNext){
321 pNext = pPg->pNextAll;
322 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000323 }
324 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000325 pPager->pLast = 0;
326 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000327 memset(pPager->aHash, 0, sizeof(pPager->aHash));
328 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000329 if( pPager->state>=SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000330 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000331 }
drha7fcb052001-12-14 15:09:55 +0000332 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000333 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000334 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000335 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000336 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000337}
338
339/*
340** When this routine is called, the pager has the journal file open and
341** a write lock on the database. This routine releases the database
342** write lock and acquires a read lock in its place. The journal file
343** is deleted and closed.
drhed7c8552001-04-11 14:29:21 +0000344*/
drhd9b02572001-04-15 00:37:09 +0000345static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000346 int rc;
drhd9b02572001-04-15 00:37:09 +0000347 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000348 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh663fc632002-02-02 18:49:19 +0000349 sqlitepager_ckpt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000350 if( pPager->ckptOpen ){
351 sqliteOsClose(&pPager->cpfd);
352 pPager->ckptOpen = 0;
353 }
drhda47d772002-12-02 04:25:19 +0000354 if( pPager->journalOpen ){
355 sqliteOsClose(&pPager->jfd);
356 pPager->journalOpen = 0;
357 sqliteOsDelete(pPager->zJournal);
358 sqliteFree( pPager->aInJournal );
359 pPager->aInJournal = 0;
360 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
361 pPg->inJournal = 0;
362 pPg->dirty = 0;
363 }
364 }else{
365 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
drhd9b02572001-04-15 00:37:09 +0000366 }
drhda47d772002-12-02 04:25:19 +0000367 rc = sqliteOsReadLock(&pPager->fd);
drh8e298f92002-07-06 16:28:47 +0000368 if( rc==SQLITE_OK ){
369 pPager->state = SQLITE_READLOCK;
370 }else{
371 /* This can only happen if a process does a BEGIN, then forks and the
372 ** child process does the COMMIT. Because of the semantics of unix
373 ** file locking, the unlock will fail.
374 */
375 pPager->state = SQLITE_UNLOCK;
376 }
drhed7c8552001-04-11 14:29:21 +0000377 return rc;
378}
379
drhed7c8552001-04-11 14:29:21 +0000380/*
drhfa86c412002-02-02 15:01:15 +0000381** Read a single page from the journal file opened on file descriptor
382** jfd. Playback this one page.
383*/
384static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
385 int rc;
386 PgHdr *pPg; /* An existing page in the cache */
387 PageRecord pgRec;
388
drh94f33312002-08-12 12:29:56 +0000389 rc = read32bits(pPager, jfd, &pgRec.pgno);
390 if( rc!=SQLITE_OK ) return rc;
391 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drhfa86c412002-02-02 15:01:15 +0000392 if( rc!=SQLITE_OK ) return rc;
393
394 /* Sanity checking on the page */
395 if( pgRec.pgno>pPager->dbSize || pgRec.pgno==0 ) return SQLITE_CORRUPT;
396
397 /* Playback the page. Update the in-memory copy of the page
398 ** at the same time, if there is one.
399 */
400 pPg = pager_lookup(pPager, pgRec.pgno);
401 if( pPg ){
402 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
403 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
404 }
drhd0d006e2002-12-01 02:00:57 +0000405 rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drhfa86c412002-02-02 15:01:15 +0000406 if( rc==SQLITE_OK ){
407 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
408 }
409 return rc;
410}
411
412/*
drhed7c8552001-04-11 14:29:21 +0000413** Playback the journal and thus restore the database file to
414** the state it was in before we started making changes.
415**
drhd9b02572001-04-15 00:37:09 +0000416** The journal file format is as follows: There is an initial
417** file-type string for sanity checking. Then there is a single
418** Pgno number which is the number of pages in the database before
419** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000420** Next come zero or more page records where each page record
421** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
422** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000423**
drhd9b02572001-04-15 00:37:09 +0000424** If the file opened as the journal file is not a well-formed
425** journal file (as determined by looking at the magic number
426** at the beginning) then this routine returns SQLITE_PROTOCOL.
427** If any other errors occur during playback, the database will
428** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
429** pPager->errMask and SQLITE_CORRUPT is returned. If it all
430** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000431*/
drhd9b02572001-04-15 00:37:09 +0000432static int pager_playback(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +0000433 off_t nRec; /* Number of Records */
drhd9b02572001-04-15 00:37:09 +0000434 int i; /* Loop counter */
435 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000436 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000437 int rc;
438
drhc3a64ba2001-11-22 00:01:27 +0000439 /* Figure out how many records are in the journal. Abort early if
440 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000441 */
drh8cfbf082001-09-19 13:22:39 +0000442 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000443 sqliteOsSeek(&pPager->jfd, 0);
444 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
drhc3a64ba2001-11-22 00:01:27 +0000445 if( rc!=SQLITE_OK ){
446 goto end_playback;
447 }
drh2c799952003-01-03 02:04:27 +0000448 if( nRec < sizeof(aMagic)+sizeof(Pgno) ){
drhc3a64ba2001-11-22 00:01:27 +0000449 goto end_playback;
450 }
drh28be87c2002-11-05 23:03:02 +0000451 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhc3a64ba2001-11-22 00:01:27 +0000452
453 /* Read the beginning of the journal and truncate the
454 ** database file back to its original size.
455 */
drha7fcb052001-12-14 15:09:55 +0000456 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000457 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000458 rc = SQLITE_PROTOCOL;
459 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000460 }
drh94f33312002-08-12 12:29:56 +0000461 if( memcmp(aMagic, aOldJournalMagic, sizeof(aMagic))==0 ){
462 pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
463 }else if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))==0 ){
464 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
465 }else{
466 rc = SQLITE_PROTOCOL;
467 goto end_playback;
468 }
469 rc = read32bits(pPager, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000470 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000471 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000472 }
drh28be87c2002-11-05 23:03:02 +0000473 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
drh81a20f22001-10-12 17:30:04 +0000474 if( rc!=SQLITE_OK ){
475 goto end_playback;
476 }
drhd9b02572001-04-15 00:37:09 +0000477 pPager->dbSize = mxPg;
478
drhfa86c412002-02-02 15:01:15 +0000479 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000480 */
drhd9b02572001-04-15 00:37:09 +0000481 for(i=nRec-1; i>=0; i--){
drhfa86c412002-02-02 15:01:15 +0000482 rc = pager_playback_one_page(pPager, &pPager->jfd);
drhd9b02572001-04-15 00:37:09 +0000483 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000484 }
drh81a20f22001-10-12 17:30:04 +0000485
486end_playback:
drhd9b02572001-04-15 00:37:09 +0000487 if( rc!=SQLITE_OK ){
488 pager_unwritelock(pPager);
489 pPager->errMask |= PAGER_ERR_CORRUPT;
490 rc = SQLITE_CORRUPT;
491 }else{
492 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000493 }
drhd9b02572001-04-15 00:37:09 +0000494 return rc;
drhed7c8552001-04-11 14:29:21 +0000495}
496
497/*
drhfa86c412002-02-02 15:01:15 +0000498** Playback the checkpoint journal.
499**
500** This is similar to playing back the transaction journal but with
501** a few extra twists.
502**
drh663fc632002-02-02 18:49:19 +0000503** (1) The number of pages in the database file at the start of
504** the checkpoint is stored in pPager->ckptSize, not in the
505** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000506**
507** (2) In addition to playing back the checkpoint journal, also
508** playback all pages of the transaction journal beginning
509** at offset pPager->ckptJSize.
510*/
511static int pager_ckpt_playback(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +0000512 off_t nRec; /* Number of Records */
drhfa86c412002-02-02 15:01:15 +0000513 int i; /* Loop counter */
514 int rc;
515
516 /* Truncate the database back to its original size.
517 */
drh28be87c2002-11-05 23:03:02 +0000518 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
drhfa86c412002-02-02 15:01:15 +0000519 pPager->dbSize = pPager->ckptSize;
520
521 /* Figure out how many records are in the checkpoint journal.
522 */
drh0f892532002-05-30 12:27:03 +0000523 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000524 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +0000525 nRec = pPager->ckptNRec;
drhfa86c412002-02-02 15:01:15 +0000526
527 /* Copy original pages out of the checkpoint journal and back into the
528 ** database file.
529 */
drh74587e52002-08-13 00:01:16 +0000530 if( pager_old_format ){
531 pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
532 }else{
533 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
534 }
drhfa86c412002-02-02 15:01:15 +0000535 for(i=nRec-1; i>=0; i--){
536 rc = pager_playback_one_page(pPager, &pPager->cpfd);
537 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
538 }
539
540 /* Figure out how many pages need to be copied out of the transaction
541 ** journal.
542 */
543 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
544 if( rc!=SQLITE_OK ){
545 goto end_ckpt_playback;
546 }
547 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
548 if( rc!=SQLITE_OK ){
549 goto end_ckpt_playback;
550 }
551 nRec = (nRec - pPager->ckptJSize)/sizeof(PageRecord);
552 for(i=nRec-1; i>=0; i--){
553 rc = pager_playback_one_page(pPager, &pPager->jfd);
554 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
555 }
556
557
558end_ckpt_playback:
drhfa86c412002-02-02 15:01:15 +0000559 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000560 pPager->errMask |= PAGER_ERR_CORRUPT;
561 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000562 }
563 return rc;
564}
565
566/*
drhf57b14a2001-09-14 18:54:08 +0000567** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000568**
569** The maximum number is the absolute value of the mxPage parameter.
570** If mxPage is negative, the noSync flag is also set. noSync bypasses
571** calls to sqliteOsSync(). The pager runs much faster with noSync on,
572** but if the operating system crashes or there is an abrupt power
573** failure, the database file might be left in an inconsistent and
574** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000575*/
576void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000577 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000578 pPager->noSync = pPager->tempFile;
drh603240c2002-03-05 01:11:12 +0000579 }else{
580 pPager->noSync = 1;
581 mxPage = -mxPage;
582 }
drhf57b14a2001-09-14 18:54:08 +0000583 if( mxPage>10 ){
584 pPager->mxPage = mxPage;
585 }
586}
587
588/*
drhfa86c412002-02-02 15:01:15 +0000589** Open a temporary file. Write the name of the file into zName
590** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
591** the file descriptor into *fd. Return SQLITE_OK on success or some
592** other error code if we fail.
593**
594** The OS will automatically delete the temporary file when it is
595** closed.
596*/
597static int sqlitepager_opentemp(char *zFile, OsFile *fd){
598 int cnt = 8;
599 int rc;
600 do{
601 cnt--;
602 sqliteOsTempFileName(zFile);
603 rc = sqliteOsOpenExclusive(zFile, fd, 1);
604 }while( cnt>0 && rc!=SQLITE_OK );
605 return rc;
606}
607
608/*
drhed7c8552001-04-11 14:29:21 +0000609** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000610** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000611** the first call to sqlitepager_get() and is only held open until the
612** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000613**
drh6446c4d2001-12-15 14:22:18 +0000614** If zFilename is NULL then a randomly-named temporary file is created
615** and used as the file to be cached. The file will be deleted
616** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000617*/
drh7e3b0a02001-04-28 16:52:40 +0000618int sqlitepager_open(
619 Pager **ppPager, /* Return the Pager structure here */
620 const char *zFilename, /* Name of the database file to open */
621 int mxPage, /* Max number of in-memory cache pages */
drhda47d772002-12-02 04:25:19 +0000622 int nExtra, /* Extra bytes append to each in-memory page */
623 int useJournal /* TRUE to use a rollback journal on this file */
drh7e3b0a02001-04-28 16:52:40 +0000624){
drhed7c8552001-04-11 14:29:21 +0000625 Pager *pPager;
drh3e7a6092002-12-07 21:45:14 +0000626 char *zFullPathname;
drhed7c8552001-04-11 14:29:21 +0000627 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000628 OsFile fd;
629 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000630 int tempFile;
631 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000632 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000633
drhd9b02572001-04-15 00:37:09 +0000634 *ppPager = 0;
635 if( sqlite_malloc_failed ){
636 return SQLITE_NOMEM;
637 }
drh5e00f6c2001-09-13 13:46:56 +0000638 if( zFilename ){
drh3e7a6092002-12-07 21:45:14 +0000639 zFullPathname = sqliteOsFullPathname(zFilename);
640 rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000641 tempFile = 0;
642 }else{
drhfa86c412002-02-02 15:01:15 +0000643 rc = sqlitepager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000644 zFilename = zTemp;
drh3e7a6092002-12-07 21:45:14 +0000645 zFullPathname = sqliteOsFullPathname(zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000646 tempFile = 1;
647 }
drh3e7a6092002-12-07 21:45:14 +0000648 if( sqlite_malloc_failed ){
649 return SQLITE_NOMEM;
650 }
drh8cfbf082001-09-19 13:22:39 +0000651 if( rc!=SQLITE_OK ){
drh3e7a6092002-12-07 21:45:14 +0000652 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000653 return SQLITE_CANTOPEN;
654 }
drh3e7a6092002-12-07 21:45:14 +0000655 nameLen = strlen(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000656 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000657 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000658 sqliteOsClose(&fd);
drh3e7a6092002-12-07 21:45:14 +0000659 sqliteFree(zFullPathname);
drhd9b02572001-04-15 00:37:09 +0000660 return SQLITE_NOMEM;
661 }
drhed7c8552001-04-11 14:29:21 +0000662 pPager->zFilename = (char*)&pPager[1];
663 pPager->zJournal = &pPager->zFilename[nameLen+1];
drh3e7a6092002-12-07 21:45:14 +0000664 strcpy(pPager->zFilename, zFullPathname);
665 strcpy(pPager->zJournal, zFullPathname);
666 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000667 strcpy(&pPager->zJournal[nameLen], "-journal");
668 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000669 pPager->journalOpen = 0;
drhda47d772002-12-02 04:25:19 +0000670 pPager->useJournal = useJournal;
drhfa86c412002-02-02 15:01:15 +0000671 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000672 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000673 pPager->nRef = 0;
674 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000675 pPager->ckptSize = 0;
676 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000677 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000678 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000679 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000680 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000681 pPager->tempFile = tempFile;
682 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000683 pPager->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000684 pPager->noSync = pPager->tempFile || !useJournal;
drhed7c8552001-04-11 14:29:21 +0000685 pPager->pFirst = 0;
686 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000687 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000688 memset(pPager->aHash, 0, sizeof(pPager->aHash));
689 *ppPager = pPager;
690 return SQLITE_OK;
691}
692
693/*
drh72f82862001-05-24 21:06:34 +0000694** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000695** when the reference count on each page reaches zero. The destructor can
696** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000697**
698** The destructor is not called as a result sqlitepager_close().
699** Destructors are only called by sqlitepager_unref().
700*/
701void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
702 pPager->xDestructor = xDesc;
703}
704
705/*
drh5e00f6c2001-09-13 13:46:56 +0000706** Return the total number of pages in the disk file associated with
707** pPager.
drhed7c8552001-04-11 14:29:21 +0000708*/
drhd9b02572001-04-15 00:37:09 +0000709int sqlitepager_pagecount(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +0000710 off_t n;
drhd9b02572001-04-15 00:37:09 +0000711 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000712 if( pPager->dbSize>=0 ){
713 return pPager->dbSize;
714 }
drha7fcb052001-12-14 15:09:55 +0000715 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000716 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000717 return 0;
drhed7c8552001-04-11 14:29:21 +0000718 }
drh8cfbf082001-09-19 13:22:39 +0000719 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000720 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000721 pPager->dbSize = n;
722 }
723 return n;
724}
725
726/*
727** Shutdown the page cache. Free all memory and close all files.
728**
729** If a transaction was in progress when this routine is called, that
730** transaction is rolled back. All outstanding pages are invalidated
731** and their memory is freed. Any attempt to use a page associated
732** with this page cache after this function returns will likely
733** result in a coredump.
734*/
drhd9b02572001-04-15 00:37:09 +0000735int sqlitepager_close(Pager *pPager){
736 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000737 switch( pPager->state ){
738 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000739 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +0000740 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000741 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000742 break;
743 }
744 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +0000745 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000746 break;
747 }
748 default: {
749 /* Do nothing */
750 break;
751 }
752 }
drhd9b02572001-04-15 00:37:09 +0000753 for(pPg=pPager->pAll; pPg; pPg=pNext){
754 pNext = pPg->pNextAll;
755 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000756 }
drha7fcb052001-12-14 15:09:55 +0000757 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000758 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +0000759 /* Temp files are automatically deleted by the OS
760 ** if( pPager->tempFile ){
761 ** sqliteOsDelete(pPager->zFilename);
762 ** }
763 */
drhed7c8552001-04-11 14:29:21 +0000764 sqliteFree(pPager);
765 return SQLITE_OK;
766}
767
768/*
drh5e00f6c2001-09-13 13:46:56 +0000769** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000770*/
drhd9b02572001-04-15 00:37:09 +0000771Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000772 PgHdr *p = DATA_TO_PGHDR(pData);
773 return p->pgno;
774}
775
776/*
drh7e3b0a02001-04-28 16:52:40 +0000777** Increment the reference count for a page. If the page is
778** currently on the freelist (the reference count is zero) then
779** remove it from the freelist.
780*/
drh836faa42003-01-11 13:30:57 +0000781#define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
782static void _page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000783 if( pPg->nRef==0 ){
784 /* The page is currently on the freelist. Remove it. */
785 if( pPg->pPrevFree ){
786 pPg->pPrevFree->pNextFree = pPg->pNextFree;
787 }else{
788 pPg->pPager->pFirst = pPg->pNextFree;
789 }
790 if( pPg->pNextFree ){
791 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
792 }else{
793 pPg->pPager->pLast = pPg->pPrevFree;
794 }
795 pPg->pPager->nRef++;
796 }
797 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000798 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000799}
800
801/*
802** Increment the reference count for a page. The input pointer is
803** a reference to the page data.
804*/
805int sqlitepager_ref(void *pData){
806 PgHdr *pPg = DATA_TO_PGHDR(pData);
807 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000808 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000809}
810
811/*
drhb19a2bc2001-09-16 00:13:26 +0000812** Sync the journal and then write all free dirty pages to the database
813** file.
814**
815** Writing all free dirty pages to the database after the sync is a
816** non-obvious optimization. fsync() is an expensive operation so we
drhaaab5722002-02-19 13:39:21 +0000817** want to minimize the number ot times it is called. After an fsync() call,
drh6446c4d2001-12-15 14:22:18 +0000818** we are free to write dirty pages back to the database. It is best
819** to go ahead and write as many dirty pages as possible to minimize
820** the risk of having to do another fsync() later on. Writing dirty
821** free pages in this way was observed to make database operations go
822** up to 10 times faster.
drhfa86c412002-02-02 15:01:15 +0000823**
824** If we are writing to temporary database, there is no need to preserve
825** the integrity of the journal file, so we can save time and skip the
826** fsync().
drh50e5dad2001-09-15 00:57:28 +0000827*/
828static int syncAllPages(Pager *pPager){
829 PgHdr *pPg;
drh80eb7902002-12-28 01:06:30 +0000830 Pgno lastPgno = 0;
drh50e5dad2001-09-15 00:57:28 +0000831 int rc = SQLITE_OK;
drh03eb96a2002-11-10 23:32:56 +0000832
833 /* Sync the journal before modifying the main database
834 ** (assuming there is a journal and it needs to be synced.)
835 */
drh50e5dad2001-09-15 00:57:28 +0000836 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +0000837 if( !pPager->tempFile ){
838 rc = sqliteOsSync(&pPager->jfd);
839 if( rc!=0 ) return rc;
840 }
drh50e5dad2001-09-15 00:57:28 +0000841 pPager->needSync = 0;
842 }
drh03eb96a2002-11-10 23:32:56 +0000843
drh4d9ef4c2002-11-11 01:04:47 +0000844 /* Write all dirty free pages to the disk in the order that they
845 ** appear on the disk. We have experimented with sorting the pages
846 ** by page numbers so that they are written in order, but that does
847 ** not appear to improve performance.
drh03eb96a2002-11-10 23:32:56 +0000848 */
drh50e5dad2001-09-15 00:57:28 +0000849 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
850 if( pPg->dirty ){
drh4d9ef4c2002-11-11 01:04:47 +0000851 if( lastPgno==0 || pPg->pgno!=lastPgno+1 ){
drhd0d006e2002-12-01 02:00:57 +0000852 sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drh03eb96a2002-11-10 23:32:56 +0000853 }
drh4d9ef4c2002-11-11 01:04:47 +0000854 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
855 if( rc!=SQLITE_OK ) break;
856 pPg->dirty = 0;
857 lastPgno = pPg->pgno;
drh03eb96a2002-11-10 23:32:56 +0000858 }
drh03eb96a2002-11-10 23:32:56 +0000859 }
drh81a20f22001-10-12 17:30:04 +0000860 return rc;
drh50e5dad2001-09-15 00:57:28 +0000861}
862
863/*
drhd9b02572001-04-15 00:37:09 +0000864** Acquire a page.
865**
drh58a11682001-11-10 13:51:08 +0000866** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +0000867** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000868**
drh306dc212001-05-21 13:45:10 +0000869** A _get works for any page number greater than 0. If the database
870** file is smaller than the requested page, then no actual disk
871** read occurs and the memory image of the page is initialized to
872** all zeros. The extra data appended to a page is always initialized
873** to zeros the first time a page is loaded into memory.
874**
drhd9b02572001-04-15 00:37:09 +0000875** The acquisition might fail for several reasons. In all cases,
876** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000877**
878** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
879** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000880** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000881** just returns 0. This routine acquires a read-lock the first time it
882** has to go to disk, and could also playback an old journal if necessary.
883** Since _lookup() never goes to disk, it never has to deal with locks
884** or journal files.
drhed7c8552001-04-11 14:29:21 +0000885*/
drhd9b02572001-04-15 00:37:09 +0000886int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000887 PgHdr *pPg;
drh8766c342002-11-09 00:33:15 +0000888 int rc;
drhed7c8552001-04-11 14:29:21 +0000889
drhd9b02572001-04-15 00:37:09 +0000890 /* Make sure we have not hit any critical errors.
891 */
drh836faa42003-01-11 13:30:57 +0000892 assert( pPager!=0 );
893 assert( pgno!=0 );
drhd9b02572001-04-15 00:37:09 +0000894 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
895 return pager_errcode(pPager);
896 }
897
drhed7c8552001-04-11 14:29:21 +0000898 /* If this is the first page accessed, then get a read lock
899 ** on the database file.
900 */
901 if( pPager->nRef==0 ){
drh8766c342002-11-09 00:33:15 +0000902 rc = sqliteOsReadLock(&pPager->fd);
903 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000904 *ppPage = 0;
drh8766c342002-11-09 00:33:15 +0000905 return rc;
drhed7c8552001-04-11 14:29:21 +0000906 }
drhd9b02572001-04-15 00:37:09 +0000907 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000908
909 /* If a journal file exists, try to play it back.
910 */
drhda47d772002-12-02 04:25:19 +0000911 if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +0000912 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +0000913
drha7fcb052001-12-14 15:09:55 +0000914 /* Get a write lock on the database
915 */
916 rc = sqliteOsWriteLock(&pPager->fd);
917 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +0000918 if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
919 /* This should never happen! */
920 rc = SQLITE_INTERNAL;
921 }
drha7fcb052001-12-14 15:09:55 +0000922 *ppPage = 0;
drh8766c342002-11-09 00:33:15 +0000923 return rc;
drha7fcb052001-12-14 15:09:55 +0000924 }
925 pPager->state = SQLITE_WRITELOCK;
926
drhed7c8552001-04-11 14:29:21 +0000927 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +0000928 ** we cannot get exclusive access to the journal file.
929 **
930 ** Even though we will only be reading from the journal, not writing,
931 ** we have to open the journal for writing in order to obtain an
932 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +0000933 */
drhf57b3392001-10-08 13:22:32 +0000934 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drha7fcb052001-12-14 15:09:55 +0000935 if( rc!=SQLITE_OK ){
936 rc = sqliteOsUnlock(&pPager->fd);
937 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +0000938 *ppPage = 0;
939 return SQLITE_BUSY;
940 }
drha7fcb052001-12-14 15:09:55 +0000941 pPager->journalOpen = 1;
drhed7c8552001-04-11 14:29:21 +0000942
943 /* Playback and delete the journal. Drop the database write
944 ** lock and reacquire the read lock.
945 */
drhd9b02572001-04-15 00:37:09 +0000946 rc = pager_playback(pPager);
947 if( rc!=SQLITE_OK ){
948 return rc;
949 }
drhed7c8552001-04-11 14:29:21 +0000950 }
951 pPg = 0;
952 }else{
953 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000954 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000955 }
956 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000957 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000958 int h;
drh7e3b0a02001-04-28 16:52:40 +0000959 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000960 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
961 /* Create a new page */
drh8c1238a2003-01-02 14:43:55 +0000962 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000963 if( pPg==0 ){
964 *ppPage = 0;
965 pager_unwritelock(pPager);
966 pPager->errMask |= PAGER_ERR_MEM;
967 return SQLITE_NOMEM;
968 }
drh8c1238a2003-01-02 14:43:55 +0000969 memset(pPg, 0, sizeof(*pPg));
drhed7c8552001-04-11 14:29:21 +0000970 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000971 pPg->pNextAll = pPager->pAll;
972 if( pPager->pAll ){
973 pPager->pAll->pPrevAll = pPg;
974 }
975 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000976 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000977 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000978 }else{
drhd9b02572001-04-15 00:37:09 +0000979 /* Recycle an older page. First locate the page to be recycled.
980 ** Try to find one that is not dirty and is near the head of
981 ** of the free list */
drhed7c8552001-04-11 14:29:21 +0000982 pPg = pPager->pFirst;
drh603240c2002-03-05 01:11:12 +0000983 while( pPg && pPg->dirty ){
drhd9b02572001-04-15 00:37:09 +0000984 pPg = pPg->pNextFree;
985 }
drhb19a2bc2001-09-16 00:13:26 +0000986
987 /* If we could not find a page that has not been used recently
988 ** and which is not dirty, then sync the journal and write all
989 ** dirty free pages into the database file, thus making them
990 ** clean pages and available for recycling.
991 **
992 ** We have to sync the journal before writing a page to the main
993 ** database. But syncing is a very slow operation. So after a
994 ** sync, it is best to write everything we can back to the main
995 ** database to minimize the risk of having to sync again in the
drh94f33312002-08-12 12:29:56 +0000996 ** near future. That is why we write all dirty pages after a
drhb19a2bc2001-09-16 00:13:26 +0000997 ** sync.
998 */
drh603240c2002-03-05 01:11:12 +0000999 if( pPg==0 ){
drh50e5dad2001-09-15 00:57:28 +00001000 int rc = syncAllPages(pPager);
1001 if( rc!=0 ){
1002 sqlitepager_rollback(pPager);
1003 *ppPage = 0;
1004 return SQLITE_IOERR;
1005 }
1006 pPg = pPager->pFirst;
1007 }
drhd9b02572001-04-15 00:37:09 +00001008 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +00001009 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +00001010
drh193a6b42002-07-07 16:52:46 +00001011 /* If the page we are recyclying is marked as alwaysRollback, then
1012 ** set the global alwaysRollback flag, thus disabling the
1013 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1014 ** It is necessary to do this because the page marked alwaysRollback
1015 ** might be reloaded at a later time but at that point we won't remember
1016 ** that is was marked alwaysRollback. This means that all pages must
1017 ** be marked as alwaysRollback from here on out.
1018 */
1019 if( pPg->alwaysRollback ){
1020 pPager->alwaysRollback = 1;
1021 }
1022
drhd9b02572001-04-15 00:37:09 +00001023 /* Unlink the old page from the free list and the hash table
1024 */
drh6019e162001-07-02 17:51:45 +00001025 if( pPg->pPrevFree ){
1026 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001027 }else{
drh6019e162001-07-02 17:51:45 +00001028 assert( pPager->pFirst==pPg );
1029 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001030 }
drh6019e162001-07-02 17:51:45 +00001031 if( pPg->pNextFree ){
1032 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1033 }else{
1034 assert( pPager->pLast==pPg );
1035 pPager->pLast = pPg->pPrevFree;
1036 }
1037 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +00001038 if( pPg->pNextHash ){
1039 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1040 }
1041 if( pPg->pPrevHash ){
1042 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1043 }else{
drhd9b02572001-04-15 00:37:09 +00001044 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +00001045 assert( pPager->aHash[h]==pPg );
1046 pPager->aHash[h] = pPg->pNextHash;
1047 }
drh6019e162001-07-02 17:51:45 +00001048 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +00001049 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +00001050 }
1051 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +00001052 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drh6019e162001-07-02 17:51:45 +00001053 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
1054 }else{
1055 pPg->inJournal = 0;
1056 }
drh03eb96a2002-11-10 23:32:56 +00001057 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
1058 && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
1059 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001060 }else{
drh03eb96a2002-11-10 23:32:56 +00001061 page_remove_from_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001062 }
drhed7c8552001-04-11 14:29:21 +00001063 pPg->dirty = 0;
1064 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +00001065 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001066 pPager->nRef++;
1067 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +00001068 pPg->pNextHash = pPager->aHash[h];
1069 pPager->aHash[h] = pPg;
1070 if( pPg->pNextHash ){
1071 assert( pPg->pNextHash->pPrevHash==0 );
1072 pPg->pNextHash->pPrevHash = pPg;
1073 }
drh306dc212001-05-21 13:45:10 +00001074 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
drh1ab43002002-01-14 09:28:19 +00001075 if( pPager->dbSize<(int)pgno ){
drh306dc212001-05-21 13:45:10 +00001076 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1077 }else{
drh81a20f22001-10-12 17:30:04 +00001078 int rc;
drhd0d006e2002-12-01 02:00:57 +00001079 sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drha7fcb052001-12-14 15:09:55 +00001080 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +00001081 if( rc!=SQLITE_OK ){
drh28be87c2002-11-05 23:03:02 +00001082 off_t fileSize;
drh4e371ee2002-09-05 16:08:27 +00001083 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
1084 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
1085 return rc;
1086 }else{
1087 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1088 }
drh81a20f22001-10-12 17:30:04 +00001089 }
drh306dc212001-05-21 13:45:10 +00001090 }
drh7e3b0a02001-04-28 16:52:40 +00001091 if( pPager->nExtra>0 ){
1092 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1093 }
drhed7c8552001-04-11 14:29:21 +00001094 }else{
drhd9b02572001-04-15 00:37:09 +00001095 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001096 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001097 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001098 }
1099 *ppPage = PGHDR_TO_DATA(pPg);
1100 return SQLITE_OK;
1101}
1102
1103/*
drh7e3b0a02001-04-28 16:52:40 +00001104** Acquire a page if it is already in the in-memory cache. Do
1105** not read the page from disk. Return a pointer to the page,
1106** or 0 if the page is not in cache.
1107**
1108** See also sqlitepager_get(). The difference between this routine
1109** and sqlitepager_get() is that _get() will go to the disk and read
1110** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001111** returns NULL if the page is not in cache or if a disk I/O error
1112** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001113*/
1114void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1115 PgHdr *pPg;
1116
drh836faa42003-01-11 13:30:57 +00001117 assert( pPager!=0 );
1118 assert( pgno!=0 );
drh7e3b0a02001-04-28 16:52:40 +00001119 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1120 return 0;
1121 }
drh836faa42003-01-11 13:30:57 +00001122 /* if( pPager->nRef==0 ){
1123 ** return 0;
1124 ** }
1125 */
drh7e3b0a02001-04-28 16:52:40 +00001126 pPg = pager_lookup(pPager, pgno);
1127 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001128 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001129 return PGHDR_TO_DATA(pPg);
1130}
1131
1132/*
drhed7c8552001-04-11 14:29:21 +00001133** Release a page.
1134**
1135** If the number of references to the page drop to zero, then the
1136** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001137** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001138** removed.
1139*/
drhd9b02572001-04-15 00:37:09 +00001140int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001141 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001142
1143 /* Decrement the reference count for this page
1144 */
drhed7c8552001-04-11 14:29:21 +00001145 pPg = DATA_TO_PGHDR(pData);
1146 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001147 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001148 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001149
drh72f82862001-05-24 21:06:34 +00001150 /* When the number of references to a page reach 0, call the
1151 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001152 */
drhed7c8552001-04-11 14:29:21 +00001153 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001154 Pager *pPager;
1155 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001156 pPg->pNextFree = 0;
1157 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001158 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001159 if( pPg->pPrevFree ){
1160 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001161 }else{
1162 pPager->pFirst = pPg;
1163 }
drh72f82862001-05-24 21:06:34 +00001164 if( pPager->xDestructor ){
1165 pPager->xDestructor(pData);
1166 }
drhd9b02572001-04-15 00:37:09 +00001167
1168 /* When all pages reach the freelist, drop the read lock from
1169 ** the database file.
1170 */
1171 pPager->nRef--;
1172 assert( pPager->nRef>=0 );
1173 if( pPager->nRef==0 ){
1174 pager_reset(pPager);
1175 }
drhed7c8552001-04-11 14:29:21 +00001176 }
drhd9b02572001-04-15 00:37:09 +00001177 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001178}
1179
1180/*
drhda47d772002-12-02 04:25:19 +00001181** Create a journal file for pPager. There should already be a write
1182** lock on the database file when this routine is called.
1183**
1184** Return SQLITE_OK if everything. Return an error code and release the
1185** write lock if anything goes wrong.
1186*/
1187static int pager_open_journal(Pager *pPager){
1188 int rc;
1189 assert( pPager->state==SQLITE_WRITELOCK );
1190 assert( pPager->journalOpen==0 );
1191 assert( pPager->useJournal );
1192 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1193 if( pPager->aInJournal==0 ){
1194 sqliteOsReadLock(&pPager->fd);
1195 pPager->state = SQLITE_READLOCK;
1196 return SQLITE_NOMEM;
1197 }
1198 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1199 if( rc!=SQLITE_OK ){
1200 sqliteFree(pPager->aInJournal);
1201 pPager->aInJournal = 0;
1202 sqliteOsReadLock(&pPager->fd);
1203 pPager->state = SQLITE_READLOCK;
1204 return SQLITE_CANTOPEN;
1205 }
1206 pPager->journalOpen = 1;
1207 pPager->needSync = 0;
1208 pPager->alwaysRollback = 0;
1209 sqlitepager_pagecount(pPager);
1210 pPager->origDbSize = pPager->dbSize;
1211 if( pager_old_format ){
1212 rc = sqliteOsWrite(&pPager->jfd, aOldJournalMagic,
1213 sizeof(aOldJournalMagic));
1214 }else{
1215 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1216 }
1217 if( rc==SQLITE_OK ){
1218 rc = write32bits(&pPager->jfd, pPager->dbSize);
1219 }
1220 if( pPager->ckptAutoopen && rc==SQLITE_OK ){
1221 rc = sqlitepager_ckpt_begin(pPager);
1222 }
1223 if( rc!=SQLITE_OK ){
1224 rc = pager_unwritelock(pPager);
1225 if( rc==SQLITE_OK ){
1226 rc = SQLITE_FULL;
1227 }
1228 }
1229 return rc;
1230}
1231
1232/*
drh4b845d72002-03-05 12:41:19 +00001233** Acquire a write-lock on the database. The lock is removed when
1234** the any of the following happen:
1235**
1236** * sqlitepager_commit() is called.
1237** * sqlitepager_rollback() is called.
1238** * sqlitepager_close() is called.
1239** * sqlitepager_unref() is called to on every outstanding page.
1240**
1241** The parameter to this routine is a pointer to any open page of the
1242** database file. Nothing changes about the page - it is used merely
1243** to acquire a pointer to the Pager structure and as proof that there
1244** is already a read-lock on the database.
1245**
drhda47d772002-12-02 04:25:19 +00001246** A journal file is opened if this is not a temporary file. For
1247** temporary files, the opening of the journal file is deferred until
1248** there is an actual need to write to the journal.
1249**
drh4b845d72002-03-05 12:41:19 +00001250** If the database is already write-locked, this routine is a no-op.
1251*/
1252int sqlitepager_begin(void *pData){
1253 PgHdr *pPg = DATA_TO_PGHDR(pData);
1254 Pager *pPager = pPg->pPager;
1255 int rc = SQLITE_OK;
1256 assert( pPg->nRef>0 );
1257 assert( pPager->state!=SQLITE_UNLOCK );
1258 if( pPager->state==SQLITE_READLOCK ){
1259 assert( pPager->aInJournal==0 );
1260 rc = sqliteOsWriteLock(&pPager->fd);
1261 if( rc!=SQLITE_OK ){
1262 return rc;
1263 }
drh4b845d72002-03-05 12:41:19 +00001264 pPager->state = SQLITE_WRITELOCK;
drhda47d772002-12-02 04:25:19 +00001265 pPager->dirtyFile = 0;
1266 if( pPager->useJournal && !pPager->tempFile ){
1267 rc = pager_open_journal(pPager);
drh4b845d72002-03-05 12:41:19 +00001268 }
1269 }
1270 return rc;
1271}
1272
1273/*
drhed7c8552001-04-11 14:29:21 +00001274** Mark a data page as writeable. The page is written into the journal
1275** if it is not there already. This routine must be called before making
1276** changes to a page.
1277**
1278** The first time this routine is called, the pager creates a new
1279** journal and acquires a write lock on the database. If the write
1280** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001281** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001282** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001283**
1284** If the journal file could not be written because the disk is full,
1285** then this routine returns SQLITE_FULL and does an immediate rollback.
1286** All subsequent write attempts also return SQLITE_FULL until there
1287** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1288** reset.
drhed7c8552001-04-11 14:29:21 +00001289*/
drhd9b02572001-04-15 00:37:09 +00001290int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001291 PgHdr *pPg = DATA_TO_PGHDR(pData);
1292 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001293 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001294
drh6446c4d2001-12-15 14:22:18 +00001295 /* Check for errors
1296 */
drhd9b02572001-04-15 00:37:09 +00001297 if( pPager->errMask ){
1298 return pager_errcode(pPager);
1299 }
drh5e00f6c2001-09-13 13:46:56 +00001300 if( pPager->readOnly ){
1301 return SQLITE_PERM;
1302 }
drh6446c4d2001-12-15 14:22:18 +00001303
1304 /* Mark the page as dirty. If the page has already been written
1305 ** to the journal then we can return right away.
1306 */
drhd9b02572001-04-15 00:37:09 +00001307 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001308 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001309 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001310 return SQLITE_OK;
1311 }
drh6446c4d2001-12-15 14:22:18 +00001312
1313 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001314 ** written to the transaction journal or the ckeckpoint journal
1315 ** or both.
1316 **
1317 ** First check to see that the transaction journal exists and
1318 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001319 */
drhd9b02572001-04-15 00:37:09 +00001320 assert( pPager->state!=SQLITE_UNLOCK );
drh4b845d72002-03-05 12:41:19 +00001321 rc = sqlitepager_begin(pData);
drhda47d772002-12-02 04:25:19 +00001322 if( rc!=SQLITE_OK ){
1323 return rc;
1324 }
drhd9b02572001-04-15 00:37:09 +00001325 assert( pPager->state==SQLITE_WRITELOCK );
drhda47d772002-12-02 04:25:19 +00001326 if( !pPager->journalOpen && pPager->useJournal ){
1327 rc = pager_open_journal(pPager);
1328 if( rc!=SQLITE_OK ) return rc;
1329 }
1330 assert( pPager->journalOpen || !pPager->useJournal );
1331 pPager->dirtyFile = 1;
drh6446c4d2001-12-15 14:22:18 +00001332
drhfa86c412002-02-02 15:01:15 +00001333 /* The transaction journal now exists and we have a write lock on the
1334 ** main database file. Write the current page to the transaction
1335 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001336 */
drhda47d772002-12-02 04:25:19 +00001337 if( !pPg->inJournal && pPager->useJournal
1338 && (int)pPg->pgno <= pPager->origDbSize ){
drh94f33312002-08-12 12:29:56 +00001339 rc = write32bits(&pPager->jfd, pPg->pgno);
drhd9b02572001-04-15 00:37:09 +00001340 if( rc==SQLITE_OK ){
drha7fcb052001-12-14 15:09:55 +00001341 rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +00001342 }
1343 if( rc!=SQLITE_OK ){
1344 sqlitepager_rollback(pPager);
1345 pPager->errMask |= PAGER_ERR_FULL;
1346 return rc;
1347 }
drh6019e162001-07-02 17:51:45 +00001348 assert( pPager->aInJournal!=0 );
1349 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh603240c2002-03-05 01:11:12 +00001350 pPager->needSync = !pPager->noSync;
drhfa86c412002-02-02 15:01:15 +00001351 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001352 if( pPager->ckptInUse ){
drhfa86c412002-02-02 15:01:15 +00001353 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001354 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001355 }
drh69688d52001-04-14 16:38:23 +00001356 }
drh6446c4d2001-12-15 14:22:18 +00001357
drhfa86c412002-02-02 15:01:15 +00001358 /* If the checkpoint journal is open and the page is not in it,
1359 ** then write the current page to the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001360 */
drh0f892532002-05-30 12:27:03 +00001361 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001362 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh94f33312002-08-12 12:29:56 +00001363 rc = write32bits(&pPager->cpfd, pPg->pgno);
drhfa86c412002-02-02 15:01:15 +00001364 if( rc==SQLITE_OK ){
1365 rc = sqliteOsWrite(&pPager->cpfd, pData, SQLITE_PAGE_SIZE);
1366 }
1367 if( rc!=SQLITE_OK ){
1368 sqlitepager_rollback(pPager);
1369 pPager->errMask |= PAGER_ERR_FULL;
1370 return rc;
1371 }
drh9bd47a92003-01-07 14:46:08 +00001372 pPager->ckptNRec++;
drhfa86c412002-02-02 15:01:15 +00001373 assert( pPager->aInCkpt!=0 );
1374 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001375 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001376 }
1377
1378 /* Update the database size and return.
1379 */
drh1ab43002002-01-14 09:28:19 +00001380 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001381 pPager->dbSize = pPg->pgno;
1382 }
drh69688d52001-04-14 16:38:23 +00001383 return rc;
drhed7c8552001-04-11 14:29:21 +00001384}
1385
1386/*
drhaacc5432002-01-06 17:07:40 +00001387** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +00001388** to sqlitepager_write(). In other words, return TRUE if it is ok
1389** to change the content of the page.
1390*/
1391int sqlitepager_iswriteable(void *pData){
1392 PgHdr *pPg = DATA_TO_PGHDR(pData);
1393 return pPg->dirty;
1394}
1395
1396/*
drh30e58752002-03-02 20:41:57 +00001397** A call to this routine tells the pager that it is not necessary to
1398** write the information on page "pgno" back to the disk, even though
1399** that page might be marked as dirty.
1400**
1401** The overlying software layer calls this routine when all of the data
1402** on the given page is unused. The pager marks the page as clean so
1403** that it does not get written to disk.
1404**
1405** Tests show that this optimization, together with the
1406** sqlitepager_dont_rollback() below, more than double the speed
1407** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001408**
1409** When this routine is called, set the alwaysRollback flag to true.
1410** Subsequent calls to sqlitepager_dont_rollback() for the same page
1411** will thereafter be ignored. This is necessary to avoid a problem
1412** where a page with data is added to the freelist during one part of
1413** a transaction then removed from the freelist during a later part
1414** of the same transaction and reused for some other purpose. When it
1415** is first added to the freelist, this routine is called. When reused,
1416** the dont_rollback() routine is called. But because the page contains
1417** critical data, we still need to be sure it gets rolled back in spite
1418** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001419*/
1420void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1421 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001422
drh30e58752002-03-02 20:41:57 +00001423 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001424 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001425 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001426 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1427 /* If this pages is the last page in the file and the file has grown
1428 ** during the current transaction, then do NOT mark the page as clean.
1429 ** When the database file grows, we must make sure that the last page
1430 ** gets written at least once so that the disk file will be the correct
1431 ** size. If you do not write this page and the size of the file
1432 ** on the disk ends up being too small, that can lead to database
1433 ** corruption during the next transaction.
1434 */
1435 }else{
1436 pPg->dirty = 0;
1437 }
drh30e58752002-03-02 20:41:57 +00001438 }
1439}
1440
1441/*
1442** A call to this routine tells the pager that if a rollback occurs,
1443** it is not necessary to restore the data on the given page. This
1444** means that the pager does not have to record the given page in the
1445** rollback journal.
1446*/
1447void sqlitepager_dont_rollback(void *pData){
1448 PgHdr *pPg = DATA_TO_PGHDR(pData);
1449 Pager *pPager = pPg->pPager;
1450
1451 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001452 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001453 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1454 assert( pPager->aInJournal!=0 );
1455 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1456 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001457 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001458 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001459 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001460 }
1461 }
drh0f892532002-05-30 12:27:03 +00001462 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001463 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1464 assert( pPager->aInCkpt!=0 );
1465 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001466 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001467 }
1468}
1469
1470/*
drhed7c8552001-04-11 14:29:21 +00001471** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001472**
1473** If the commit fails for any reason, a rollback attempt is made
1474** and an error code is returned. If the commit worked, SQLITE_OK
1475** is returned.
drhed7c8552001-04-11 14:29:21 +00001476*/
drhd9b02572001-04-15 00:37:09 +00001477int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001478 int rc;
drhed7c8552001-04-11 14:29:21 +00001479 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001480
1481 if( pPager->errMask==PAGER_ERR_FULL ){
1482 rc = sqlitepager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00001483 if( rc==SQLITE_OK ){
1484 rc = SQLITE_FULL;
1485 }
drhd9b02572001-04-15 00:37:09 +00001486 return rc;
1487 }
1488 if( pPager->errMask!=0 ){
1489 rc = pager_errcode(pPager);
1490 return rc;
1491 }
1492 if( pPager->state!=SQLITE_WRITELOCK ){
1493 return SQLITE_ERROR;
1494 }
drha1680452002-04-18 01:56:57 +00001495 if( pPager->dirtyFile==0 ){
1496 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
1497 ** if there have been no changes to the database file. */
1498 rc = pager_unwritelock(pPager);
1499 pPager->dbSize = -1;
1500 return rc;
1501 }
drhda47d772002-12-02 04:25:19 +00001502 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +00001503 if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001504 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001505 }
drha1b351a2001-09-14 16:42:12 +00001506 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1507 if( pPg->dirty==0 ) continue;
drhd0d006e2002-12-01 02:00:57 +00001508 rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001509 if( rc!=SQLITE_OK ) goto commit_abort;
drha7fcb052001-12-14 15:09:55 +00001510 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001511 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001512 }
drh603240c2002-03-05 01:11:12 +00001513 if( !pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK ){
1514 goto commit_abort;
1515 }
drhd9b02572001-04-15 00:37:09 +00001516 rc = pager_unwritelock(pPager);
1517 pPager->dbSize = -1;
1518 return rc;
1519
1520 /* Jump here if anything goes wrong during the commit process.
1521 */
1522commit_abort:
1523 rc = sqlitepager_rollback(pPager);
1524 if( rc==SQLITE_OK ){
1525 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001526 }
drhed7c8552001-04-11 14:29:21 +00001527 return rc;
1528}
1529
1530/*
1531** Rollback all changes. The database falls back to read-only mode.
1532** All in-memory cache pages revert to their original data contents.
1533** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001534**
1535** This routine cannot fail unless some other process is not following
1536** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1537** process is writing trash into the journal file (SQLITE_CORRUPT) or
1538** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1539** codes are returned for all these occasions. Otherwise,
1540** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001541*/
drhd9b02572001-04-15 00:37:09 +00001542int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001543 int rc;
drhda47d772002-12-02 04:25:19 +00001544 if( !pPager->dirtyFile || !pPager->journalOpen ){
1545 rc = pager_unwritelock(pPager);
1546 pPager->dbSize = -1;
1547 return rc;
1548 }
drhd9b02572001-04-15 00:37:09 +00001549 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00001550 if( pPager->state>=SQLITE_WRITELOCK ){
1551 pager_playback(pPager);
1552 }
drhd9b02572001-04-15 00:37:09 +00001553 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001554 }
drhd9b02572001-04-15 00:37:09 +00001555 if( pPager->state!=SQLITE_WRITELOCK ){
1556 return SQLITE_OK;
1557 }
1558 rc = pager_playback(pPager);
1559 if( rc!=SQLITE_OK ){
1560 rc = SQLITE_CORRUPT;
1561 pPager->errMask |= PAGER_ERR_CORRUPT;
1562 }
1563 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001564 return rc;
drh98808ba2001-10-18 12:34:46 +00001565}
drhd9b02572001-04-15 00:37:09 +00001566
1567/*
drh5e00f6c2001-09-13 13:46:56 +00001568** Return TRUE if the database file is opened read-only. Return FALSE
1569** if the database is (in theory) writable.
1570*/
1571int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001572 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001573}
1574
1575/*
drhd9b02572001-04-15 00:37:09 +00001576** This routine is used for testing and analysis only.
1577*/
1578int *sqlitepager_stats(Pager *pPager){
1579 static int a[9];
1580 a[0] = pPager->nRef;
1581 a[1] = pPager->nPage;
1582 a[2] = pPager->mxPage;
1583 a[3] = pPager->dbSize;
1584 a[4] = pPager->state;
1585 a[5] = pPager->errMask;
1586 a[6] = pPager->nHit;
1587 a[7] = pPager->nMiss;
1588 a[8] = pPager->nOvfl;
1589 return a;
1590}
drhdd793422001-06-28 01:54:48 +00001591
drhfa86c412002-02-02 15:01:15 +00001592/*
1593** Set the checkpoint.
1594**
1595** This routine should be called with the transaction journal already
1596** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00001597** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00001598*/
1599int sqlitepager_ckpt_begin(Pager *pPager){
1600 int rc;
1601 char zTemp[SQLITE_TEMPNAME_SIZE];
drhda47d772002-12-02 04:25:19 +00001602 if( !pPager->journalOpen ){
1603 pPager->ckptAutoopen = 1;
1604 return SQLITE_OK;
1605 }
drhfa86c412002-02-02 15:01:15 +00001606 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00001607 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00001608 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
1609 if( pPager->aInCkpt==0 ){
1610 sqliteOsReadLock(&pPager->fd);
1611 return SQLITE_NOMEM;
1612 }
1613 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
1614 if( rc ) goto ckpt_begin_failed;
drh663fc632002-02-02 18:49:19 +00001615 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00001616 if( !pPager->ckptOpen ){
1617 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
1618 if( rc ) goto ckpt_begin_failed;
1619 pPager->ckptOpen = 1;
drh9bd47a92003-01-07 14:46:08 +00001620 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00001621 }
1622 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00001623 return SQLITE_OK;
1624
1625ckpt_begin_failed:
1626 if( pPager->aInCkpt ){
1627 sqliteFree(pPager->aInCkpt);
1628 pPager->aInCkpt = 0;
1629 }
1630 return rc;
1631}
1632
1633/*
1634** Commit a checkpoint.
1635*/
1636int sqlitepager_ckpt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00001637 if( pPager->ckptInUse ){
drh03eb96a2002-11-10 23:32:56 +00001638 PgHdr *pPg, *pNext;
drh96ddd6d2002-09-05 19:10:33 +00001639 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +00001640 /* sqliteOsTruncate(&pPager->cpfd, 0); */
1641 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00001642 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00001643 sqliteFree( pPager->aInCkpt );
1644 pPager->aInCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00001645 for(pPg=pPager->pCkpt; pPg; pPg=pNext){
1646 pNext = pPg->pNextCkpt;
1647 assert( pPg->inCkpt );
drh663fc632002-02-02 18:49:19 +00001648 pPg->inCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00001649 pPg->pPrevCkpt = pPg->pNextCkpt = 0;
drh663fc632002-02-02 18:49:19 +00001650 }
drh03eb96a2002-11-10 23:32:56 +00001651 pPager->pCkpt = 0;
drh663fc632002-02-02 18:49:19 +00001652 }
drhda47d772002-12-02 04:25:19 +00001653 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00001654 return SQLITE_OK;
1655}
1656
1657/*
1658** Rollback a checkpoint.
1659*/
1660int sqlitepager_ckpt_rollback(Pager *pPager){
1661 int rc;
drh0f892532002-05-30 12:27:03 +00001662 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00001663 rc = pager_ckpt_playback(pPager);
1664 sqlitepager_ckpt_commit(pPager);
1665 }else{
1666 rc = SQLITE_OK;
1667 }
drhda47d772002-12-02 04:25:19 +00001668 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00001669 return rc;
1670}
1671
drh74587e52002-08-13 00:01:16 +00001672#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00001673/*
1674** Print a listing of all referenced pages and their ref count.
1675*/
1676void sqlitepager_refdump(Pager *pPager){
1677 PgHdr *pPg;
1678 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1679 if( pPg->nRef<=0 ) continue;
1680 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1681 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1682 }
1683}
1684#endif