blob: 03be78c5657c94cf938d0b1dc916813cd7493997 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drh2554f8b2003-01-22 01:26:44 +000021** @(#) $Id: pager.c,v 1.70 2003/01/22 01:26:44 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drh829e8022002-11-06 14:08:11 +000023#include "os.h" /* Must be first to enable large file support */
drhd9b02572001-04-15 00:37:09 +000024#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000025#include "pager.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
drhdb48ee02003-01-16 13:42:43 +000030** Macros for troubleshooting. Normally turned off
31*/
32#if 0
33static Pager *mainPager = 0;
34#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39#else
40#define SET_PAGER(X)
41#define CLR_PAGER(X)
42#define TRACE1(X)
43#define TRACE2(X,Y)
44#define TRACE3(X,Y,Z)
45#endif
46
47
48/*
drhed7c8552001-04-11 14:29:21 +000049** The page cache as a whole is always in one of the following
50** states:
51**
52** SQLITE_UNLOCK The page cache is not currently reading or
53** writing the database file. There is no
54** data held in memory. This is the initial
55** state.
56**
57** SQLITE_READLOCK The page cache is reading the database.
58** Writing is not permitted. There can be
59** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000060** file at the same time.
drhed7c8552001-04-11 14:29:21 +000061**
62** SQLITE_WRITELOCK The page cache is writing the database.
63** Access is exclusive. No other processes or
64** threads can be reading or writing while one
65** process is writing.
66**
drh306dc212001-05-21 13:45:10 +000067** The page cache comes up in SQLITE_UNLOCK. The first time a
68** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000070** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000071** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000072** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
73** called on an outstanding page which means that the pager must
74** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
75** The sqlite_page_rollback() and sqlite_page_commit() functions
76** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000077*/
78#define SQLITE_UNLOCK 0
79#define SQLITE_READLOCK 1
80#define SQLITE_WRITELOCK 2
81
drhd9b02572001-04-15 00:37:09 +000082
drhed7c8552001-04-11 14:29:21 +000083/*
84** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000085** This header is only visible to this pager module. The client
86** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000087*/
drhd9b02572001-04-15 00:37:09 +000088typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000089struct PgHdr {
90 Pager *pPager; /* The pager to which this page belongs */
91 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000092 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000093 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000094 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
95 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh03eb96a2002-11-10 23:32:56 +000096 PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
drh193a6b42002-07-07 16:52:46 +000097 u8 inJournal; /* TRUE if has been written to journal */
98 u8 inCkpt; /* TRUE if written to the checkpoint journal */
99 u8 dirty; /* TRUE if we need to write back changes */
drhdb48ee02003-01-16 13:42:43 +0000100 u8 needSync; /* Sync journal before writing this page */
drh193a6b42002-07-07 16:52:46 +0000101 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh2554f8b2003-01-22 01:26:44 +0000102 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
drh69688d52001-04-14 16:38:23 +0000103 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +0000104 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +0000105};
106
107/*
drh69688d52001-04-14 16:38:23 +0000108** Convert a pointer to a PgHdr into a pointer to its data
109** and back again.
drhed7c8552001-04-11 14:29:21 +0000110*/
111#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
112#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +0000113#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000114
115/*
drhed7c8552001-04-11 14:29:21 +0000116** How big to make the hash table used for locating in-memory pages
drh836faa42003-01-11 13:30:57 +0000117** by page number.
drhed7c8552001-04-11 14:29:21 +0000118*/
drh836faa42003-01-11 13:30:57 +0000119#define N_PG_HASH 2048
120
121/*
122** Hash a page number
123*/
124#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
drhed7c8552001-04-11 14:29:21 +0000125
126/*
127** A open page cache is an instance of the following structure.
128*/
129struct Pager {
130 char *zFilename; /* Name of the database file */
131 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000132 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000133 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000134 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000135 int origDbSize; /* dbSize before the current change */
drh28be87c2002-11-05 23:03:02 +0000136 int ckptSize; /* Size of database (in pages) at ckpt_begin() */
137 off_t ckptJSize; /* Size of journal at ckpt_begin() */
drhdb48ee02003-01-16 13:42:43 +0000138#ifndef NDEBUG
139 off_t syncJSize; /* Size of journal at last fsync() call */
140#endif
drh9bd47a92003-01-07 14:46:08 +0000141 int ckptNRec; /* Number of records in the checkpoint journal */
drh7e3b0a02001-04-28 16:52:40 +0000142 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000143 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000144 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000145 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000146 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000147 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drh603240c2002-03-05 01:11:12 +0000148 u8 journalOpen; /* True if journal file descriptors is valid */
drhdb48ee02003-01-16 13:42:43 +0000149 u8 journalStarted; /* True if initial magic of journal is synced */
drhda47d772002-12-02 04:25:19 +0000150 u8 useJournal; /* Do not use a rollback journal on this file */
drh603240c2002-03-05 01:11:12 +0000151 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000152 u8 ckptInUse; /* True we are in a checkpoint */
drhda47d772002-12-02 04:25:19 +0000153 u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
drh603240c2002-03-05 01:11:12 +0000154 u8 noSync; /* Do not sync the journal if true */
155 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
156 u8 errMask; /* One of several kinds of errors */
157 u8 tempFile; /* zFilename is a temporary file */
158 u8 readOnly; /* True for a read-only database */
159 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000160 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000161 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh94f33312002-08-12 12:29:56 +0000162 u8 journalFormat; /* Version number of the journal file */
drh603240c2002-03-05 01:11:12 +0000163 u8 *aInJournal; /* One bit for each page in the database file */
164 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000165 PgHdr *pFirst, *pLast; /* List of free pages */
drh341eae82003-01-21 02:39:36 +0000166 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
drhd9b02572001-04-15 00:37:09 +0000167 PgHdr *pAll; /* List of all pages */
drh03eb96a2002-11-10 23:32:56 +0000168 PgHdr *pCkpt; /* List of pages in the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000169 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000170};
171
172/*
173** These are bits that can be set in Pager.errMask.
174*/
175#define PAGER_ERR_FULL 0x01 /* a write() failed */
176#define PAGER_ERR_MEM 0x02 /* malloc() failed */
177#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
178#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000179#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000180
181/*
182** The journal file contains page records in the following
183** format.
184*/
185typedef struct PageRecord PageRecord;
186struct PageRecord {
187 Pgno pgno; /* The page number */
188 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
189};
190
191/*
drh5e00f6c2001-09-13 13:46:56 +0000192** Journal files begin with the following magic string. The data
193** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000194**
195** There are two journal formats. The older journal format writes
196** 32-bit integers in the byte-order of the host machine. The new
197** format writes integers as big-endian. All new journals use the
198** new format, but we have to be able to read an older journal in order
199** to roll it back.
drhd9b02572001-04-15 00:37:09 +0000200*/
drh94f33312002-08-12 12:29:56 +0000201static const unsigned char aOldJournalMagic[] = {
drhd9b02572001-04-15 00:37:09 +0000202 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000203};
drh94f33312002-08-12 12:29:56 +0000204static const unsigned char aJournalMagic[] = {
205 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
206};
207#define SQLITE_NEW_JOURNAL_FORMAT 1
208#define SQLITE_OLD_JOURNAL_FORMAT 0
209
210/*
211** The following integer, if set, causes journals to be written in the
212** old format. This is used for testing purposes only - to make sure
213** the code is able to rollback an old journal.
214*/
215#ifdef SQLITE_TEST
216int pager_old_format = 0;
drh74587e52002-08-13 00:01:16 +0000217#else
218# define pager_old_format 0
drh94f33312002-08-12 12:29:56 +0000219#endif
drhed7c8552001-04-11 14:29:21 +0000220
221/*
drhdd793422001-06-28 01:54:48 +0000222** Enable reference count tracking here:
223*/
drh74587e52002-08-13 00:01:16 +0000224#ifdef SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000225 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000226 static void pager_refinfo(PgHdr *p){
227 static int cnt = 0;
228 if( !pager_refinfo_enable ) return;
229 printf(
230 "REFCNT: %4d addr=0x%08x nRef=%d\n",
231 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
232 );
233 cnt++; /* Something to set a breakpoint on */
234 }
235# define REFINFO(X) pager_refinfo(X)
236#else
237# define REFINFO(X)
238#endif
239
240/*
drh94f33312002-08-12 12:29:56 +0000241** Read a 32-bit integer from the given file descriptor
242*/
243static int read32bits(Pager *pPager, OsFile *fd, u32 *pRes){
244 u32 res;
245 int rc;
246 rc = sqliteOsRead(fd, &res, sizeof(res));
247 if( rc==SQLITE_OK && pPager->journalFormat==SQLITE_NEW_JOURNAL_FORMAT ){
248 unsigned char ac[4];
249 memcpy(ac, &res, 4);
250 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
251 }
252 *pRes = res;
253 return rc;
254}
255
256/*
257** Write a 32-bit integer into the given file descriptor. Writing
258** is always done using the new journal format.
259*/
260static int write32bits(OsFile *fd, u32 val){
261 unsigned char ac[4];
drh94f33312002-08-12 12:29:56 +0000262 if( pager_old_format ){
263 return sqliteOsWrite(fd, &val, 4);
264 }
drh94f33312002-08-12 12:29:56 +0000265 ac[0] = (val>>24) & 0xff;
266 ac[1] = (val>>16) & 0xff;
267 ac[2] = (val>>8) & 0xff;
268 ac[3] = val & 0xff;
269 return sqliteOsWrite(fd, ac, 4);
270}
271
drh2554f8b2003-01-22 01:26:44 +0000272/*
273** Write a 32-bit integer into a page header right before the
274** page data. This will overwrite the PgHdr.pDirty pointer.
275*/
276static void storePageNumber(PgHdr *p){
277 u32 val = p->pgno;
278 unsigned char *ac;
279 ac = &((char*)PGHDR_TO_DATA(p))[-4];
280 if( pager_old_format ){
281 memcpy(ac, &val, 4);
282 }else{
283 ac[0] = (val>>24) & 0xff;
284 ac[1] = (val>>16) & 0xff;
285 ac[2] = (val>>8) & 0xff;
286 ac[3] = val & 0xff;
287 }
288}
289
drh94f33312002-08-12 12:29:56 +0000290
291/*
drhd9b02572001-04-15 00:37:09 +0000292** Convert the bits in the pPager->errMask into an approprate
293** return code.
294*/
295static int pager_errcode(Pager *pPager){
296 int rc = SQLITE_OK;
297 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000298 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000299 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
300 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
301 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
302 return rc;
drhed7c8552001-04-11 14:29:21 +0000303}
304
305/*
drh03eb96a2002-11-10 23:32:56 +0000306** Add or remove a page from the list of all pages that are in the
307** checkpoint journal.
308**
309** The Pager keeps a separate list of pages that are currently in
310** the checkpoint journal. This helps the sqlitepager_ckpt_commit()
311** routine run MUCH faster for the common case where there are many
312** pages in memory but only a few are in the checkpoint journal.
313*/
314static void page_add_to_ckpt_list(PgHdr *pPg){
315 Pager *pPager = pPg->pPager;
316 if( pPg->inCkpt ) return;
317 assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
318 pPg->pPrevCkpt = 0;
319 if( pPager->pCkpt ){
320 pPager->pCkpt->pPrevCkpt = pPg;
321 }
322 pPg->pNextCkpt = pPager->pCkpt;
323 pPager->pCkpt = pPg;
324 pPg->inCkpt = 1;
325}
326static void page_remove_from_ckpt_list(PgHdr *pPg){
327 if( !pPg->inCkpt ) return;
328 if( pPg->pPrevCkpt ){
329 assert( pPg->pPrevCkpt->pNextCkpt==pPg );
330 pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
331 }else{
332 assert( pPg->pPager->pCkpt==pPg );
333 pPg->pPager->pCkpt = pPg->pNextCkpt;
334 }
335 if( pPg->pNextCkpt ){
336 assert( pPg->pNextCkpt->pPrevCkpt==pPg );
337 pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
338 }
339 pPg->pNextCkpt = 0;
340 pPg->pPrevCkpt = 0;
341 pPg->inCkpt = 0;
342}
343
344/*
drhed7c8552001-04-11 14:29:21 +0000345** Find a page in the hash table given its page number. Return
346** a pointer to the page or NULL if not found.
347*/
drhd9b02572001-04-15 00:37:09 +0000348static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drh836faa42003-01-11 13:30:57 +0000349 PgHdr *p = pPager->aHash[pager_hash(pgno)];
drhed7c8552001-04-11 14:29:21 +0000350 while( p && p->pgno!=pgno ){
351 p = p->pNextHash;
352 }
353 return p;
354}
355
356/*
357** Unlock the database and clear the in-memory cache. This routine
358** sets the state of the pager back to what it was when it was first
359** opened. Any outstanding pages are invalidated and subsequent attempts
360** to access those pages will likely result in a coredump.
361*/
drhd9b02572001-04-15 00:37:09 +0000362static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000363 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000364 for(pPg=pPager->pAll; pPg; pPg=pNext){
365 pNext = pPg->pNextAll;
366 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000367 }
368 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000369 pPager->pFirstSynced = 0;
drhd9b02572001-04-15 00:37:09 +0000370 pPager->pLast = 0;
371 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000372 memset(pPager->aHash, 0, sizeof(pPager->aHash));
373 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000374 if( pPager->state>=SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000375 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000376 }
drha7fcb052001-12-14 15:09:55 +0000377 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000378 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000379 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000380 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000381 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000382}
383
384/*
385** When this routine is called, the pager has the journal file open and
386** a write lock on the database. This routine releases the database
387** write lock and acquires a read lock in its place. The journal file
388** is deleted and closed.
drhed7c8552001-04-11 14:29:21 +0000389*/
drhd9b02572001-04-15 00:37:09 +0000390static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000391 int rc;
drhd9b02572001-04-15 00:37:09 +0000392 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000393 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh663fc632002-02-02 18:49:19 +0000394 sqlitepager_ckpt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000395 if( pPager->ckptOpen ){
396 sqliteOsClose(&pPager->cpfd);
397 pPager->ckptOpen = 0;
398 }
drhda47d772002-12-02 04:25:19 +0000399 if( pPager->journalOpen ){
400 sqliteOsClose(&pPager->jfd);
401 pPager->journalOpen = 0;
402 sqliteOsDelete(pPager->zJournal);
403 sqliteFree( pPager->aInJournal );
404 pPager->aInJournal = 0;
405 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
406 pPg->inJournal = 0;
407 pPg->dirty = 0;
drhdb48ee02003-01-16 13:42:43 +0000408 pPg->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000409 }
410 }else{
411 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
drhd9b02572001-04-15 00:37:09 +0000412 }
drhda47d772002-12-02 04:25:19 +0000413 rc = sqliteOsReadLock(&pPager->fd);
drh8e298f92002-07-06 16:28:47 +0000414 if( rc==SQLITE_OK ){
415 pPager->state = SQLITE_READLOCK;
416 }else{
417 /* This can only happen if a process does a BEGIN, then forks and the
418 ** child process does the COMMIT. Because of the semantics of unix
419 ** file locking, the unlock will fail.
420 */
421 pPager->state = SQLITE_UNLOCK;
422 }
drhed7c8552001-04-11 14:29:21 +0000423 return rc;
424}
425
drhed7c8552001-04-11 14:29:21 +0000426/*
drhfa86c412002-02-02 15:01:15 +0000427** Read a single page from the journal file opened on file descriptor
428** jfd. Playback this one page.
429*/
430static int pager_playback_one_page(Pager *pPager, OsFile *jfd){
431 int rc;
432 PgHdr *pPg; /* An existing page in the cache */
433 PageRecord pgRec;
434
drh94f33312002-08-12 12:29:56 +0000435 rc = read32bits(pPager, jfd, &pgRec.pgno);
436 if( rc!=SQLITE_OK ) return rc;
437 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drhfa86c412002-02-02 15:01:15 +0000438 if( rc!=SQLITE_OK ) return rc;
439
440 /* Sanity checking on the page */
441 if( pgRec.pgno>pPager->dbSize || pgRec.pgno==0 ) return SQLITE_CORRUPT;
442
443 /* Playback the page. Update the in-memory copy of the page
444 ** at the same time, if there is one.
445 */
446 pPg = pager_lookup(pPager, pgRec.pgno);
drhdb48ee02003-01-16 13:42:43 +0000447 if( pPg==0 || pPg->needSync==0 ){
448 TRACE2("PLAYBACK %d\n", pgRec.pgno);
449 sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
450 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
451 }
drhfa86c412002-02-02 15:01:15 +0000452 if( pPg ){
453 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
454 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhdb48ee02003-01-16 13:42:43 +0000455 pPg->dirty = 0;
456 pPg->needSync = 0;
drhfa86c412002-02-02 15:01:15 +0000457 }
458 return rc;
459}
460
461/*
drhed7c8552001-04-11 14:29:21 +0000462** Playback the journal and thus restore the database file to
463** the state it was in before we started making changes.
464**
drhd9b02572001-04-15 00:37:09 +0000465** The journal file format is as follows: There is an initial
466** file-type string for sanity checking. Then there is a single
467** Pgno number which is the number of pages in the database before
468** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000469** Next come zero or more page records where each page record
470** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
471** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000472**
drhd9b02572001-04-15 00:37:09 +0000473** If the file opened as the journal file is not a well-formed
474** journal file (as determined by looking at the magic number
475** at the beginning) then this routine returns SQLITE_PROTOCOL.
476** If any other errors occur during playback, the database will
477** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
478** pPager->errMask and SQLITE_CORRUPT is returned. If it all
479** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000480*/
drhd9b02572001-04-15 00:37:09 +0000481static int pager_playback(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +0000482 off_t nRec; /* Number of Records */
drhd9b02572001-04-15 00:37:09 +0000483 int i; /* Loop counter */
484 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000485 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000486 int rc;
487
drhc3a64ba2001-11-22 00:01:27 +0000488 /* Figure out how many records are in the journal. Abort early if
489 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000490 */
drh8cfbf082001-09-19 13:22:39 +0000491 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000492 sqliteOsSeek(&pPager->jfd, 0);
493 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
drhc3a64ba2001-11-22 00:01:27 +0000494 if( rc!=SQLITE_OK ){
495 goto end_playback;
496 }
drh2c799952003-01-03 02:04:27 +0000497 if( nRec < sizeof(aMagic)+sizeof(Pgno) ){
drhc3a64ba2001-11-22 00:01:27 +0000498 goto end_playback;
499 }
drh28be87c2002-11-05 23:03:02 +0000500 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhc3a64ba2001-11-22 00:01:27 +0000501
502 /* Read the beginning of the journal and truncate the
503 ** database file back to its original size.
504 */
drha7fcb052001-12-14 15:09:55 +0000505 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000506 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000507 rc = SQLITE_PROTOCOL;
508 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000509 }
drh94f33312002-08-12 12:29:56 +0000510 if( memcmp(aMagic, aOldJournalMagic, sizeof(aMagic))==0 ){
511 pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
512 }else if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))==0 ){
513 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
514 }else{
515 rc = SQLITE_PROTOCOL;
516 goto end_playback;
517 }
518 rc = read32bits(pPager, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000519 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000520 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000521 }
drh28be87c2002-11-05 23:03:02 +0000522 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
drh81a20f22001-10-12 17:30:04 +0000523 if( rc!=SQLITE_OK ){
524 goto end_playback;
525 }
drhd9b02572001-04-15 00:37:09 +0000526 pPager->dbSize = mxPg;
527
drhfa86c412002-02-02 15:01:15 +0000528 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000529 */
drhd9b02572001-04-15 00:37:09 +0000530 for(i=nRec-1; i>=0; i--){
drhfa86c412002-02-02 15:01:15 +0000531 rc = pager_playback_one_page(pPager, &pPager->jfd);
drhd9b02572001-04-15 00:37:09 +0000532 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000533 }
drh81a20f22001-10-12 17:30:04 +0000534
drhdb48ee02003-01-16 13:42:43 +0000535
drh81a20f22001-10-12 17:30:04 +0000536end_playback:
drhdb48ee02003-01-16 13:42:43 +0000537#if !defined(NDEBUG) && defined(SQLITE_TEST)
538 /* For pages that were never written into the journal, restore the
539 ** memory copy from the original database file.
540 **
541 ** This is code is used during testing only. It is necessary to
542 ** compensate for the sqliteOsTruncate() call inside
543 ** sqlitepager_rollback().
544 */
545 if( rc==SQLITE_OK ){
546 PgHdr *pPg;
547 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
548 if( (int)pPg->pgno <= pPager->origDbSize ){
549 sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
550 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
551 if( rc ) break;
552 }else{
553 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
554 }
555 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
556 pPg->needSync = 0;
557 pPg->dirty = 0;
558 }
559 }
560#endif
drhd9b02572001-04-15 00:37:09 +0000561 if( rc!=SQLITE_OK ){
562 pager_unwritelock(pPager);
563 pPager->errMask |= PAGER_ERR_CORRUPT;
564 rc = SQLITE_CORRUPT;
565 }else{
566 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000567 }
drhd9b02572001-04-15 00:37:09 +0000568 return rc;
drhed7c8552001-04-11 14:29:21 +0000569}
570
571/*
drhfa86c412002-02-02 15:01:15 +0000572** Playback the checkpoint journal.
573**
574** This is similar to playing back the transaction journal but with
575** a few extra twists.
576**
drh663fc632002-02-02 18:49:19 +0000577** (1) The number of pages in the database file at the start of
578** the checkpoint is stored in pPager->ckptSize, not in the
579** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000580**
581** (2) In addition to playing back the checkpoint journal, also
582** playback all pages of the transaction journal beginning
583** at offset pPager->ckptJSize.
584*/
585static int pager_ckpt_playback(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +0000586 off_t nRec; /* Number of Records */
drhfa86c412002-02-02 15:01:15 +0000587 int i; /* Loop counter */
588 int rc;
589
590 /* Truncate the database back to its original size.
591 */
drh28be87c2002-11-05 23:03:02 +0000592 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
drhfa86c412002-02-02 15:01:15 +0000593 pPager->dbSize = pPager->ckptSize;
594
595 /* Figure out how many records are in the checkpoint journal.
596 */
drh0f892532002-05-30 12:27:03 +0000597 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000598 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +0000599 nRec = pPager->ckptNRec;
drhfa86c412002-02-02 15:01:15 +0000600
601 /* Copy original pages out of the checkpoint journal and back into the
602 ** database file.
603 */
drh74587e52002-08-13 00:01:16 +0000604 if( pager_old_format ){
605 pPager->journalFormat = SQLITE_OLD_JOURNAL_FORMAT;
606 }else{
607 pPager->journalFormat = SQLITE_NEW_JOURNAL_FORMAT;
608 }
drhfa86c412002-02-02 15:01:15 +0000609 for(i=nRec-1; i>=0; i--){
610 rc = pager_playback_one_page(pPager, &pPager->cpfd);
611 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
612 }
613
614 /* Figure out how many pages need to be copied out of the transaction
615 ** journal.
616 */
617 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
618 if( rc!=SQLITE_OK ){
619 goto end_ckpt_playback;
620 }
621 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
622 if( rc!=SQLITE_OK ){
623 goto end_ckpt_playback;
624 }
625 nRec = (nRec - pPager->ckptJSize)/sizeof(PageRecord);
626 for(i=nRec-1; i>=0; i--){
627 rc = pager_playback_one_page(pPager, &pPager->jfd);
628 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
629 }
630
631
632end_ckpt_playback:
drhfa86c412002-02-02 15:01:15 +0000633 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000634 pPager->errMask |= PAGER_ERR_CORRUPT;
635 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000636 }
637 return rc;
638}
639
640/*
drhf57b14a2001-09-14 18:54:08 +0000641** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000642**
643** The maximum number is the absolute value of the mxPage parameter.
644** If mxPage is negative, the noSync flag is also set. noSync bypasses
645** calls to sqliteOsSync(). The pager runs much faster with noSync on,
646** but if the operating system crashes or there is an abrupt power
647** failure, the database file might be left in an inconsistent and
648** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000649*/
650void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000651 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000652 pPager->noSync = pPager->tempFile;
drh603240c2002-03-05 01:11:12 +0000653 }else{
654 pPager->noSync = 1;
655 mxPage = -mxPage;
656 }
drhf57b14a2001-09-14 18:54:08 +0000657 if( mxPage>10 ){
658 pPager->mxPage = mxPage;
659 }
660}
661
662/*
drhfa86c412002-02-02 15:01:15 +0000663** Open a temporary file. Write the name of the file into zName
664** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
665** the file descriptor into *fd. Return SQLITE_OK on success or some
666** other error code if we fail.
667**
668** The OS will automatically delete the temporary file when it is
669** closed.
670*/
671static int sqlitepager_opentemp(char *zFile, OsFile *fd){
672 int cnt = 8;
673 int rc;
674 do{
675 cnt--;
676 sqliteOsTempFileName(zFile);
677 rc = sqliteOsOpenExclusive(zFile, fd, 1);
678 }while( cnt>0 && rc!=SQLITE_OK );
679 return rc;
680}
681
682/*
drhed7c8552001-04-11 14:29:21 +0000683** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000684** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000685** the first call to sqlitepager_get() and is only held open until the
686** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000687**
drh6446c4d2001-12-15 14:22:18 +0000688** If zFilename is NULL then a randomly-named temporary file is created
689** and used as the file to be cached. The file will be deleted
690** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000691*/
drh7e3b0a02001-04-28 16:52:40 +0000692int sqlitepager_open(
693 Pager **ppPager, /* Return the Pager structure here */
694 const char *zFilename, /* Name of the database file to open */
695 int mxPage, /* Max number of in-memory cache pages */
drhda47d772002-12-02 04:25:19 +0000696 int nExtra, /* Extra bytes append to each in-memory page */
697 int useJournal /* TRUE to use a rollback journal on this file */
drh7e3b0a02001-04-28 16:52:40 +0000698){
drhed7c8552001-04-11 14:29:21 +0000699 Pager *pPager;
drh3e7a6092002-12-07 21:45:14 +0000700 char *zFullPathname;
drhed7c8552001-04-11 14:29:21 +0000701 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000702 OsFile fd;
703 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000704 int tempFile;
705 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000706 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000707
drhd9b02572001-04-15 00:37:09 +0000708 *ppPager = 0;
709 if( sqlite_malloc_failed ){
710 return SQLITE_NOMEM;
711 }
drh5e00f6c2001-09-13 13:46:56 +0000712 if( zFilename ){
drh3e7a6092002-12-07 21:45:14 +0000713 zFullPathname = sqliteOsFullPathname(zFilename);
714 rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000715 tempFile = 0;
716 }else{
drhfa86c412002-02-02 15:01:15 +0000717 rc = sqlitepager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000718 zFilename = zTemp;
drh3e7a6092002-12-07 21:45:14 +0000719 zFullPathname = sqliteOsFullPathname(zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000720 tempFile = 1;
721 }
drh3e7a6092002-12-07 21:45:14 +0000722 if( sqlite_malloc_failed ){
723 return SQLITE_NOMEM;
724 }
drh8cfbf082001-09-19 13:22:39 +0000725 if( rc!=SQLITE_OK ){
drh3e7a6092002-12-07 21:45:14 +0000726 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000727 return SQLITE_CANTOPEN;
728 }
drh3e7a6092002-12-07 21:45:14 +0000729 nameLen = strlen(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000730 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000731 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000732 sqliteOsClose(&fd);
drh3e7a6092002-12-07 21:45:14 +0000733 sqliteFree(zFullPathname);
drhd9b02572001-04-15 00:37:09 +0000734 return SQLITE_NOMEM;
735 }
drhdb48ee02003-01-16 13:42:43 +0000736 SET_PAGER(pPager);
drhed7c8552001-04-11 14:29:21 +0000737 pPager->zFilename = (char*)&pPager[1];
738 pPager->zJournal = &pPager->zFilename[nameLen+1];
drh3e7a6092002-12-07 21:45:14 +0000739 strcpy(pPager->zFilename, zFullPathname);
740 strcpy(pPager->zJournal, zFullPathname);
741 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000742 strcpy(&pPager->zJournal[nameLen], "-journal");
743 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000744 pPager->journalOpen = 0;
drhda47d772002-12-02 04:25:19 +0000745 pPager->useJournal = useJournal;
drhfa86c412002-02-02 15:01:15 +0000746 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000747 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000748 pPager->nRef = 0;
749 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000750 pPager->ckptSize = 0;
751 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000752 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000753 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000754 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000755 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000756 pPager->tempFile = tempFile;
757 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000758 pPager->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000759 pPager->noSync = pPager->tempFile || !useJournal;
drhed7c8552001-04-11 14:29:21 +0000760 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000761 pPager->pFirstSynced = 0;
drhed7c8552001-04-11 14:29:21 +0000762 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000763 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000764 memset(pPager->aHash, 0, sizeof(pPager->aHash));
765 *ppPager = pPager;
766 return SQLITE_OK;
767}
768
769/*
drh72f82862001-05-24 21:06:34 +0000770** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000771** when the reference count on each page reaches zero. The destructor can
772** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000773**
774** The destructor is not called as a result sqlitepager_close().
775** Destructors are only called by sqlitepager_unref().
776*/
777void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
778 pPager->xDestructor = xDesc;
779}
780
781/*
drh5e00f6c2001-09-13 13:46:56 +0000782** Return the total number of pages in the disk file associated with
783** pPager.
drhed7c8552001-04-11 14:29:21 +0000784*/
drhd9b02572001-04-15 00:37:09 +0000785int sqlitepager_pagecount(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +0000786 off_t n;
drhd9b02572001-04-15 00:37:09 +0000787 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000788 if( pPager->dbSize>=0 ){
789 return pPager->dbSize;
790 }
drha7fcb052001-12-14 15:09:55 +0000791 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000792 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000793 return 0;
drhed7c8552001-04-11 14:29:21 +0000794 }
drh8cfbf082001-09-19 13:22:39 +0000795 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000796 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000797 pPager->dbSize = n;
798 }
799 return n;
800}
801
802/*
803** Shutdown the page cache. Free all memory and close all files.
804**
805** If a transaction was in progress when this routine is called, that
806** transaction is rolled back. All outstanding pages are invalidated
807** and their memory is freed. Any attempt to use a page associated
808** with this page cache after this function returns will likely
809** result in a coredump.
810*/
drhd9b02572001-04-15 00:37:09 +0000811int sqlitepager_close(Pager *pPager){
812 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000813 switch( pPager->state ){
814 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000815 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +0000816 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000817 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000818 break;
819 }
820 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +0000821 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000822 break;
823 }
824 default: {
825 /* Do nothing */
826 break;
827 }
828 }
drhd9b02572001-04-15 00:37:09 +0000829 for(pPg=pPager->pAll; pPg; pPg=pNext){
830 pNext = pPg->pNextAll;
831 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000832 }
drha7fcb052001-12-14 15:09:55 +0000833 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000834 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +0000835 /* Temp files are automatically deleted by the OS
836 ** if( pPager->tempFile ){
837 ** sqliteOsDelete(pPager->zFilename);
838 ** }
839 */
drhdb48ee02003-01-16 13:42:43 +0000840 CLR_PAGER(pPager);
drhed7c8552001-04-11 14:29:21 +0000841 sqliteFree(pPager);
842 return SQLITE_OK;
843}
844
845/*
drh5e00f6c2001-09-13 13:46:56 +0000846** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000847*/
drhd9b02572001-04-15 00:37:09 +0000848Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000849 PgHdr *p = DATA_TO_PGHDR(pData);
850 return p->pgno;
851}
852
853/*
drh7e3b0a02001-04-28 16:52:40 +0000854** Increment the reference count for a page. If the page is
855** currently on the freelist (the reference count is zero) then
856** remove it from the freelist.
857*/
drh836faa42003-01-11 13:30:57 +0000858#define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
859static void _page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000860 if( pPg->nRef==0 ){
861 /* The page is currently on the freelist. Remove it. */
drh341eae82003-01-21 02:39:36 +0000862 if( pPg==pPg->pPager->pFirstSynced ){
863 PgHdr *p = pPg->pNextFree;
864 while( p && p->needSync ){ p = p->pNextFree; }
865 pPg->pPager->pFirstSynced = p;
866 }
drh7e3b0a02001-04-28 16:52:40 +0000867 if( pPg->pPrevFree ){
868 pPg->pPrevFree->pNextFree = pPg->pNextFree;
869 }else{
870 pPg->pPager->pFirst = pPg->pNextFree;
871 }
872 if( pPg->pNextFree ){
873 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
874 }else{
875 pPg->pPager->pLast = pPg->pPrevFree;
876 }
877 pPg->pPager->nRef++;
878 }
879 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000880 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000881}
882
883/*
884** Increment the reference count for a page. The input pointer is
885** a reference to the page data.
886*/
887int sqlitepager_ref(void *pData){
888 PgHdr *pPg = DATA_TO_PGHDR(pData);
889 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000890 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000891}
892
893/*
drhb19a2bc2001-09-16 00:13:26 +0000894** Sync the journal and then write all free dirty pages to the database
895** file.
896**
897** Writing all free dirty pages to the database after the sync is a
898** non-obvious optimization. fsync() is an expensive operation so we
drhaaab5722002-02-19 13:39:21 +0000899** want to minimize the number ot times it is called. After an fsync() call,
drh6446c4d2001-12-15 14:22:18 +0000900** we are free to write dirty pages back to the database. It is best
901** to go ahead and write as many dirty pages as possible to minimize
902** the risk of having to do another fsync() later on. Writing dirty
903** free pages in this way was observed to make database operations go
904** up to 10 times faster.
drhfa86c412002-02-02 15:01:15 +0000905**
906** If we are writing to temporary database, there is no need to preserve
907** the integrity of the journal file, so we can save time and skip the
908** fsync().
drh50e5dad2001-09-15 00:57:28 +0000909*/
910static int syncAllPages(Pager *pPager){
911 PgHdr *pPg;
912 int rc = SQLITE_OK;
drh03eb96a2002-11-10 23:32:56 +0000913
914 /* Sync the journal before modifying the main database
915 ** (assuming there is a journal and it needs to be synced.)
916 */
drh50e5dad2001-09-15 00:57:28 +0000917 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +0000918 if( !pPager->tempFile ){
drhdb48ee02003-01-16 13:42:43 +0000919 assert( pPager->journalOpen );
920 assert( !pPager->noSync );
921 TRACE1("SYNC\n");
drhfa86c412002-02-02 15:01:15 +0000922 rc = sqliteOsSync(&pPager->jfd);
923 if( rc!=0 ) return rc;
drhdb48ee02003-01-16 13:42:43 +0000924#ifndef NDEBUG
925 rc = sqliteOsFileSize(&pPager->jfd, &pPager->syncJSize);
926 if( rc!=0 ) return rc;
927#endif
928 pPager->journalStarted = 1;
drhfa86c412002-02-02 15:01:15 +0000929 }
drh50e5dad2001-09-15 00:57:28 +0000930 pPager->needSync = 0;
drh341eae82003-01-21 02:39:36 +0000931
932 /* Erase the needSync flag from every page.
933 */
934 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
935 pPg->needSync = 0;
936 }
937 pPager->pFirstSynced = pPager->pFirst;
drh50e5dad2001-09-15 00:57:28 +0000938 }
drh03eb96a2002-11-10 23:32:56 +0000939
drh341eae82003-01-21 02:39:36 +0000940#ifndef NDEBUG
941 /* If the Pager.needSync flag is clear then the PgHdr.needSync
942 ** flag must also be clear for all pages. Verify that this
943 ** invariant is true.
drh03eb96a2002-11-10 23:32:56 +0000944 */
drh341eae82003-01-21 02:39:36 +0000945 else{
946 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
947 assert( pPg->needSync==0 );
948 }
949 assert( pPager->pFirstSynced==pPager->pFirst );
drh03eb96a2002-11-10 23:32:56 +0000950 }
drh341eae82003-01-21 02:39:36 +0000951#endif
drhdb48ee02003-01-16 13:42:43 +0000952
drh81a20f22001-10-12 17:30:04 +0000953 return rc;
drh50e5dad2001-09-15 00:57:28 +0000954}
955
956/*
drh2554f8b2003-01-22 01:26:44 +0000957** Given a list of pages (connected by the PgHdr.pDirty pointer) write
958** every one of those pages out to the database file and mark them all
959** as clean.
960*/
961static int pager_write_pagelist(PgHdr *pList){
962 Pager *pPager;
963 int rc;
964
965 if( pList==0 ) return SQLITE_OK;
966 pPager = pList->pPager;
967 while( pList ){
968 assert( pList->dirty );
969 sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
970 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
971 if( rc ) return rc;
972 pList->dirty = 0;
973 pList = pList->pDirty;
974 }
975 return SQLITE_OK;
976}
977
978/*
979** Collect every dirty page into a dirty list and
980** return a pointer to the head of that list. All pages are
981** collected even if they are still in use.
982*/
983static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
984 PgHdr *p, *pList;
985 pList = 0;
986 for(p=pPager->pAll; p; p=p->pNextAll){
987 if( p->dirty ){
988 p->pDirty = pList;
989 pList = p;
990 }
991 }
992 return pList;
993}
994
995/*
drhd9b02572001-04-15 00:37:09 +0000996** Acquire a page.
997**
drh58a11682001-11-10 13:51:08 +0000998** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +0000999** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +00001000**
drh306dc212001-05-21 13:45:10 +00001001** A _get works for any page number greater than 0. If the database
1002** file is smaller than the requested page, then no actual disk
1003** read occurs and the memory image of the page is initialized to
1004** all zeros. The extra data appended to a page is always initialized
1005** to zeros the first time a page is loaded into memory.
1006**
drhd9b02572001-04-15 00:37:09 +00001007** The acquisition might fail for several reasons. In all cases,
1008** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +00001009**
1010** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
1011** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +00001012** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +00001013** just returns 0. This routine acquires a read-lock the first time it
1014** has to go to disk, and could also playback an old journal if necessary.
1015** Since _lookup() never goes to disk, it never has to deal with locks
1016** or journal files.
drhed7c8552001-04-11 14:29:21 +00001017*/
drhd9b02572001-04-15 00:37:09 +00001018int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +00001019 PgHdr *pPg;
drh8766c342002-11-09 00:33:15 +00001020 int rc;
drhed7c8552001-04-11 14:29:21 +00001021
drhd9b02572001-04-15 00:37:09 +00001022 /* Make sure we have not hit any critical errors.
1023 */
drh836faa42003-01-11 13:30:57 +00001024 assert( pPager!=0 );
1025 assert( pgno!=0 );
drhd9b02572001-04-15 00:37:09 +00001026 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1027 return pager_errcode(pPager);
1028 }
1029
drhed7c8552001-04-11 14:29:21 +00001030 /* If this is the first page accessed, then get a read lock
1031 ** on the database file.
1032 */
1033 if( pPager->nRef==0 ){
drh8766c342002-11-09 00:33:15 +00001034 rc = sqliteOsReadLock(&pPager->fd);
1035 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +00001036 *ppPage = 0;
drh8766c342002-11-09 00:33:15 +00001037 return rc;
drhed7c8552001-04-11 14:29:21 +00001038 }
drhd9b02572001-04-15 00:37:09 +00001039 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +00001040
1041 /* If a journal file exists, try to play it back.
1042 */
drhda47d772002-12-02 04:25:19 +00001043 if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +00001044 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +00001045
drha7fcb052001-12-14 15:09:55 +00001046 /* Get a write lock on the database
1047 */
1048 rc = sqliteOsWriteLock(&pPager->fd);
1049 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001050 if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
1051 /* This should never happen! */
1052 rc = SQLITE_INTERNAL;
1053 }
drha7fcb052001-12-14 15:09:55 +00001054 *ppPage = 0;
drh8766c342002-11-09 00:33:15 +00001055 return rc;
drha7fcb052001-12-14 15:09:55 +00001056 }
1057 pPager->state = SQLITE_WRITELOCK;
1058
drhed7c8552001-04-11 14:29:21 +00001059 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +00001060 ** we cannot get exclusive access to the journal file.
1061 **
1062 ** Even though we will only be reading from the journal, not writing,
1063 ** we have to open the journal for writing in order to obtain an
1064 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +00001065 */
drhf57b3392001-10-08 13:22:32 +00001066 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drha7fcb052001-12-14 15:09:55 +00001067 if( rc!=SQLITE_OK ){
1068 rc = sqliteOsUnlock(&pPager->fd);
1069 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +00001070 *ppPage = 0;
1071 return SQLITE_BUSY;
1072 }
drha7fcb052001-12-14 15:09:55 +00001073 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001074 pPager->journalStarted = 0;
drhed7c8552001-04-11 14:29:21 +00001075
1076 /* Playback and delete the journal. Drop the database write
1077 ** lock and reacquire the read lock.
1078 */
drhd9b02572001-04-15 00:37:09 +00001079 rc = pager_playback(pPager);
1080 if( rc!=SQLITE_OK ){
1081 return rc;
1082 }
drhed7c8552001-04-11 14:29:21 +00001083 }
1084 pPg = 0;
1085 }else{
1086 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +00001087 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +00001088 }
1089 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001090 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +00001091 int h;
drh7e3b0a02001-04-28 16:52:40 +00001092 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +00001093 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
1094 /* Create a new page */
drh8c1238a2003-01-02 14:43:55 +00001095 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +00001096 if( pPg==0 ){
1097 *ppPage = 0;
1098 pager_unwritelock(pPager);
1099 pPager->errMask |= PAGER_ERR_MEM;
1100 return SQLITE_NOMEM;
1101 }
drh8c1238a2003-01-02 14:43:55 +00001102 memset(pPg, 0, sizeof(*pPg));
drhed7c8552001-04-11 14:29:21 +00001103 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +00001104 pPg->pNextAll = pPager->pAll;
1105 if( pPager->pAll ){
1106 pPager->pAll->pPrevAll = pPg;
1107 }
1108 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +00001109 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +00001110 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +00001111 }else{
drhdb48ee02003-01-16 13:42:43 +00001112 /* Find a page to recycle. Try to locate a page that does not
1113 ** require us to do an fsync() on the journal.
1114 */
drh341eae82003-01-21 02:39:36 +00001115 pPg = pPager->pFirstSynced;
drhb19a2bc2001-09-16 00:13:26 +00001116
drhdb48ee02003-01-16 13:42:43 +00001117 /* If we could not find a page that does not require an fsync()
1118 ** on the journal file then fsync the journal file. This is a
1119 ** very slow operation, so we work hard to avoid it. But sometimes
1120 ** it can't be helped.
drhb19a2bc2001-09-16 00:13:26 +00001121 */
drh603240c2002-03-05 01:11:12 +00001122 if( pPg==0 ){
drh50e5dad2001-09-15 00:57:28 +00001123 int rc = syncAllPages(pPager);
1124 if( rc!=0 ){
1125 sqlitepager_rollback(pPager);
1126 *ppPage = 0;
1127 return SQLITE_IOERR;
1128 }
1129 pPg = pPager->pFirst;
1130 }
drhd9b02572001-04-15 00:37:09 +00001131 assert( pPg->nRef==0 );
drhdb48ee02003-01-16 13:42:43 +00001132
1133 /* Write the page to the database file if it is dirty.
1134 */
1135 if( pPg->dirty ){
1136 assert( pPg->needSync==0 );
drh2554f8b2003-01-22 01:26:44 +00001137 pPg->pDirty = 0;
1138 rc = pager_write_pagelist( pPg );
drhdb48ee02003-01-16 13:42:43 +00001139 if( rc!=SQLITE_OK ){
1140 sqlitepager_rollback(pPager);
1141 *ppPage = 0;
1142 return SQLITE_IOERR;
1143 }
drhdb48ee02003-01-16 13:42:43 +00001144 }
drh50e5dad2001-09-15 00:57:28 +00001145 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +00001146
drhdb48ee02003-01-16 13:42:43 +00001147 /* If the page we are recycling is marked as alwaysRollback, then
drh193a6b42002-07-07 16:52:46 +00001148 ** set the global alwaysRollback flag, thus disabling the
1149 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1150 ** It is necessary to do this because the page marked alwaysRollback
1151 ** might be reloaded at a later time but at that point we won't remember
1152 ** that is was marked alwaysRollback. This means that all pages must
1153 ** be marked as alwaysRollback from here on out.
1154 */
1155 if( pPg->alwaysRollback ){
1156 pPager->alwaysRollback = 1;
1157 }
1158
drhd9b02572001-04-15 00:37:09 +00001159 /* Unlink the old page from the free list and the hash table
1160 */
drh341eae82003-01-21 02:39:36 +00001161 if( pPg==pPager->pFirstSynced ){
1162 PgHdr *p = pPg->pNextFree;
1163 while( p && p->needSync ){ p = p->pNextFree; }
1164 pPager->pFirstSynced = p;
1165 }
drh6019e162001-07-02 17:51:45 +00001166 if( pPg->pPrevFree ){
1167 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001168 }else{
drh6019e162001-07-02 17:51:45 +00001169 assert( pPager->pFirst==pPg );
1170 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001171 }
drh6019e162001-07-02 17:51:45 +00001172 if( pPg->pNextFree ){
1173 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1174 }else{
1175 assert( pPager->pLast==pPg );
1176 pPager->pLast = pPg->pPrevFree;
1177 }
1178 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +00001179 if( pPg->pNextHash ){
1180 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1181 }
1182 if( pPg->pPrevHash ){
1183 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1184 }else{
drhd9b02572001-04-15 00:37:09 +00001185 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +00001186 assert( pPager->aHash[h]==pPg );
1187 pPager->aHash[h] = pPg->pNextHash;
1188 }
drh6019e162001-07-02 17:51:45 +00001189 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +00001190 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +00001191 }
1192 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +00001193 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drhed6c8672003-01-12 18:02:16 +00001194 sqliteCheckMemory(pPager->aInJournal, pgno/8);
drhdb48ee02003-01-16 13:42:43 +00001195 assert( pPager->journalOpen );
drh6019e162001-07-02 17:51:45 +00001196 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
drhdb48ee02003-01-16 13:42:43 +00001197 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001198 }else{
1199 pPg->inJournal = 0;
drhdb48ee02003-01-16 13:42:43 +00001200 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001201 }
drh03eb96a2002-11-10 23:32:56 +00001202 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
1203 && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
1204 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001205 }else{
drh03eb96a2002-11-10 23:32:56 +00001206 page_remove_from_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001207 }
drhed7c8552001-04-11 14:29:21 +00001208 pPg->dirty = 0;
1209 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +00001210 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001211 pPager->nRef++;
1212 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +00001213 pPg->pNextHash = pPager->aHash[h];
1214 pPager->aHash[h] = pPg;
1215 if( pPg->pNextHash ){
1216 assert( pPg->pNextHash->pPrevHash==0 );
1217 pPg->pNextHash->pPrevHash = pPg;
1218 }
drh306dc212001-05-21 13:45:10 +00001219 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
drh1ab43002002-01-14 09:28:19 +00001220 if( pPager->dbSize<(int)pgno ){
drh306dc212001-05-21 13:45:10 +00001221 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1222 }else{
drh81a20f22001-10-12 17:30:04 +00001223 int rc;
drhd0d006e2002-12-01 02:00:57 +00001224 sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drha7fcb052001-12-14 15:09:55 +00001225 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +00001226 if( rc!=SQLITE_OK ){
drh28be87c2002-11-05 23:03:02 +00001227 off_t fileSize;
drh4e371ee2002-09-05 16:08:27 +00001228 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
1229 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
1230 return rc;
1231 }else{
1232 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1233 }
drh81a20f22001-10-12 17:30:04 +00001234 }
drh306dc212001-05-21 13:45:10 +00001235 }
drh7e3b0a02001-04-28 16:52:40 +00001236 if( pPager->nExtra>0 ){
1237 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1238 }
drhed7c8552001-04-11 14:29:21 +00001239 }else{
drhd9b02572001-04-15 00:37:09 +00001240 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001241 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001242 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001243 }
1244 *ppPage = PGHDR_TO_DATA(pPg);
1245 return SQLITE_OK;
1246}
1247
1248/*
drh7e3b0a02001-04-28 16:52:40 +00001249** Acquire a page if it is already in the in-memory cache. Do
1250** not read the page from disk. Return a pointer to the page,
1251** or 0 if the page is not in cache.
1252**
1253** See also sqlitepager_get(). The difference between this routine
1254** and sqlitepager_get() is that _get() will go to the disk and read
1255** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001256** returns NULL if the page is not in cache or if a disk I/O error
1257** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001258*/
1259void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1260 PgHdr *pPg;
1261
drh836faa42003-01-11 13:30:57 +00001262 assert( pPager!=0 );
1263 assert( pgno!=0 );
drh7e3b0a02001-04-28 16:52:40 +00001264 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1265 return 0;
1266 }
drh836faa42003-01-11 13:30:57 +00001267 /* if( pPager->nRef==0 ){
1268 ** return 0;
1269 ** }
1270 */
drh7e3b0a02001-04-28 16:52:40 +00001271 pPg = pager_lookup(pPager, pgno);
1272 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001273 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001274 return PGHDR_TO_DATA(pPg);
1275}
1276
1277/*
drhed7c8552001-04-11 14:29:21 +00001278** Release a page.
1279**
1280** If the number of references to the page drop to zero, then the
1281** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001282** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001283** removed.
1284*/
drhd9b02572001-04-15 00:37:09 +00001285int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001286 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001287
1288 /* Decrement the reference count for this page
1289 */
drhed7c8552001-04-11 14:29:21 +00001290 pPg = DATA_TO_PGHDR(pData);
1291 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001292 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001293 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001294
drh72f82862001-05-24 21:06:34 +00001295 /* When the number of references to a page reach 0, call the
1296 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001297 */
drhed7c8552001-04-11 14:29:21 +00001298 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001299 Pager *pPager;
1300 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001301 pPg->pNextFree = 0;
1302 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001303 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001304 if( pPg->pPrevFree ){
1305 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001306 }else{
1307 pPager->pFirst = pPg;
1308 }
drh341eae82003-01-21 02:39:36 +00001309 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1310 pPager->pFirstSynced = pPg;
1311 }
drh72f82862001-05-24 21:06:34 +00001312 if( pPager->xDestructor ){
1313 pPager->xDestructor(pData);
1314 }
drhd9b02572001-04-15 00:37:09 +00001315
1316 /* When all pages reach the freelist, drop the read lock from
1317 ** the database file.
1318 */
1319 pPager->nRef--;
1320 assert( pPager->nRef>=0 );
1321 if( pPager->nRef==0 ){
1322 pager_reset(pPager);
1323 }
drhed7c8552001-04-11 14:29:21 +00001324 }
drhd9b02572001-04-15 00:37:09 +00001325 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001326}
1327
1328/*
drhda47d772002-12-02 04:25:19 +00001329** Create a journal file for pPager. There should already be a write
1330** lock on the database file when this routine is called.
1331**
1332** Return SQLITE_OK if everything. Return an error code and release the
1333** write lock if anything goes wrong.
1334*/
1335static int pager_open_journal(Pager *pPager){
1336 int rc;
1337 assert( pPager->state==SQLITE_WRITELOCK );
1338 assert( pPager->journalOpen==0 );
1339 assert( pPager->useJournal );
1340 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1341 if( pPager->aInJournal==0 ){
1342 sqliteOsReadLock(&pPager->fd);
1343 pPager->state = SQLITE_READLOCK;
1344 return SQLITE_NOMEM;
1345 }
1346 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1347 if( rc!=SQLITE_OK ){
1348 sqliteFree(pPager->aInJournal);
1349 pPager->aInJournal = 0;
1350 sqliteOsReadLock(&pPager->fd);
1351 pPager->state = SQLITE_READLOCK;
1352 return SQLITE_CANTOPEN;
1353 }
1354 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001355 pPager->journalStarted = 0;
drhda47d772002-12-02 04:25:19 +00001356 pPager->needSync = 0;
1357 pPager->alwaysRollback = 0;
1358 sqlitepager_pagecount(pPager);
1359 pPager->origDbSize = pPager->dbSize;
1360 if( pager_old_format ){
1361 rc = sqliteOsWrite(&pPager->jfd, aOldJournalMagic,
1362 sizeof(aOldJournalMagic));
1363 }else{
1364 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1365 }
1366 if( rc==SQLITE_OK ){
1367 rc = write32bits(&pPager->jfd, pPager->dbSize);
1368 }
1369 if( pPager->ckptAutoopen && rc==SQLITE_OK ){
1370 rc = sqlitepager_ckpt_begin(pPager);
1371 }
1372 if( rc!=SQLITE_OK ){
1373 rc = pager_unwritelock(pPager);
1374 if( rc==SQLITE_OK ){
1375 rc = SQLITE_FULL;
1376 }
1377 }
drhdb48ee02003-01-16 13:42:43 +00001378#ifndef NDEBUG
1379 pPager->syncJSize = 0;
1380#endif
drhda47d772002-12-02 04:25:19 +00001381 return rc;
1382}
1383
1384/*
drh4b845d72002-03-05 12:41:19 +00001385** Acquire a write-lock on the database. The lock is removed when
1386** the any of the following happen:
1387**
1388** * sqlitepager_commit() is called.
1389** * sqlitepager_rollback() is called.
1390** * sqlitepager_close() is called.
1391** * sqlitepager_unref() is called to on every outstanding page.
1392**
1393** The parameter to this routine is a pointer to any open page of the
1394** database file. Nothing changes about the page - it is used merely
1395** to acquire a pointer to the Pager structure and as proof that there
1396** is already a read-lock on the database.
1397**
drhda47d772002-12-02 04:25:19 +00001398** A journal file is opened if this is not a temporary file. For
1399** temporary files, the opening of the journal file is deferred until
1400** there is an actual need to write to the journal.
1401**
drh4b845d72002-03-05 12:41:19 +00001402** If the database is already write-locked, this routine is a no-op.
1403*/
1404int sqlitepager_begin(void *pData){
1405 PgHdr *pPg = DATA_TO_PGHDR(pData);
1406 Pager *pPager = pPg->pPager;
1407 int rc = SQLITE_OK;
1408 assert( pPg->nRef>0 );
1409 assert( pPager->state!=SQLITE_UNLOCK );
1410 if( pPager->state==SQLITE_READLOCK ){
1411 assert( pPager->aInJournal==0 );
1412 rc = sqliteOsWriteLock(&pPager->fd);
1413 if( rc!=SQLITE_OK ){
1414 return rc;
1415 }
drh4b845d72002-03-05 12:41:19 +00001416 pPager->state = SQLITE_WRITELOCK;
drhda47d772002-12-02 04:25:19 +00001417 pPager->dirtyFile = 0;
drhdb48ee02003-01-16 13:42:43 +00001418 TRACE1("TRANSACTION\n");
drhda47d772002-12-02 04:25:19 +00001419 if( pPager->useJournal && !pPager->tempFile ){
1420 rc = pager_open_journal(pPager);
drh4b845d72002-03-05 12:41:19 +00001421 }
1422 }
1423 return rc;
1424}
1425
1426/*
drhed7c8552001-04-11 14:29:21 +00001427** Mark a data page as writeable. The page is written into the journal
1428** if it is not there already. This routine must be called before making
1429** changes to a page.
1430**
1431** The first time this routine is called, the pager creates a new
1432** journal and acquires a write lock on the database. If the write
1433** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001434** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001435** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001436**
1437** If the journal file could not be written because the disk is full,
1438** then this routine returns SQLITE_FULL and does an immediate rollback.
1439** All subsequent write attempts also return SQLITE_FULL until there
1440** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1441** reset.
drhed7c8552001-04-11 14:29:21 +00001442*/
drhd9b02572001-04-15 00:37:09 +00001443int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001444 PgHdr *pPg = DATA_TO_PGHDR(pData);
1445 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001446 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001447
drh6446c4d2001-12-15 14:22:18 +00001448 /* Check for errors
1449 */
drhd9b02572001-04-15 00:37:09 +00001450 if( pPager->errMask ){
1451 return pager_errcode(pPager);
1452 }
drh5e00f6c2001-09-13 13:46:56 +00001453 if( pPager->readOnly ){
1454 return SQLITE_PERM;
1455 }
drh6446c4d2001-12-15 14:22:18 +00001456
1457 /* Mark the page as dirty. If the page has already been written
1458 ** to the journal then we can return right away.
1459 */
drhd9b02572001-04-15 00:37:09 +00001460 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001461 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001462 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001463 return SQLITE_OK;
1464 }
drh6446c4d2001-12-15 14:22:18 +00001465
1466 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001467 ** written to the transaction journal or the ckeckpoint journal
1468 ** or both.
1469 **
1470 ** First check to see that the transaction journal exists and
1471 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001472 */
drhd9b02572001-04-15 00:37:09 +00001473 assert( pPager->state!=SQLITE_UNLOCK );
drh4b845d72002-03-05 12:41:19 +00001474 rc = sqlitepager_begin(pData);
drhda47d772002-12-02 04:25:19 +00001475 if( rc!=SQLITE_OK ){
1476 return rc;
1477 }
drhd9b02572001-04-15 00:37:09 +00001478 assert( pPager->state==SQLITE_WRITELOCK );
drhda47d772002-12-02 04:25:19 +00001479 if( !pPager->journalOpen && pPager->useJournal ){
1480 rc = pager_open_journal(pPager);
1481 if( rc!=SQLITE_OK ) return rc;
1482 }
1483 assert( pPager->journalOpen || !pPager->useJournal );
1484 pPager->dirtyFile = 1;
drh6446c4d2001-12-15 14:22:18 +00001485
drhfa86c412002-02-02 15:01:15 +00001486 /* The transaction journal now exists and we have a write lock on the
1487 ** main database file. Write the current page to the transaction
1488 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001489 */
drhdb48ee02003-01-16 13:42:43 +00001490 if( !pPg->inJournal && pPager->useJournal ){
1491 if( (int)pPg->pgno <= pPager->origDbSize ){
drh2554f8b2003-01-22 01:26:44 +00001492 storePageNumber(pPg);
1493 rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
drhdb48ee02003-01-16 13:42:43 +00001494 if( rc!=SQLITE_OK ){
1495 sqlitepager_rollback(pPager);
1496 pPager->errMask |= PAGER_ERR_FULL;
1497 return rc;
1498 }
1499 assert( pPager->aInJournal!=0 );
1500 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1501 pPg->needSync = !pPager->noSync;
1502 pPg->inJournal = 1;
1503 if( pPager->ckptInUse ){
1504 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1505 page_add_to_ckpt_list(pPg);
1506 }
1507 TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
1508 }else{
1509 pPg->needSync = !pPager->journalStarted && !pPager->noSync;
1510 TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
drhd9b02572001-04-15 00:37:09 +00001511 }
drhdb48ee02003-01-16 13:42:43 +00001512 if( pPg->needSync ){
1513 pPager->needSync = 1;
drhfa86c412002-02-02 15:01:15 +00001514 }
drh69688d52001-04-14 16:38:23 +00001515 }
drh6446c4d2001-12-15 14:22:18 +00001516
drhfa86c412002-02-02 15:01:15 +00001517 /* If the checkpoint journal is open and the page is not in it,
1518 ** then write the current page to the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001519 */
drh0f892532002-05-30 12:27:03 +00001520 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001521 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh2554f8b2003-01-22 01:26:44 +00001522 storePageNumber(pPg);
1523 rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
drhfa86c412002-02-02 15:01:15 +00001524 if( rc!=SQLITE_OK ){
1525 sqlitepager_rollback(pPager);
1526 pPager->errMask |= PAGER_ERR_FULL;
1527 return rc;
1528 }
drh9bd47a92003-01-07 14:46:08 +00001529 pPager->ckptNRec++;
drhfa86c412002-02-02 15:01:15 +00001530 assert( pPager->aInCkpt!=0 );
1531 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001532 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001533 }
1534
1535 /* Update the database size and return.
1536 */
drh1ab43002002-01-14 09:28:19 +00001537 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001538 pPager->dbSize = pPg->pgno;
1539 }
drh69688d52001-04-14 16:38:23 +00001540 return rc;
drhed7c8552001-04-11 14:29:21 +00001541}
1542
1543/*
drhaacc5432002-01-06 17:07:40 +00001544** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +00001545** to sqlitepager_write(). In other words, return TRUE if it is ok
1546** to change the content of the page.
1547*/
1548int sqlitepager_iswriteable(void *pData){
1549 PgHdr *pPg = DATA_TO_PGHDR(pData);
1550 return pPg->dirty;
1551}
1552
1553/*
drh30e58752002-03-02 20:41:57 +00001554** A call to this routine tells the pager that it is not necessary to
1555** write the information on page "pgno" back to the disk, even though
1556** that page might be marked as dirty.
1557**
1558** The overlying software layer calls this routine when all of the data
1559** on the given page is unused. The pager marks the page as clean so
1560** that it does not get written to disk.
1561**
1562** Tests show that this optimization, together with the
1563** sqlitepager_dont_rollback() below, more than double the speed
1564** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001565**
1566** When this routine is called, set the alwaysRollback flag to true.
1567** Subsequent calls to sqlitepager_dont_rollback() for the same page
1568** will thereafter be ignored. This is necessary to avoid a problem
1569** where a page with data is added to the freelist during one part of
1570** a transaction then removed from the freelist during a later part
1571** of the same transaction and reused for some other purpose. When it
1572** is first added to the freelist, this routine is called. When reused,
1573** the dont_rollback() routine is called. But because the page contains
1574** critical data, we still need to be sure it gets rolled back in spite
1575** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001576*/
1577void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1578 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001579
drh30e58752002-03-02 20:41:57 +00001580 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001581 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001582 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001583 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1584 /* If this pages is the last page in the file and the file has grown
1585 ** during the current transaction, then do NOT mark the page as clean.
1586 ** When the database file grows, we must make sure that the last page
1587 ** gets written at least once so that the disk file will be the correct
1588 ** size. If you do not write this page and the size of the file
1589 ** on the disk ends up being too small, that can lead to database
1590 ** corruption during the next transaction.
1591 */
1592 }else{
drhdb48ee02003-01-16 13:42:43 +00001593 TRACE2("DONT_WRITE %d\n", pgno);
drh8124a302002-06-25 14:43:57 +00001594 pPg->dirty = 0;
1595 }
drh30e58752002-03-02 20:41:57 +00001596 }
1597}
1598
1599/*
1600** A call to this routine tells the pager that if a rollback occurs,
1601** it is not necessary to restore the data on the given page. This
1602** means that the pager does not have to record the given page in the
1603** rollback journal.
1604*/
1605void sqlitepager_dont_rollback(void *pData){
1606 PgHdr *pPg = DATA_TO_PGHDR(pData);
1607 Pager *pPager = pPg->pPager;
1608
1609 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001610 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001611 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1612 assert( pPager->aInJournal!=0 );
1613 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1614 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001615 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001616 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001617 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001618 }
drhdb48ee02003-01-16 13:42:43 +00001619 TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
drh30e58752002-03-02 20:41:57 +00001620 }
drh0f892532002-05-30 12:27:03 +00001621 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001622 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1623 assert( pPager->aInCkpt!=0 );
1624 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001625 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001626 }
1627}
1628
1629/*
drhed7c8552001-04-11 14:29:21 +00001630** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001631**
1632** If the commit fails for any reason, a rollback attempt is made
1633** and an error code is returned. If the commit worked, SQLITE_OK
1634** is returned.
drhed7c8552001-04-11 14:29:21 +00001635*/
drhd9b02572001-04-15 00:37:09 +00001636int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001637 int rc;
drhed7c8552001-04-11 14:29:21 +00001638 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001639
1640 if( pPager->errMask==PAGER_ERR_FULL ){
1641 rc = sqlitepager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00001642 if( rc==SQLITE_OK ){
1643 rc = SQLITE_FULL;
1644 }
drhd9b02572001-04-15 00:37:09 +00001645 return rc;
1646 }
1647 if( pPager->errMask!=0 ){
1648 rc = pager_errcode(pPager);
1649 return rc;
1650 }
1651 if( pPager->state!=SQLITE_WRITELOCK ){
1652 return SQLITE_ERROR;
1653 }
drhdb48ee02003-01-16 13:42:43 +00001654 TRACE1("COMMIT\n");
drha1680452002-04-18 01:56:57 +00001655 if( pPager->dirtyFile==0 ){
1656 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
1657 ** if there have been no changes to the database file. */
drh341eae82003-01-21 02:39:36 +00001658 assert( pPager->needSync==0 );
drha1680452002-04-18 01:56:57 +00001659 rc = pager_unwritelock(pPager);
1660 pPager->dbSize = -1;
1661 return rc;
1662 }
drhda47d772002-12-02 04:25:19 +00001663 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +00001664 if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001665 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001666 }
drh2554f8b2003-01-22 01:26:44 +00001667 pPg = pager_get_all_dirty_pages(pPager);
1668 if( pPg ){
1669 rc = pager_write_pagelist(pPg);
1670 if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
1671 goto commit_abort;
1672 }
drh603240c2002-03-05 01:11:12 +00001673 }
drhd9b02572001-04-15 00:37:09 +00001674 rc = pager_unwritelock(pPager);
1675 pPager->dbSize = -1;
1676 return rc;
1677
1678 /* Jump here if anything goes wrong during the commit process.
1679 */
1680commit_abort:
1681 rc = sqlitepager_rollback(pPager);
1682 if( rc==SQLITE_OK ){
1683 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001684 }
drhed7c8552001-04-11 14:29:21 +00001685 return rc;
1686}
1687
1688/*
1689** Rollback all changes. The database falls back to read-only mode.
1690** All in-memory cache pages revert to their original data contents.
1691** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001692**
1693** This routine cannot fail unless some other process is not following
1694** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1695** process is writing trash into the journal file (SQLITE_CORRUPT) or
1696** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1697** codes are returned for all these occasions. Otherwise,
1698** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001699*/
drhd9b02572001-04-15 00:37:09 +00001700int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001701 int rc;
drhdb48ee02003-01-16 13:42:43 +00001702 TRACE1("ROLLBACK\n");
drhda47d772002-12-02 04:25:19 +00001703 if( !pPager->dirtyFile || !pPager->journalOpen ){
1704 rc = pager_unwritelock(pPager);
1705 pPager->dbSize = -1;
1706 return rc;
1707 }
drhdb48ee02003-01-16 13:42:43 +00001708
1709#if defined(SQLITE_TEST) && !defined(NDEBUG)
1710 /* Truncate the journal to the size it was at the conclusion of the
1711 ** last sqliteOsSync() call. This is really an error check. If the
1712 ** rollback still works, it means that the rollback would have also
1713 ** worked if it had occurred after an OS crash or unexpected power
1714 ** loss.
1715 */
1716 if( pPager->syncJSize<sizeof(aJournalMagic)+sizeof(Pgno) ){
1717 pPager->syncJSize = sizeof(aJournalMagic)+sizeof(Pgno);
1718 }
1719 TRACE2("TRUNCATE JOURNAL %lld\n", pPager->syncJSize);
1720 rc = sqliteOsTruncate(&pPager->jfd, pPager->syncJSize);
1721 if( rc ) return rc;
1722#endif
1723
drhd9b02572001-04-15 00:37:09 +00001724 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00001725 if( pPager->state>=SQLITE_WRITELOCK ){
1726 pager_playback(pPager);
1727 }
drhd9b02572001-04-15 00:37:09 +00001728 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001729 }
drhd9b02572001-04-15 00:37:09 +00001730 if( pPager->state!=SQLITE_WRITELOCK ){
1731 return SQLITE_OK;
1732 }
1733 rc = pager_playback(pPager);
1734 if( rc!=SQLITE_OK ){
1735 rc = SQLITE_CORRUPT;
1736 pPager->errMask |= PAGER_ERR_CORRUPT;
1737 }
1738 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001739 return rc;
drh98808ba2001-10-18 12:34:46 +00001740}
drhd9b02572001-04-15 00:37:09 +00001741
1742/*
drh5e00f6c2001-09-13 13:46:56 +00001743** Return TRUE if the database file is opened read-only. Return FALSE
1744** if the database is (in theory) writable.
1745*/
1746int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001747 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001748}
1749
1750/*
drhd9b02572001-04-15 00:37:09 +00001751** This routine is used for testing and analysis only.
1752*/
1753int *sqlitepager_stats(Pager *pPager){
1754 static int a[9];
1755 a[0] = pPager->nRef;
1756 a[1] = pPager->nPage;
1757 a[2] = pPager->mxPage;
1758 a[3] = pPager->dbSize;
1759 a[4] = pPager->state;
1760 a[5] = pPager->errMask;
1761 a[6] = pPager->nHit;
1762 a[7] = pPager->nMiss;
1763 a[8] = pPager->nOvfl;
1764 return a;
1765}
drhdd793422001-06-28 01:54:48 +00001766
drhfa86c412002-02-02 15:01:15 +00001767/*
1768** Set the checkpoint.
1769**
1770** This routine should be called with the transaction journal already
1771** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00001772** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00001773*/
1774int sqlitepager_ckpt_begin(Pager *pPager){
1775 int rc;
1776 char zTemp[SQLITE_TEMPNAME_SIZE];
drhda47d772002-12-02 04:25:19 +00001777 if( !pPager->journalOpen ){
1778 pPager->ckptAutoopen = 1;
1779 return SQLITE_OK;
1780 }
drhfa86c412002-02-02 15:01:15 +00001781 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00001782 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00001783 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
1784 if( pPager->aInCkpt==0 ){
1785 sqliteOsReadLock(&pPager->fd);
1786 return SQLITE_NOMEM;
1787 }
1788 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
1789 if( rc ) goto ckpt_begin_failed;
drh663fc632002-02-02 18:49:19 +00001790 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00001791 if( !pPager->ckptOpen ){
1792 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
1793 if( rc ) goto ckpt_begin_failed;
1794 pPager->ckptOpen = 1;
drh9bd47a92003-01-07 14:46:08 +00001795 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00001796 }
1797 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00001798 return SQLITE_OK;
1799
1800ckpt_begin_failed:
1801 if( pPager->aInCkpt ){
1802 sqliteFree(pPager->aInCkpt);
1803 pPager->aInCkpt = 0;
1804 }
1805 return rc;
1806}
1807
1808/*
1809** Commit a checkpoint.
1810*/
1811int sqlitepager_ckpt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00001812 if( pPager->ckptInUse ){
drh03eb96a2002-11-10 23:32:56 +00001813 PgHdr *pPg, *pNext;
drh96ddd6d2002-09-05 19:10:33 +00001814 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +00001815 /* sqliteOsTruncate(&pPager->cpfd, 0); */
1816 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00001817 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00001818 sqliteFree( pPager->aInCkpt );
1819 pPager->aInCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00001820 for(pPg=pPager->pCkpt; pPg; pPg=pNext){
1821 pNext = pPg->pNextCkpt;
1822 assert( pPg->inCkpt );
drh663fc632002-02-02 18:49:19 +00001823 pPg->inCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00001824 pPg->pPrevCkpt = pPg->pNextCkpt = 0;
drh663fc632002-02-02 18:49:19 +00001825 }
drh03eb96a2002-11-10 23:32:56 +00001826 pPager->pCkpt = 0;
drh663fc632002-02-02 18:49:19 +00001827 }
drhda47d772002-12-02 04:25:19 +00001828 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00001829 return SQLITE_OK;
1830}
1831
1832/*
1833** Rollback a checkpoint.
1834*/
1835int sqlitepager_ckpt_rollback(Pager *pPager){
1836 int rc;
drh0f892532002-05-30 12:27:03 +00001837 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00001838 rc = pager_ckpt_playback(pPager);
1839 sqlitepager_ckpt_commit(pPager);
1840 }else{
1841 rc = SQLITE_OK;
1842 }
drhda47d772002-12-02 04:25:19 +00001843 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00001844 return rc;
1845}
1846
drh74587e52002-08-13 00:01:16 +00001847#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00001848/*
1849** Print a listing of all referenced pages and their ref count.
1850*/
1851void sqlitepager_refdump(Pager *pPager){
1852 PgHdr *pPg;
1853 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1854 if( pPg->nRef<=0 ) continue;
1855 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1856 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1857 }
1858}
1859#endif