blob: 130043d3883387b17a25432bab8ad4535a13a21c [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drh2e6d11b2003-04-25 15:37:57 +000021** @(#) $Id: pager.c,v 1.83 2003/04/25 15:37:58 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drh829e8022002-11-06 14:08:11 +000023#include "os.h" /* Must be first to enable large file support */
drhd9b02572001-04-15 00:37:09 +000024#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000025#include "pager.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
drhdb48ee02003-01-16 13:42:43 +000030** Macros for troubleshooting. Normally turned off
31*/
32#if 0
33static Pager *mainPager = 0;
34#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39#else
40#define SET_PAGER(X)
41#define CLR_PAGER(X)
42#define TRACE1(X)
43#define TRACE2(X,Y)
44#define TRACE3(X,Y,Z)
45#endif
46
47
48/*
drhed7c8552001-04-11 14:29:21 +000049** The page cache as a whole is always in one of the following
50** states:
51**
52** SQLITE_UNLOCK The page cache is not currently reading or
53** writing the database file. There is no
54** data held in memory. This is the initial
55** state.
56**
57** SQLITE_READLOCK The page cache is reading the database.
58** Writing is not permitted. There can be
59** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000060** file at the same time.
drhed7c8552001-04-11 14:29:21 +000061**
62** SQLITE_WRITELOCK The page cache is writing the database.
63** Access is exclusive. No other processes or
64** threads can be reading or writing while one
65** process is writing.
66**
drh306dc212001-05-21 13:45:10 +000067** The page cache comes up in SQLITE_UNLOCK. The first time a
68** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000070** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000071** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000072** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
73** called on an outstanding page which means that the pager must
74** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
75** The sqlite_page_rollback() and sqlite_page_commit() functions
76** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000077*/
78#define SQLITE_UNLOCK 0
79#define SQLITE_READLOCK 1
80#define SQLITE_WRITELOCK 2
81
drhd9b02572001-04-15 00:37:09 +000082
drhed7c8552001-04-11 14:29:21 +000083/*
84** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000085** This header is only visible to this pager module. The client
86** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000087*/
drhd9b02572001-04-15 00:37:09 +000088typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000089struct PgHdr {
90 Pager *pPager; /* The pager to which this page belongs */
91 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000092 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000093 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000094 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
95 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh03eb96a2002-11-10 23:32:56 +000096 PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
drh193a6b42002-07-07 16:52:46 +000097 u8 inJournal; /* TRUE if has been written to journal */
98 u8 inCkpt; /* TRUE if written to the checkpoint journal */
99 u8 dirty; /* TRUE if we need to write back changes */
drhdb48ee02003-01-16 13:42:43 +0000100 u8 needSync; /* Sync journal before writing this page */
drh193a6b42002-07-07 16:52:46 +0000101 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh2554f8b2003-01-22 01:26:44 +0000102 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
drh69688d52001-04-14 16:38:23 +0000103 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh973b6e32003-02-12 14:09:42 +0000104 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +0000105};
106
107/*
drh69688d52001-04-14 16:38:23 +0000108** Convert a pointer to a PgHdr into a pointer to its data
109** and back again.
drhed7c8552001-04-11 14:29:21 +0000110*/
111#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
112#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +0000113#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000114
115/*
drhed7c8552001-04-11 14:29:21 +0000116** How big to make the hash table used for locating in-memory pages
drh836faa42003-01-11 13:30:57 +0000117** by page number.
drhed7c8552001-04-11 14:29:21 +0000118*/
drh836faa42003-01-11 13:30:57 +0000119#define N_PG_HASH 2048
120
121/*
122** Hash a page number
123*/
124#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
drhed7c8552001-04-11 14:29:21 +0000125
126/*
127** A open page cache is an instance of the following structure.
128*/
129struct Pager {
130 char *zFilename; /* Name of the database file */
131 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000132 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000133 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000134 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000135 int origDbSize; /* dbSize before the current change */
drh28be87c2002-11-05 23:03:02 +0000136 int ckptSize; /* Size of database (in pages) at ckpt_begin() */
137 off_t ckptJSize; /* Size of journal at ckpt_begin() */
drh968af522003-02-11 14:55:40 +0000138 int nRec; /* Number of pages written to the journal */
139 u32 cksumInit; /* Quasi-random value added to every checksum */
drh9bd47a92003-01-07 14:46:08 +0000140 int ckptNRec; /* Number of records in the checkpoint journal */
drh7e3b0a02001-04-28 16:52:40 +0000141 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000142 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000143 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000144 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000145 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000146 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drh603240c2002-03-05 01:11:12 +0000147 u8 journalOpen; /* True if journal file descriptors is valid */
drhdb48ee02003-01-16 13:42:43 +0000148 u8 journalStarted; /* True if initial magic of journal is synced */
drhda47d772002-12-02 04:25:19 +0000149 u8 useJournal; /* Do not use a rollback journal on this file */
drh603240c2002-03-05 01:11:12 +0000150 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000151 u8 ckptInUse; /* True we are in a checkpoint */
drhda47d772002-12-02 04:25:19 +0000152 u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
drh603240c2002-03-05 01:11:12 +0000153 u8 noSync; /* Do not sync the journal if true */
drh968af522003-02-11 14:55:40 +0000154 u8 fullSync; /* Do extra syncs of the journal for robustness */
drh603240c2002-03-05 01:11:12 +0000155 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
156 u8 errMask; /* One of several kinds of errors */
157 u8 tempFile; /* zFilename is a temporary file */
158 u8 readOnly; /* True for a read-only database */
159 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000160 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000161 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh603240c2002-03-05 01:11:12 +0000162 u8 *aInJournal; /* One bit for each page in the database file */
163 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000164 PgHdr *pFirst, *pLast; /* List of free pages */
drh341eae82003-01-21 02:39:36 +0000165 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
drhd9b02572001-04-15 00:37:09 +0000166 PgHdr *pAll; /* List of all pages */
drh03eb96a2002-11-10 23:32:56 +0000167 PgHdr *pCkpt; /* List of pages in the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000168 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000169};
170
171/*
172** These are bits that can be set in Pager.errMask.
173*/
174#define PAGER_ERR_FULL 0x01 /* a write() failed */
175#define PAGER_ERR_MEM 0x02 /* malloc() failed */
176#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
177#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000178#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000179
180/*
181** The journal file contains page records in the following
182** format.
drh968af522003-02-11 14:55:40 +0000183**
184** Actually, this structure is the complete page record for pager
185** formats less than 3. Beginning with format 3, this record is surrounded
186** by two checksums.
drhd9b02572001-04-15 00:37:09 +0000187*/
188typedef struct PageRecord PageRecord;
189struct PageRecord {
190 Pgno pgno; /* The page number */
191 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
192};
193
194/*
drh5e00f6c2001-09-13 13:46:56 +0000195** Journal files begin with the following magic string. The data
196** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000197**
drh968af522003-02-11 14:55:40 +0000198** There are three journal formats (so far). The 1st journal format writes
199** 32-bit integers in the byte-order of the host machine. New
200** formats writes integers as big-endian. All new journals use the
drh94f33312002-08-12 12:29:56 +0000201** new format, but we have to be able to read an older journal in order
drh968af522003-02-11 14:55:40 +0000202** to rollback journals created by older versions of the library.
203**
204** The 3rd journal format (added for 2.8.0) adds additional sanity
205** checking information to the journal. If the power fails while the
206** journal is being written, semi-random garbage data might appear in
207** the journal file after power is restored. If an attempt is then made
208** to roll the journal back, the database could be corrupted. The additional
209** sanity checking data is an attempt to discover the garbage in the
210** journal and ignore it.
211**
212** The sanity checking information for the 3rd journal format consists
213** of a 32-bit checksum on each page of data. The checksum covers both
214** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
215** This cksum is initialized to a 32-bit random value that appears in the
216** journal file right after the header. The random initializer is important,
217** because garbage data that appears at the end of a journal is likely
218** data that was once in other files that have now been deleted. If the
219** garbage data came from an obsolete journal file, the checksums might
220** be correct. But by initializing the checksum to random value which
221** is different for every journal, we minimize that risk.
drhd9b02572001-04-15 00:37:09 +0000222*/
drh968af522003-02-11 14:55:40 +0000223static const unsigned char aJournalMagic1[] = {
drhd9b02572001-04-15 00:37:09 +0000224 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000225};
drh968af522003-02-11 14:55:40 +0000226static const unsigned char aJournalMagic2[] = {
drh94f33312002-08-12 12:29:56 +0000227 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
228};
drh968af522003-02-11 14:55:40 +0000229static const unsigned char aJournalMagic3[] = {
230 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
231};
232#define JOURNAL_FORMAT_1 1
233#define JOURNAL_FORMAT_2 2
234#define JOURNAL_FORMAT_3 3
drh94f33312002-08-12 12:29:56 +0000235
236/*
drh968af522003-02-11 14:55:40 +0000237** The following integer determines what format to use when creating
238** new primary journal files. By default we always use format 3.
239** When testing, we can set this value to older journal formats in order to
240** make sure that newer versions of the library are able to rollback older
241** journal files.
242**
243** Note that checkpoint journals always use format 2 and omit the header.
drh94f33312002-08-12 12:29:56 +0000244*/
245#ifdef SQLITE_TEST
drh968af522003-02-11 14:55:40 +0000246int journal_format = 3;
drh74587e52002-08-13 00:01:16 +0000247#else
drh968af522003-02-11 14:55:40 +0000248# define journal_format 3
drh94f33312002-08-12 12:29:56 +0000249#endif
drhed7c8552001-04-11 14:29:21 +0000250
251/*
drh968af522003-02-11 14:55:40 +0000252** The size of the header and of each page in the journal varies according
253** to which journal format is being used. The following macros figure out
254** the sizes based on format numbers.
255*/
256#define JOURNAL_HDR_SZ(X) \
257 (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
258#define JOURNAL_PG_SZ(X) \
259 (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
260
261/*
drhdd793422001-06-28 01:54:48 +0000262** Enable reference count tracking here:
263*/
drh74587e52002-08-13 00:01:16 +0000264#ifdef SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000265 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000266 static void pager_refinfo(PgHdr *p){
267 static int cnt = 0;
268 if( !pager_refinfo_enable ) return;
269 printf(
270 "REFCNT: %4d addr=0x%08x nRef=%d\n",
271 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
272 );
273 cnt++; /* Something to set a breakpoint on */
274 }
275# define REFINFO(X) pager_refinfo(X)
276#else
277# define REFINFO(X)
278#endif
279
280/*
drh94f33312002-08-12 12:29:56 +0000281** Read a 32-bit integer from the given file descriptor
282*/
drh968af522003-02-11 14:55:40 +0000283static int read32bits(int format, OsFile *fd, u32 *pRes){
drh94f33312002-08-12 12:29:56 +0000284 u32 res;
285 int rc;
286 rc = sqliteOsRead(fd, &res, sizeof(res));
drh968af522003-02-11 14:55:40 +0000287 if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
drh94f33312002-08-12 12:29:56 +0000288 unsigned char ac[4];
289 memcpy(ac, &res, 4);
290 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
291 }
292 *pRes = res;
293 return rc;
294}
295
296/*
297** Write a 32-bit integer into the given file descriptor. Writing
298** is always done using the new journal format.
299*/
300static int write32bits(OsFile *fd, u32 val){
301 unsigned char ac[4];
drh968af522003-02-11 14:55:40 +0000302 if( journal_format<=1 ){
drh94f33312002-08-12 12:29:56 +0000303 return sqliteOsWrite(fd, &val, 4);
304 }
drh94f33312002-08-12 12:29:56 +0000305 ac[0] = (val>>24) & 0xff;
306 ac[1] = (val>>16) & 0xff;
307 ac[2] = (val>>8) & 0xff;
308 ac[3] = val & 0xff;
309 return sqliteOsWrite(fd, ac, 4);
310}
311
drh2554f8b2003-01-22 01:26:44 +0000312/*
313** Write a 32-bit integer into a page header right before the
314** page data. This will overwrite the PgHdr.pDirty pointer.
315*/
drh968af522003-02-11 14:55:40 +0000316static void store32bits(u32 val, PgHdr *p, int offset){
drh2554f8b2003-01-22 01:26:44 +0000317 unsigned char *ac;
drh968af522003-02-11 14:55:40 +0000318 ac = &((char*)PGHDR_TO_DATA(p))[offset];
319 if( journal_format<=1 ){
drh2554f8b2003-01-22 01:26:44 +0000320 memcpy(ac, &val, 4);
321 }else{
322 ac[0] = (val>>24) & 0xff;
323 ac[1] = (val>>16) & 0xff;
324 ac[2] = (val>>8) & 0xff;
325 ac[3] = val & 0xff;
326 }
327}
328
drh94f33312002-08-12 12:29:56 +0000329
330/*
drhd9b02572001-04-15 00:37:09 +0000331** Convert the bits in the pPager->errMask into an approprate
332** return code.
333*/
334static int pager_errcode(Pager *pPager){
335 int rc = SQLITE_OK;
336 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000337 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000338 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
339 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
340 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
341 return rc;
drhed7c8552001-04-11 14:29:21 +0000342}
343
344/*
drh03eb96a2002-11-10 23:32:56 +0000345** Add or remove a page from the list of all pages that are in the
346** checkpoint journal.
347**
348** The Pager keeps a separate list of pages that are currently in
349** the checkpoint journal. This helps the sqlitepager_ckpt_commit()
350** routine run MUCH faster for the common case where there are many
351** pages in memory but only a few are in the checkpoint journal.
352*/
353static void page_add_to_ckpt_list(PgHdr *pPg){
354 Pager *pPager = pPg->pPager;
355 if( pPg->inCkpt ) return;
356 assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
357 pPg->pPrevCkpt = 0;
358 if( pPager->pCkpt ){
359 pPager->pCkpt->pPrevCkpt = pPg;
360 }
361 pPg->pNextCkpt = pPager->pCkpt;
362 pPager->pCkpt = pPg;
363 pPg->inCkpt = 1;
364}
365static void page_remove_from_ckpt_list(PgHdr *pPg){
366 if( !pPg->inCkpt ) return;
367 if( pPg->pPrevCkpt ){
368 assert( pPg->pPrevCkpt->pNextCkpt==pPg );
369 pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
370 }else{
371 assert( pPg->pPager->pCkpt==pPg );
372 pPg->pPager->pCkpt = pPg->pNextCkpt;
373 }
374 if( pPg->pNextCkpt ){
375 assert( pPg->pNextCkpt->pPrevCkpt==pPg );
376 pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
377 }
378 pPg->pNextCkpt = 0;
379 pPg->pPrevCkpt = 0;
380 pPg->inCkpt = 0;
381}
382
383/*
drhed7c8552001-04-11 14:29:21 +0000384** Find a page in the hash table given its page number. Return
385** a pointer to the page or NULL if not found.
386*/
drhd9b02572001-04-15 00:37:09 +0000387static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drh836faa42003-01-11 13:30:57 +0000388 PgHdr *p = pPager->aHash[pager_hash(pgno)];
drhed7c8552001-04-11 14:29:21 +0000389 while( p && p->pgno!=pgno ){
390 p = p->pNextHash;
391 }
392 return p;
393}
394
395/*
396** Unlock the database and clear the in-memory cache. This routine
397** sets the state of the pager back to what it was when it was first
398** opened. Any outstanding pages are invalidated and subsequent attempts
399** to access those pages will likely result in a coredump.
400*/
drhd9b02572001-04-15 00:37:09 +0000401static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000402 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000403 for(pPg=pPager->pAll; pPg; pPg=pNext){
404 pNext = pPg->pNextAll;
405 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000406 }
407 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000408 pPager->pFirstSynced = 0;
drhd9b02572001-04-15 00:37:09 +0000409 pPager->pLast = 0;
410 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000411 memset(pPager->aHash, 0, sizeof(pPager->aHash));
412 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000413 if( pPager->state>=SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000414 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000415 }
drha7fcb052001-12-14 15:09:55 +0000416 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000417 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000418 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000419 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000420 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000421}
422
423/*
424** When this routine is called, the pager has the journal file open and
425** a write lock on the database. This routine releases the database
426** write lock and acquires a read lock in its place. The journal file
427** is deleted and closed.
drhed7c8552001-04-11 14:29:21 +0000428*/
drhd9b02572001-04-15 00:37:09 +0000429static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000430 int rc;
drhd9b02572001-04-15 00:37:09 +0000431 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000432 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh663fc632002-02-02 18:49:19 +0000433 sqlitepager_ckpt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000434 if( pPager->ckptOpen ){
435 sqliteOsClose(&pPager->cpfd);
436 pPager->ckptOpen = 0;
437 }
drhda47d772002-12-02 04:25:19 +0000438 if( pPager->journalOpen ){
439 sqliteOsClose(&pPager->jfd);
440 pPager->journalOpen = 0;
441 sqliteOsDelete(pPager->zJournal);
442 sqliteFree( pPager->aInJournal );
443 pPager->aInJournal = 0;
444 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
445 pPg->inJournal = 0;
446 pPg->dirty = 0;
drhdb48ee02003-01-16 13:42:43 +0000447 pPg->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000448 }
449 }else{
450 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
drhd9b02572001-04-15 00:37:09 +0000451 }
drhda47d772002-12-02 04:25:19 +0000452 rc = sqliteOsReadLock(&pPager->fd);
drh8e298f92002-07-06 16:28:47 +0000453 if( rc==SQLITE_OK ){
454 pPager->state = SQLITE_READLOCK;
455 }else{
456 /* This can only happen if a process does a BEGIN, then forks and the
457 ** child process does the COMMIT. Because of the semantics of unix
458 ** file locking, the unlock will fail.
459 */
460 pPager->state = SQLITE_UNLOCK;
461 }
drhed7c8552001-04-11 14:29:21 +0000462 return rc;
463}
464
drhed7c8552001-04-11 14:29:21 +0000465/*
drh968af522003-02-11 14:55:40 +0000466** Compute and return a checksum for the page of data.
467*/
468static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
469 u32 cksum = pPager->cksumInit + pgno;
drh968af522003-02-11 14:55:40 +0000470 return cksum;
471}
472
473/*
drhfa86c412002-02-02 15:01:15 +0000474** Read a single page from the journal file opened on file descriptor
475** jfd. Playback this one page.
drh968af522003-02-11 14:55:40 +0000476**
477** There are three different journal formats. The format parameter determines
478** which format is used by the journal that is played back.
drhfa86c412002-02-02 15:01:15 +0000479*/
drh968af522003-02-11 14:55:40 +0000480static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
drhfa86c412002-02-02 15:01:15 +0000481 int rc;
482 PgHdr *pPg; /* An existing page in the cache */
483 PageRecord pgRec;
drh968af522003-02-11 14:55:40 +0000484 u32 cksum;
drhfa86c412002-02-02 15:01:15 +0000485
drh968af522003-02-11 14:55:40 +0000486 rc = read32bits(format, jfd, &pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000487 if( rc!=SQLITE_OK ) return rc;
drh94f33312002-08-12 12:29:56 +0000488 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drh99ee3602003-02-16 19:13:36 +0000489 if( rc!=SQLITE_OK ) return rc;
drhfa86c412002-02-02 15:01:15 +0000490
drh968af522003-02-11 14:55:40 +0000491 /* Sanity checking on the page. This is more important that I originally
492 ** thought. If a power failure occurs while the journal is being written,
493 ** it could cause invalid data to be written into the journal. We need to
494 ** detect this invalid data (with high probability) and ignore it.
495 */
496 if( pgRec.pgno==0 ){
497 return SQLITE_DONE;
498 }
499 if( pgRec.pgno>pPager->dbSize ){
500 return SQLITE_OK;
501 }
502 if( format>=JOURNAL_FORMAT_3 ){
503 rc = read32bits(format, jfd, &cksum);
drh99ee3602003-02-16 19:13:36 +0000504 if( rc ) return rc;
drh968af522003-02-11 14:55:40 +0000505 if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
506 return SQLITE_DONE;
507 }
508 }
drhfa86c412002-02-02 15:01:15 +0000509
510 /* Playback the page. Update the in-memory copy of the page
511 ** at the same time, if there is one.
512 */
513 pPg = pager_lookup(pPager, pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000514 TRACE2("PLAYBACK %d\n", pgRec.pgno);
515 sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
516 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhfa86c412002-02-02 15:01:15 +0000517 if( pPg ){
drh3a840692003-01-29 22:58:26 +0000518 if( pPg->nRef==0 ||
519 memcmp(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE)==0
520 ){
521 /* Do not update the data on this page if the page is in use
522 ** and the page has never been modified. This avoids resetting
523 ** the "extra" data. That in turn avoids invalidating BTree cursors
524 ** in trees that have never been modified. The end result is that
525 ** you can have a SELECT going on in one table and ROLLBACK changes
526 ** to a different table and the SELECT is unaffected by the ROLLBACK.
527 */
528 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
529 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
530 }
drhdb48ee02003-01-16 13:42:43 +0000531 pPg->dirty = 0;
532 pPg->needSync = 0;
drhfa86c412002-02-02 15:01:15 +0000533 }
534 return rc;
535}
536
537/*
drhed7c8552001-04-11 14:29:21 +0000538** Playback the journal and thus restore the database file to
539** the state it was in before we started making changes.
540**
drhd9b02572001-04-15 00:37:09 +0000541** The journal file format is as follows: There is an initial
542** file-type string for sanity checking. Then there is a single
543** Pgno number which is the number of pages in the database before
544** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000545** Next come zero or more page records where each page record
546** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
547** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000548**
drhd9b02572001-04-15 00:37:09 +0000549** If the file opened as the journal file is not a well-formed
550** journal file (as determined by looking at the magic number
551** at the beginning) then this routine returns SQLITE_PROTOCOL.
552** If any other errors occur during playback, the database will
553** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
554** pPager->errMask and SQLITE_CORRUPT is returned. If it all
555** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000556*/
drh99ee3602003-02-16 19:13:36 +0000557static int pager_playback(Pager *pPager, int useJournalSize){
drh968af522003-02-11 14:55:40 +0000558 off_t szJ; /* Size of the journal file in bytes */
559 int nRec; /* Number of Records in the journal */
drhd9b02572001-04-15 00:37:09 +0000560 int i; /* Loop counter */
561 Pgno mxPg = 0; /* Size of the original file in pages */
drh968af522003-02-11 14:55:40 +0000562 int format; /* Format of the journal file. */
563 unsigned char aMagic[sizeof(aJournalMagic1)];
drhed7c8552001-04-11 14:29:21 +0000564 int rc;
565
drhc3a64ba2001-11-22 00:01:27 +0000566 /* Figure out how many records are in the journal. Abort early if
567 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000568 */
drh8cfbf082001-09-19 13:22:39 +0000569 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000570 sqliteOsSeek(&pPager->jfd, 0);
drh968af522003-02-11 14:55:40 +0000571 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhc3a64ba2001-11-22 00:01:27 +0000572 if( rc!=SQLITE_OK ){
573 goto end_playback;
574 }
drh968af522003-02-11 14:55:40 +0000575 if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
drhc3a64ba2001-11-22 00:01:27 +0000576 goto end_playback;
577 }
578
579 /* Read the beginning of the journal and truncate the
580 ** database file back to its original size.
581 */
drha7fcb052001-12-14 15:09:55 +0000582 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000583 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000584 rc = SQLITE_PROTOCOL;
585 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000586 }
drh968af522003-02-11 14:55:40 +0000587 if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
588 format = JOURNAL_FORMAT_3;
589 }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
590 format = JOURNAL_FORMAT_2;
591 }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
592 format = JOURNAL_FORMAT_1;
drh94f33312002-08-12 12:29:56 +0000593 }else{
594 rc = SQLITE_PROTOCOL;
595 goto end_playback;
596 }
drh968af522003-02-11 14:55:40 +0000597 if( format>=JOURNAL_FORMAT_3 ){
598 rc = read32bits(format, &pPager->jfd, &nRec);
599 if( rc ) goto end_playback;
600 rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
601 if( rc ) goto end_playback;
drh99ee3602003-02-16 19:13:36 +0000602 if( nRec==0xffffffff || useJournalSize ){
drh968af522003-02-11 14:55:40 +0000603 nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
604 }
605 }else{
drhd8d66e82003-02-12 02:10:15 +0000606 nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
607 assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
drh968af522003-02-11 14:55:40 +0000608 }
609 rc = read32bits(format, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000610 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000611 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000612 }
drhd8d66e82003-02-12 02:10:15 +0000613 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
drh28be87c2002-11-05 23:03:02 +0000614 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
drh81a20f22001-10-12 17:30:04 +0000615 if( rc!=SQLITE_OK ){
616 goto end_playback;
617 }
drhd9b02572001-04-15 00:37:09 +0000618 pPager->dbSize = mxPg;
619
drhfa86c412002-02-02 15:01:15 +0000620 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000621 */
drh968af522003-02-11 14:55:40 +0000622 for(i=0; i<nRec; i++){
623 rc = pager_playback_one_page(pPager, &pPager->jfd, format);
624 if( rc!=SQLITE_OK ){
625 if( rc==SQLITE_DONE ){
drh968af522003-02-11 14:55:40 +0000626 rc = SQLITE_OK;
627 }
628 break;
629 }
drhed7c8552001-04-11 14:29:21 +0000630 }
drh81a20f22001-10-12 17:30:04 +0000631
drh4a0681e2003-02-13 01:58:20 +0000632 /* Pages that have been written to the journal but never synced
633 ** where not restored by the loop above. We have to restore those
634 ** pages by reading the back from the original database.
drhdb48ee02003-01-16 13:42:43 +0000635 */
636 if( rc==SQLITE_OK ){
637 PgHdr *pPg;
638 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
drh3a840692003-01-29 22:58:26 +0000639 char zBuf[SQLITE_PAGE_SIZE];
drh4a0681e2003-02-13 01:58:20 +0000640 if( !pPg->dirty ) continue;
drhdb48ee02003-01-16 13:42:43 +0000641 if( (int)pPg->pgno <= pPager->origDbSize ){
642 sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
drh3a840692003-01-29 22:58:26 +0000643 rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000644 if( rc ) break;
645 }else{
drh3a840692003-01-29 22:58:26 +0000646 memset(zBuf, 0, SQLITE_PAGE_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000647 }
drh3a840692003-01-29 22:58:26 +0000648 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
649 memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
650 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
651 }
drhdb48ee02003-01-16 13:42:43 +0000652 pPg->needSync = 0;
653 pPg->dirty = 0;
654 }
655 }
drh4a0681e2003-02-13 01:58:20 +0000656
657end_playback:
drhd9b02572001-04-15 00:37:09 +0000658 if( rc!=SQLITE_OK ){
659 pager_unwritelock(pPager);
660 pPager->errMask |= PAGER_ERR_CORRUPT;
661 rc = SQLITE_CORRUPT;
662 }else{
663 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000664 }
drhd9b02572001-04-15 00:37:09 +0000665 return rc;
drhed7c8552001-04-11 14:29:21 +0000666}
667
668/*
drhfa86c412002-02-02 15:01:15 +0000669** Playback the checkpoint journal.
670**
671** This is similar to playing back the transaction journal but with
672** a few extra twists.
673**
drh663fc632002-02-02 18:49:19 +0000674** (1) The number of pages in the database file at the start of
675** the checkpoint is stored in pPager->ckptSize, not in the
676** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000677**
678** (2) In addition to playing back the checkpoint journal, also
679** playback all pages of the transaction journal beginning
680** at offset pPager->ckptJSize.
681*/
682static int pager_ckpt_playback(Pager *pPager){
drh968af522003-02-11 14:55:40 +0000683 off_t szJ; /* Size of the full journal */
684 int nRec; /* Number of Records */
drhfa86c412002-02-02 15:01:15 +0000685 int i; /* Loop counter */
686 int rc;
687
688 /* Truncate the database back to its original size.
689 */
drh28be87c2002-11-05 23:03:02 +0000690 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
drhfa86c412002-02-02 15:01:15 +0000691 pPager->dbSize = pPager->ckptSize;
692
693 /* Figure out how many records are in the checkpoint journal.
694 */
drh0f892532002-05-30 12:27:03 +0000695 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000696 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +0000697 nRec = pPager->ckptNRec;
drhfa86c412002-02-02 15:01:15 +0000698
699 /* Copy original pages out of the checkpoint journal and back into the
drh968af522003-02-11 14:55:40 +0000700 ** database file. Note that the checkpoint journal always uses format
701 ** 2 instead of format 3 since it does not need to be concerned with
702 ** power failures corrupting the journal and can thus omit the checksums.
drhfa86c412002-02-02 15:01:15 +0000703 */
704 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000705 rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
706 assert( rc!=SQLITE_DONE );
drhfa86c412002-02-02 15:01:15 +0000707 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
708 }
709
710 /* Figure out how many pages need to be copied out of the transaction
711 ** journal.
712 */
713 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
714 if( rc!=SQLITE_OK ){
715 goto end_ckpt_playback;
716 }
drh968af522003-02-11 14:55:40 +0000717 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhfa86c412002-02-02 15:01:15 +0000718 if( rc!=SQLITE_OK ){
719 goto end_ckpt_playback;
720 }
drh968af522003-02-11 14:55:40 +0000721 nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
drhfa86c412002-02-02 15:01:15 +0000722 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000723 rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
724 if( rc!=SQLITE_OK ){
725 assert( rc!=SQLITE_DONE );
726 goto end_ckpt_playback;
727 }
drhfa86c412002-02-02 15:01:15 +0000728 }
729
drhfa86c412002-02-02 15:01:15 +0000730end_ckpt_playback:
drhfa86c412002-02-02 15:01:15 +0000731 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000732 pPager->errMask |= PAGER_ERR_CORRUPT;
733 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000734 }
735 return rc;
736}
737
738/*
drhf57b14a2001-09-14 18:54:08 +0000739** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000740**
741** The maximum number is the absolute value of the mxPage parameter.
742** If mxPage is negative, the noSync flag is also set. noSync bypasses
743** calls to sqliteOsSync(). The pager runs much faster with noSync on,
744** but if the operating system crashes or there is an abrupt power
745** failure, the database file might be left in an inconsistent and
746** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000747*/
748void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000749 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000750 pPager->noSync = pPager->tempFile;
drh603240c2002-03-05 01:11:12 +0000751 }else{
752 pPager->noSync = 1;
753 mxPage = -mxPage;
754 }
drhf57b14a2001-09-14 18:54:08 +0000755 if( mxPage>10 ){
756 pPager->mxPage = mxPage;
757 }
758}
759
760/*
drh973b6e32003-02-12 14:09:42 +0000761** Adjust the robustness of the database to damage due to OS crashes
762** or power failures by changing the number of syncs()s when writing
763** the rollback journal. There are three levels:
764**
765** OFF sqliteOsSync() is never called. This is the default
766** for temporary and transient files.
767**
768** NORMAL The journal is synced once before writes begin on the
769** database. This is normally adequate protection, but
770** it is theoretically possible, though very unlikely,
771** that an inopertune power failure could leave the journal
772** in a state which would cause damage to the database
773** when it is rolled back.
774**
775** FULL The journal is synced twice before writes begin on the
776** database (with some additional information being written
777** in between the two syncs. If we assume that writing a
778** single disk sector is atomic, then this mode provides
779** assurance that the journal will not be corrupted to the
780** point of causing damage to the database during rollback.
781**
782** Numeric values associated with these states are OFF==1, NORMAL=2,
783** and FULL=3.
784*/
785void sqlitepager_set_safety_level(Pager *pPager, int level){
786 pPager->noSync = level==1 || pPager->tempFile;
787 pPager->fullSync = level==3 && !pPager->tempFile;
788}
789
790/*
drhfa86c412002-02-02 15:01:15 +0000791** Open a temporary file. Write the name of the file into zName
792** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
793** the file descriptor into *fd. Return SQLITE_OK on success or some
794** other error code if we fail.
795**
796** The OS will automatically delete the temporary file when it is
797** closed.
798*/
799static int sqlitepager_opentemp(char *zFile, OsFile *fd){
800 int cnt = 8;
801 int rc;
802 do{
803 cnt--;
804 sqliteOsTempFileName(zFile);
805 rc = sqliteOsOpenExclusive(zFile, fd, 1);
806 }while( cnt>0 && rc!=SQLITE_OK );
807 return rc;
808}
809
810/*
drhed7c8552001-04-11 14:29:21 +0000811** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000812** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000813** the first call to sqlitepager_get() and is only held open until the
814** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000815**
drh6446c4d2001-12-15 14:22:18 +0000816** If zFilename is NULL then a randomly-named temporary file is created
817** and used as the file to be cached. The file will be deleted
818** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000819*/
drh7e3b0a02001-04-28 16:52:40 +0000820int sqlitepager_open(
821 Pager **ppPager, /* Return the Pager structure here */
822 const char *zFilename, /* Name of the database file to open */
823 int mxPage, /* Max number of in-memory cache pages */
drhda47d772002-12-02 04:25:19 +0000824 int nExtra, /* Extra bytes append to each in-memory page */
825 int useJournal /* TRUE to use a rollback journal on this file */
drh7e3b0a02001-04-28 16:52:40 +0000826){
drhed7c8552001-04-11 14:29:21 +0000827 Pager *pPager;
drh3e7a6092002-12-07 21:45:14 +0000828 char *zFullPathname;
drhed7c8552001-04-11 14:29:21 +0000829 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000830 OsFile fd;
831 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000832 int tempFile;
833 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000834 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000835
drhd9b02572001-04-15 00:37:09 +0000836 *ppPager = 0;
837 if( sqlite_malloc_failed ){
838 return SQLITE_NOMEM;
839 }
drh5e00f6c2001-09-13 13:46:56 +0000840 if( zFilename ){
drh3e7a6092002-12-07 21:45:14 +0000841 zFullPathname = sqliteOsFullPathname(zFilename);
842 rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000843 tempFile = 0;
844 }else{
drhfa86c412002-02-02 15:01:15 +0000845 rc = sqlitepager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000846 zFilename = zTemp;
drh3e7a6092002-12-07 21:45:14 +0000847 zFullPathname = sqliteOsFullPathname(zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000848 tempFile = 1;
849 }
drh3e7a6092002-12-07 21:45:14 +0000850 if( sqlite_malloc_failed ){
851 return SQLITE_NOMEM;
852 }
drh8cfbf082001-09-19 13:22:39 +0000853 if( rc!=SQLITE_OK ){
drh3e7a6092002-12-07 21:45:14 +0000854 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000855 return SQLITE_CANTOPEN;
856 }
drh3e7a6092002-12-07 21:45:14 +0000857 nameLen = strlen(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000858 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000859 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000860 sqliteOsClose(&fd);
drh3e7a6092002-12-07 21:45:14 +0000861 sqliteFree(zFullPathname);
drhd9b02572001-04-15 00:37:09 +0000862 return SQLITE_NOMEM;
863 }
drhdb48ee02003-01-16 13:42:43 +0000864 SET_PAGER(pPager);
drhed7c8552001-04-11 14:29:21 +0000865 pPager->zFilename = (char*)&pPager[1];
866 pPager->zJournal = &pPager->zFilename[nameLen+1];
drh3e7a6092002-12-07 21:45:14 +0000867 strcpy(pPager->zFilename, zFullPathname);
868 strcpy(pPager->zJournal, zFullPathname);
869 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000870 strcpy(&pPager->zJournal[nameLen], "-journal");
871 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000872 pPager->journalOpen = 0;
drhda47d772002-12-02 04:25:19 +0000873 pPager->useJournal = useJournal;
drhfa86c412002-02-02 15:01:15 +0000874 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000875 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000876 pPager->nRef = 0;
877 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000878 pPager->ckptSize = 0;
879 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000880 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000881 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000882 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000883 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000884 pPager->tempFile = tempFile;
885 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000886 pPager->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000887 pPager->noSync = pPager->tempFile || !useJournal;
drhed7c8552001-04-11 14:29:21 +0000888 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000889 pPager->pFirstSynced = 0;
drhed7c8552001-04-11 14:29:21 +0000890 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000891 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000892 memset(pPager->aHash, 0, sizeof(pPager->aHash));
893 *ppPager = pPager;
894 return SQLITE_OK;
895}
896
897/*
drh72f82862001-05-24 21:06:34 +0000898** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000899** when the reference count on each page reaches zero. The destructor can
900** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000901**
902** The destructor is not called as a result sqlitepager_close().
903** Destructors are only called by sqlitepager_unref().
904*/
905void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
906 pPager->xDestructor = xDesc;
907}
908
909/*
drh5e00f6c2001-09-13 13:46:56 +0000910** Return the total number of pages in the disk file associated with
911** pPager.
drhed7c8552001-04-11 14:29:21 +0000912*/
drhd9b02572001-04-15 00:37:09 +0000913int sqlitepager_pagecount(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +0000914 off_t n;
drhd9b02572001-04-15 00:37:09 +0000915 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000916 if( pPager->dbSize>=0 ){
917 return pPager->dbSize;
918 }
drha7fcb052001-12-14 15:09:55 +0000919 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000920 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000921 return 0;
drhed7c8552001-04-11 14:29:21 +0000922 }
drh8cfbf082001-09-19 13:22:39 +0000923 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000924 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000925 pPager->dbSize = n;
926 }
927 return n;
928}
929
930/*
drhf7c57532003-04-25 13:22:51 +0000931** Forward declaration
932*/
933static int syncAllPages(Pager*);
934
935/*
936** Truncate the file to the number of pages specified.
937*/
938int sqlitepager_truncate(Pager *pPager, Pgno nPage){
939 int rc;
drh2e6d11b2003-04-25 15:37:57 +0000940 if( pPager->dbSize<0 ){
941 sqlitepager_pagecount(pPager);
942 }
943 if( pPager->errMask!=0 ){
944 rc = pager_errcode(pPager);
945 return rc;
946 }
drhf7c57532003-04-25 13:22:51 +0000947 if( nPage>=pPager->dbSize ){
948 return SQLITE_OK;
949 }
950 syncAllPages(pPager);
951 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
952 if( rc==SQLITE_OK ){
953 pPager->dbSize = nPage;
954 }
955 return rc;
956}
957
958/*
drhed7c8552001-04-11 14:29:21 +0000959** Shutdown the page cache. Free all memory and close all files.
960**
961** If a transaction was in progress when this routine is called, that
962** transaction is rolled back. All outstanding pages are invalidated
963** and their memory is freed. Any attempt to use a page associated
964** with this page cache after this function returns will likely
965** result in a coredump.
966*/
drhd9b02572001-04-15 00:37:09 +0000967int sqlitepager_close(Pager *pPager){
968 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000969 switch( pPager->state ){
970 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000971 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +0000972 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000973 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000974 break;
975 }
976 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +0000977 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000978 break;
979 }
980 default: {
981 /* Do nothing */
982 break;
983 }
984 }
drhd9b02572001-04-15 00:37:09 +0000985 for(pPg=pPager->pAll; pPg; pPg=pNext){
986 pNext = pPg->pNextAll;
987 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000988 }
drha7fcb052001-12-14 15:09:55 +0000989 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000990 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +0000991 /* Temp files are automatically deleted by the OS
992 ** if( pPager->tempFile ){
993 ** sqliteOsDelete(pPager->zFilename);
994 ** }
995 */
drhdb48ee02003-01-16 13:42:43 +0000996 CLR_PAGER(pPager);
drh73509ee2003-04-06 20:44:45 +0000997 if( pPager->zFilename!=(char*)&pPager[1] ){
998 sqliteFree(pPager->zFilename);
999 sqliteFree(pPager->zJournal);
1000 }
drhed7c8552001-04-11 14:29:21 +00001001 sqliteFree(pPager);
1002 return SQLITE_OK;
1003}
1004
1005/*
drh5e00f6c2001-09-13 13:46:56 +00001006** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +00001007*/
drhd9b02572001-04-15 00:37:09 +00001008Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +00001009 PgHdr *p = DATA_TO_PGHDR(pData);
1010 return p->pgno;
1011}
1012
1013/*
drh7e3b0a02001-04-28 16:52:40 +00001014** Increment the reference count for a page. If the page is
1015** currently on the freelist (the reference count is zero) then
1016** remove it from the freelist.
1017*/
drh836faa42003-01-11 13:30:57 +00001018#define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1019static void _page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +00001020 if( pPg->nRef==0 ){
1021 /* The page is currently on the freelist. Remove it. */
drh341eae82003-01-21 02:39:36 +00001022 if( pPg==pPg->pPager->pFirstSynced ){
1023 PgHdr *p = pPg->pNextFree;
1024 while( p && p->needSync ){ p = p->pNextFree; }
1025 pPg->pPager->pFirstSynced = p;
1026 }
drh7e3b0a02001-04-28 16:52:40 +00001027 if( pPg->pPrevFree ){
1028 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1029 }else{
1030 pPg->pPager->pFirst = pPg->pNextFree;
1031 }
1032 if( pPg->pNextFree ){
1033 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1034 }else{
1035 pPg->pPager->pLast = pPg->pPrevFree;
1036 }
1037 pPg->pPager->nRef++;
1038 }
1039 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +00001040 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +00001041}
1042
1043/*
1044** Increment the reference count for a page. The input pointer is
1045** a reference to the page data.
1046*/
1047int sqlitepager_ref(void *pData){
1048 PgHdr *pPg = DATA_TO_PGHDR(pData);
1049 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +00001050 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +00001051}
1052
1053/*
drhb19a2bc2001-09-16 00:13:26 +00001054** Sync the journal and then write all free dirty pages to the database
1055** file.
1056**
1057** Writing all free dirty pages to the database after the sync is a
1058** non-obvious optimization. fsync() is an expensive operation so we
drhaaab5722002-02-19 13:39:21 +00001059** want to minimize the number ot times it is called. After an fsync() call,
drh6446c4d2001-12-15 14:22:18 +00001060** we are free to write dirty pages back to the database. It is best
1061** to go ahead and write as many dirty pages as possible to minimize
1062** the risk of having to do another fsync() later on. Writing dirty
1063** free pages in this way was observed to make database operations go
1064** up to 10 times faster.
drhfa86c412002-02-02 15:01:15 +00001065**
1066** If we are writing to temporary database, there is no need to preserve
1067** the integrity of the journal file, so we can save time and skip the
1068** fsync().
drh50e5dad2001-09-15 00:57:28 +00001069*/
1070static int syncAllPages(Pager *pPager){
1071 PgHdr *pPg;
1072 int rc = SQLITE_OK;
drh03eb96a2002-11-10 23:32:56 +00001073
1074 /* Sync the journal before modifying the main database
1075 ** (assuming there is a journal and it needs to be synced.)
1076 */
drh50e5dad2001-09-15 00:57:28 +00001077 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +00001078 if( !pPager->tempFile ){
drhdb48ee02003-01-16 13:42:43 +00001079 assert( pPager->journalOpen );
1080 assert( !pPager->noSync );
drh968af522003-02-11 14:55:40 +00001081#ifndef NDEBUG
1082 {
drh4a0681e2003-02-13 01:58:20 +00001083 off_t hdrSz, pgSz, jSz;
drh968af522003-02-11 14:55:40 +00001084 hdrSz = JOURNAL_HDR_SZ(journal_format);
1085 pgSz = JOURNAL_PG_SZ(journal_format);
drh4a0681e2003-02-13 01:58:20 +00001086 rc = sqliteOsFileSize(&pPager->jfd, &jSz);
drh968af522003-02-11 14:55:40 +00001087 if( rc!=0 ) return rc;
drh4a0681e2003-02-13 01:58:20 +00001088 assert( pPager->nRec*pgSz+hdrSz==jSz );
drh968af522003-02-11 14:55:40 +00001089 }
1090#endif
drhd8d66e82003-02-12 02:10:15 +00001091 if( journal_format>=3 ){
1092 off_t szJ;
1093 if( pPager->fullSync ){
1094 TRACE1("SYNC\n");
1095 rc = sqliteOsSync(&pPager->jfd);
1096 if( rc!=0 ) return rc;
1097 }
1098 sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
drh99ee3602003-02-16 19:13:36 +00001099 rc = write32bits(&pPager->jfd, pPager->nRec);
1100 if( rc ) return rc;
drhd8d66e82003-02-12 02:10:15 +00001101 szJ = JOURNAL_HDR_SZ(journal_format) +
1102 pPager->nRec*JOURNAL_PG_SZ(journal_format);
1103 sqliteOsSeek(&pPager->jfd, szJ);
drh968af522003-02-11 14:55:40 +00001104 }
drhdb48ee02003-01-16 13:42:43 +00001105 TRACE1("SYNC\n");
drhfa86c412002-02-02 15:01:15 +00001106 rc = sqliteOsSync(&pPager->jfd);
1107 if( rc!=0 ) return rc;
drhdb48ee02003-01-16 13:42:43 +00001108 pPager->journalStarted = 1;
drhfa86c412002-02-02 15:01:15 +00001109 }
drh50e5dad2001-09-15 00:57:28 +00001110 pPager->needSync = 0;
drh341eae82003-01-21 02:39:36 +00001111
1112 /* Erase the needSync flag from every page.
1113 */
1114 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1115 pPg->needSync = 0;
1116 }
1117 pPager->pFirstSynced = pPager->pFirst;
drh50e5dad2001-09-15 00:57:28 +00001118 }
drh03eb96a2002-11-10 23:32:56 +00001119
drh341eae82003-01-21 02:39:36 +00001120#ifndef NDEBUG
1121 /* If the Pager.needSync flag is clear then the PgHdr.needSync
1122 ** flag must also be clear for all pages. Verify that this
1123 ** invariant is true.
drh03eb96a2002-11-10 23:32:56 +00001124 */
drh341eae82003-01-21 02:39:36 +00001125 else{
1126 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1127 assert( pPg->needSync==0 );
1128 }
1129 assert( pPager->pFirstSynced==pPager->pFirst );
drh03eb96a2002-11-10 23:32:56 +00001130 }
drh341eae82003-01-21 02:39:36 +00001131#endif
drhdb48ee02003-01-16 13:42:43 +00001132
drh81a20f22001-10-12 17:30:04 +00001133 return rc;
drh50e5dad2001-09-15 00:57:28 +00001134}
1135
1136/*
drh2554f8b2003-01-22 01:26:44 +00001137** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1138** every one of those pages out to the database file and mark them all
1139** as clean.
1140*/
1141static int pager_write_pagelist(PgHdr *pList){
1142 Pager *pPager;
1143 int rc;
1144
1145 if( pList==0 ) return SQLITE_OK;
1146 pPager = pList->pPager;
1147 while( pList ){
1148 assert( pList->dirty );
1149 sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1150 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
1151 if( rc ) return rc;
1152 pList->dirty = 0;
1153 pList = pList->pDirty;
1154 }
1155 return SQLITE_OK;
1156}
1157
1158/*
1159** Collect every dirty page into a dirty list and
1160** return a pointer to the head of that list. All pages are
1161** collected even if they are still in use.
1162*/
1163static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1164 PgHdr *p, *pList;
1165 pList = 0;
1166 for(p=pPager->pAll; p; p=p->pNextAll){
1167 if( p->dirty ){
1168 p->pDirty = pList;
1169 pList = p;
1170 }
1171 }
1172 return pList;
1173}
1174
1175/*
drhd9b02572001-04-15 00:37:09 +00001176** Acquire a page.
1177**
drh58a11682001-11-10 13:51:08 +00001178** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +00001179** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +00001180**
drh306dc212001-05-21 13:45:10 +00001181** A _get works for any page number greater than 0. If the database
1182** file is smaller than the requested page, then no actual disk
1183** read occurs and the memory image of the page is initialized to
1184** all zeros. The extra data appended to a page is always initialized
1185** to zeros the first time a page is loaded into memory.
1186**
drhd9b02572001-04-15 00:37:09 +00001187** The acquisition might fail for several reasons. In all cases,
1188** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +00001189**
1190** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
1191** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +00001192** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +00001193** just returns 0. This routine acquires a read-lock the first time it
1194** has to go to disk, and could also playback an old journal if necessary.
1195** Since _lookup() never goes to disk, it never has to deal with locks
1196** or journal files.
drhed7c8552001-04-11 14:29:21 +00001197*/
drhd9b02572001-04-15 00:37:09 +00001198int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +00001199 PgHdr *pPg;
drh8766c342002-11-09 00:33:15 +00001200 int rc;
drhed7c8552001-04-11 14:29:21 +00001201
drhd9b02572001-04-15 00:37:09 +00001202 /* Make sure we have not hit any critical errors.
1203 */
drh836faa42003-01-11 13:30:57 +00001204 assert( pPager!=0 );
1205 assert( pgno!=0 );
drh2e6d11b2003-04-25 15:37:57 +00001206 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +00001207 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1208 return pager_errcode(pPager);
1209 }
1210
drhed7c8552001-04-11 14:29:21 +00001211 /* If this is the first page accessed, then get a read lock
1212 ** on the database file.
1213 */
1214 if( pPager->nRef==0 ){
drh8766c342002-11-09 00:33:15 +00001215 rc = sqliteOsReadLock(&pPager->fd);
1216 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001217 return rc;
drhed7c8552001-04-11 14:29:21 +00001218 }
drhd9b02572001-04-15 00:37:09 +00001219 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +00001220
1221 /* If a journal file exists, try to play it back.
1222 */
drhda47d772002-12-02 04:25:19 +00001223 if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +00001224 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +00001225
drha7fcb052001-12-14 15:09:55 +00001226 /* Get a write lock on the database
1227 */
1228 rc = sqliteOsWriteLock(&pPager->fd);
1229 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001230 if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
1231 /* This should never happen! */
1232 rc = SQLITE_INTERNAL;
1233 }
drh8766c342002-11-09 00:33:15 +00001234 return rc;
drha7fcb052001-12-14 15:09:55 +00001235 }
1236 pPager->state = SQLITE_WRITELOCK;
1237
drhed7c8552001-04-11 14:29:21 +00001238 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +00001239 ** we cannot get exclusive access to the journal file.
1240 **
1241 ** Even though we will only be reading from the journal, not writing,
1242 ** we have to open the journal for writing in order to obtain an
1243 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +00001244 */
drhf57b3392001-10-08 13:22:32 +00001245 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drha7fcb052001-12-14 15:09:55 +00001246 if( rc!=SQLITE_OK ){
1247 rc = sqliteOsUnlock(&pPager->fd);
1248 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +00001249 return SQLITE_BUSY;
1250 }
drha7fcb052001-12-14 15:09:55 +00001251 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001252 pPager->journalStarted = 0;
drhed7c8552001-04-11 14:29:21 +00001253
1254 /* Playback and delete the journal. Drop the database write
1255 ** lock and reacquire the read lock.
1256 */
drh99ee3602003-02-16 19:13:36 +00001257 rc = pager_playback(pPager, 0);
drhd9b02572001-04-15 00:37:09 +00001258 if( rc!=SQLITE_OK ){
1259 return rc;
1260 }
drhed7c8552001-04-11 14:29:21 +00001261 }
1262 pPg = 0;
1263 }else{
1264 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +00001265 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +00001266 }
1267 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001268 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +00001269 int h;
drh7e3b0a02001-04-28 16:52:40 +00001270 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +00001271 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
1272 /* Create a new page */
drh968af522003-02-11 14:55:40 +00001273 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
1274 + sizeof(u32) + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +00001275 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001276 pager_unwritelock(pPager);
1277 pPager->errMask |= PAGER_ERR_MEM;
1278 return SQLITE_NOMEM;
1279 }
drh8c1238a2003-01-02 14:43:55 +00001280 memset(pPg, 0, sizeof(*pPg));
drhed7c8552001-04-11 14:29:21 +00001281 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +00001282 pPg->pNextAll = pPager->pAll;
1283 if( pPager->pAll ){
1284 pPager->pAll->pPrevAll = pPg;
1285 }
1286 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +00001287 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +00001288 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +00001289 }else{
drhdb48ee02003-01-16 13:42:43 +00001290 /* Find a page to recycle. Try to locate a page that does not
1291 ** require us to do an fsync() on the journal.
1292 */
drh341eae82003-01-21 02:39:36 +00001293 pPg = pPager->pFirstSynced;
drhb19a2bc2001-09-16 00:13:26 +00001294
drhdb48ee02003-01-16 13:42:43 +00001295 /* If we could not find a page that does not require an fsync()
1296 ** on the journal file then fsync the journal file. This is a
1297 ** very slow operation, so we work hard to avoid it. But sometimes
1298 ** it can't be helped.
drhb19a2bc2001-09-16 00:13:26 +00001299 */
drh603240c2002-03-05 01:11:12 +00001300 if( pPg==0 ){
drh50e5dad2001-09-15 00:57:28 +00001301 int rc = syncAllPages(pPager);
1302 if( rc!=0 ){
1303 sqlitepager_rollback(pPager);
drh50e5dad2001-09-15 00:57:28 +00001304 return SQLITE_IOERR;
1305 }
1306 pPg = pPager->pFirst;
1307 }
drhd9b02572001-04-15 00:37:09 +00001308 assert( pPg->nRef==0 );
drhdb48ee02003-01-16 13:42:43 +00001309
1310 /* Write the page to the database file if it is dirty.
1311 */
1312 if( pPg->dirty ){
1313 assert( pPg->needSync==0 );
drh2554f8b2003-01-22 01:26:44 +00001314 pPg->pDirty = 0;
1315 rc = pager_write_pagelist( pPg );
drhdb48ee02003-01-16 13:42:43 +00001316 if( rc!=SQLITE_OK ){
1317 sqlitepager_rollback(pPager);
drhdb48ee02003-01-16 13:42:43 +00001318 return SQLITE_IOERR;
1319 }
drhdb48ee02003-01-16 13:42:43 +00001320 }
drh50e5dad2001-09-15 00:57:28 +00001321 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +00001322
drhdb48ee02003-01-16 13:42:43 +00001323 /* If the page we are recycling is marked as alwaysRollback, then
drh193a6b42002-07-07 16:52:46 +00001324 ** set the global alwaysRollback flag, thus disabling the
1325 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1326 ** It is necessary to do this because the page marked alwaysRollback
1327 ** might be reloaded at a later time but at that point we won't remember
1328 ** that is was marked alwaysRollback. This means that all pages must
1329 ** be marked as alwaysRollback from here on out.
1330 */
1331 if( pPg->alwaysRollback ){
1332 pPager->alwaysRollback = 1;
1333 }
1334
drhd9b02572001-04-15 00:37:09 +00001335 /* Unlink the old page from the free list and the hash table
1336 */
drh341eae82003-01-21 02:39:36 +00001337 if( pPg==pPager->pFirstSynced ){
1338 PgHdr *p = pPg->pNextFree;
1339 while( p && p->needSync ){ p = p->pNextFree; }
1340 pPager->pFirstSynced = p;
1341 }
drh6019e162001-07-02 17:51:45 +00001342 if( pPg->pPrevFree ){
1343 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001344 }else{
drh6019e162001-07-02 17:51:45 +00001345 assert( pPager->pFirst==pPg );
1346 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001347 }
drh6019e162001-07-02 17:51:45 +00001348 if( pPg->pNextFree ){
1349 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1350 }else{
1351 assert( pPager->pLast==pPg );
1352 pPager->pLast = pPg->pPrevFree;
1353 }
1354 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +00001355 if( pPg->pNextHash ){
1356 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1357 }
1358 if( pPg->pPrevHash ){
1359 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1360 }else{
drhd9b02572001-04-15 00:37:09 +00001361 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +00001362 assert( pPager->aHash[h]==pPg );
1363 pPager->aHash[h] = pPg->pNextHash;
1364 }
drh6019e162001-07-02 17:51:45 +00001365 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +00001366 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +00001367 }
1368 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +00001369 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drhed6c8672003-01-12 18:02:16 +00001370 sqliteCheckMemory(pPager->aInJournal, pgno/8);
drhdb48ee02003-01-16 13:42:43 +00001371 assert( pPager->journalOpen );
drh6019e162001-07-02 17:51:45 +00001372 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
drhdb48ee02003-01-16 13:42:43 +00001373 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001374 }else{
1375 pPg->inJournal = 0;
drhdb48ee02003-01-16 13:42:43 +00001376 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001377 }
drh03eb96a2002-11-10 23:32:56 +00001378 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
1379 && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
1380 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001381 }else{
drh03eb96a2002-11-10 23:32:56 +00001382 page_remove_from_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001383 }
drhed7c8552001-04-11 14:29:21 +00001384 pPg->dirty = 0;
1385 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +00001386 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001387 pPager->nRef++;
1388 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +00001389 pPg->pNextHash = pPager->aHash[h];
1390 pPager->aHash[h] = pPg;
1391 if( pPg->pNextHash ){
1392 assert( pPg->pNextHash->pPrevHash==0 );
1393 pPg->pNextHash->pPrevHash = pPg;
1394 }
drh2e6d11b2003-04-25 15:37:57 +00001395 if( pPager->nExtra>0 ){
1396 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1397 }
drh306dc212001-05-21 13:45:10 +00001398 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001399 if( pPager->errMask!=0 ){
1400 sqlitepager_unref(PGHDR_TO_DATA(pPg));
1401 rc = pager_errcode(pPager);
1402 return rc;
1403 }
drh1ab43002002-01-14 09:28:19 +00001404 if( pPager->dbSize<(int)pgno ){
drh306dc212001-05-21 13:45:10 +00001405 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1406 }else{
drh81a20f22001-10-12 17:30:04 +00001407 int rc;
drhd0d006e2002-12-01 02:00:57 +00001408 sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drha7fcb052001-12-14 15:09:55 +00001409 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +00001410 if( rc!=SQLITE_OK ){
drh28be87c2002-11-05 23:03:02 +00001411 off_t fileSize;
drh4e371ee2002-09-05 16:08:27 +00001412 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
1413 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
drh2e6d11b2003-04-25 15:37:57 +00001414 sqlitepager_unref(PGHDR_TO_DATA(pPg));
drh4e371ee2002-09-05 16:08:27 +00001415 return rc;
1416 }else{
1417 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1418 }
drh81a20f22001-10-12 17:30:04 +00001419 }
drh306dc212001-05-21 13:45:10 +00001420 }
drhed7c8552001-04-11 14:29:21 +00001421 }else{
drhd9b02572001-04-15 00:37:09 +00001422 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001423 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001424 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001425 }
1426 *ppPage = PGHDR_TO_DATA(pPg);
1427 return SQLITE_OK;
1428}
1429
1430/*
drh7e3b0a02001-04-28 16:52:40 +00001431** Acquire a page if it is already in the in-memory cache. Do
1432** not read the page from disk. Return a pointer to the page,
1433** or 0 if the page is not in cache.
1434**
1435** See also sqlitepager_get(). The difference between this routine
1436** and sqlitepager_get() is that _get() will go to the disk and read
1437** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001438** returns NULL if the page is not in cache or if a disk I/O error
1439** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001440*/
1441void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1442 PgHdr *pPg;
1443
drh836faa42003-01-11 13:30:57 +00001444 assert( pPager!=0 );
1445 assert( pgno!=0 );
drh7e3b0a02001-04-28 16:52:40 +00001446 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1447 return 0;
1448 }
drh836faa42003-01-11 13:30:57 +00001449 /* if( pPager->nRef==0 ){
1450 ** return 0;
1451 ** }
1452 */
drh7e3b0a02001-04-28 16:52:40 +00001453 pPg = pager_lookup(pPager, pgno);
1454 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001455 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001456 return PGHDR_TO_DATA(pPg);
1457}
1458
1459/*
drhed7c8552001-04-11 14:29:21 +00001460** Release a page.
1461**
1462** If the number of references to the page drop to zero, then the
1463** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001464** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001465** removed.
1466*/
drhd9b02572001-04-15 00:37:09 +00001467int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001468 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001469
1470 /* Decrement the reference count for this page
1471 */
drhed7c8552001-04-11 14:29:21 +00001472 pPg = DATA_TO_PGHDR(pData);
1473 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001474 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001475 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001476
drh72f82862001-05-24 21:06:34 +00001477 /* When the number of references to a page reach 0, call the
1478 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001479 */
drhed7c8552001-04-11 14:29:21 +00001480 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001481 Pager *pPager;
1482 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001483 pPg->pNextFree = 0;
1484 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001485 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001486 if( pPg->pPrevFree ){
1487 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001488 }else{
1489 pPager->pFirst = pPg;
1490 }
drh341eae82003-01-21 02:39:36 +00001491 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1492 pPager->pFirstSynced = pPg;
1493 }
drh72f82862001-05-24 21:06:34 +00001494 if( pPager->xDestructor ){
1495 pPager->xDestructor(pData);
1496 }
drhd9b02572001-04-15 00:37:09 +00001497
1498 /* When all pages reach the freelist, drop the read lock from
1499 ** the database file.
1500 */
1501 pPager->nRef--;
1502 assert( pPager->nRef>=0 );
1503 if( pPager->nRef==0 ){
1504 pager_reset(pPager);
1505 }
drhed7c8552001-04-11 14:29:21 +00001506 }
drhd9b02572001-04-15 00:37:09 +00001507 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001508}
1509
1510/*
drhda47d772002-12-02 04:25:19 +00001511** Create a journal file for pPager. There should already be a write
1512** lock on the database file when this routine is called.
1513**
1514** Return SQLITE_OK if everything. Return an error code and release the
1515** write lock if anything goes wrong.
1516*/
1517static int pager_open_journal(Pager *pPager){
1518 int rc;
1519 assert( pPager->state==SQLITE_WRITELOCK );
1520 assert( pPager->journalOpen==0 );
1521 assert( pPager->useJournal );
1522 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1523 if( pPager->aInJournal==0 ){
1524 sqliteOsReadLock(&pPager->fd);
1525 pPager->state = SQLITE_READLOCK;
1526 return SQLITE_NOMEM;
1527 }
1528 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1529 if( rc!=SQLITE_OK ){
1530 sqliteFree(pPager->aInJournal);
1531 pPager->aInJournal = 0;
1532 sqliteOsReadLock(&pPager->fd);
1533 pPager->state = SQLITE_READLOCK;
1534 return SQLITE_CANTOPEN;
1535 }
1536 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001537 pPager->journalStarted = 0;
drhda47d772002-12-02 04:25:19 +00001538 pPager->needSync = 0;
1539 pPager->alwaysRollback = 0;
drh968af522003-02-11 14:55:40 +00001540 pPager->nRec = 0;
drhda47d772002-12-02 04:25:19 +00001541 sqlitepager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001542 if( pPager->errMask!=0 ){
1543 rc = pager_errcode(pPager);
1544 return rc;
1545 }
drhda47d772002-12-02 04:25:19 +00001546 pPager->origDbSize = pPager->dbSize;
drh968af522003-02-11 14:55:40 +00001547 if( journal_format==JOURNAL_FORMAT_3 ){
1548 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
1549 if( rc==SQLITE_OK ){
drh4303fee2003-02-15 23:09:17 +00001550 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
drh968af522003-02-11 14:55:40 +00001551 }
1552 if( rc==SQLITE_OK ){
1553 pPager->cksumInit = (u32)sqliteRandomInteger();
1554 rc = write32bits(&pPager->jfd, pPager->cksumInit);
1555 }
1556 }else if( journal_format==JOURNAL_FORMAT_2 ){
1557 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
drhda47d772002-12-02 04:25:19 +00001558 }else{
drh968af522003-02-11 14:55:40 +00001559 assert( journal_format==JOURNAL_FORMAT_1 );
1560 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
drhda47d772002-12-02 04:25:19 +00001561 }
1562 if( rc==SQLITE_OK ){
1563 rc = write32bits(&pPager->jfd, pPager->dbSize);
1564 }
1565 if( pPager->ckptAutoopen && rc==SQLITE_OK ){
1566 rc = sqlitepager_ckpt_begin(pPager);
1567 }
1568 if( rc!=SQLITE_OK ){
1569 rc = pager_unwritelock(pPager);
1570 if( rc==SQLITE_OK ){
1571 rc = SQLITE_FULL;
1572 }
1573 }
1574 return rc;
1575}
1576
1577/*
drh4b845d72002-03-05 12:41:19 +00001578** Acquire a write-lock on the database. The lock is removed when
1579** the any of the following happen:
1580**
1581** * sqlitepager_commit() is called.
1582** * sqlitepager_rollback() is called.
1583** * sqlitepager_close() is called.
1584** * sqlitepager_unref() is called to on every outstanding page.
1585**
1586** The parameter to this routine is a pointer to any open page of the
1587** database file. Nothing changes about the page - it is used merely
1588** to acquire a pointer to the Pager structure and as proof that there
1589** is already a read-lock on the database.
1590**
drhda47d772002-12-02 04:25:19 +00001591** A journal file is opened if this is not a temporary file. For
1592** temporary files, the opening of the journal file is deferred until
1593** there is an actual need to write to the journal.
1594**
drh4b845d72002-03-05 12:41:19 +00001595** If the database is already write-locked, this routine is a no-op.
1596*/
1597int sqlitepager_begin(void *pData){
1598 PgHdr *pPg = DATA_TO_PGHDR(pData);
1599 Pager *pPager = pPg->pPager;
1600 int rc = SQLITE_OK;
1601 assert( pPg->nRef>0 );
1602 assert( pPager->state!=SQLITE_UNLOCK );
1603 if( pPager->state==SQLITE_READLOCK ){
1604 assert( pPager->aInJournal==0 );
1605 rc = sqliteOsWriteLock(&pPager->fd);
1606 if( rc!=SQLITE_OK ){
1607 return rc;
1608 }
drh4b845d72002-03-05 12:41:19 +00001609 pPager->state = SQLITE_WRITELOCK;
drhda47d772002-12-02 04:25:19 +00001610 pPager->dirtyFile = 0;
drhdb48ee02003-01-16 13:42:43 +00001611 TRACE1("TRANSACTION\n");
drhda47d772002-12-02 04:25:19 +00001612 if( pPager->useJournal && !pPager->tempFile ){
1613 rc = pager_open_journal(pPager);
drh4b845d72002-03-05 12:41:19 +00001614 }
1615 }
1616 return rc;
1617}
1618
1619/*
drhed7c8552001-04-11 14:29:21 +00001620** Mark a data page as writeable. The page is written into the journal
1621** if it is not there already. This routine must be called before making
1622** changes to a page.
1623**
1624** The first time this routine is called, the pager creates a new
1625** journal and acquires a write lock on the database. If the write
1626** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001627** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001628** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001629**
1630** If the journal file could not be written because the disk is full,
1631** then this routine returns SQLITE_FULL and does an immediate rollback.
1632** All subsequent write attempts also return SQLITE_FULL until there
1633** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1634** reset.
drhed7c8552001-04-11 14:29:21 +00001635*/
drhd9b02572001-04-15 00:37:09 +00001636int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001637 PgHdr *pPg = DATA_TO_PGHDR(pData);
1638 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001639 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001640
drh6446c4d2001-12-15 14:22:18 +00001641 /* Check for errors
1642 */
drhd9b02572001-04-15 00:37:09 +00001643 if( pPager->errMask ){
1644 return pager_errcode(pPager);
1645 }
drh5e00f6c2001-09-13 13:46:56 +00001646 if( pPager->readOnly ){
1647 return SQLITE_PERM;
1648 }
drh6446c4d2001-12-15 14:22:18 +00001649
1650 /* Mark the page as dirty. If the page has already been written
1651 ** to the journal then we can return right away.
1652 */
drhd9b02572001-04-15 00:37:09 +00001653 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001654 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001655 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001656 return SQLITE_OK;
1657 }
drh6446c4d2001-12-15 14:22:18 +00001658
1659 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001660 ** written to the transaction journal or the ckeckpoint journal
1661 ** or both.
1662 **
1663 ** First check to see that the transaction journal exists and
1664 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001665 */
drhd9b02572001-04-15 00:37:09 +00001666 assert( pPager->state!=SQLITE_UNLOCK );
drh4b845d72002-03-05 12:41:19 +00001667 rc = sqlitepager_begin(pData);
drhda47d772002-12-02 04:25:19 +00001668 if( rc!=SQLITE_OK ){
1669 return rc;
1670 }
drhd9b02572001-04-15 00:37:09 +00001671 assert( pPager->state==SQLITE_WRITELOCK );
drhda47d772002-12-02 04:25:19 +00001672 if( !pPager->journalOpen && pPager->useJournal ){
1673 rc = pager_open_journal(pPager);
1674 if( rc!=SQLITE_OK ) return rc;
1675 }
1676 assert( pPager->journalOpen || !pPager->useJournal );
1677 pPager->dirtyFile = 1;
drh6446c4d2001-12-15 14:22:18 +00001678
drhfa86c412002-02-02 15:01:15 +00001679 /* The transaction journal now exists and we have a write lock on the
1680 ** main database file. Write the current page to the transaction
1681 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001682 */
drhdb48ee02003-01-16 13:42:43 +00001683 if( !pPg->inJournal && pPager->useJournal ){
1684 if( (int)pPg->pgno <= pPager->origDbSize ){
drh968af522003-02-11 14:55:40 +00001685 int szPg;
1686 u32 saved;
1687 if( journal_format>=JOURNAL_FORMAT_3 ){
1688 u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
1689 saved = *(u32*)PGHDR_TO_EXTRA(pPg);
1690 store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
1691 szPg = SQLITE_PAGE_SIZE+8;
1692 }else{
1693 szPg = SQLITE_PAGE_SIZE+4;
1694 }
1695 store32bits(pPg->pgno, pPg, -4);
1696 rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
1697 if( journal_format>=JOURNAL_FORMAT_3 ){
1698 *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
1699 }
drhdb48ee02003-01-16 13:42:43 +00001700 if( rc!=SQLITE_OK ){
1701 sqlitepager_rollback(pPager);
1702 pPager->errMask |= PAGER_ERR_FULL;
1703 return rc;
1704 }
drh99ee3602003-02-16 19:13:36 +00001705 pPager->nRec++;
drhdb48ee02003-01-16 13:42:43 +00001706 assert( pPager->aInJournal!=0 );
1707 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1708 pPg->needSync = !pPager->noSync;
1709 pPg->inJournal = 1;
1710 if( pPager->ckptInUse ){
1711 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1712 page_add_to_ckpt_list(pPg);
1713 }
1714 TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
1715 }else{
1716 pPg->needSync = !pPager->journalStarted && !pPager->noSync;
1717 TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
drhd9b02572001-04-15 00:37:09 +00001718 }
drhdb48ee02003-01-16 13:42:43 +00001719 if( pPg->needSync ){
1720 pPager->needSync = 1;
drhfa86c412002-02-02 15:01:15 +00001721 }
drh69688d52001-04-14 16:38:23 +00001722 }
drh6446c4d2001-12-15 14:22:18 +00001723
drhfa86c412002-02-02 15:01:15 +00001724 /* If the checkpoint journal is open and the page is not in it,
drh968af522003-02-11 14:55:40 +00001725 ** then write the current page to the checkpoint journal. Note that
1726 ** the checkpoint journal always uses the simplier format 2 that lacks
1727 ** checksums. The header is also omitted from the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001728 */
drh0f892532002-05-30 12:27:03 +00001729 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001730 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh968af522003-02-11 14:55:40 +00001731 store32bits(pPg->pgno, pPg, -4);
drh2554f8b2003-01-22 01:26:44 +00001732 rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
drhfa86c412002-02-02 15:01:15 +00001733 if( rc!=SQLITE_OK ){
1734 sqlitepager_rollback(pPager);
1735 pPager->errMask |= PAGER_ERR_FULL;
1736 return rc;
1737 }
drh9bd47a92003-01-07 14:46:08 +00001738 pPager->ckptNRec++;
drhfa86c412002-02-02 15:01:15 +00001739 assert( pPager->aInCkpt!=0 );
1740 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001741 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001742 }
1743
1744 /* Update the database size and return.
1745 */
drh1ab43002002-01-14 09:28:19 +00001746 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001747 pPager->dbSize = pPg->pgno;
1748 }
drh69688d52001-04-14 16:38:23 +00001749 return rc;
drhed7c8552001-04-11 14:29:21 +00001750}
1751
1752/*
drhaacc5432002-01-06 17:07:40 +00001753** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +00001754** to sqlitepager_write(). In other words, return TRUE if it is ok
1755** to change the content of the page.
1756*/
1757int sqlitepager_iswriteable(void *pData){
1758 PgHdr *pPg = DATA_TO_PGHDR(pData);
1759 return pPg->dirty;
1760}
1761
1762/*
drh001bbcb2003-03-19 03:14:00 +00001763** Replace the content of a single page with the information in the third
1764** argument.
1765*/
1766int sqlitepager_overwrite(Pager *pPager, Pgno pgno, void *pData){
1767 void *pPage;
1768 int rc;
1769
1770 rc = sqlitepager_get(pPager, pgno, &pPage);
1771 if( rc==SQLITE_OK ){
1772 rc = sqlitepager_write(pPage);
1773 if( rc==SQLITE_OK ){
1774 memcpy(pPage, pData, SQLITE_PAGE_SIZE);
1775 }
1776 sqlitepager_unref(pPage);
1777 }
1778 return rc;
1779}
1780
1781/*
drh30e58752002-03-02 20:41:57 +00001782** A call to this routine tells the pager that it is not necessary to
1783** write the information on page "pgno" back to the disk, even though
1784** that page might be marked as dirty.
1785**
1786** The overlying software layer calls this routine when all of the data
1787** on the given page is unused. The pager marks the page as clean so
1788** that it does not get written to disk.
1789**
1790** Tests show that this optimization, together with the
1791** sqlitepager_dont_rollback() below, more than double the speed
1792** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001793**
1794** When this routine is called, set the alwaysRollback flag to true.
1795** Subsequent calls to sqlitepager_dont_rollback() for the same page
1796** will thereafter be ignored. This is necessary to avoid a problem
1797** where a page with data is added to the freelist during one part of
1798** a transaction then removed from the freelist during a later part
1799** of the same transaction and reused for some other purpose. When it
1800** is first added to the freelist, this routine is called. When reused,
1801** the dont_rollback() routine is called. But because the page contains
1802** critical data, we still need to be sure it gets rolled back in spite
1803** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001804*/
1805void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1806 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001807
drh30e58752002-03-02 20:41:57 +00001808 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001809 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001810 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001811 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1812 /* If this pages is the last page in the file and the file has grown
1813 ** during the current transaction, then do NOT mark the page as clean.
1814 ** When the database file grows, we must make sure that the last page
1815 ** gets written at least once so that the disk file will be the correct
1816 ** size. If you do not write this page and the size of the file
1817 ** on the disk ends up being too small, that can lead to database
1818 ** corruption during the next transaction.
1819 */
1820 }else{
drhdb48ee02003-01-16 13:42:43 +00001821 TRACE2("DONT_WRITE %d\n", pgno);
drh8124a302002-06-25 14:43:57 +00001822 pPg->dirty = 0;
1823 }
drh30e58752002-03-02 20:41:57 +00001824 }
1825}
1826
1827/*
1828** A call to this routine tells the pager that if a rollback occurs,
1829** it is not necessary to restore the data on the given page. This
1830** means that the pager does not have to record the given page in the
1831** rollback journal.
1832*/
1833void sqlitepager_dont_rollback(void *pData){
1834 PgHdr *pPg = DATA_TO_PGHDR(pData);
1835 Pager *pPager = pPg->pPager;
1836
1837 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001838 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001839 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1840 assert( pPager->aInJournal!=0 );
1841 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1842 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001843 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001844 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001845 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001846 }
drhdb48ee02003-01-16 13:42:43 +00001847 TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
drh30e58752002-03-02 20:41:57 +00001848 }
drh0f892532002-05-30 12:27:03 +00001849 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001850 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1851 assert( pPager->aInCkpt!=0 );
1852 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001853 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001854 }
1855}
1856
1857/*
drhed7c8552001-04-11 14:29:21 +00001858** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001859**
1860** If the commit fails for any reason, a rollback attempt is made
1861** and an error code is returned. If the commit worked, SQLITE_OK
1862** is returned.
drhed7c8552001-04-11 14:29:21 +00001863*/
drhd9b02572001-04-15 00:37:09 +00001864int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001865 int rc;
drhed7c8552001-04-11 14:29:21 +00001866 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001867
1868 if( pPager->errMask==PAGER_ERR_FULL ){
1869 rc = sqlitepager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00001870 if( rc==SQLITE_OK ){
1871 rc = SQLITE_FULL;
1872 }
drhd9b02572001-04-15 00:37:09 +00001873 return rc;
1874 }
1875 if( pPager->errMask!=0 ){
1876 rc = pager_errcode(pPager);
1877 return rc;
1878 }
1879 if( pPager->state!=SQLITE_WRITELOCK ){
1880 return SQLITE_ERROR;
1881 }
drhdb48ee02003-01-16 13:42:43 +00001882 TRACE1("COMMIT\n");
drha1680452002-04-18 01:56:57 +00001883 if( pPager->dirtyFile==0 ){
1884 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
1885 ** if there have been no changes to the database file. */
drh341eae82003-01-21 02:39:36 +00001886 assert( pPager->needSync==0 );
drha1680452002-04-18 01:56:57 +00001887 rc = pager_unwritelock(pPager);
1888 pPager->dbSize = -1;
1889 return rc;
1890 }
drhda47d772002-12-02 04:25:19 +00001891 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +00001892 if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001893 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001894 }
drh2554f8b2003-01-22 01:26:44 +00001895 pPg = pager_get_all_dirty_pages(pPager);
1896 if( pPg ){
1897 rc = pager_write_pagelist(pPg);
1898 if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
1899 goto commit_abort;
1900 }
drh603240c2002-03-05 01:11:12 +00001901 }
drhd9b02572001-04-15 00:37:09 +00001902 rc = pager_unwritelock(pPager);
1903 pPager->dbSize = -1;
1904 return rc;
1905
1906 /* Jump here if anything goes wrong during the commit process.
1907 */
1908commit_abort:
1909 rc = sqlitepager_rollback(pPager);
1910 if( rc==SQLITE_OK ){
1911 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001912 }
drhed7c8552001-04-11 14:29:21 +00001913 return rc;
1914}
1915
1916/*
1917** Rollback all changes. The database falls back to read-only mode.
1918** All in-memory cache pages revert to their original data contents.
1919** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001920**
1921** This routine cannot fail unless some other process is not following
1922** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1923** process is writing trash into the journal file (SQLITE_CORRUPT) or
1924** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1925** codes are returned for all these occasions. Otherwise,
1926** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001927*/
drhd9b02572001-04-15 00:37:09 +00001928int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001929 int rc;
drhdb48ee02003-01-16 13:42:43 +00001930 TRACE1("ROLLBACK\n");
drhda47d772002-12-02 04:25:19 +00001931 if( !pPager->dirtyFile || !pPager->journalOpen ){
1932 rc = pager_unwritelock(pPager);
1933 pPager->dbSize = -1;
1934 return rc;
1935 }
drhdb48ee02003-01-16 13:42:43 +00001936
drhd9b02572001-04-15 00:37:09 +00001937 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00001938 if( pPager->state>=SQLITE_WRITELOCK ){
drh99ee3602003-02-16 19:13:36 +00001939 pager_playback(pPager, 1);
drh4b845d72002-03-05 12:41:19 +00001940 }
drhd9b02572001-04-15 00:37:09 +00001941 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001942 }
drhd9b02572001-04-15 00:37:09 +00001943 if( pPager->state!=SQLITE_WRITELOCK ){
1944 return SQLITE_OK;
1945 }
drh99ee3602003-02-16 19:13:36 +00001946 rc = pager_playback(pPager, 1);
drhd9b02572001-04-15 00:37:09 +00001947 if( rc!=SQLITE_OK ){
1948 rc = SQLITE_CORRUPT;
1949 pPager->errMask |= PAGER_ERR_CORRUPT;
1950 }
1951 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001952 return rc;
drh98808ba2001-10-18 12:34:46 +00001953}
drhd9b02572001-04-15 00:37:09 +00001954
1955/*
drh5e00f6c2001-09-13 13:46:56 +00001956** Return TRUE if the database file is opened read-only. Return FALSE
1957** if the database is (in theory) writable.
1958*/
1959int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001960 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001961}
1962
1963/*
drhd9b02572001-04-15 00:37:09 +00001964** This routine is used for testing and analysis only.
1965*/
1966int *sqlitepager_stats(Pager *pPager){
1967 static int a[9];
1968 a[0] = pPager->nRef;
1969 a[1] = pPager->nPage;
1970 a[2] = pPager->mxPage;
1971 a[3] = pPager->dbSize;
1972 a[4] = pPager->state;
1973 a[5] = pPager->errMask;
1974 a[6] = pPager->nHit;
1975 a[7] = pPager->nMiss;
1976 a[8] = pPager->nOvfl;
1977 return a;
1978}
drhdd793422001-06-28 01:54:48 +00001979
drhfa86c412002-02-02 15:01:15 +00001980/*
1981** Set the checkpoint.
1982**
1983** This routine should be called with the transaction journal already
1984** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00001985** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00001986*/
1987int sqlitepager_ckpt_begin(Pager *pPager){
1988 int rc;
1989 char zTemp[SQLITE_TEMPNAME_SIZE];
drhda47d772002-12-02 04:25:19 +00001990 if( !pPager->journalOpen ){
1991 pPager->ckptAutoopen = 1;
1992 return SQLITE_OK;
1993 }
drhfa86c412002-02-02 15:01:15 +00001994 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00001995 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00001996 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
1997 if( pPager->aInCkpt==0 ){
1998 sqliteOsReadLock(&pPager->fd);
1999 return SQLITE_NOMEM;
2000 }
drh968af522003-02-11 14:55:40 +00002001#ifndef NDEBUG
drhfa86c412002-02-02 15:01:15 +00002002 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
2003 if( rc ) goto ckpt_begin_failed;
drh968af522003-02-11 14:55:40 +00002004 assert( pPager->ckptJSize ==
2005 pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
2006#endif
2007 pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
2008 + JOURNAL_HDR_SZ(journal_format);
drh663fc632002-02-02 18:49:19 +00002009 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00002010 if( !pPager->ckptOpen ){
2011 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
2012 if( rc ) goto ckpt_begin_failed;
2013 pPager->ckptOpen = 1;
drh9bd47a92003-01-07 14:46:08 +00002014 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002015 }
2016 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00002017 return SQLITE_OK;
2018
2019ckpt_begin_failed:
2020 if( pPager->aInCkpt ){
2021 sqliteFree(pPager->aInCkpt);
2022 pPager->aInCkpt = 0;
2023 }
2024 return rc;
2025}
2026
2027/*
2028** Commit a checkpoint.
2029*/
2030int sqlitepager_ckpt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00002031 if( pPager->ckptInUse ){
drh03eb96a2002-11-10 23:32:56 +00002032 PgHdr *pPg, *pNext;
drh96ddd6d2002-09-05 19:10:33 +00002033 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +00002034 /* sqliteOsTruncate(&pPager->cpfd, 0); */
2035 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002036 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00002037 sqliteFree( pPager->aInCkpt );
2038 pPager->aInCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002039 for(pPg=pPager->pCkpt; pPg; pPg=pNext){
2040 pNext = pPg->pNextCkpt;
2041 assert( pPg->inCkpt );
drh663fc632002-02-02 18:49:19 +00002042 pPg->inCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002043 pPg->pPrevCkpt = pPg->pNextCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002044 }
drh03eb96a2002-11-10 23:32:56 +00002045 pPager->pCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002046 }
drhda47d772002-12-02 04:25:19 +00002047 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002048 return SQLITE_OK;
2049}
2050
2051/*
2052** Rollback a checkpoint.
2053*/
2054int sqlitepager_ckpt_rollback(Pager *pPager){
2055 int rc;
drh0f892532002-05-30 12:27:03 +00002056 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00002057 rc = pager_ckpt_playback(pPager);
2058 sqlitepager_ckpt_commit(pPager);
2059 }else{
2060 rc = SQLITE_OK;
2061 }
drhda47d772002-12-02 04:25:19 +00002062 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002063 return rc;
2064}
2065
drh73509ee2003-04-06 20:44:45 +00002066/*
2067** Return the full pathname of the database file.
2068*/
2069const char *sqlitepager_filename(Pager *pPager){
2070 return pPager->zFilename;
2071}
2072
drh74587e52002-08-13 00:01:16 +00002073#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00002074/*
2075** Print a listing of all referenced pages and their ref count.
2076*/
2077void sqlitepager_refdump(Pager *pPager){
2078 PgHdr *pPg;
2079 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2080 if( pPg->nRef<=0 ) continue;
2081 printf("PAGE %3d addr=0x%08x nRef=%d\n",
2082 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2083 }
2084}
2085#endif