blob: d1776303d15126e728bc9f2dc32b0bba5f5d4963 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drhb20ea9d2004-02-09 01:20:36 +000021** @(#) $Id: pager.c,v 1.96 2004/02/09 01:20:37 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drh829e8022002-11-06 14:08:11 +000023#include "os.h" /* Must be first to enable large file support */
drhd9b02572001-04-15 00:37:09 +000024#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000025#include "pager.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
drhdb48ee02003-01-16 13:42:43 +000030** Macros for troubleshooting. Normally turned off
31*/
32#if 0
33static Pager *mainPager = 0;
34#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39#else
40#define SET_PAGER(X)
41#define CLR_PAGER(X)
42#define TRACE1(X)
43#define TRACE2(X,Y)
44#define TRACE3(X,Y,Z)
45#endif
46
drhb20ea9d2004-02-09 01:20:36 +000047/*
48** Number of extra bytes of data allocated at the end of each page and
49** stored on disk but not used by the higher level btree layer.
50*/
51#ifndef SQLITE_PAGE_RESERVE
52#define SQLITE_PAGE_RESERVE 0
53#endif
54
55/*
56** The total number of bytes stored on disk for each page.
57*/
58#define SQLITE_BLOCK_SIZE (SQLITE_PAGE_SIZE+SQLITE_PAGE_RESERVE)
59
drhdb48ee02003-01-16 13:42:43 +000060
61/*
drhed7c8552001-04-11 14:29:21 +000062** The page cache as a whole is always in one of the following
63** states:
64**
65** SQLITE_UNLOCK The page cache is not currently reading or
66** writing the database file. There is no
67** data held in memory. This is the initial
68** state.
69**
70** SQLITE_READLOCK The page cache is reading the database.
71** Writing is not permitted. There can be
72** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000073** file at the same time.
drhed7c8552001-04-11 14:29:21 +000074**
75** SQLITE_WRITELOCK The page cache is writing the database.
76** Access is exclusive. No other processes or
77** threads can be reading or writing while one
78** process is writing.
79**
drh306dc212001-05-21 13:45:10 +000080** The page cache comes up in SQLITE_UNLOCK. The first time a
81** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000082** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000083** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000084** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000085** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
86** called on an outstanding page which means that the pager must
87** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
88** The sqlite_page_rollback() and sqlite_page_commit() functions
89** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000090*/
91#define SQLITE_UNLOCK 0
92#define SQLITE_READLOCK 1
93#define SQLITE_WRITELOCK 2
94
drhd9b02572001-04-15 00:37:09 +000095
drhed7c8552001-04-11 14:29:21 +000096/*
97** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000098** This header is only visible to this pager module. The client
99** code that calls pager sees only the data that follows the header.
drhf6038712004-02-08 18:07:34 +0000100**
101** Client code should call sqlitepager_write() on a page prior to making
102** any modifications to that page. The first time sqlitepager_write()
103** is called, the original page contents are written into the rollback
104** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
105** the journal page has made it onto the disk surface, PgHdr.needSync
106** is cleared. The modified page cannot be written back into the original
107** database file until the journal pages has been synced to disk and the
108** PgHdr.needSync has been cleared.
109**
110** The PgHdr.dirty flag is set when sqlitepager_write() is called and
111** is cleared again when the page content is written back to the original
112** database file.
drhed7c8552001-04-11 14:29:21 +0000113*/
drhd9b02572001-04-15 00:37:09 +0000114typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +0000115struct PgHdr {
116 Pager *pPager; /* The pager to which this page belongs */
117 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +0000118 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +0000119 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +0000120 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
121 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh03eb96a2002-11-10 23:32:56 +0000122 PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
drh193a6b42002-07-07 16:52:46 +0000123 u8 inJournal; /* TRUE if has been written to journal */
124 u8 inCkpt; /* TRUE if written to the checkpoint journal */
125 u8 dirty; /* TRUE if we need to write back changes */
drhdb48ee02003-01-16 13:42:43 +0000126 u8 needSync; /* Sync journal before writing this page */
drh193a6b42002-07-07 16:52:46 +0000127 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh2554f8b2003-01-22 01:26:44 +0000128 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
drhb20ea9d2004-02-09 01:20:36 +0000129 /* SQLITE_BLOCK_SIZE bytes of page data follow this header */
drh973b6e32003-02-12 14:09:42 +0000130 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +0000131};
132
133/*
drh69688d52001-04-14 16:38:23 +0000134** Convert a pointer to a PgHdr into a pointer to its data
135** and back again.
drhed7c8552001-04-11 14:29:21 +0000136*/
137#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
138#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drhb20ea9d2004-02-09 01:20:36 +0000139#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_BLOCK_SIZE])
drhed7c8552001-04-11 14:29:21 +0000140
141/*
drhed7c8552001-04-11 14:29:21 +0000142** How big to make the hash table used for locating in-memory pages
drh836faa42003-01-11 13:30:57 +0000143** by page number.
drhed7c8552001-04-11 14:29:21 +0000144*/
drh836faa42003-01-11 13:30:57 +0000145#define N_PG_HASH 2048
146
147/*
148** Hash a page number
149*/
150#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
drhed7c8552001-04-11 14:29:21 +0000151
152/*
153** A open page cache is an instance of the following structure.
154*/
155struct Pager {
156 char *zFilename; /* Name of the database file */
157 char *zJournal; /* Name of the journal file */
drha76c82e2003-07-27 18:59:42 +0000158 char *zDirectory; /* Directory hold database and journal files */
drh8cfbf082001-09-19 13:22:39 +0000159 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000160 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000161 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000162 int origDbSize; /* dbSize before the current change */
drh28be87c2002-11-05 23:03:02 +0000163 int ckptSize; /* Size of database (in pages) at ckpt_begin() */
164 off_t ckptJSize; /* Size of journal at ckpt_begin() */
drh968af522003-02-11 14:55:40 +0000165 int nRec; /* Number of pages written to the journal */
166 u32 cksumInit; /* Quasi-random value added to every checksum */
drh9bd47a92003-01-07 14:46:08 +0000167 int ckptNRec; /* Number of records in the checkpoint journal */
drh7e3b0a02001-04-28 16:52:40 +0000168 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000169 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000170 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000171 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000172 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000173 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drhb20ea9d2004-02-09 01:20:36 +0000174 void (*xCodec)(void*,void*,int); /* Routine for en/decoding on-disk data */
175 void *pCodecArg; /* First argument to xCodec() */
drh603240c2002-03-05 01:11:12 +0000176 u8 journalOpen; /* True if journal file descriptors is valid */
drh34e79ce2004-02-08 06:05:46 +0000177 u8 journalStarted; /* True if header of journal is synced */
178 u8 useJournal; /* Use a rollback journal on this file */
drh603240c2002-03-05 01:11:12 +0000179 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000180 u8 ckptInUse; /* True we are in a checkpoint */
drhda47d772002-12-02 04:25:19 +0000181 u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
drh603240c2002-03-05 01:11:12 +0000182 u8 noSync; /* Do not sync the journal if true */
drh968af522003-02-11 14:55:40 +0000183 u8 fullSync; /* Do extra syncs of the journal for robustness */
drh603240c2002-03-05 01:11:12 +0000184 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
185 u8 errMask; /* One of several kinds of errors */
186 u8 tempFile; /* zFilename is a temporary file */
187 u8 readOnly; /* True for a read-only database */
188 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000189 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000190 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh603240c2002-03-05 01:11:12 +0000191 u8 *aInJournal; /* One bit for each page in the database file */
192 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000193 PgHdr *pFirst, *pLast; /* List of free pages */
drh341eae82003-01-21 02:39:36 +0000194 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
drhd9b02572001-04-15 00:37:09 +0000195 PgHdr *pAll; /* List of all pages */
drh03eb96a2002-11-10 23:32:56 +0000196 PgHdr *pCkpt; /* List of pages in the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000197 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000198};
199
200/*
201** These are bits that can be set in Pager.errMask.
202*/
203#define PAGER_ERR_FULL 0x01 /* a write() failed */
204#define PAGER_ERR_MEM 0x02 /* malloc() failed */
205#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
206#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000207#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000208
209/*
210** The journal file contains page records in the following
211** format.
drh968af522003-02-11 14:55:40 +0000212**
213** Actually, this structure is the complete page record for pager
214** formats less than 3. Beginning with format 3, this record is surrounded
215** by two checksums.
drhd9b02572001-04-15 00:37:09 +0000216*/
217typedef struct PageRecord PageRecord;
218struct PageRecord {
drhb20ea9d2004-02-09 01:20:36 +0000219 Pgno pgno; /* The page number */
220 char aData[SQLITE_BLOCK_SIZE]; /* Original data for page pgno */
drhd9b02572001-04-15 00:37:09 +0000221};
222
223/*
drh5e00f6c2001-09-13 13:46:56 +0000224** Journal files begin with the following magic string. The data
225** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000226**
drh968af522003-02-11 14:55:40 +0000227** There are three journal formats (so far). The 1st journal format writes
228** 32-bit integers in the byte-order of the host machine. New
229** formats writes integers as big-endian. All new journals use the
drh94f33312002-08-12 12:29:56 +0000230** new format, but we have to be able to read an older journal in order
drh968af522003-02-11 14:55:40 +0000231** to rollback journals created by older versions of the library.
232**
233** The 3rd journal format (added for 2.8.0) adds additional sanity
234** checking information to the journal. If the power fails while the
235** journal is being written, semi-random garbage data might appear in
236** the journal file after power is restored. If an attempt is then made
237** to roll the journal back, the database could be corrupted. The additional
238** sanity checking data is an attempt to discover the garbage in the
239** journal and ignore it.
240**
241** The sanity checking information for the 3rd journal format consists
242** of a 32-bit checksum on each page of data. The checksum covers both
drhb20ea9d2004-02-09 01:20:36 +0000243** the page number and the SQLITE_BLOCK_SIZE bytes of data for the page.
drh968af522003-02-11 14:55:40 +0000244** This cksum is initialized to a 32-bit random value that appears in the
245** journal file right after the header. The random initializer is important,
246** because garbage data that appears at the end of a journal is likely
247** data that was once in other files that have now been deleted. If the
248** garbage data came from an obsolete journal file, the checksums might
249** be correct. But by initializing the checksum to random value which
250** is different for every journal, we minimize that risk.
drhd9b02572001-04-15 00:37:09 +0000251*/
drh968af522003-02-11 14:55:40 +0000252static const unsigned char aJournalMagic1[] = {
drhd9b02572001-04-15 00:37:09 +0000253 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000254};
drh968af522003-02-11 14:55:40 +0000255static const unsigned char aJournalMagic2[] = {
drh94f33312002-08-12 12:29:56 +0000256 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
257};
drh968af522003-02-11 14:55:40 +0000258static const unsigned char aJournalMagic3[] = {
259 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
260};
261#define JOURNAL_FORMAT_1 1
262#define JOURNAL_FORMAT_2 2
263#define JOURNAL_FORMAT_3 3
drh94f33312002-08-12 12:29:56 +0000264
265/*
drh968af522003-02-11 14:55:40 +0000266** The following integer determines what format to use when creating
267** new primary journal files. By default we always use format 3.
268** When testing, we can set this value to older journal formats in order to
269** make sure that newer versions of the library are able to rollback older
270** journal files.
271**
272** Note that checkpoint journals always use format 2 and omit the header.
drh94f33312002-08-12 12:29:56 +0000273*/
274#ifdef SQLITE_TEST
drh968af522003-02-11 14:55:40 +0000275int journal_format = 3;
drh74587e52002-08-13 00:01:16 +0000276#else
drh968af522003-02-11 14:55:40 +0000277# define journal_format 3
drh94f33312002-08-12 12:29:56 +0000278#endif
drhed7c8552001-04-11 14:29:21 +0000279
280/*
drh968af522003-02-11 14:55:40 +0000281** The size of the header and of each page in the journal varies according
282** to which journal format is being used. The following macros figure out
283** the sizes based on format numbers.
284*/
285#define JOURNAL_HDR_SZ(X) \
286 (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
287#define JOURNAL_PG_SZ(X) \
drhb20ea9d2004-02-09 01:20:36 +0000288 (SQLITE_BLOCK_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
drh968af522003-02-11 14:55:40 +0000289
290/*
drhdd793422001-06-28 01:54:48 +0000291** Enable reference count tracking here:
292*/
drh74587e52002-08-13 00:01:16 +0000293#ifdef SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000294 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000295 static void pager_refinfo(PgHdr *p){
296 static int cnt = 0;
297 if( !pager_refinfo_enable ) return;
298 printf(
299 "REFCNT: %4d addr=0x%08x nRef=%d\n",
300 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
301 );
302 cnt++; /* Something to set a breakpoint on */
303 }
304# define REFINFO(X) pager_refinfo(X)
305#else
306# define REFINFO(X)
307#endif
308
309/*
drh34e79ce2004-02-08 06:05:46 +0000310** Read a 32-bit integer from the given file descriptor. Store the integer
311** that is read in *pRes. Return SQLITE_OK if everything worked, or an
312** error code is something goes wrong.
313**
314** If the journal format is 2 or 3, read a big-endian integer. If the
315** journal format is 1, read an integer in the native byte-order of the
316** host machine.
drh94f33312002-08-12 12:29:56 +0000317*/
drh968af522003-02-11 14:55:40 +0000318static int read32bits(int format, OsFile *fd, u32 *pRes){
drh94f33312002-08-12 12:29:56 +0000319 u32 res;
320 int rc;
321 rc = sqliteOsRead(fd, &res, sizeof(res));
drh968af522003-02-11 14:55:40 +0000322 if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
drh94f33312002-08-12 12:29:56 +0000323 unsigned char ac[4];
324 memcpy(ac, &res, 4);
325 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
326 }
327 *pRes = res;
328 return rc;
329}
330
331/*
drh34e79ce2004-02-08 06:05:46 +0000332** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
333** on success or an error code is something goes wrong.
334**
335** If the journal format is 2 or 3, write the integer as 4 big-endian
336** bytes. If the journal format is 1, write the integer in the native
337** byte order. In normal operation, only formats 2 and 3 are used.
338** Journal format 1 is only used for testing.
drh94f33312002-08-12 12:29:56 +0000339*/
340static int write32bits(OsFile *fd, u32 val){
341 unsigned char ac[4];
drh968af522003-02-11 14:55:40 +0000342 if( journal_format<=1 ){
drh94f33312002-08-12 12:29:56 +0000343 return sqliteOsWrite(fd, &val, 4);
344 }
drh94f33312002-08-12 12:29:56 +0000345 ac[0] = (val>>24) & 0xff;
346 ac[1] = (val>>16) & 0xff;
347 ac[2] = (val>>8) & 0xff;
348 ac[3] = val & 0xff;
349 return sqliteOsWrite(fd, ac, 4);
350}
351
drh2554f8b2003-01-22 01:26:44 +0000352/*
353** Write a 32-bit integer into a page header right before the
354** page data. This will overwrite the PgHdr.pDirty pointer.
drh34e79ce2004-02-08 06:05:46 +0000355**
356** The integer is big-endian for formats 2 and 3 and native byte order
357** for journal format 1.
drh2554f8b2003-01-22 01:26:44 +0000358*/
drh968af522003-02-11 14:55:40 +0000359static void store32bits(u32 val, PgHdr *p, int offset){
drh2554f8b2003-01-22 01:26:44 +0000360 unsigned char *ac;
drhec1bd0b2003-08-26 11:41:27 +0000361 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
drh968af522003-02-11 14:55:40 +0000362 if( journal_format<=1 ){
drh2554f8b2003-01-22 01:26:44 +0000363 memcpy(ac, &val, 4);
364 }else{
365 ac[0] = (val>>24) & 0xff;
366 ac[1] = (val>>16) & 0xff;
367 ac[2] = (val>>8) & 0xff;
368 ac[3] = val & 0xff;
369 }
370}
371
drh94f33312002-08-12 12:29:56 +0000372
373/*
drhd9b02572001-04-15 00:37:09 +0000374** Convert the bits in the pPager->errMask into an approprate
375** return code.
376*/
377static int pager_errcode(Pager *pPager){
378 int rc = SQLITE_OK;
379 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000380 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000381 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
382 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
383 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
384 return rc;
drhed7c8552001-04-11 14:29:21 +0000385}
386
387/*
drh03eb96a2002-11-10 23:32:56 +0000388** Add or remove a page from the list of all pages that are in the
389** checkpoint journal.
390**
391** The Pager keeps a separate list of pages that are currently in
392** the checkpoint journal. This helps the sqlitepager_ckpt_commit()
393** routine run MUCH faster for the common case where there are many
394** pages in memory but only a few are in the checkpoint journal.
395*/
396static void page_add_to_ckpt_list(PgHdr *pPg){
397 Pager *pPager = pPg->pPager;
398 if( pPg->inCkpt ) return;
399 assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
400 pPg->pPrevCkpt = 0;
401 if( pPager->pCkpt ){
402 pPager->pCkpt->pPrevCkpt = pPg;
403 }
404 pPg->pNextCkpt = pPager->pCkpt;
405 pPager->pCkpt = pPg;
406 pPg->inCkpt = 1;
407}
408static void page_remove_from_ckpt_list(PgHdr *pPg){
409 if( !pPg->inCkpt ) return;
410 if( pPg->pPrevCkpt ){
411 assert( pPg->pPrevCkpt->pNextCkpt==pPg );
412 pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
413 }else{
414 assert( pPg->pPager->pCkpt==pPg );
415 pPg->pPager->pCkpt = pPg->pNextCkpt;
416 }
417 if( pPg->pNextCkpt ){
418 assert( pPg->pNextCkpt->pPrevCkpt==pPg );
419 pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
420 }
421 pPg->pNextCkpt = 0;
422 pPg->pPrevCkpt = 0;
423 pPg->inCkpt = 0;
424}
425
426/*
drhed7c8552001-04-11 14:29:21 +0000427** Find a page in the hash table given its page number. Return
428** a pointer to the page or NULL if not found.
429*/
drhd9b02572001-04-15 00:37:09 +0000430static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drh836faa42003-01-11 13:30:57 +0000431 PgHdr *p = pPager->aHash[pager_hash(pgno)];
drhed7c8552001-04-11 14:29:21 +0000432 while( p && p->pgno!=pgno ){
433 p = p->pNextHash;
434 }
435 return p;
436}
437
438/*
439** Unlock the database and clear the in-memory cache. This routine
440** sets the state of the pager back to what it was when it was first
441** opened. Any outstanding pages are invalidated and subsequent attempts
442** to access those pages will likely result in a coredump.
443*/
drhd9b02572001-04-15 00:37:09 +0000444static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000445 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000446 for(pPg=pPager->pAll; pPg; pPg=pNext){
447 pNext = pPg->pNextAll;
448 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000449 }
450 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000451 pPager->pFirstSynced = 0;
drhd9b02572001-04-15 00:37:09 +0000452 pPager->pLast = 0;
453 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000454 memset(pPager->aHash, 0, sizeof(pPager->aHash));
455 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000456 if( pPager->state>=SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000457 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000458 }
drha7fcb052001-12-14 15:09:55 +0000459 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000460 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000461 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000462 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000463 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000464}
465
466/*
467** When this routine is called, the pager has the journal file open and
468** a write lock on the database. This routine releases the database
469** write lock and acquires a read lock in its place. The journal file
470** is deleted and closed.
drh50457892003-09-06 01:10:47 +0000471**
472** TODO: Consider keeping the journal file open for temporary databases.
473** This might give a performance improvement on windows where opening
474** a file is an expensive operation.
drhed7c8552001-04-11 14:29:21 +0000475*/
drhd9b02572001-04-15 00:37:09 +0000476static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000477 int rc;
drhd9b02572001-04-15 00:37:09 +0000478 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000479 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh663fc632002-02-02 18:49:19 +0000480 sqlitepager_ckpt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000481 if( pPager->ckptOpen ){
482 sqliteOsClose(&pPager->cpfd);
483 pPager->ckptOpen = 0;
484 }
drhda47d772002-12-02 04:25:19 +0000485 if( pPager->journalOpen ){
486 sqliteOsClose(&pPager->jfd);
487 pPager->journalOpen = 0;
488 sqliteOsDelete(pPager->zJournal);
489 sqliteFree( pPager->aInJournal );
490 pPager->aInJournal = 0;
491 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
492 pPg->inJournal = 0;
493 pPg->dirty = 0;
drhdb48ee02003-01-16 13:42:43 +0000494 pPg->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000495 }
496 }else{
497 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
drhd9b02572001-04-15 00:37:09 +0000498 }
drhda47d772002-12-02 04:25:19 +0000499 rc = sqliteOsReadLock(&pPager->fd);
drh8e298f92002-07-06 16:28:47 +0000500 if( rc==SQLITE_OK ){
501 pPager->state = SQLITE_READLOCK;
502 }else{
503 /* This can only happen if a process does a BEGIN, then forks and the
504 ** child process does the COMMIT. Because of the semantics of unix
505 ** file locking, the unlock will fail.
506 */
507 pPager->state = SQLITE_UNLOCK;
508 }
drhed7c8552001-04-11 14:29:21 +0000509 return rc;
510}
511
drhed7c8552001-04-11 14:29:21 +0000512/*
drh968af522003-02-11 14:55:40 +0000513** Compute and return a checksum for the page of data.
drh34e79ce2004-02-08 06:05:46 +0000514**
515** This is not a real checksum. It is really just the sum of the
516** random initial value and the page number. We considered do a checksum
517** of the database, but that was found to be too slow.
drh968af522003-02-11 14:55:40 +0000518*/
519static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
520 u32 cksum = pPager->cksumInit + pgno;
drh968af522003-02-11 14:55:40 +0000521 return cksum;
522}
523
524/*
drhfa86c412002-02-02 15:01:15 +0000525** Read a single page from the journal file opened on file descriptor
526** jfd. Playback this one page.
drh968af522003-02-11 14:55:40 +0000527**
528** There are three different journal formats. The format parameter determines
529** which format is used by the journal that is played back.
drhfa86c412002-02-02 15:01:15 +0000530*/
drh968af522003-02-11 14:55:40 +0000531static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
drhfa86c412002-02-02 15:01:15 +0000532 int rc;
533 PgHdr *pPg; /* An existing page in the cache */
534 PageRecord pgRec;
drh968af522003-02-11 14:55:40 +0000535 u32 cksum;
drhfa86c412002-02-02 15:01:15 +0000536
drh968af522003-02-11 14:55:40 +0000537 rc = read32bits(format, jfd, &pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000538 if( rc!=SQLITE_OK ) return rc;
drh94f33312002-08-12 12:29:56 +0000539 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drh99ee3602003-02-16 19:13:36 +0000540 if( rc!=SQLITE_OK ) return rc;
drhfa86c412002-02-02 15:01:15 +0000541
drh968af522003-02-11 14:55:40 +0000542 /* Sanity checking on the page. This is more important that I originally
543 ** thought. If a power failure occurs while the journal is being written,
544 ** it could cause invalid data to be written into the journal. We need to
545 ** detect this invalid data (with high probability) and ignore it.
546 */
547 if( pgRec.pgno==0 ){
548 return SQLITE_DONE;
549 }
drh7d02cb72003-06-04 16:24:39 +0000550 if( pgRec.pgno>(unsigned)pPager->dbSize ){
drh968af522003-02-11 14:55:40 +0000551 return SQLITE_OK;
552 }
553 if( format>=JOURNAL_FORMAT_3 ){
554 rc = read32bits(format, jfd, &cksum);
drh99ee3602003-02-16 19:13:36 +0000555 if( rc ) return rc;
drh968af522003-02-11 14:55:40 +0000556 if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
557 return SQLITE_DONE;
558 }
559 }
drhfa86c412002-02-02 15:01:15 +0000560
561 /* Playback the page. Update the in-memory copy of the page
562 ** at the same time, if there is one.
563 */
564 pPg = pager_lookup(pPager, pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000565 TRACE2("PLAYBACK %d\n", pgRec.pgno);
drhb20ea9d2004-02-09 01:20:36 +0000566 sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_BLOCK_SIZE);
567 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_BLOCK_SIZE);
drhfa86c412002-02-02 15:01:15 +0000568 if( pPg ){
drhacf4ac92003-12-17 23:57:34 +0000569 /* No page should ever be rolled back that is in use, except for page
570 ** 1 which is held in use in order to keep the lock on the database
571 ** active.
572 */
573 assert( pPg->nRef==0 || pPg->pgno==1 );
drhb20ea9d2004-02-09 01:20:36 +0000574 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_BLOCK_SIZE);
drhacf4ac92003-12-17 23:57:34 +0000575 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhdb48ee02003-01-16 13:42:43 +0000576 pPg->dirty = 0;
577 pPg->needSync = 0;
drhb20ea9d2004-02-09 01:20:36 +0000578 if( pPager->xCodec ){
579 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 0);
580 }
drhfa86c412002-02-02 15:01:15 +0000581 }
582 return rc;
583}
584
585/*
drhed7c8552001-04-11 14:29:21 +0000586** Playback the journal and thus restore the database file to
587** the state it was in before we started making changes.
588**
drh34e79ce2004-02-08 06:05:46 +0000589** The journal file format is as follows:
590**
591** * 8 byte prefix. One of the aJournalMagic123 vectors defined
592** above. The format of the journal file is determined by which
593** of the three prefix vectors is seen.
594** * 4 byte big-endian integer which is the number of valid page records
595** in the journal. If this value is 0xffffffff, then compute the
596** number of page records from the journal size. This field appears
597** in format 3 only.
598** * 4 byte big-endian integer which is the initial value for the
599** sanity checksum. This field appears in format 3 only.
600** * 4 byte integer which is the number of pages to truncate the
601** database to during a rollback.
602** * Zero or more pages instances, each as follows:
603** + 4 byte page number.
drhb20ea9d2004-02-09 01:20:36 +0000604** + SQLITE_BLOCK_SIZE bytes of data.
drh34e79ce2004-02-08 06:05:46 +0000605** + 4 byte checksum (format 3 only)
606**
607** When we speak of the journal header, we mean the first 4 bullets above.
608** Each entry in the journal is an instance of the 5th bullet. Note that
609** bullets 2 and 3 only appear in format-3 journals.
610**
611** Call the value from the second bullet "nRec". nRec is the number of
612** valid page entries in the journal. In most cases, you can compute the
613** value of nRec from the size of the journal file. But if a power
614** failure occurred while the journal was being written, it could be the
615** case that the size of the journal file had already been increased but
616** the extra entries had not yet made it safely to disk. In such a case,
617** the value of nRec computed from the file size would be too large. For
618** that reason, we always use the nRec value in the header.
619**
620** If the nRec value is 0xffffffff it means that nRec should be computed
621** from the file size. This value is used when the user selects the
622** no-sync option for the journal. A power failure could lead to corruption
623** in this case. But for things like temporary table (which will be
624** deleted when the power is restored) we don't care.
625**
626** Journal formats 1 and 2 do not have an nRec value in the header so we
627** have to compute nRec from the file size. This has risks (as described
628** above) which is why all persistent tables have been changed to use
629** format 3.
drhed7c8552001-04-11 14:29:21 +0000630**
drhd9b02572001-04-15 00:37:09 +0000631** If the file opened as the journal file is not a well-formed
drh34e79ce2004-02-08 06:05:46 +0000632** journal file then the database will likely already be
633** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
634** and SQLITE_CORRUPT is returned. If it all works, then this routine
635** returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000636*/
drh99ee3602003-02-16 19:13:36 +0000637static int pager_playback(Pager *pPager, int useJournalSize){
drh968af522003-02-11 14:55:40 +0000638 off_t szJ; /* Size of the journal file in bytes */
639 int nRec; /* Number of Records in the journal */
drhd9b02572001-04-15 00:37:09 +0000640 int i; /* Loop counter */
641 Pgno mxPg = 0; /* Size of the original file in pages */
drh968af522003-02-11 14:55:40 +0000642 int format; /* Format of the journal file. */
643 unsigned char aMagic[sizeof(aJournalMagic1)];
drhed7c8552001-04-11 14:29:21 +0000644 int rc;
645
drhc3a64ba2001-11-22 00:01:27 +0000646 /* Figure out how many records are in the journal. Abort early if
647 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000648 */
drh8cfbf082001-09-19 13:22:39 +0000649 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000650 sqliteOsSeek(&pPager->jfd, 0);
drh968af522003-02-11 14:55:40 +0000651 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhc3a64ba2001-11-22 00:01:27 +0000652 if( rc!=SQLITE_OK ){
653 goto end_playback;
654 }
drh240c5792004-02-08 00:40:52 +0000655
656 /* If the journal file is too small to contain a complete header,
drh34e79ce2004-02-08 06:05:46 +0000657 ** it must mean that the process that created the journal was just
658 ** beginning to write the journal file when it died. In that case,
659 ** the database file should have still been completely unchanged.
660 ** Nothing needs to be rolled back. We can safely ignore this journal.
drh240c5792004-02-08 00:40:52 +0000661 */
drh968af522003-02-11 14:55:40 +0000662 if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
drhc3a64ba2001-11-22 00:01:27 +0000663 goto end_playback;
664 }
665
666 /* Read the beginning of the journal and truncate the
667 ** database file back to its original size.
668 */
drha7fcb052001-12-14 15:09:55 +0000669 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000670 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000671 rc = SQLITE_PROTOCOL;
672 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000673 }
drh968af522003-02-11 14:55:40 +0000674 if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
675 format = JOURNAL_FORMAT_3;
676 }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
677 format = JOURNAL_FORMAT_2;
678 }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
679 format = JOURNAL_FORMAT_1;
drh94f33312002-08-12 12:29:56 +0000680 }else{
681 rc = SQLITE_PROTOCOL;
682 goto end_playback;
683 }
drh968af522003-02-11 14:55:40 +0000684 if( format>=JOURNAL_FORMAT_3 ){
drh240c5792004-02-08 00:40:52 +0000685 if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
686 /* Ignore the journal if it is too small to contain a complete
687 ** header. We already did this test once above, but at the prior
688 ** test, we did not know the journal format and so we had to assume
689 ** the smallest possible header. Now we know the header is bigger
drh34e79ce2004-02-08 06:05:46 +0000690 ** than the minimum so we test again.
drh240c5792004-02-08 00:40:52 +0000691 */
692 goto end_playback;
693 }
drh133cdf62004-01-07 02:52:07 +0000694 rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
drh968af522003-02-11 14:55:40 +0000695 if( rc ) goto end_playback;
696 rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
697 if( rc ) goto end_playback;
drh99ee3602003-02-16 19:13:36 +0000698 if( nRec==0xffffffff || useJournalSize ){
drh968af522003-02-11 14:55:40 +0000699 nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
700 }
701 }else{
drhd8d66e82003-02-12 02:10:15 +0000702 nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
703 assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
drh968af522003-02-11 14:55:40 +0000704 }
705 rc = read32bits(format, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000706 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000707 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000708 }
drhd8d66e82003-02-12 02:10:15 +0000709 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
drhb20ea9d2004-02-09 01:20:36 +0000710 rc = sqliteOsTruncate(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)mxPg);
drh81a20f22001-10-12 17:30:04 +0000711 if( rc!=SQLITE_OK ){
712 goto end_playback;
713 }
drhd9b02572001-04-15 00:37:09 +0000714 pPager->dbSize = mxPg;
715
drhfa86c412002-02-02 15:01:15 +0000716 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000717 */
drh968af522003-02-11 14:55:40 +0000718 for(i=0; i<nRec; i++){
719 rc = pager_playback_one_page(pPager, &pPager->jfd, format);
720 if( rc!=SQLITE_OK ){
721 if( rc==SQLITE_DONE ){
drh968af522003-02-11 14:55:40 +0000722 rc = SQLITE_OK;
723 }
724 break;
725 }
drhed7c8552001-04-11 14:29:21 +0000726 }
drh81a20f22001-10-12 17:30:04 +0000727
drh4a0681e2003-02-13 01:58:20 +0000728 /* Pages that have been written to the journal but never synced
729 ** where not restored by the loop above. We have to restore those
drh240c5792004-02-08 00:40:52 +0000730 ** pages by reading them back from the original database.
drhdb48ee02003-01-16 13:42:43 +0000731 */
732 if( rc==SQLITE_OK ){
733 PgHdr *pPg;
734 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
drhb20ea9d2004-02-09 01:20:36 +0000735 char zBuf[SQLITE_BLOCK_SIZE];
drh4a0681e2003-02-13 01:58:20 +0000736 if( !pPg->dirty ) continue;
drhdb48ee02003-01-16 13:42:43 +0000737 if( (int)pPg->pgno <= pPager->origDbSize ){
drhb20ea9d2004-02-09 01:20:36 +0000738 sqliteOsSeek(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)(pPg->pgno-1));
739 rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_BLOCK_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000740 if( rc ) break;
drhb20ea9d2004-02-09 01:20:36 +0000741 if( pPager->xCodec ){
742 pPager->xCodec(pPager->pCodecArg, zBuf, 0);
743 }
drhdb48ee02003-01-16 13:42:43 +0000744 }else{
drhb20ea9d2004-02-09 01:20:36 +0000745 memset(zBuf, 0, SQLITE_BLOCK_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000746 }
drhb20ea9d2004-02-09 01:20:36 +0000747 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_BLOCK_SIZE) ){
748 memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_BLOCK_SIZE);
drh3a840692003-01-29 22:58:26 +0000749 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
750 }
drhdb48ee02003-01-16 13:42:43 +0000751 pPg->needSync = 0;
752 pPg->dirty = 0;
753 }
754 }
drh4a0681e2003-02-13 01:58:20 +0000755
756end_playback:
drhd9b02572001-04-15 00:37:09 +0000757 if( rc!=SQLITE_OK ){
758 pager_unwritelock(pPager);
759 pPager->errMask |= PAGER_ERR_CORRUPT;
760 rc = SQLITE_CORRUPT;
761 }else{
762 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000763 }
drhd9b02572001-04-15 00:37:09 +0000764 return rc;
drhed7c8552001-04-11 14:29:21 +0000765}
766
767/*
drhfa86c412002-02-02 15:01:15 +0000768** Playback the checkpoint journal.
769**
770** This is similar to playing back the transaction journal but with
771** a few extra twists.
772**
drh663fc632002-02-02 18:49:19 +0000773** (1) The number of pages in the database file at the start of
774** the checkpoint is stored in pPager->ckptSize, not in the
775** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000776**
777** (2) In addition to playing back the checkpoint journal, also
778** playback all pages of the transaction journal beginning
779** at offset pPager->ckptJSize.
780*/
781static int pager_ckpt_playback(Pager *pPager){
drh968af522003-02-11 14:55:40 +0000782 off_t szJ; /* Size of the full journal */
783 int nRec; /* Number of Records */
drhfa86c412002-02-02 15:01:15 +0000784 int i; /* Loop counter */
785 int rc;
786
787 /* Truncate the database back to its original size.
788 */
drhb20ea9d2004-02-09 01:20:36 +0000789 rc = sqliteOsTruncate(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)pPager->ckptSize);
drhfa86c412002-02-02 15:01:15 +0000790 pPager->dbSize = pPager->ckptSize;
791
792 /* Figure out how many records are in the checkpoint journal.
793 */
drh0f892532002-05-30 12:27:03 +0000794 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000795 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +0000796 nRec = pPager->ckptNRec;
drhfa86c412002-02-02 15:01:15 +0000797
798 /* Copy original pages out of the checkpoint journal and back into the
drh968af522003-02-11 14:55:40 +0000799 ** database file. Note that the checkpoint journal always uses format
800 ** 2 instead of format 3 since it does not need to be concerned with
801 ** power failures corrupting the journal and can thus omit the checksums.
drhfa86c412002-02-02 15:01:15 +0000802 */
803 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000804 rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
805 assert( rc!=SQLITE_DONE );
drhfa86c412002-02-02 15:01:15 +0000806 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
807 }
808
809 /* Figure out how many pages need to be copied out of the transaction
810 ** journal.
811 */
812 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
813 if( rc!=SQLITE_OK ){
814 goto end_ckpt_playback;
815 }
drh968af522003-02-11 14:55:40 +0000816 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhfa86c412002-02-02 15:01:15 +0000817 if( rc!=SQLITE_OK ){
818 goto end_ckpt_playback;
819 }
drh968af522003-02-11 14:55:40 +0000820 nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
drhfa86c412002-02-02 15:01:15 +0000821 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000822 rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
823 if( rc!=SQLITE_OK ){
824 assert( rc!=SQLITE_DONE );
825 goto end_ckpt_playback;
826 }
drhfa86c412002-02-02 15:01:15 +0000827 }
828
drhfa86c412002-02-02 15:01:15 +0000829end_ckpt_playback:
drhfa86c412002-02-02 15:01:15 +0000830 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000831 pPager->errMask |= PAGER_ERR_CORRUPT;
832 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000833 }
834 return rc;
835}
836
837/*
drhf57b14a2001-09-14 18:54:08 +0000838** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000839**
840** The maximum number is the absolute value of the mxPage parameter.
841** If mxPage is negative, the noSync flag is also set. noSync bypasses
842** calls to sqliteOsSync(). The pager runs much faster with noSync on,
843** but if the operating system crashes or there is an abrupt power
844** failure, the database file might be left in an inconsistent and
845** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000846*/
847void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000848 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000849 pPager->noSync = pPager->tempFile;
drh603240c2002-03-05 01:11:12 +0000850 }else{
851 pPager->noSync = 1;
852 mxPage = -mxPage;
853 }
drhf57b14a2001-09-14 18:54:08 +0000854 if( mxPage>10 ){
855 pPager->mxPage = mxPage;
856 }
857}
858
859/*
drh973b6e32003-02-12 14:09:42 +0000860** Adjust the robustness of the database to damage due to OS crashes
861** or power failures by changing the number of syncs()s when writing
862** the rollback journal. There are three levels:
863**
864** OFF sqliteOsSync() is never called. This is the default
865** for temporary and transient files.
866**
867** NORMAL The journal is synced once before writes begin on the
868** database. This is normally adequate protection, but
869** it is theoretically possible, though very unlikely,
870** that an inopertune power failure could leave the journal
871** in a state which would cause damage to the database
872** when it is rolled back.
873**
874** FULL The journal is synced twice before writes begin on the
drh34e79ce2004-02-08 06:05:46 +0000875** database (with some additional information - the nRec field
876** of the journal header - being written in between the two
877** syncs). If we assume that writing a
drh973b6e32003-02-12 14:09:42 +0000878** single disk sector is atomic, then this mode provides
879** assurance that the journal will not be corrupted to the
880** point of causing damage to the database during rollback.
881**
882** Numeric values associated with these states are OFF==1, NORMAL=2,
883** and FULL=3.
884*/
885void sqlitepager_set_safety_level(Pager *pPager, int level){
886 pPager->noSync = level==1 || pPager->tempFile;
887 pPager->fullSync = level==3 && !pPager->tempFile;
888}
889
890/*
drhfa86c412002-02-02 15:01:15 +0000891** Open a temporary file. Write the name of the file into zName
892** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
893** the file descriptor into *fd. Return SQLITE_OK on success or some
894** other error code if we fail.
895**
896** The OS will automatically delete the temporary file when it is
897** closed.
898*/
899static int sqlitepager_opentemp(char *zFile, OsFile *fd){
900 int cnt = 8;
901 int rc;
902 do{
903 cnt--;
904 sqliteOsTempFileName(zFile);
905 rc = sqliteOsOpenExclusive(zFile, fd, 1);
906 }while( cnt>0 && rc!=SQLITE_OK );
907 return rc;
908}
909
910/*
drhed7c8552001-04-11 14:29:21 +0000911** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000912** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000913** the first call to sqlitepager_get() and is only held open until the
914** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000915**
drh6446c4d2001-12-15 14:22:18 +0000916** If zFilename is NULL then a randomly-named temporary file is created
917** and used as the file to be cached. The file will be deleted
918** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000919*/
drh7e3b0a02001-04-28 16:52:40 +0000920int sqlitepager_open(
921 Pager **ppPager, /* Return the Pager structure here */
922 const char *zFilename, /* Name of the database file to open */
923 int mxPage, /* Max number of in-memory cache pages */
drhda47d772002-12-02 04:25:19 +0000924 int nExtra, /* Extra bytes append to each in-memory page */
925 int useJournal /* TRUE to use a rollback journal on this file */
drh7e3b0a02001-04-28 16:52:40 +0000926){
drhed7c8552001-04-11 14:29:21 +0000927 Pager *pPager;
drh3e7a6092002-12-07 21:45:14 +0000928 char *zFullPathname;
drhed7c8552001-04-11 14:29:21 +0000929 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000930 OsFile fd;
drha76c82e2003-07-27 18:59:42 +0000931 int rc, i;
drh5e00f6c2001-09-13 13:46:56 +0000932 int tempFile;
933 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000934 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000935
drhd9b02572001-04-15 00:37:09 +0000936 *ppPager = 0;
937 if( sqlite_malloc_failed ){
938 return SQLITE_NOMEM;
939 }
drh901afd42003-08-26 11:25:58 +0000940 if( zFilename && zFilename[0] ){
drh3e7a6092002-12-07 21:45:14 +0000941 zFullPathname = sqliteOsFullPathname(zFilename);
942 rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000943 tempFile = 0;
944 }else{
drhfa86c412002-02-02 15:01:15 +0000945 rc = sqlitepager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000946 zFilename = zTemp;
drh3e7a6092002-12-07 21:45:14 +0000947 zFullPathname = sqliteOsFullPathname(zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000948 tempFile = 1;
949 }
drh3e7a6092002-12-07 21:45:14 +0000950 if( sqlite_malloc_failed ){
951 return SQLITE_NOMEM;
952 }
drh8cfbf082001-09-19 13:22:39 +0000953 if( rc!=SQLITE_OK ){
drh3e7a6092002-12-07 21:45:14 +0000954 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000955 return SQLITE_CANTOPEN;
956 }
drh3e7a6092002-12-07 21:45:14 +0000957 nameLen = strlen(zFullPathname);
drha76c82e2003-07-27 18:59:42 +0000958 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
drhd9b02572001-04-15 00:37:09 +0000959 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000960 sqliteOsClose(&fd);
drh3e7a6092002-12-07 21:45:14 +0000961 sqliteFree(zFullPathname);
drhd9b02572001-04-15 00:37:09 +0000962 return SQLITE_NOMEM;
963 }
drhdb48ee02003-01-16 13:42:43 +0000964 SET_PAGER(pPager);
drhed7c8552001-04-11 14:29:21 +0000965 pPager->zFilename = (char*)&pPager[1];
drha76c82e2003-07-27 18:59:42 +0000966 pPager->zDirectory = &pPager->zFilename[nameLen+1];
967 pPager->zJournal = &pPager->zDirectory[nameLen+1];
drh3e7a6092002-12-07 21:45:14 +0000968 strcpy(pPager->zFilename, zFullPathname);
drha76c82e2003-07-27 18:59:42 +0000969 strcpy(pPager->zDirectory, zFullPathname);
970 for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
971 if( i>0 ) pPager->zDirectory[i-1] = 0;
drh3e7a6092002-12-07 21:45:14 +0000972 strcpy(pPager->zJournal, zFullPathname);
973 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000974 strcpy(&pPager->zJournal[nameLen], "-journal");
975 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000976 pPager->journalOpen = 0;
drhda47d772002-12-02 04:25:19 +0000977 pPager->useJournal = useJournal;
drhfa86c412002-02-02 15:01:15 +0000978 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000979 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000980 pPager->nRef = 0;
981 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000982 pPager->ckptSize = 0;
983 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000984 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000985 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000986 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000987 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000988 pPager->tempFile = tempFile;
989 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000990 pPager->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000991 pPager->noSync = pPager->tempFile || !useJournal;
drhed7c8552001-04-11 14:29:21 +0000992 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000993 pPager->pFirstSynced = 0;
drhed7c8552001-04-11 14:29:21 +0000994 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000995 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000996 memset(pPager->aHash, 0, sizeof(pPager->aHash));
997 *ppPager = pPager;
998 return SQLITE_OK;
999}
1000
1001/*
drh72f82862001-05-24 21:06:34 +00001002** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +00001003** when the reference count on each page reaches zero. The destructor can
1004** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +00001005**
1006** The destructor is not called as a result sqlitepager_close().
1007** Destructors are only called by sqlitepager_unref().
1008*/
1009void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
1010 pPager->xDestructor = xDesc;
1011}
1012
1013/*
drh5e00f6c2001-09-13 13:46:56 +00001014** Return the total number of pages in the disk file associated with
1015** pPager.
drhed7c8552001-04-11 14:29:21 +00001016*/
drhd9b02572001-04-15 00:37:09 +00001017int sqlitepager_pagecount(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +00001018 off_t n;
drhd9b02572001-04-15 00:37:09 +00001019 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +00001020 if( pPager->dbSize>=0 ){
1021 return pPager->dbSize;
1022 }
drha7fcb052001-12-14 15:09:55 +00001023 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +00001024 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +00001025 return 0;
drhed7c8552001-04-11 14:29:21 +00001026 }
drhb20ea9d2004-02-09 01:20:36 +00001027 n /= SQLITE_BLOCK_SIZE;
drhd9b02572001-04-15 00:37:09 +00001028 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +00001029 pPager->dbSize = n;
1030 }
1031 return n;
1032}
1033
1034/*
drhf7c57532003-04-25 13:22:51 +00001035** Forward declaration
1036*/
drh34e79ce2004-02-08 06:05:46 +00001037static int syncJournal(Pager*);
drhf7c57532003-04-25 13:22:51 +00001038
1039/*
1040** Truncate the file to the number of pages specified.
1041*/
1042int sqlitepager_truncate(Pager *pPager, Pgno nPage){
1043 int rc;
drh2e6d11b2003-04-25 15:37:57 +00001044 if( pPager->dbSize<0 ){
1045 sqlitepager_pagecount(pPager);
1046 }
1047 if( pPager->errMask!=0 ){
1048 rc = pager_errcode(pPager);
1049 return rc;
1050 }
drh7d02cb72003-06-04 16:24:39 +00001051 if( nPage>=(unsigned)pPager->dbSize ){
drhf7c57532003-04-25 13:22:51 +00001052 return SQLITE_OK;
1053 }
drh34e79ce2004-02-08 06:05:46 +00001054 syncJournal(pPager);
drhb20ea9d2004-02-09 01:20:36 +00001055 rc = sqliteOsTruncate(&pPager->fd, SQLITE_BLOCK_SIZE*(off_t)nPage);
drhf7c57532003-04-25 13:22:51 +00001056 if( rc==SQLITE_OK ){
1057 pPager->dbSize = nPage;
1058 }
1059 return rc;
1060}
1061
1062/*
drhed7c8552001-04-11 14:29:21 +00001063** Shutdown the page cache. Free all memory and close all files.
1064**
1065** If a transaction was in progress when this routine is called, that
1066** transaction is rolled back. All outstanding pages are invalidated
1067** and their memory is freed. Any attempt to use a page associated
1068** with this page cache after this function returns will likely
1069** result in a coredump.
1070*/
drhd9b02572001-04-15 00:37:09 +00001071int sqlitepager_close(Pager *pPager){
1072 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +00001073 switch( pPager->state ){
1074 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +00001075 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +00001076 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +00001077 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +00001078 break;
1079 }
1080 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +00001081 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +00001082 break;
1083 }
1084 default: {
1085 /* Do nothing */
1086 break;
1087 }
1088 }
drhd9b02572001-04-15 00:37:09 +00001089 for(pPg=pPager->pAll; pPg; pPg=pNext){
1090 pNext = pPg->pNextAll;
1091 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +00001092 }
drha7fcb052001-12-14 15:09:55 +00001093 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +00001094 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +00001095 /* Temp files are automatically deleted by the OS
1096 ** if( pPager->tempFile ){
1097 ** sqliteOsDelete(pPager->zFilename);
1098 ** }
1099 */
drhdb48ee02003-01-16 13:42:43 +00001100 CLR_PAGER(pPager);
drh73509ee2003-04-06 20:44:45 +00001101 if( pPager->zFilename!=(char*)&pPager[1] ){
drha76c82e2003-07-27 18:59:42 +00001102 assert( 0 ); /* Cannot happen */
drh73509ee2003-04-06 20:44:45 +00001103 sqliteFree(pPager->zFilename);
1104 sqliteFree(pPager->zJournal);
drha76c82e2003-07-27 18:59:42 +00001105 sqliteFree(pPager->zDirectory);
drh73509ee2003-04-06 20:44:45 +00001106 }
drhed7c8552001-04-11 14:29:21 +00001107 sqliteFree(pPager);
1108 return SQLITE_OK;
1109}
1110
1111/*
drh5e00f6c2001-09-13 13:46:56 +00001112** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +00001113*/
drhd9b02572001-04-15 00:37:09 +00001114Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +00001115 PgHdr *p = DATA_TO_PGHDR(pData);
1116 return p->pgno;
1117}
1118
1119/*
drh7e3b0a02001-04-28 16:52:40 +00001120** Increment the reference count for a page. If the page is
1121** currently on the freelist (the reference count is zero) then
1122** remove it from the freelist.
1123*/
drh836faa42003-01-11 13:30:57 +00001124#define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1125static void _page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +00001126 if( pPg->nRef==0 ){
1127 /* The page is currently on the freelist. Remove it. */
drh341eae82003-01-21 02:39:36 +00001128 if( pPg==pPg->pPager->pFirstSynced ){
1129 PgHdr *p = pPg->pNextFree;
1130 while( p && p->needSync ){ p = p->pNextFree; }
1131 pPg->pPager->pFirstSynced = p;
1132 }
drh7e3b0a02001-04-28 16:52:40 +00001133 if( pPg->pPrevFree ){
1134 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1135 }else{
1136 pPg->pPager->pFirst = pPg->pNextFree;
1137 }
1138 if( pPg->pNextFree ){
1139 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1140 }else{
1141 pPg->pPager->pLast = pPg->pPrevFree;
1142 }
1143 pPg->pPager->nRef++;
1144 }
1145 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +00001146 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +00001147}
1148
1149/*
1150** Increment the reference count for a page. The input pointer is
1151** a reference to the page data.
1152*/
1153int sqlitepager_ref(void *pData){
1154 PgHdr *pPg = DATA_TO_PGHDR(pData);
1155 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +00001156 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +00001157}
1158
1159/*
drh34e79ce2004-02-08 06:05:46 +00001160** Sync the journal. In other words, make sure all the pages that have
1161** been written to the journal have actually reached the surface of the
1162** disk. It is not safe to modify the original database file until after
1163** the journal has been synced. If the original database is modified before
1164** the journal is synced and a power failure occurs, the unsynced journal
1165** data would be lost and we would be unable to completely rollback the
1166** database changes. Database corruption would occur.
1167**
1168** This routine also updates the nRec field in the header of the journal.
1169** (See comments on the pager_playback() routine for additional information.)
1170** If the sync mode is FULL, two syncs will occur. First the whole journal
1171** is synced, then the nRec field is updated, then a second sync occurs.
drhb19a2bc2001-09-16 00:13:26 +00001172**
drh34e79ce2004-02-08 06:05:46 +00001173** For temporary databases, we do not care if we are able to rollback
1174** after a power failure, so sync occurs.
drhfa86c412002-02-02 15:01:15 +00001175**
drh34e79ce2004-02-08 06:05:46 +00001176** This routine clears the needSync field of every page current held in
1177** memory.
drh50e5dad2001-09-15 00:57:28 +00001178*/
drh34e79ce2004-02-08 06:05:46 +00001179static int syncJournal(Pager *pPager){
drh50e5dad2001-09-15 00:57:28 +00001180 PgHdr *pPg;
1181 int rc = SQLITE_OK;
drh03eb96a2002-11-10 23:32:56 +00001182
1183 /* Sync the journal before modifying the main database
1184 ** (assuming there is a journal and it needs to be synced.)
1185 */
drh50e5dad2001-09-15 00:57:28 +00001186 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +00001187 if( !pPager->tempFile ){
drhdb48ee02003-01-16 13:42:43 +00001188 assert( pPager->journalOpen );
1189 assert( !pPager->noSync );
drh968af522003-02-11 14:55:40 +00001190#ifndef NDEBUG
1191 {
drh34e79ce2004-02-08 06:05:46 +00001192 /* Make sure the pPager->nRec counter we are keeping agrees
1193 ** with the nRec computed from the size of the journal file.
1194 */
drh4a0681e2003-02-13 01:58:20 +00001195 off_t hdrSz, pgSz, jSz;
drh968af522003-02-11 14:55:40 +00001196 hdrSz = JOURNAL_HDR_SZ(journal_format);
1197 pgSz = JOURNAL_PG_SZ(journal_format);
drh4a0681e2003-02-13 01:58:20 +00001198 rc = sqliteOsFileSize(&pPager->jfd, &jSz);
drh968af522003-02-11 14:55:40 +00001199 if( rc!=0 ) return rc;
drh4a0681e2003-02-13 01:58:20 +00001200 assert( pPager->nRec*pgSz+hdrSz==jSz );
drh968af522003-02-11 14:55:40 +00001201 }
1202#endif
drhd8d66e82003-02-12 02:10:15 +00001203 if( journal_format>=3 ){
drh34e79ce2004-02-08 06:05:46 +00001204 /* Write the nRec value into the journal file header */
drhd8d66e82003-02-12 02:10:15 +00001205 off_t szJ;
1206 if( pPager->fullSync ){
1207 TRACE1("SYNC\n");
1208 rc = sqliteOsSync(&pPager->jfd);
1209 if( rc!=0 ) return rc;
1210 }
1211 sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
drh99ee3602003-02-16 19:13:36 +00001212 rc = write32bits(&pPager->jfd, pPager->nRec);
1213 if( rc ) return rc;
drhd8d66e82003-02-12 02:10:15 +00001214 szJ = JOURNAL_HDR_SZ(journal_format) +
1215 pPager->nRec*JOURNAL_PG_SZ(journal_format);
1216 sqliteOsSeek(&pPager->jfd, szJ);
drh968af522003-02-11 14:55:40 +00001217 }
drhdb48ee02003-01-16 13:42:43 +00001218 TRACE1("SYNC\n");
drhfa86c412002-02-02 15:01:15 +00001219 rc = sqliteOsSync(&pPager->jfd);
1220 if( rc!=0 ) return rc;
drhdb48ee02003-01-16 13:42:43 +00001221 pPager->journalStarted = 1;
drhfa86c412002-02-02 15:01:15 +00001222 }
drh50e5dad2001-09-15 00:57:28 +00001223 pPager->needSync = 0;
drh341eae82003-01-21 02:39:36 +00001224
1225 /* Erase the needSync flag from every page.
1226 */
1227 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1228 pPg->needSync = 0;
1229 }
1230 pPager->pFirstSynced = pPager->pFirst;
drh50e5dad2001-09-15 00:57:28 +00001231 }
drh03eb96a2002-11-10 23:32:56 +00001232
drh341eae82003-01-21 02:39:36 +00001233#ifndef NDEBUG
1234 /* If the Pager.needSync flag is clear then the PgHdr.needSync
1235 ** flag must also be clear for all pages. Verify that this
1236 ** invariant is true.
drh03eb96a2002-11-10 23:32:56 +00001237 */
drh341eae82003-01-21 02:39:36 +00001238 else{
1239 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1240 assert( pPg->needSync==0 );
1241 }
1242 assert( pPager->pFirstSynced==pPager->pFirst );
drh03eb96a2002-11-10 23:32:56 +00001243 }
drh341eae82003-01-21 02:39:36 +00001244#endif
drhdb48ee02003-01-16 13:42:43 +00001245
drh81a20f22001-10-12 17:30:04 +00001246 return rc;
drh50e5dad2001-09-15 00:57:28 +00001247}
1248
1249/*
drh2554f8b2003-01-22 01:26:44 +00001250** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1251** every one of those pages out to the database file and mark them all
1252** as clean.
1253*/
1254static int pager_write_pagelist(PgHdr *pList){
1255 Pager *pPager;
1256 int rc;
1257
1258 if( pList==0 ) return SQLITE_OK;
1259 pPager = pList->pPager;
1260 while( pList ){
1261 assert( pList->dirty );
drhb20ea9d2004-02-09 01:20:36 +00001262 sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_BLOCK_SIZE);
1263 if( pPager->xCodec ){
1264 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 1);
1265 }
1266 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_BLOCK_SIZE);
1267 if( pPager->xCodec ){
1268 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 0);
1269 }
drh2554f8b2003-01-22 01:26:44 +00001270 if( rc ) return rc;
1271 pList->dirty = 0;
1272 pList = pList->pDirty;
1273 }
1274 return SQLITE_OK;
1275}
1276
1277/*
1278** Collect every dirty page into a dirty list and
1279** return a pointer to the head of that list. All pages are
1280** collected even if they are still in use.
1281*/
1282static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1283 PgHdr *p, *pList;
1284 pList = 0;
1285 for(p=pPager->pAll; p; p=p->pNextAll){
1286 if( p->dirty ){
1287 p->pDirty = pList;
1288 pList = p;
1289 }
1290 }
1291 return pList;
1292}
1293
1294/*
drhd9b02572001-04-15 00:37:09 +00001295** Acquire a page.
1296**
drh58a11682001-11-10 13:51:08 +00001297** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +00001298** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +00001299**
drh306dc212001-05-21 13:45:10 +00001300** A _get works for any page number greater than 0. If the database
1301** file is smaller than the requested page, then no actual disk
1302** read occurs and the memory image of the page is initialized to
1303** all zeros. The extra data appended to a page is always initialized
1304** to zeros the first time a page is loaded into memory.
1305**
drhd9b02572001-04-15 00:37:09 +00001306** The acquisition might fail for several reasons. In all cases,
1307** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +00001308**
1309** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
1310** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +00001311** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +00001312** just returns 0. This routine acquires a read-lock the first time it
1313** has to go to disk, and could also playback an old journal if necessary.
1314** Since _lookup() never goes to disk, it never has to deal with locks
1315** or journal files.
drhed7c8552001-04-11 14:29:21 +00001316*/
drhd9b02572001-04-15 00:37:09 +00001317int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +00001318 PgHdr *pPg;
drh8766c342002-11-09 00:33:15 +00001319 int rc;
drhed7c8552001-04-11 14:29:21 +00001320
drhd9b02572001-04-15 00:37:09 +00001321 /* Make sure we have not hit any critical errors.
1322 */
drh836faa42003-01-11 13:30:57 +00001323 assert( pPager!=0 );
1324 assert( pgno!=0 );
drh2e6d11b2003-04-25 15:37:57 +00001325 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +00001326 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1327 return pager_errcode(pPager);
1328 }
1329
drhed7c8552001-04-11 14:29:21 +00001330 /* If this is the first page accessed, then get a read lock
1331 ** on the database file.
1332 */
1333 if( pPager->nRef==0 ){
drh8766c342002-11-09 00:33:15 +00001334 rc = sqliteOsReadLock(&pPager->fd);
1335 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001336 return rc;
drhed7c8552001-04-11 14:29:21 +00001337 }
drhd9b02572001-04-15 00:37:09 +00001338 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +00001339
1340 /* If a journal file exists, try to play it back.
1341 */
drhda47d772002-12-02 04:25:19 +00001342 if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
drhe2227f02003-06-14 11:42:57 +00001343 int rc;
drhed7c8552001-04-11 14:29:21 +00001344
drha7fcb052001-12-14 15:09:55 +00001345 /* Get a write lock on the database
1346 */
1347 rc = sqliteOsWriteLock(&pPager->fd);
1348 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001349 if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
1350 /* This should never happen! */
1351 rc = SQLITE_INTERNAL;
1352 }
drh8766c342002-11-09 00:33:15 +00001353 return rc;
drha7fcb052001-12-14 15:09:55 +00001354 }
1355 pPager->state = SQLITE_WRITELOCK;
1356
drhe2227f02003-06-14 11:42:57 +00001357 /* Open the journal for reading only. Return SQLITE_BUSY if
1358 ** we are unable to open the journal file.
drhf57b3392001-10-08 13:22:32 +00001359 **
drhe2227f02003-06-14 11:42:57 +00001360 ** The journal file does not need to be locked itself. The
1361 ** journal file is never open unless the main database file holds
1362 ** a write lock, so there is never any chance of two or more
1363 ** processes opening the journal at the same time.
drhed7c8552001-04-11 14:29:21 +00001364 */
drhe2227f02003-06-14 11:42:57 +00001365 rc = sqliteOsOpenReadOnly(pPager->zJournal, &pPager->jfd);
drha7fcb052001-12-14 15:09:55 +00001366 if( rc!=SQLITE_OK ){
1367 rc = sqliteOsUnlock(&pPager->fd);
1368 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +00001369 return SQLITE_BUSY;
1370 }
drha7fcb052001-12-14 15:09:55 +00001371 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001372 pPager->journalStarted = 0;
drhed7c8552001-04-11 14:29:21 +00001373
1374 /* Playback and delete the journal. Drop the database write
1375 ** lock and reacquire the read lock.
1376 */
drh99ee3602003-02-16 19:13:36 +00001377 rc = pager_playback(pPager, 0);
drhd9b02572001-04-15 00:37:09 +00001378 if( rc!=SQLITE_OK ){
1379 return rc;
1380 }
drhed7c8552001-04-11 14:29:21 +00001381 }
1382 pPg = 0;
1383 }else{
1384 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +00001385 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +00001386 }
1387 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001388 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +00001389 int h;
drh7e3b0a02001-04-28 16:52:40 +00001390 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +00001391 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
1392 /* Create a new page */
drhb20ea9d2004-02-09 01:20:36 +00001393 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_BLOCK_SIZE
drh968af522003-02-11 14:55:40 +00001394 + sizeof(u32) + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +00001395 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001396 pager_unwritelock(pPager);
1397 pPager->errMask |= PAGER_ERR_MEM;
1398 return SQLITE_NOMEM;
1399 }
drh8c1238a2003-01-02 14:43:55 +00001400 memset(pPg, 0, sizeof(*pPg));
drhed7c8552001-04-11 14:29:21 +00001401 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +00001402 pPg->pNextAll = pPager->pAll;
1403 if( pPager->pAll ){
1404 pPager->pAll->pPrevAll = pPg;
1405 }
1406 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +00001407 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +00001408 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +00001409 }else{
drhdb48ee02003-01-16 13:42:43 +00001410 /* Find a page to recycle. Try to locate a page that does not
1411 ** require us to do an fsync() on the journal.
1412 */
drh341eae82003-01-21 02:39:36 +00001413 pPg = pPager->pFirstSynced;
drhb19a2bc2001-09-16 00:13:26 +00001414
drhdb48ee02003-01-16 13:42:43 +00001415 /* If we could not find a page that does not require an fsync()
1416 ** on the journal file then fsync the journal file. This is a
1417 ** very slow operation, so we work hard to avoid it. But sometimes
1418 ** it can't be helped.
drhb19a2bc2001-09-16 00:13:26 +00001419 */
drh603240c2002-03-05 01:11:12 +00001420 if( pPg==0 ){
drh34e79ce2004-02-08 06:05:46 +00001421 int rc = syncJournal(pPager);
drh50e5dad2001-09-15 00:57:28 +00001422 if( rc!=0 ){
1423 sqlitepager_rollback(pPager);
drh50e5dad2001-09-15 00:57:28 +00001424 return SQLITE_IOERR;
1425 }
1426 pPg = pPager->pFirst;
1427 }
drhd9b02572001-04-15 00:37:09 +00001428 assert( pPg->nRef==0 );
drhdb48ee02003-01-16 13:42:43 +00001429
1430 /* Write the page to the database file if it is dirty.
1431 */
1432 if( pPg->dirty ){
1433 assert( pPg->needSync==0 );
drh2554f8b2003-01-22 01:26:44 +00001434 pPg->pDirty = 0;
1435 rc = pager_write_pagelist( pPg );
drhdb48ee02003-01-16 13:42:43 +00001436 if( rc!=SQLITE_OK ){
1437 sqlitepager_rollback(pPager);
drhdb48ee02003-01-16 13:42:43 +00001438 return SQLITE_IOERR;
1439 }
drhdb48ee02003-01-16 13:42:43 +00001440 }
drh50e5dad2001-09-15 00:57:28 +00001441 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +00001442
drhdb48ee02003-01-16 13:42:43 +00001443 /* If the page we are recycling is marked as alwaysRollback, then
drh193a6b42002-07-07 16:52:46 +00001444 ** set the global alwaysRollback flag, thus disabling the
1445 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1446 ** It is necessary to do this because the page marked alwaysRollback
1447 ** might be reloaded at a later time but at that point we won't remember
1448 ** that is was marked alwaysRollback. This means that all pages must
1449 ** be marked as alwaysRollback from here on out.
1450 */
1451 if( pPg->alwaysRollback ){
1452 pPager->alwaysRollback = 1;
1453 }
1454
drhd9b02572001-04-15 00:37:09 +00001455 /* Unlink the old page from the free list and the hash table
1456 */
drh341eae82003-01-21 02:39:36 +00001457 if( pPg==pPager->pFirstSynced ){
1458 PgHdr *p = pPg->pNextFree;
1459 while( p && p->needSync ){ p = p->pNextFree; }
1460 pPager->pFirstSynced = p;
1461 }
drh6019e162001-07-02 17:51:45 +00001462 if( pPg->pPrevFree ){
1463 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001464 }else{
drh6019e162001-07-02 17:51:45 +00001465 assert( pPager->pFirst==pPg );
1466 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001467 }
drh6019e162001-07-02 17:51:45 +00001468 if( pPg->pNextFree ){
1469 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1470 }else{
1471 assert( pPager->pLast==pPg );
1472 pPager->pLast = pPg->pPrevFree;
1473 }
1474 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +00001475 if( pPg->pNextHash ){
1476 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1477 }
1478 if( pPg->pPrevHash ){
1479 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1480 }else{
drhd9b02572001-04-15 00:37:09 +00001481 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +00001482 assert( pPager->aHash[h]==pPg );
1483 pPager->aHash[h] = pPg->pNextHash;
1484 }
drh6019e162001-07-02 17:51:45 +00001485 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +00001486 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +00001487 }
1488 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +00001489 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drhed6c8672003-01-12 18:02:16 +00001490 sqliteCheckMemory(pPager->aInJournal, pgno/8);
drhdb48ee02003-01-16 13:42:43 +00001491 assert( pPager->journalOpen );
drh6019e162001-07-02 17:51:45 +00001492 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
drhdb48ee02003-01-16 13:42:43 +00001493 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001494 }else{
1495 pPg->inJournal = 0;
drhdb48ee02003-01-16 13:42:43 +00001496 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001497 }
drh03eb96a2002-11-10 23:32:56 +00001498 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
1499 && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
1500 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001501 }else{
drh03eb96a2002-11-10 23:32:56 +00001502 page_remove_from_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001503 }
drhed7c8552001-04-11 14:29:21 +00001504 pPg->dirty = 0;
1505 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +00001506 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001507 pPager->nRef++;
1508 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +00001509 pPg->pNextHash = pPager->aHash[h];
1510 pPager->aHash[h] = pPg;
1511 if( pPg->pNextHash ){
1512 assert( pPg->pNextHash->pPrevHash==0 );
1513 pPg->pNextHash->pPrevHash = pPg;
1514 }
drh2e6d11b2003-04-25 15:37:57 +00001515 if( pPager->nExtra>0 ){
1516 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1517 }
drh306dc212001-05-21 13:45:10 +00001518 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001519 if( pPager->errMask!=0 ){
1520 sqlitepager_unref(PGHDR_TO_DATA(pPg));
1521 rc = pager_errcode(pPager);
1522 return rc;
1523 }
drh1ab43002002-01-14 09:28:19 +00001524 if( pPager->dbSize<(int)pgno ){
drhb20ea9d2004-02-09 01:20:36 +00001525 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_BLOCK_SIZE);
drh306dc212001-05-21 13:45:10 +00001526 }else{
drh81a20f22001-10-12 17:30:04 +00001527 int rc;
drhb20ea9d2004-02-09 01:20:36 +00001528 sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_BLOCK_SIZE);
1529 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_BLOCK_SIZE);
drh81a20f22001-10-12 17:30:04 +00001530 if( rc!=SQLITE_OK ){
drh28be87c2002-11-05 23:03:02 +00001531 off_t fileSize;
drh4e371ee2002-09-05 16:08:27 +00001532 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
drhb20ea9d2004-02-09 01:20:36 +00001533 || fileSize>=pgno*SQLITE_BLOCK_SIZE ){
drh2e6d11b2003-04-25 15:37:57 +00001534 sqlitepager_unref(PGHDR_TO_DATA(pPg));
drh4e371ee2002-09-05 16:08:27 +00001535 return rc;
1536 }else{
drhb20ea9d2004-02-09 01:20:36 +00001537 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_BLOCK_SIZE);
drh4e371ee2002-09-05 16:08:27 +00001538 }
drhb20ea9d2004-02-09 01:20:36 +00001539 }else if( pPager->xCodec ){
1540 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 0);
drh81a20f22001-10-12 17:30:04 +00001541 }
drh306dc212001-05-21 13:45:10 +00001542 }
drhed7c8552001-04-11 14:29:21 +00001543 }else{
drhd9b02572001-04-15 00:37:09 +00001544 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001545 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001546 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001547 }
1548 *ppPage = PGHDR_TO_DATA(pPg);
1549 return SQLITE_OK;
1550}
1551
1552/*
drh7e3b0a02001-04-28 16:52:40 +00001553** Acquire a page if it is already in the in-memory cache. Do
1554** not read the page from disk. Return a pointer to the page,
1555** or 0 if the page is not in cache.
1556**
1557** See also sqlitepager_get(). The difference between this routine
1558** and sqlitepager_get() is that _get() will go to the disk and read
1559** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001560** returns NULL if the page is not in cache or if a disk I/O error
1561** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001562*/
1563void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1564 PgHdr *pPg;
1565
drh836faa42003-01-11 13:30:57 +00001566 assert( pPager!=0 );
1567 assert( pgno!=0 );
drh7e3b0a02001-04-28 16:52:40 +00001568 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1569 return 0;
1570 }
drh836faa42003-01-11 13:30:57 +00001571 /* if( pPager->nRef==0 ){
1572 ** return 0;
1573 ** }
1574 */
drh7e3b0a02001-04-28 16:52:40 +00001575 pPg = pager_lookup(pPager, pgno);
1576 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001577 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001578 return PGHDR_TO_DATA(pPg);
1579}
1580
1581/*
drhed7c8552001-04-11 14:29:21 +00001582** Release a page.
1583**
1584** If the number of references to the page drop to zero, then the
1585** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001586** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001587** removed.
1588*/
drhd9b02572001-04-15 00:37:09 +00001589int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001590 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001591
1592 /* Decrement the reference count for this page
1593 */
drhed7c8552001-04-11 14:29:21 +00001594 pPg = DATA_TO_PGHDR(pData);
1595 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001596 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001597 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001598
drh72f82862001-05-24 21:06:34 +00001599 /* When the number of references to a page reach 0, call the
1600 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001601 */
drhed7c8552001-04-11 14:29:21 +00001602 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001603 Pager *pPager;
1604 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001605 pPg->pNextFree = 0;
1606 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001607 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001608 if( pPg->pPrevFree ){
1609 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001610 }else{
1611 pPager->pFirst = pPg;
1612 }
drh341eae82003-01-21 02:39:36 +00001613 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1614 pPager->pFirstSynced = pPg;
1615 }
drh72f82862001-05-24 21:06:34 +00001616 if( pPager->xDestructor ){
1617 pPager->xDestructor(pData);
1618 }
drhd9b02572001-04-15 00:37:09 +00001619
1620 /* When all pages reach the freelist, drop the read lock from
1621 ** the database file.
1622 */
1623 pPager->nRef--;
1624 assert( pPager->nRef>=0 );
1625 if( pPager->nRef==0 ){
1626 pager_reset(pPager);
1627 }
drhed7c8552001-04-11 14:29:21 +00001628 }
drhd9b02572001-04-15 00:37:09 +00001629 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001630}
1631
1632/*
drhda47d772002-12-02 04:25:19 +00001633** Create a journal file for pPager. There should already be a write
1634** lock on the database file when this routine is called.
1635**
1636** Return SQLITE_OK if everything. Return an error code and release the
1637** write lock if anything goes wrong.
1638*/
1639static int pager_open_journal(Pager *pPager){
1640 int rc;
1641 assert( pPager->state==SQLITE_WRITELOCK );
1642 assert( pPager->journalOpen==0 );
1643 assert( pPager->useJournal );
drh3e4c8522003-07-07 10:47:10 +00001644 sqlitepager_pagecount(pPager);
drhda47d772002-12-02 04:25:19 +00001645 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1646 if( pPager->aInJournal==0 ){
1647 sqliteOsReadLock(&pPager->fd);
1648 pPager->state = SQLITE_READLOCK;
1649 return SQLITE_NOMEM;
1650 }
1651 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1652 if( rc!=SQLITE_OK ){
1653 sqliteFree(pPager->aInJournal);
1654 pPager->aInJournal = 0;
1655 sqliteOsReadLock(&pPager->fd);
1656 pPager->state = SQLITE_READLOCK;
1657 return SQLITE_CANTOPEN;
1658 }
drha76c82e2003-07-27 18:59:42 +00001659 sqliteOsOpenDirectory(pPager->zDirectory, &pPager->jfd);
drhda47d772002-12-02 04:25:19 +00001660 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001661 pPager->journalStarted = 0;
drhda47d772002-12-02 04:25:19 +00001662 pPager->needSync = 0;
1663 pPager->alwaysRollback = 0;
drh968af522003-02-11 14:55:40 +00001664 pPager->nRec = 0;
drh2e6d11b2003-04-25 15:37:57 +00001665 if( pPager->errMask!=0 ){
1666 rc = pager_errcode(pPager);
1667 return rc;
1668 }
drhda47d772002-12-02 04:25:19 +00001669 pPager->origDbSize = pPager->dbSize;
drh968af522003-02-11 14:55:40 +00001670 if( journal_format==JOURNAL_FORMAT_3 ){
1671 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
1672 if( rc==SQLITE_OK ){
drh4303fee2003-02-15 23:09:17 +00001673 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
drh968af522003-02-11 14:55:40 +00001674 }
1675 if( rc==SQLITE_OK ){
1676 pPager->cksumInit = (u32)sqliteRandomInteger();
1677 rc = write32bits(&pPager->jfd, pPager->cksumInit);
1678 }
1679 }else if( journal_format==JOURNAL_FORMAT_2 ){
1680 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
drhda47d772002-12-02 04:25:19 +00001681 }else{
drh968af522003-02-11 14:55:40 +00001682 assert( journal_format==JOURNAL_FORMAT_1 );
1683 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
drhda47d772002-12-02 04:25:19 +00001684 }
1685 if( rc==SQLITE_OK ){
1686 rc = write32bits(&pPager->jfd, pPager->dbSize);
1687 }
1688 if( pPager->ckptAutoopen && rc==SQLITE_OK ){
1689 rc = sqlitepager_ckpt_begin(pPager);
1690 }
1691 if( rc!=SQLITE_OK ){
1692 rc = pager_unwritelock(pPager);
1693 if( rc==SQLITE_OK ){
1694 rc = SQLITE_FULL;
1695 }
1696 }
1697 return rc;
1698}
1699
1700/*
drh4b845d72002-03-05 12:41:19 +00001701** Acquire a write-lock on the database. The lock is removed when
1702** the any of the following happen:
1703**
1704** * sqlitepager_commit() is called.
1705** * sqlitepager_rollback() is called.
1706** * sqlitepager_close() is called.
1707** * sqlitepager_unref() is called to on every outstanding page.
1708**
1709** The parameter to this routine is a pointer to any open page of the
1710** database file. Nothing changes about the page - it is used merely
1711** to acquire a pointer to the Pager structure and as proof that there
1712** is already a read-lock on the database.
1713**
drhda47d772002-12-02 04:25:19 +00001714** A journal file is opened if this is not a temporary file. For
1715** temporary files, the opening of the journal file is deferred until
1716** there is an actual need to write to the journal.
1717**
drh4b845d72002-03-05 12:41:19 +00001718** If the database is already write-locked, this routine is a no-op.
1719*/
1720int sqlitepager_begin(void *pData){
1721 PgHdr *pPg = DATA_TO_PGHDR(pData);
1722 Pager *pPager = pPg->pPager;
1723 int rc = SQLITE_OK;
1724 assert( pPg->nRef>0 );
1725 assert( pPager->state!=SQLITE_UNLOCK );
1726 if( pPager->state==SQLITE_READLOCK ){
1727 assert( pPager->aInJournal==0 );
1728 rc = sqliteOsWriteLock(&pPager->fd);
1729 if( rc!=SQLITE_OK ){
1730 return rc;
1731 }
drh4b845d72002-03-05 12:41:19 +00001732 pPager->state = SQLITE_WRITELOCK;
drhda47d772002-12-02 04:25:19 +00001733 pPager->dirtyFile = 0;
drhdb48ee02003-01-16 13:42:43 +00001734 TRACE1("TRANSACTION\n");
drhda47d772002-12-02 04:25:19 +00001735 if( pPager->useJournal && !pPager->tempFile ){
1736 rc = pager_open_journal(pPager);
drh4b845d72002-03-05 12:41:19 +00001737 }
1738 }
1739 return rc;
1740}
1741
1742/*
drhed7c8552001-04-11 14:29:21 +00001743** Mark a data page as writeable. The page is written into the journal
1744** if it is not there already. This routine must be called before making
1745** changes to a page.
1746**
1747** The first time this routine is called, the pager creates a new
1748** journal and acquires a write lock on the database. If the write
1749** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001750** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001751** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001752**
1753** If the journal file could not be written because the disk is full,
1754** then this routine returns SQLITE_FULL and does an immediate rollback.
1755** All subsequent write attempts also return SQLITE_FULL until there
1756** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1757** reset.
drhed7c8552001-04-11 14:29:21 +00001758*/
drhd9b02572001-04-15 00:37:09 +00001759int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001760 PgHdr *pPg = DATA_TO_PGHDR(pData);
1761 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001762 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001763
drh6446c4d2001-12-15 14:22:18 +00001764 /* Check for errors
1765 */
drhd9b02572001-04-15 00:37:09 +00001766 if( pPager->errMask ){
1767 return pager_errcode(pPager);
1768 }
drh5e00f6c2001-09-13 13:46:56 +00001769 if( pPager->readOnly ){
1770 return SQLITE_PERM;
1771 }
drh6446c4d2001-12-15 14:22:18 +00001772
1773 /* Mark the page as dirty. If the page has already been written
1774 ** to the journal then we can return right away.
1775 */
drhd9b02572001-04-15 00:37:09 +00001776 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001777 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001778 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001779 return SQLITE_OK;
1780 }
drh6446c4d2001-12-15 14:22:18 +00001781
1782 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001783 ** written to the transaction journal or the ckeckpoint journal
1784 ** or both.
1785 **
1786 ** First check to see that the transaction journal exists and
1787 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001788 */
drhd9b02572001-04-15 00:37:09 +00001789 assert( pPager->state!=SQLITE_UNLOCK );
drh4b845d72002-03-05 12:41:19 +00001790 rc = sqlitepager_begin(pData);
drhda47d772002-12-02 04:25:19 +00001791 if( rc!=SQLITE_OK ){
1792 return rc;
1793 }
drhd9b02572001-04-15 00:37:09 +00001794 assert( pPager->state==SQLITE_WRITELOCK );
drhda47d772002-12-02 04:25:19 +00001795 if( !pPager->journalOpen && pPager->useJournal ){
1796 rc = pager_open_journal(pPager);
1797 if( rc!=SQLITE_OK ) return rc;
1798 }
1799 assert( pPager->journalOpen || !pPager->useJournal );
1800 pPager->dirtyFile = 1;
drh6446c4d2001-12-15 14:22:18 +00001801
drhfa86c412002-02-02 15:01:15 +00001802 /* The transaction journal now exists and we have a write lock on the
1803 ** main database file. Write the current page to the transaction
1804 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001805 */
drhdb48ee02003-01-16 13:42:43 +00001806 if( !pPg->inJournal && pPager->useJournal ){
1807 if( (int)pPg->pgno <= pPager->origDbSize ){
drh968af522003-02-11 14:55:40 +00001808 int szPg;
1809 u32 saved;
1810 if( journal_format>=JOURNAL_FORMAT_3 ){
1811 u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
1812 saved = *(u32*)PGHDR_TO_EXTRA(pPg);
1813 store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
1814 szPg = SQLITE_PAGE_SIZE+8;
1815 }else{
1816 szPg = SQLITE_PAGE_SIZE+4;
1817 }
1818 store32bits(pPg->pgno, pPg, -4);
drhb20ea9d2004-02-09 01:20:36 +00001819 if( pPager->xCodec ){
1820 pPager->xCodec(pPager->pCodecArg, pData, 1);
1821 }
drh968af522003-02-11 14:55:40 +00001822 rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
drhb20ea9d2004-02-09 01:20:36 +00001823 if( pPager->xCodec ){
1824 pPager->xCodec(pPager->pCodecArg, pData, 0);
1825 }
drh968af522003-02-11 14:55:40 +00001826 if( journal_format>=JOURNAL_FORMAT_3 ){
1827 *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
1828 }
drhdb48ee02003-01-16 13:42:43 +00001829 if( rc!=SQLITE_OK ){
1830 sqlitepager_rollback(pPager);
1831 pPager->errMask |= PAGER_ERR_FULL;
1832 return rc;
1833 }
drh99ee3602003-02-16 19:13:36 +00001834 pPager->nRec++;
drhdb48ee02003-01-16 13:42:43 +00001835 assert( pPager->aInJournal!=0 );
1836 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1837 pPg->needSync = !pPager->noSync;
1838 pPg->inJournal = 1;
1839 if( pPager->ckptInUse ){
1840 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1841 page_add_to_ckpt_list(pPg);
1842 }
1843 TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
1844 }else{
1845 pPg->needSync = !pPager->journalStarted && !pPager->noSync;
1846 TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
drhd9b02572001-04-15 00:37:09 +00001847 }
drhdb48ee02003-01-16 13:42:43 +00001848 if( pPg->needSync ){
1849 pPager->needSync = 1;
drhfa86c412002-02-02 15:01:15 +00001850 }
drh69688d52001-04-14 16:38:23 +00001851 }
drh6446c4d2001-12-15 14:22:18 +00001852
drhfa86c412002-02-02 15:01:15 +00001853 /* If the checkpoint journal is open and the page is not in it,
drh968af522003-02-11 14:55:40 +00001854 ** then write the current page to the checkpoint journal. Note that
1855 ** the checkpoint journal always uses the simplier format 2 that lacks
1856 ** checksums. The header is also omitted from the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001857 */
drh0f892532002-05-30 12:27:03 +00001858 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001859 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh968af522003-02-11 14:55:40 +00001860 store32bits(pPg->pgno, pPg, -4);
drhb20ea9d2004-02-09 01:20:36 +00001861 rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_BLOCK_SIZE+4);
drhfa86c412002-02-02 15:01:15 +00001862 if( rc!=SQLITE_OK ){
1863 sqlitepager_rollback(pPager);
1864 pPager->errMask |= PAGER_ERR_FULL;
1865 return rc;
1866 }
drh9bd47a92003-01-07 14:46:08 +00001867 pPager->ckptNRec++;
drhfa86c412002-02-02 15:01:15 +00001868 assert( pPager->aInCkpt!=0 );
1869 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001870 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001871 }
1872
1873 /* Update the database size and return.
1874 */
drh1ab43002002-01-14 09:28:19 +00001875 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001876 pPager->dbSize = pPg->pgno;
1877 }
drh69688d52001-04-14 16:38:23 +00001878 return rc;
drhed7c8552001-04-11 14:29:21 +00001879}
1880
1881/*
drhaacc5432002-01-06 17:07:40 +00001882** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +00001883** to sqlitepager_write(). In other words, return TRUE if it is ok
1884** to change the content of the page.
1885*/
1886int sqlitepager_iswriteable(void *pData){
1887 PgHdr *pPg = DATA_TO_PGHDR(pData);
1888 return pPg->dirty;
1889}
1890
1891/*
drh001bbcb2003-03-19 03:14:00 +00001892** Replace the content of a single page with the information in the third
1893** argument.
1894*/
1895int sqlitepager_overwrite(Pager *pPager, Pgno pgno, void *pData){
1896 void *pPage;
1897 int rc;
1898
1899 rc = sqlitepager_get(pPager, pgno, &pPage);
1900 if( rc==SQLITE_OK ){
1901 rc = sqlitepager_write(pPage);
1902 if( rc==SQLITE_OK ){
drhb20ea9d2004-02-09 01:20:36 +00001903 memcpy(pPage, pData, SQLITE_BLOCK_SIZE);
drh001bbcb2003-03-19 03:14:00 +00001904 }
1905 sqlitepager_unref(pPage);
1906 }
1907 return rc;
1908}
1909
1910/*
drh30e58752002-03-02 20:41:57 +00001911** A call to this routine tells the pager that it is not necessary to
1912** write the information on page "pgno" back to the disk, even though
1913** that page might be marked as dirty.
1914**
1915** The overlying software layer calls this routine when all of the data
1916** on the given page is unused. The pager marks the page as clean so
1917** that it does not get written to disk.
1918**
1919** Tests show that this optimization, together with the
1920** sqlitepager_dont_rollback() below, more than double the speed
1921** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001922**
1923** When this routine is called, set the alwaysRollback flag to true.
1924** Subsequent calls to sqlitepager_dont_rollback() for the same page
1925** will thereafter be ignored. This is necessary to avoid a problem
1926** where a page with data is added to the freelist during one part of
1927** a transaction then removed from the freelist during a later part
1928** of the same transaction and reused for some other purpose. When it
1929** is first added to the freelist, this routine is called. When reused,
1930** the dont_rollback() routine is called. But because the page contains
1931** critical data, we still need to be sure it gets rolled back in spite
1932** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001933*/
1934void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1935 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001936
drh30e58752002-03-02 20:41:57 +00001937 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001938 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001939 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001940 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1941 /* If this pages is the last page in the file and the file has grown
1942 ** during the current transaction, then do NOT mark the page as clean.
1943 ** When the database file grows, we must make sure that the last page
1944 ** gets written at least once so that the disk file will be the correct
1945 ** size. If you do not write this page and the size of the file
1946 ** on the disk ends up being too small, that can lead to database
1947 ** corruption during the next transaction.
1948 */
1949 }else{
drhdb48ee02003-01-16 13:42:43 +00001950 TRACE2("DONT_WRITE %d\n", pgno);
drh8124a302002-06-25 14:43:57 +00001951 pPg->dirty = 0;
1952 }
drh30e58752002-03-02 20:41:57 +00001953 }
1954}
1955
1956/*
1957** A call to this routine tells the pager that if a rollback occurs,
1958** it is not necessary to restore the data on the given page. This
1959** means that the pager does not have to record the given page in the
1960** rollback journal.
1961*/
1962void sqlitepager_dont_rollback(void *pData){
1963 PgHdr *pPg = DATA_TO_PGHDR(pData);
1964 Pager *pPager = pPg->pPager;
1965
1966 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001967 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001968 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1969 assert( pPager->aInJournal!=0 );
1970 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1971 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001972 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001973 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001974 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001975 }
drhdb48ee02003-01-16 13:42:43 +00001976 TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
drh30e58752002-03-02 20:41:57 +00001977 }
drh0f892532002-05-30 12:27:03 +00001978 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001979 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1980 assert( pPager->aInCkpt!=0 );
1981 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001982 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001983 }
1984}
1985
1986/*
drhed7c8552001-04-11 14:29:21 +00001987** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001988**
1989** If the commit fails for any reason, a rollback attempt is made
1990** and an error code is returned. If the commit worked, SQLITE_OK
1991** is returned.
drhed7c8552001-04-11 14:29:21 +00001992*/
drhd9b02572001-04-15 00:37:09 +00001993int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001994 int rc;
drhed7c8552001-04-11 14:29:21 +00001995 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001996
1997 if( pPager->errMask==PAGER_ERR_FULL ){
1998 rc = sqlitepager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00001999 if( rc==SQLITE_OK ){
2000 rc = SQLITE_FULL;
2001 }
drhd9b02572001-04-15 00:37:09 +00002002 return rc;
2003 }
2004 if( pPager->errMask!=0 ){
2005 rc = pager_errcode(pPager);
2006 return rc;
2007 }
2008 if( pPager->state!=SQLITE_WRITELOCK ){
2009 return SQLITE_ERROR;
2010 }
drhdb48ee02003-01-16 13:42:43 +00002011 TRACE1("COMMIT\n");
drha1680452002-04-18 01:56:57 +00002012 if( pPager->dirtyFile==0 ){
2013 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
2014 ** if there have been no changes to the database file. */
drh341eae82003-01-21 02:39:36 +00002015 assert( pPager->needSync==0 );
drha1680452002-04-18 01:56:57 +00002016 rc = pager_unwritelock(pPager);
2017 pPager->dbSize = -1;
2018 return rc;
2019 }
drhda47d772002-12-02 04:25:19 +00002020 assert( pPager->journalOpen );
drh34e79ce2004-02-08 06:05:46 +00002021 rc = syncJournal(pPager);
drh240c5792004-02-08 00:40:52 +00002022 if( rc!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00002023 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00002024 }
drh2554f8b2003-01-22 01:26:44 +00002025 pPg = pager_get_all_dirty_pages(pPager);
2026 if( pPg ){
2027 rc = pager_write_pagelist(pPg);
2028 if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
2029 goto commit_abort;
2030 }
drh603240c2002-03-05 01:11:12 +00002031 }
drhd9b02572001-04-15 00:37:09 +00002032 rc = pager_unwritelock(pPager);
2033 pPager->dbSize = -1;
2034 return rc;
2035
2036 /* Jump here if anything goes wrong during the commit process.
2037 */
2038commit_abort:
2039 rc = sqlitepager_rollback(pPager);
2040 if( rc==SQLITE_OK ){
2041 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00002042 }
drhed7c8552001-04-11 14:29:21 +00002043 return rc;
2044}
2045
2046/*
2047** Rollback all changes. The database falls back to read-only mode.
2048** All in-memory cache pages revert to their original data contents.
2049** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00002050**
2051** This routine cannot fail unless some other process is not following
2052** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
2053** process is writing trash into the journal file (SQLITE_CORRUPT) or
2054** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
2055** codes are returned for all these occasions. Otherwise,
2056** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00002057*/
drhd9b02572001-04-15 00:37:09 +00002058int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00002059 int rc;
drhdb48ee02003-01-16 13:42:43 +00002060 TRACE1("ROLLBACK\n");
drhda47d772002-12-02 04:25:19 +00002061 if( !pPager->dirtyFile || !pPager->journalOpen ){
2062 rc = pager_unwritelock(pPager);
2063 pPager->dbSize = -1;
2064 return rc;
2065 }
drhdb48ee02003-01-16 13:42:43 +00002066
drhd9b02572001-04-15 00:37:09 +00002067 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00002068 if( pPager->state>=SQLITE_WRITELOCK ){
drh99ee3602003-02-16 19:13:36 +00002069 pager_playback(pPager, 1);
drh4b845d72002-03-05 12:41:19 +00002070 }
drhd9b02572001-04-15 00:37:09 +00002071 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00002072 }
drhd9b02572001-04-15 00:37:09 +00002073 if( pPager->state!=SQLITE_WRITELOCK ){
2074 return SQLITE_OK;
2075 }
drh99ee3602003-02-16 19:13:36 +00002076 rc = pager_playback(pPager, 1);
drhd9b02572001-04-15 00:37:09 +00002077 if( rc!=SQLITE_OK ){
2078 rc = SQLITE_CORRUPT;
2079 pPager->errMask |= PAGER_ERR_CORRUPT;
2080 }
2081 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00002082 return rc;
drh98808ba2001-10-18 12:34:46 +00002083}
drhd9b02572001-04-15 00:37:09 +00002084
2085/*
drh5e00f6c2001-09-13 13:46:56 +00002086** Return TRUE if the database file is opened read-only. Return FALSE
2087** if the database is (in theory) writable.
2088*/
2089int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00002090 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00002091}
2092
2093/*
drhd9b02572001-04-15 00:37:09 +00002094** This routine is used for testing and analysis only.
2095*/
2096int *sqlitepager_stats(Pager *pPager){
2097 static int a[9];
2098 a[0] = pPager->nRef;
2099 a[1] = pPager->nPage;
2100 a[2] = pPager->mxPage;
2101 a[3] = pPager->dbSize;
2102 a[4] = pPager->state;
2103 a[5] = pPager->errMask;
2104 a[6] = pPager->nHit;
2105 a[7] = pPager->nMiss;
2106 a[8] = pPager->nOvfl;
2107 return a;
2108}
drhdd793422001-06-28 01:54:48 +00002109
drhfa86c412002-02-02 15:01:15 +00002110/*
2111** Set the checkpoint.
2112**
2113** This routine should be called with the transaction journal already
2114** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00002115** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00002116*/
2117int sqlitepager_ckpt_begin(Pager *pPager){
2118 int rc;
2119 char zTemp[SQLITE_TEMPNAME_SIZE];
drhda47d772002-12-02 04:25:19 +00002120 if( !pPager->journalOpen ){
2121 pPager->ckptAutoopen = 1;
2122 return SQLITE_OK;
2123 }
drhfa86c412002-02-02 15:01:15 +00002124 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00002125 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00002126 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
2127 if( pPager->aInCkpt==0 ){
2128 sqliteOsReadLock(&pPager->fd);
2129 return SQLITE_NOMEM;
2130 }
drh968af522003-02-11 14:55:40 +00002131#ifndef NDEBUG
drhfa86c412002-02-02 15:01:15 +00002132 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
2133 if( rc ) goto ckpt_begin_failed;
drh968af522003-02-11 14:55:40 +00002134 assert( pPager->ckptJSize ==
2135 pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
2136#endif
2137 pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
2138 + JOURNAL_HDR_SZ(journal_format);
drh663fc632002-02-02 18:49:19 +00002139 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00002140 if( !pPager->ckptOpen ){
2141 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
2142 if( rc ) goto ckpt_begin_failed;
2143 pPager->ckptOpen = 1;
drh9bd47a92003-01-07 14:46:08 +00002144 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002145 }
2146 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00002147 return SQLITE_OK;
2148
2149ckpt_begin_failed:
2150 if( pPager->aInCkpt ){
2151 sqliteFree(pPager->aInCkpt);
2152 pPager->aInCkpt = 0;
2153 }
2154 return rc;
2155}
2156
2157/*
2158** Commit a checkpoint.
2159*/
2160int sqlitepager_ckpt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00002161 if( pPager->ckptInUse ){
drh03eb96a2002-11-10 23:32:56 +00002162 PgHdr *pPg, *pNext;
drh96ddd6d2002-09-05 19:10:33 +00002163 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +00002164 /* sqliteOsTruncate(&pPager->cpfd, 0); */
2165 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002166 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00002167 sqliteFree( pPager->aInCkpt );
2168 pPager->aInCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002169 for(pPg=pPager->pCkpt; pPg; pPg=pNext){
2170 pNext = pPg->pNextCkpt;
2171 assert( pPg->inCkpt );
drh663fc632002-02-02 18:49:19 +00002172 pPg->inCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002173 pPg->pPrevCkpt = pPg->pNextCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002174 }
drh03eb96a2002-11-10 23:32:56 +00002175 pPager->pCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002176 }
drhda47d772002-12-02 04:25:19 +00002177 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002178 return SQLITE_OK;
2179}
2180
2181/*
2182** Rollback a checkpoint.
2183*/
2184int sqlitepager_ckpt_rollback(Pager *pPager){
2185 int rc;
drh0f892532002-05-30 12:27:03 +00002186 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00002187 rc = pager_ckpt_playback(pPager);
2188 sqlitepager_ckpt_commit(pPager);
2189 }else{
2190 rc = SQLITE_OK;
2191 }
drhda47d772002-12-02 04:25:19 +00002192 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002193 return rc;
2194}
2195
drh73509ee2003-04-06 20:44:45 +00002196/*
2197** Return the full pathname of the database file.
2198*/
2199const char *sqlitepager_filename(Pager *pPager){
2200 return pPager->zFilename;
2201}
2202
drhb20ea9d2004-02-09 01:20:36 +00002203/*
2204** Set the codec for this pager
2205*/
2206void sqlitepager_set_codec(
2207 Pager *pPager,
2208 void (*xCodec)(void*,void*,int),
2209 void *pCodecArg
2210){
2211 pPager->xCodec = xCodec;
2212 pPager->pCodecArg = pCodecArg;
2213}
2214
drh74587e52002-08-13 00:01:16 +00002215#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00002216/*
2217** Print a listing of all referenced pages and their ref count.
2218*/
2219void sqlitepager_refdump(Pager *pPager){
2220 PgHdr *pPg;
2221 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2222 if( pPg->nRef<=0 ) continue;
2223 printf("PAGE %3d addr=0x%08x nRef=%d\n",
2224 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2225 }
2226}
2227#endif