blob: 2663cb148aa12a42a65569f6fc2f574f7ccb0ee5 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drhd0ba1932004-02-10 01:54:28 +000021** @(#) $Id: pager.c,v 1.97 2004/02/10 01:54:28 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drh829e8022002-11-06 14:08:11 +000023#include "os.h" /* Must be first to enable large file support */
drhd9b02572001-04-15 00:37:09 +000024#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000025#include "pager.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
drhdb48ee02003-01-16 13:42:43 +000030** Macros for troubleshooting. Normally turned off
31*/
32#if 0
33static Pager *mainPager = 0;
34#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39#else
40#define SET_PAGER(X)
41#define CLR_PAGER(X)
42#define TRACE1(X)
43#define TRACE2(X,Y)
44#define TRACE3(X,Y,Z)
45#endif
46
47
48/*
drhed7c8552001-04-11 14:29:21 +000049** The page cache as a whole is always in one of the following
50** states:
51**
52** SQLITE_UNLOCK The page cache is not currently reading or
53** writing the database file. There is no
54** data held in memory. This is the initial
55** state.
56**
57** SQLITE_READLOCK The page cache is reading the database.
58** Writing is not permitted. There can be
59** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000060** file at the same time.
drhed7c8552001-04-11 14:29:21 +000061**
62** SQLITE_WRITELOCK The page cache is writing the database.
63** Access is exclusive. No other processes or
64** threads can be reading or writing while one
65** process is writing.
66**
drh306dc212001-05-21 13:45:10 +000067** The page cache comes up in SQLITE_UNLOCK. The first time a
68** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000070** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000071** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000072** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
73** called on an outstanding page which means that the pager must
74** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
75** The sqlite_page_rollback() and sqlite_page_commit() functions
76** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000077*/
78#define SQLITE_UNLOCK 0
79#define SQLITE_READLOCK 1
80#define SQLITE_WRITELOCK 2
81
drhd9b02572001-04-15 00:37:09 +000082
drhed7c8552001-04-11 14:29:21 +000083/*
84** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000085** This header is only visible to this pager module. The client
86** code that calls pager sees only the data that follows the header.
drhf6038712004-02-08 18:07:34 +000087**
88** Client code should call sqlitepager_write() on a page prior to making
89** any modifications to that page. The first time sqlitepager_write()
90** is called, the original page contents are written into the rollback
91** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
92** the journal page has made it onto the disk surface, PgHdr.needSync
93** is cleared. The modified page cannot be written back into the original
94** database file until the journal pages has been synced to disk and the
95** PgHdr.needSync has been cleared.
96**
97** The PgHdr.dirty flag is set when sqlitepager_write() is called and
98** is cleared again when the page content is written back to the original
99** database file.
drhed7c8552001-04-11 14:29:21 +0000100*/
drhd9b02572001-04-15 00:37:09 +0000101typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +0000102struct PgHdr {
103 Pager *pPager; /* The pager to which this page belongs */
104 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +0000105 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +0000106 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +0000107 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
108 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh03eb96a2002-11-10 23:32:56 +0000109 PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
drh193a6b42002-07-07 16:52:46 +0000110 u8 inJournal; /* TRUE if has been written to journal */
111 u8 inCkpt; /* TRUE if written to the checkpoint journal */
112 u8 dirty; /* TRUE if we need to write back changes */
drhdb48ee02003-01-16 13:42:43 +0000113 u8 needSync; /* Sync journal before writing this page */
drh193a6b42002-07-07 16:52:46 +0000114 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh2554f8b2003-01-22 01:26:44 +0000115 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
drhd0ba1932004-02-10 01:54:28 +0000116 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh973b6e32003-02-12 14:09:42 +0000117 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +0000118};
119
120/*
drh69688d52001-04-14 16:38:23 +0000121** Convert a pointer to a PgHdr into a pointer to its data
122** and back again.
drhed7c8552001-04-11 14:29:21 +0000123*/
124#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
125#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drhd0ba1932004-02-10 01:54:28 +0000126#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000127
128/*
drhed7c8552001-04-11 14:29:21 +0000129** How big to make the hash table used for locating in-memory pages
drh836faa42003-01-11 13:30:57 +0000130** by page number.
drhed7c8552001-04-11 14:29:21 +0000131*/
drh836faa42003-01-11 13:30:57 +0000132#define N_PG_HASH 2048
133
134/*
135** Hash a page number
136*/
137#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
drhed7c8552001-04-11 14:29:21 +0000138
139/*
140** A open page cache is an instance of the following structure.
141*/
142struct Pager {
143 char *zFilename; /* Name of the database file */
144 char *zJournal; /* Name of the journal file */
drha76c82e2003-07-27 18:59:42 +0000145 char *zDirectory; /* Directory hold database and journal files */
drh8cfbf082001-09-19 13:22:39 +0000146 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000147 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000148 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000149 int origDbSize; /* dbSize before the current change */
drh28be87c2002-11-05 23:03:02 +0000150 int ckptSize; /* Size of database (in pages) at ckpt_begin() */
151 off_t ckptJSize; /* Size of journal at ckpt_begin() */
drh968af522003-02-11 14:55:40 +0000152 int nRec; /* Number of pages written to the journal */
153 u32 cksumInit; /* Quasi-random value added to every checksum */
drh9bd47a92003-01-07 14:46:08 +0000154 int ckptNRec; /* Number of records in the checkpoint journal */
drh7e3b0a02001-04-28 16:52:40 +0000155 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000156 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000157 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000158 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000159 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000160 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drhb20ea9d2004-02-09 01:20:36 +0000161 void (*xCodec)(void*,void*,int); /* Routine for en/decoding on-disk data */
162 void *pCodecArg; /* First argument to xCodec() */
drh603240c2002-03-05 01:11:12 +0000163 u8 journalOpen; /* True if journal file descriptors is valid */
drh34e79ce2004-02-08 06:05:46 +0000164 u8 journalStarted; /* True if header of journal is synced */
165 u8 useJournal; /* Use a rollback journal on this file */
drh603240c2002-03-05 01:11:12 +0000166 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000167 u8 ckptInUse; /* True we are in a checkpoint */
drhda47d772002-12-02 04:25:19 +0000168 u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
drh603240c2002-03-05 01:11:12 +0000169 u8 noSync; /* Do not sync the journal if true */
drh968af522003-02-11 14:55:40 +0000170 u8 fullSync; /* Do extra syncs of the journal for robustness */
drh603240c2002-03-05 01:11:12 +0000171 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
172 u8 errMask; /* One of several kinds of errors */
173 u8 tempFile; /* zFilename is a temporary file */
174 u8 readOnly; /* True for a read-only database */
175 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000176 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000177 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh603240c2002-03-05 01:11:12 +0000178 u8 *aInJournal; /* One bit for each page in the database file */
179 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000180 PgHdr *pFirst, *pLast; /* List of free pages */
drh341eae82003-01-21 02:39:36 +0000181 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
drhd9b02572001-04-15 00:37:09 +0000182 PgHdr *pAll; /* List of all pages */
drh03eb96a2002-11-10 23:32:56 +0000183 PgHdr *pCkpt; /* List of pages in the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000184 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000185};
186
187/*
188** These are bits that can be set in Pager.errMask.
189*/
190#define PAGER_ERR_FULL 0x01 /* a write() failed */
191#define PAGER_ERR_MEM 0x02 /* malloc() failed */
192#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
193#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000194#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000195
196/*
197** The journal file contains page records in the following
198** format.
drh968af522003-02-11 14:55:40 +0000199**
200** Actually, this structure is the complete page record for pager
201** formats less than 3. Beginning with format 3, this record is surrounded
202** by two checksums.
drhd9b02572001-04-15 00:37:09 +0000203*/
204typedef struct PageRecord PageRecord;
205struct PageRecord {
drhb20ea9d2004-02-09 01:20:36 +0000206 Pgno pgno; /* The page number */
drhd0ba1932004-02-10 01:54:28 +0000207 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
drhd9b02572001-04-15 00:37:09 +0000208};
209
210/*
drh5e00f6c2001-09-13 13:46:56 +0000211** Journal files begin with the following magic string. The data
212** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000213**
drh968af522003-02-11 14:55:40 +0000214** There are three journal formats (so far). The 1st journal format writes
215** 32-bit integers in the byte-order of the host machine. New
216** formats writes integers as big-endian. All new journals use the
drh94f33312002-08-12 12:29:56 +0000217** new format, but we have to be able to read an older journal in order
drh968af522003-02-11 14:55:40 +0000218** to rollback journals created by older versions of the library.
219**
220** The 3rd journal format (added for 2.8.0) adds additional sanity
221** checking information to the journal. If the power fails while the
222** journal is being written, semi-random garbage data might appear in
223** the journal file after power is restored. If an attempt is then made
224** to roll the journal back, the database could be corrupted. The additional
225** sanity checking data is an attempt to discover the garbage in the
226** journal and ignore it.
227**
228** The sanity checking information for the 3rd journal format consists
229** of a 32-bit checksum on each page of data. The checksum covers both
drhd0ba1932004-02-10 01:54:28 +0000230** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
drh968af522003-02-11 14:55:40 +0000231** This cksum is initialized to a 32-bit random value that appears in the
232** journal file right after the header. The random initializer is important,
233** because garbage data that appears at the end of a journal is likely
234** data that was once in other files that have now been deleted. If the
235** garbage data came from an obsolete journal file, the checksums might
236** be correct. But by initializing the checksum to random value which
237** is different for every journal, we minimize that risk.
drhd9b02572001-04-15 00:37:09 +0000238*/
drh968af522003-02-11 14:55:40 +0000239static const unsigned char aJournalMagic1[] = {
drhd9b02572001-04-15 00:37:09 +0000240 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000241};
drh968af522003-02-11 14:55:40 +0000242static const unsigned char aJournalMagic2[] = {
drh94f33312002-08-12 12:29:56 +0000243 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
244};
drh968af522003-02-11 14:55:40 +0000245static const unsigned char aJournalMagic3[] = {
246 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
247};
248#define JOURNAL_FORMAT_1 1
249#define JOURNAL_FORMAT_2 2
250#define JOURNAL_FORMAT_3 3
drh94f33312002-08-12 12:29:56 +0000251
252/*
drh968af522003-02-11 14:55:40 +0000253** The following integer determines what format to use when creating
254** new primary journal files. By default we always use format 3.
255** When testing, we can set this value to older journal formats in order to
256** make sure that newer versions of the library are able to rollback older
257** journal files.
258**
259** Note that checkpoint journals always use format 2 and omit the header.
drh94f33312002-08-12 12:29:56 +0000260*/
261#ifdef SQLITE_TEST
drh968af522003-02-11 14:55:40 +0000262int journal_format = 3;
drh74587e52002-08-13 00:01:16 +0000263#else
drh968af522003-02-11 14:55:40 +0000264# define journal_format 3
drh94f33312002-08-12 12:29:56 +0000265#endif
drhed7c8552001-04-11 14:29:21 +0000266
267/*
drh968af522003-02-11 14:55:40 +0000268** The size of the header and of each page in the journal varies according
269** to which journal format is being used. The following macros figure out
270** the sizes based on format numbers.
271*/
272#define JOURNAL_HDR_SZ(X) \
273 (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
274#define JOURNAL_PG_SZ(X) \
drhd0ba1932004-02-10 01:54:28 +0000275 (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
drh968af522003-02-11 14:55:40 +0000276
277/*
drhdd793422001-06-28 01:54:48 +0000278** Enable reference count tracking here:
279*/
drh74587e52002-08-13 00:01:16 +0000280#ifdef SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000281 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000282 static void pager_refinfo(PgHdr *p){
283 static int cnt = 0;
284 if( !pager_refinfo_enable ) return;
285 printf(
286 "REFCNT: %4d addr=0x%08x nRef=%d\n",
287 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
288 );
289 cnt++; /* Something to set a breakpoint on */
290 }
291# define REFINFO(X) pager_refinfo(X)
292#else
293# define REFINFO(X)
294#endif
295
296/*
drh34e79ce2004-02-08 06:05:46 +0000297** Read a 32-bit integer from the given file descriptor. Store the integer
298** that is read in *pRes. Return SQLITE_OK if everything worked, or an
299** error code is something goes wrong.
300**
301** If the journal format is 2 or 3, read a big-endian integer. If the
302** journal format is 1, read an integer in the native byte-order of the
303** host machine.
drh94f33312002-08-12 12:29:56 +0000304*/
drh968af522003-02-11 14:55:40 +0000305static int read32bits(int format, OsFile *fd, u32 *pRes){
drh94f33312002-08-12 12:29:56 +0000306 u32 res;
307 int rc;
308 rc = sqliteOsRead(fd, &res, sizeof(res));
drh968af522003-02-11 14:55:40 +0000309 if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
drh94f33312002-08-12 12:29:56 +0000310 unsigned char ac[4];
311 memcpy(ac, &res, 4);
312 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
313 }
314 *pRes = res;
315 return rc;
316}
317
318/*
drh34e79ce2004-02-08 06:05:46 +0000319** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
320** on success or an error code is something goes wrong.
321**
322** If the journal format is 2 or 3, write the integer as 4 big-endian
323** bytes. If the journal format is 1, write the integer in the native
324** byte order. In normal operation, only formats 2 and 3 are used.
325** Journal format 1 is only used for testing.
drh94f33312002-08-12 12:29:56 +0000326*/
327static int write32bits(OsFile *fd, u32 val){
328 unsigned char ac[4];
drh968af522003-02-11 14:55:40 +0000329 if( journal_format<=1 ){
drh94f33312002-08-12 12:29:56 +0000330 return sqliteOsWrite(fd, &val, 4);
331 }
drh94f33312002-08-12 12:29:56 +0000332 ac[0] = (val>>24) & 0xff;
333 ac[1] = (val>>16) & 0xff;
334 ac[2] = (val>>8) & 0xff;
335 ac[3] = val & 0xff;
336 return sqliteOsWrite(fd, ac, 4);
337}
338
drh2554f8b2003-01-22 01:26:44 +0000339/*
340** Write a 32-bit integer into a page header right before the
341** page data. This will overwrite the PgHdr.pDirty pointer.
drh34e79ce2004-02-08 06:05:46 +0000342**
343** The integer is big-endian for formats 2 and 3 and native byte order
344** for journal format 1.
drh2554f8b2003-01-22 01:26:44 +0000345*/
drh968af522003-02-11 14:55:40 +0000346static void store32bits(u32 val, PgHdr *p, int offset){
drh2554f8b2003-01-22 01:26:44 +0000347 unsigned char *ac;
drhec1bd0b2003-08-26 11:41:27 +0000348 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
drh968af522003-02-11 14:55:40 +0000349 if( journal_format<=1 ){
drh2554f8b2003-01-22 01:26:44 +0000350 memcpy(ac, &val, 4);
351 }else{
352 ac[0] = (val>>24) & 0xff;
353 ac[1] = (val>>16) & 0xff;
354 ac[2] = (val>>8) & 0xff;
355 ac[3] = val & 0xff;
356 }
357}
358
drh94f33312002-08-12 12:29:56 +0000359
360/*
drhd9b02572001-04-15 00:37:09 +0000361** Convert the bits in the pPager->errMask into an approprate
362** return code.
363*/
364static int pager_errcode(Pager *pPager){
365 int rc = SQLITE_OK;
366 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000367 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000368 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
369 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
370 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
371 return rc;
drhed7c8552001-04-11 14:29:21 +0000372}
373
374/*
drh03eb96a2002-11-10 23:32:56 +0000375** Add or remove a page from the list of all pages that are in the
376** checkpoint journal.
377**
378** The Pager keeps a separate list of pages that are currently in
379** the checkpoint journal. This helps the sqlitepager_ckpt_commit()
380** routine run MUCH faster for the common case where there are many
381** pages in memory but only a few are in the checkpoint journal.
382*/
383static void page_add_to_ckpt_list(PgHdr *pPg){
384 Pager *pPager = pPg->pPager;
385 if( pPg->inCkpt ) return;
386 assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
387 pPg->pPrevCkpt = 0;
388 if( pPager->pCkpt ){
389 pPager->pCkpt->pPrevCkpt = pPg;
390 }
391 pPg->pNextCkpt = pPager->pCkpt;
392 pPager->pCkpt = pPg;
393 pPg->inCkpt = 1;
394}
395static void page_remove_from_ckpt_list(PgHdr *pPg){
396 if( !pPg->inCkpt ) return;
397 if( pPg->pPrevCkpt ){
398 assert( pPg->pPrevCkpt->pNextCkpt==pPg );
399 pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
400 }else{
401 assert( pPg->pPager->pCkpt==pPg );
402 pPg->pPager->pCkpt = pPg->pNextCkpt;
403 }
404 if( pPg->pNextCkpt ){
405 assert( pPg->pNextCkpt->pPrevCkpt==pPg );
406 pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
407 }
408 pPg->pNextCkpt = 0;
409 pPg->pPrevCkpt = 0;
410 pPg->inCkpt = 0;
411}
412
413/*
drhed7c8552001-04-11 14:29:21 +0000414** Find a page in the hash table given its page number. Return
415** a pointer to the page or NULL if not found.
416*/
drhd9b02572001-04-15 00:37:09 +0000417static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drh836faa42003-01-11 13:30:57 +0000418 PgHdr *p = pPager->aHash[pager_hash(pgno)];
drhed7c8552001-04-11 14:29:21 +0000419 while( p && p->pgno!=pgno ){
420 p = p->pNextHash;
421 }
422 return p;
423}
424
425/*
426** Unlock the database and clear the in-memory cache. This routine
427** sets the state of the pager back to what it was when it was first
428** opened. Any outstanding pages are invalidated and subsequent attempts
429** to access those pages will likely result in a coredump.
430*/
drhd9b02572001-04-15 00:37:09 +0000431static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000432 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000433 for(pPg=pPager->pAll; pPg; pPg=pNext){
434 pNext = pPg->pNextAll;
435 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000436 }
437 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000438 pPager->pFirstSynced = 0;
drhd9b02572001-04-15 00:37:09 +0000439 pPager->pLast = 0;
440 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000441 memset(pPager->aHash, 0, sizeof(pPager->aHash));
442 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000443 if( pPager->state>=SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000444 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000445 }
drha7fcb052001-12-14 15:09:55 +0000446 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000447 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000448 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000449 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000450 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000451}
452
453/*
454** When this routine is called, the pager has the journal file open and
455** a write lock on the database. This routine releases the database
456** write lock and acquires a read lock in its place. The journal file
457** is deleted and closed.
drh50457892003-09-06 01:10:47 +0000458**
459** TODO: Consider keeping the journal file open for temporary databases.
460** This might give a performance improvement on windows where opening
461** a file is an expensive operation.
drhed7c8552001-04-11 14:29:21 +0000462*/
drhd9b02572001-04-15 00:37:09 +0000463static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000464 int rc;
drhd9b02572001-04-15 00:37:09 +0000465 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000466 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh663fc632002-02-02 18:49:19 +0000467 sqlitepager_ckpt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000468 if( pPager->ckptOpen ){
469 sqliteOsClose(&pPager->cpfd);
470 pPager->ckptOpen = 0;
471 }
drhda47d772002-12-02 04:25:19 +0000472 if( pPager->journalOpen ){
473 sqliteOsClose(&pPager->jfd);
474 pPager->journalOpen = 0;
475 sqliteOsDelete(pPager->zJournal);
476 sqliteFree( pPager->aInJournal );
477 pPager->aInJournal = 0;
478 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
479 pPg->inJournal = 0;
480 pPg->dirty = 0;
drhdb48ee02003-01-16 13:42:43 +0000481 pPg->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000482 }
483 }else{
484 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
drhd9b02572001-04-15 00:37:09 +0000485 }
drhda47d772002-12-02 04:25:19 +0000486 rc = sqliteOsReadLock(&pPager->fd);
drh8e298f92002-07-06 16:28:47 +0000487 if( rc==SQLITE_OK ){
488 pPager->state = SQLITE_READLOCK;
489 }else{
490 /* This can only happen if a process does a BEGIN, then forks and the
491 ** child process does the COMMIT. Because of the semantics of unix
492 ** file locking, the unlock will fail.
493 */
494 pPager->state = SQLITE_UNLOCK;
495 }
drhed7c8552001-04-11 14:29:21 +0000496 return rc;
497}
498
drhed7c8552001-04-11 14:29:21 +0000499/*
drh968af522003-02-11 14:55:40 +0000500** Compute and return a checksum for the page of data.
drh34e79ce2004-02-08 06:05:46 +0000501**
502** This is not a real checksum. It is really just the sum of the
503** random initial value and the page number. We considered do a checksum
504** of the database, but that was found to be too slow.
drh968af522003-02-11 14:55:40 +0000505*/
506static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
507 u32 cksum = pPager->cksumInit + pgno;
drh968af522003-02-11 14:55:40 +0000508 return cksum;
509}
510
511/*
drhfa86c412002-02-02 15:01:15 +0000512** Read a single page from the journal file opened on file descriptor
513** jfd. Playback this one page.
drh968af522003-02-11 14:55:40 +0000514**
515** There are three different journal formats. The format parameter determines
516** which format is used by the journal that is played back.
drhfa86c412002-02-02 15:01:15 +0000517*/
drh968af522003-02-11 14:55:40 +0000518static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
drhfa86c412002-02-02 15:01:15 +0000519 int rc;
520 PgHdr *pPg; /* An existing page in the cache */
521 PageRecord pgRec;
drh968af522003-02-11 14:55:40 +0000522 u32 cksum;
drhfa86c412002-02-02 15:01:15 +0000523
drh968af522003-02-11 14:55:40 +0000524 rc = read32bits(format, jfd, &pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000525 if( rc!=SQLITE_OK ) return rc;
drh94f33312002-08-12 12:29:56 +0000526 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drh99ee3602003-02-16 19:13:36 +0000527 if( rc!=SQLITE_OK ) return rc;
drhfa86c412002-02-02 15:01:15 +0000528
drh968af522003-02-11 14:55:40 +0000529 /* Sanity checking on the page. This is more important that I originally
530 ** thought. If a power failure occurs while the journal is being written,
531 ** it could cause invalid data to be written into the journal. We need to
532 ** detect this invalid data (with high probability) and ignore it.
533 */
534 if( pgRec.pgno==0 ){
535 return SQLITE_DONE;
536 }
drh7d02cb72003-06-04 16:24:39 +0000537 if( pgRec.pgno>(unsigned)pPager->dbSize ){
drh968af522003-02-11 14:55:40 +0000538 return SQLITE_OK;
539 }
540 if( format>=JOURNAL_FORMAT_3 ){
541 rc = read32bits(format, jfd, &cksum);
drh99ee3602003-02-16 19:13:36 +0000542 if( rc ) return rc;
drh968af522003-02-11 14:55:40 +0000543 if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
544 return SQLITE_DONE;
545 }
546 }
drhfa86c412002-02-02 15:01:15 +0000547
548 /* Playback the page. Update the in-memory copy of the page
549 ** at the same time, if there is one.
550 */
551 pPg = pager_lookup(pPager, pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000552 TRACE2("PLAYBACK %d\n", pgRec.pgno);
drhd0ba1932004-02-10 01:54:28 +0000553 sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
554 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhfa86c412002-02-02 15:01:15 +0000555 if( pPg ){
drhacf4ac92003-12-17 23:57:34 +0000556 /* No page should ever be rolled back that is in use, except for page
557 ** 1 which is held in use in order to keep the lock on the database
558 ** active.
559 */
560 assert( pPg->nRef==0 || pPg->pgno==1 );
drhd0ba1932004-02-10 01:54:28 +0000561 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drhacf4ac92003-12-17 23:57:34 +0000562 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhdb48ee02003-01-16 13:42:43 +0000563 pPg->dirty = 0;
564 pPg->needSync = 0;
drhb20ea9d2004-02-09 01:20:36 +0000565 if( pPager->xCodec ){
drhd0ba1932004-02-10 01:54:28 +0000566 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 2);
drhb20ea9d2004-02-09 01:20:36 +0000567 }
drhfa86c412002-02-02 15:01:15 +0000568 }
569 return rc;
570}
571
572/*
drhed7c8552001-04-11 14:29:21 +0000573** Playback the journal and thus restore the database file to
574** the state it was in before we started making changes.
575**
drh34e79ce2004-02-08 06:05:46 +0000576** The journal file format is as follows:
577**
578** * 8 byte prefix. One of the aJournalMagic123 vectors defined
579** above. The format of the journal file is determined by which
580** of the three prefix vectors is seen.
581** * 4 byte big-endian integer which is the number of valid page records
582** in the journal. If this value is 0xffffffff, then compute the
583** number of page records from the journal size. This field appears
584** in format 3 only.
585** * 4 byte big-endian integer which is the initial value for the
586** sanity checksum. This field appears in format 3 only.
587** * 4 byte integer which is the number of pages to truncate the
588** database to during a rollback.
589** * Zero or more pages instances, each as follows:
590** + 4 byte page number.
drhd0ba1932004-02-10 01:54:28 +0000591** + SQLITE_PAGE_SIZE bytes of data.
drh34e79ce2004-02-08 06:05:46 +0000592** + 4 byte checksum (format 3 only)
593**
594** When we speak of the journal header, we mean the first 4 bullets above.
595** Each entry in the journal is an instance of the 5th bullet. Note that
596** bullets 2 and 3 only appear in format-3 journals.
597**
598** Call the value from the second bullet "nRec". nRec is the number of
599** valid page entries in the journal. In most cases, you can compute the
600** value of nRec from the size of the journal file. But if a power
601** failure occurred while the journal was being written, it could be the
602** case that the size of the journal file had already been increased but
603** the extra entries had not yet made it safely to disk. In such a case,
604** the value of nRec computed from the file size would be too large. For
605** that reason, we always use the nRec value in the header.
606**
607** If the nRec value is 0xffffffff it means that nRec should be computed
608** from the file size. This value is used when the user selects the
609** no-sync option for the journal. A power failure could lead to corruption
610** in this case. But for things like temporary table (which will be
611** deleted when the power is restored) we don't care.
612**
613** Journal formats 1 and 2 do not have an nRec value in the header so we
614** have to compute nRec from the file size. This has risks (as described
615** above) which is why all persistent tables have been changed to use
616** format 3.
drhed7c8552001-04-11 14:29:21 +0000617**
drhd9b02572001-04-15 00:37:09 +0000618** If the file opened as the journal file is not a well-formed
drh34e79ce2004-02-08 06:05:46 +0000619** journal file then the database will likely already be
620** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
621** and SQLITE_CORRUPT is returned. If it all works, then this routine
622** returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000623*/
drh99ee3602003-02-16 19:13:36 +0000624static int pager_playback(Pager *pPager, int useJournalSize){
drh968af522003-02-11 14:55:40 +0000625 off_t szJ; /* Size of the journal file in bytes */
626 int nRec; /* Number of Records in the journal */
drhd9b02572001-04-15 00:37:09 +0000627 int i; /* Loop counter */
628 Pgno mxPg = 0; /* Size of the original file in pages */
drh968af522003-02-11 14:55:40 +0000629 int format; /* Format of the journal file. */
630 unsigned char aMagic[sizeof(aJournalMagic1)];
drhed7c8552001-04-11 14:29:21 +0000631 int rc;
632
drhc3a64ba2001-11-22 00:01:27 +0000633 /* Figure out how many records are in the journal. Abort early if
634 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000635 */
drh8cfbf082001-09-19 13:22:39 +0000636 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000637 sqliteOsSeek(&pPager->jfd, 0);
drh968af522003-02-11 14:55:40 +0000638 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhc3a64ba2001-11-22 00:01:27 +0000639 if( rc!=SQLITE_OK ){
640 goto end_playback;
641 }
drh240c5792004-02-08 00:40:52 +0000642
643 /* If the journal file is too small to contain a complete header,
drh34e79ce2004-02-08 06:05:46 +0000644 ** it must mean that the process that created the journal was just
645 ** beginning to write the journal file when it died. In that case,
646 ** the database file should have still been completely unchanged.
647 ** Nothing needs to be rolled back. We can safely ignore this journal.
drh240c5792004-02-08 00:40:52 +0000648 */
drh968af522003-02-11 14:55:40 +0000649 if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
drhc3a64ba2001-11-22 00:01:27 +0000650 goto end_playback;
651 }
652
653 /* Read the beginning of the journal and truncate the
654 ** database file back to its original size.
655 */
drha7fcb052001-12-14 15:09:55 +0000656 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000657 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000658 rc = SQLITE_PROTOCOL;
659 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000660 }
drh968af522003-02-11 14:55:40 +0000661 if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
662 format = JOURNAL_FORMAT_3;
663 }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
664 format = JOURNAL_FORMAT_2;
665 }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
666 format = JOURNAL_FORMAT_1;
drh94f33312002-08-12 12:29:56 +0000667 }else{
668 rc = SQLITE_PROTOCOL;
669 goto end_playback;
670 }
drh968af522003-02-11 14:55:40 +0000671 if( format>=JOURNAL_FORMAT_3 ){
drh240c5792004-02-08 00:40:52 +0000672 if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
673 /* Ignore the journal if it is too small to contain a complete
674 ** header. We already did this test once above, but at the prior
675 ** test, we did not know the journal format and so we had to assume
676 ** the smallest possible header. Now we know the header is bigger
drh34e79ce2004-02-08 06:05:46 +0000677 ** than the minimum so we test again.
drh240c5792004-02-08 00:40:52 +0000678 */
679 goto end_playback;
680 }
drh133cdf62004-01-07 02:52:07 +0000681 rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
drh968af522003-02-11 14:55:40 +0000682 if( rc ) goto end_playback;
683 rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
684 if( rc ) goto end_playback;
drh99ee3602003-02-16 19:13:36 +0000685 if( nRec==0xffffffff || useJournalSize ){
drh968af522003-02-11 14:55:40 +0000686 nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
687 }
688 }else{
drhd8d66e82003-02-12 02:10:15 +0000689 nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
690 assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
drh968af522003-02-11 14:55:40 +0000691 }
692 rc = read32bits(format, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000693 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000694 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000695 }
drhd8d66e82003-02-12 02:10:15 +0000696 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
drhd0ba1932004-02-10 01:54:28 +0000697 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
drh81a20f22001-10-12 17:30:04 +0000698 if( rc!=SQLITE_OK ){
699 goto end_playback;
700 }
drhd9b02572001-04-15 00:37:09 +0000701 pPager->dbSize = mxPg;
702
drhfa86c412002-02-02 15:01:15 +0000703 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000704 */
drh968af522003-02-11 14:55:40 +0000705 for(i=0; i<nRec; i++){
706 rc = pager_playback_one_page(pPager, &pPager->jfd, format);
707 if( rc!=SQLITE_OK ){
708 if( rc==SQLITE_DONE ){
drh968af522003-02-11 14:55:40 +0000709 rc = SQLITE_OK;
710 }
711 break;
712 }
drhed7c8552001-04-11 14:29:21 +0000713 }
drh81a20f22001-10-12 17:30:04 +0000714
drh4a0681e2003-02-13 01:58:20 +0000715 /* Pages that have been written to the journal but never synced
716 ** where not restored by the loop above. We have to restore those
drh240c5792004-02-08 00:40:52 +0000717 ** pages by reading them back from the original database.
drhdb48ee02003-01-16 13:42:43 +0000718 */
719 if( rc==SQLITE_OK ){
720 PgHdr *pPg;
721 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
drhd0ba1932004-02-10 01:54:28 +0000722 char zBuf[SQLITE_PAGE_SIZE];
drh4a0681e2003-02-13 01:58:20 +0000723 if( !pPg->dirty ) continue;
drhdb48ee02003-01-16 13:42:43 +0000724 if( (int)pPg->pgno <= pPager->origDbSize ){
drhd0ba1932004-02-10 01:54:28 +0000725 sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
726 rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000727 if( rc ) break;
drhb20ea9d2004-02-09 01:20:36 +0000728 if( pPager->xCodec ){
729 pPager->xCodec(pPager->pCodecArg, zBuf, 0);
730 }
drhdb48ee02003-01-16 13:42:43 +0000731 }else{
drhd0ba1932004-02-10 01:54:28 +0000732 memset(zBuf, 0, SQLITE_PAGE_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000733 }
drhd0ba1932004-02-10 01:54:28 +0000734 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
735 memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
drh3a840692003-01-29 22:58:26 +0000736 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
737 }
drhdb48ee02003-01-16 13:42:43 +0000738 pPg->needSync = 0;
739 pPg->dirty = 0;
740 }
741 }
drh4a0681e2003-02-13 01:58:20 +0000742
743end_playback:
drhd9b02572001-04-15 00:37:09 +0000744 if( rc!=SQLITE_OK ){
745 pager_unwritelock(pPager);
746 pPager->errMask |= PAGER_ERR_CORRUPT;
747 rc = SQLITE_CORRUPT;
748 }else{
749 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000750 }
drhd9b02572001-04-15 00:37:09 +0000751 return rc;
drhed7c8552001-04-11 14:29:21 +0000752}
753
754/*
drhfa86c412002-02-02 15:01:15 +0000755** Playback the checkpoint journal.
756**
757** This is similar to playing back the transaction journal but with
758** a few extra twists.
759**
drh663fc632002-02-02 18:49:19 +0000760** (1) The number of pages in the database file at the start of
761** the checkpoint is stored in pPager->ckptSize, not in the
762** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000763**
764** (2) In addition to playing back the checkpoint journal, also
765** playback all pages of the transaction journal beginning
766** at offset pPager->ckptJSize.
767*/
768static int pager_ckpt_playback(Pager *pPager){
drh968af522003-02-11 14:55:40 +0000769 off_t szJ; /* Size of the full journal */
770 int nRec; /* Number of Records */
drhfa86c412002-02-02 15:01:15 +0000771 int i; /* Loop counter */
772 int rc;
773
774 /* Truncate the database back to its original size.
775 */
drhd0ba1932004-02-10 01:54:28 +0000776 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
drhfa86c412002-02-02 15:01:15 +0000777 pPager->dbSize = pPager->ckptSize;
778
779 /* Figure out how many records are in the checkpoint journal.
780 */
drh0f892532002-05-30 12:27:03 +0000781 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000782 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +0000783 nRec = pPager->ckptNRec;
drhfa86c412002-02-02 15:01:15 +0000784
785 /* Copy original pages out of the checkpoint journal and back into the
drh968af522003-02-11 14:55:40 +0000786 ** database file. Note that the checkpoint journal always uses format
787 ** 2 instead of format 3 since it does not need to be concerned with
788 ** power failures corrupting the journal and can thus omit the checksums.
drhfa86c412002-02-02 15:01:15 +0000789 */
790 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000791 rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
792 assert( rc!=SQLITE_DONE );
drhfa86c412002-02-02 15:01:15 +0000793 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
794 }
795
796 /* Figure out how many pages need to be copied out of the transaction
797 ** journal.
798 */
799 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
800 if( rc!=SQLITE_OK ){
801 goto end_ckpt_playback;
802 }
drh968af522003-02-11 14:55:40 +0000803 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhfa86c412002-02-02 15:01:15 +0000804 if( rc!=SQLITE_OK ){
805 goto end_ckpt_playback;
806 }
drh968af522003-02-11 14:55:40 +0000807 nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
drhfa86c412002-02-02 15:01:15 +0000808 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000809 rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
810 if( rc!=SQLITE_OK ){
811 assert( rc!=SQLITE_DONE );
812 goto end_ckpt_playback;
813 }
drhfa86c412002-02-02 15:01:15 +0000814 }
815
drhfa86c412002-02-02 15:01:15 +0000816end_ckpt_playback:
drhfa86c412002-02-02 15:01:15 +0000817 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000818 pPager->errMask |= PAGER_ERR_CORRUPT;
819 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000820 }
821 return rc;
822}
823
824/*
drhf57b14a2001-09-14 18:54:08 +0000825** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000826**
827** The maximum number is the absolute value of the mxPage parameter.
828** If mxPage is negative, the noSync flag is also set. noSync bypasses
829** calls to sqliteOsSync(). The pager runs much faster with noSync on,
830** but if the operating system crashes or there is an abrupt power
831** failure, the database file might be left in an inconsistent and
832** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000833*/
834void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000835 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000836 pPager->noSync = pPager->tempFile;
drh603240c2002-03-05 01:11:12 +0000837 }else{
838 pPager->noSync = 1;
839 mxPage = -mxPage;
840 }
drhf57b14a2001-09-14 18:54:08 +0000841 if( mxPage>10 ){
842 pPager->mxPage = mxPage;
843 }
844}
845
846/*
drh973b6e32003-02-12 14:09:42 +0000847** Adjust the robustness of the database to damage due to OS crashes
848** or power failures by changing the number of syncs()s when writing
849** the rollback journal. There are three levels:
850**
851** OFF sqliteOsSync() is never called. This is the default
852** for temporary and transient files.
853**
854** NORMAL The journal is synced once before writes begin on the
855** database. This is normally adequate protection, but
856** it is theoretically possible, though very unlikely,
857** that an inopertune power failure could leave the journal
858** in a state which would cause damage to the database
859** when it is rolled back.
860**
861** FULL The journal is synced twice before writes begin on the
drh34e79ce2004-02-08 06:05:46 +0000862** database (with some additional information - the nRec field
863** of the journal header - being written in between the two
864** syncs). If we assume that writing a
drh973b6e32003-02-12 14:09:42 +0000865** single disk sector is atomic, then this mode provides
866** assurance that the journal will not be corrupted to the
867** point of causing damage to the database during rollback.
868**
869** Numeric values associated with these states are OFF==1, NORMAL=2,
870** and FULL=3.
871*/
872void sqlitepager_set_safety_level(Pager *pPager, int level){
873 pPager->noSync = level==1 || pPager->tempFile;
874 pPager->fullSync = level==3 && !pPager->tempFile;
875}
876
877/*
drhfa86c412002-02-02 15:01:15 +0000878** Open a temporary file. Write the name of the file into zName
879** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
880** the file descriptor into *fd. Return SQLITE_OK on success or some
881** other error code if we fail.
882**
883** The OS will automatically delete the temporary file when it is
884** closed.
885*/
886static int sqlitepager_opentemp(char *zFile, OsFile *fd){
887 int cnt = 8;
888 int rc;
889 do{
890 cnt--;
891 sqliteOsTempFileName(zFile);
892 rc = sqliteOsOpenExclusive(zFile, fd, 1);
893 }while( cnt>0 && rc!=SQLITE_OK );
894 return rc;
895}
896
897/*
drhed7c8552001-04-11 14:29:21 +0000898** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000899** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000900** the first call to sqlitepager_get() and is only held open until the
901** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000902**
drh6446c4d2001-12-15 14:22:18 +0000903** If zFilename is NULL then a randomly-named temporary file is created
904** and used as the file to be cached. The file will be deleted
905** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000906*/
drh7e3b0a02001-04-28 16:52:40 +0000907int sqlitepager_open(
908 Pager **ppPager, /* Return the Pager structure here */
909 const char *zFilename, /* Name of the database file to open */
910 int mxPage, /* Max number of in-memory cache pages */
drhda47d772002-12-02 04:25:19 +0000911 int nExtra, /* Extra bytes append to each in-memory page */
912 int useJournal /* TRUE to use a rollback journal on this file */
drh7e3b0a02001-04-28 16:52:40 +0000913){
drhed7c8552001-04-11 14:29:21 +0000914 Pager *pPager;
drh3e7a6092002-12-07 21:45:14 +0000915 char *zFullPathname;
drhed7c8552001-04-11 14:29:21 +0000916 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000917 OsFile fd;
drha76c82e2003-07-27 18:59:42 +0000918 int rc, i;
drh5e00f6c2001-09-13 13:46:56 +0000919 int tempFile;
920 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000921 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000922
drhd9b02572001-04-15 00:37:09 +0000923 *ppPager = 0;
924 if( sqlite_malloc_failed ){
925 return SQLITE_NOMEM;
926 }
drh901afd42003-08-26 11:25:58 +0000927 if( zFilename && zFilename[0] ){
drh3e7a6092002-12-07 21:45:14 +0000928 zFullPathname = sqliteOsFullPathname(zFilename);
929 rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000930 tempFile = 0;
931 }else{
drhfa86c412002-02-02 15:01:15 +0000932 rc = sqlitepager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000933 zFilename = zTemp;
drh3e7a6092002-12-07 21:45:14 +0000934 zFullPathname = sqliteOsFullPathname(zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000935 tempFile = 1;
936 }
drh3e7a6092002-12-07 21:45:14 +0000937 if( sqlite_malloc_failed ){
938 return SQLITE_NOMEM;
939 }
drh8cfbf082001-09-19 13:22:39 +0000940 if( rc!=SQLITE_OK ){
drh3e7a6092002-12-07 21:45:14 +0000941 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000942 return SQLITE_CANTOPEN;
943 }
drh3e7a6092002-12-07 21:45:14 +0000944 nameLen = strlen(zFullPathname);
drha76c82e2003-07-27 18:59:42 +0000945 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
drhd9b02572001-04-15 00:37:09 +0000946 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000947 sqliteOsClose(&fd);
drh3e7a6092002-12-07 21:45:14 +0000948 sqliteFree(zFullPathname);
drhd9b02572001-04-15 00:37:09 +0000949 return SQLITE_NOMEM;
950 }
drhdb48ee02003-01-16 13:42:43 +0000951 SET_PAGER(pPager);
drhed7c8552001-04-11 14:29:21 +0000952 pPager->zFilename = (char*)&pPager[1];
drha76c82e2003-07-27 18:59:42 +0000953 pPager->zDirectory = &pPager->zFilename[nameLen+1];
954 pPager->zJournal = &pPager->zDirectory[nameLen+1];
drh3e7a6092002-12-07 21:45:14 +0000955 strcpy(pPager->zFilename, zFullPathname);
drha76c82e2003-07-27 18:59:42 +0000956 strcpy(pPager->zDirectory, zFullPathname);
957 for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
958 if( i>0 ) pPager->zDirectory[i-1] = 0;
drh3e7a6092002-12-07 21:45:14 +0000959 strcpy(pPager->zJournal, zFullPathname);
960 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000961 strcpy(&pPager->zJournal[nameLen], "-journal");
962 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000963 pPager->journalOpen = 0;
drhda47d772002-12-02 04:25:19 +0000964 pPager->useJournal = useJournal;
drhfa86c412002-02-02 15:01:15 +0000965 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000966 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000967 pPager->nRef = 0;
968 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000969 pPager->ckptSize = 0;
970 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000971 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000972 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000973 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000974 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000975 pPager->tempFile = tempFile;
976 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000977 pPager->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000978 pPager->noSync = pPager->tempFile || !useJournal;
drhed7c8552001-04-11 14:29:21 +0000979 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000980 pPager->pFirstSynced = 0;
drhed7c8552001-04-11 14:29:21 +0000981 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000982 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000983 memset(pPager->aHash, 0, sizeof(pPager->aHash));
984 *ppPager = pPager;
985 return SQLITE_OK;
986}
987
988/*
drh72f82862001-05-24 21:06:34 +0000989** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000990** when the reference count on each page reaches zero. The destructor can
991** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000992**
993** The destructor is not called as a result sqlitepager_close().
994** Destructors are only called by sqlitepager_unref().
995*/
996void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
997 pPager->xDestructor = xDesc;
998}
999
1000/*
drh5e00f6c2001-09-13 13:46:56 +00001001** Return the total number of pages in the disk file associated with
1002** pPager.
drhed7c8552001-04-11 14:29:21 +00001003*/
drhd9b02572001-04-15 00:37:09 +00001004int sqlitepager_pagecount(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +00001005 off_t n;
drhd9b02572001-04-15 00:37:09 +00001006 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +00001007 if( pPager->dbSize>=0 ){
1008 return pPager->dbSize;
1009 }
drha7fcb052001-12-14 15:09:55 +00001010 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +00001011 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +00001012 return 0;
drhed7c8552001-04-11 14:29:21 +00001013 }
drhd0ba1932004-02-10 01:54:28 +00001014 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +00001015 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +00001016 pPager->dbSize = n;
1017 }
1018 return n;
1019}
1020
1021/*
drhf7c57532003-04-25 13:22:51 +00001022** Forward declaration
1023*/
drh34e79ce2004-02-08 06:05:46 +00001024static int syncJournal(Pager*);
drhf7c57532003-04-25 13:22:51 +00001025
1026/*
1027** Truncate the file to the number of pages specified.
1028*/
1029int sqlitepager_truncate(Pager *pPager, Pgno nPage){
1030 int rc;
drh2e6d11b2003-04-25 15:37:57 +00001031 if( pPager->dbSize<0 ){
1032 sqlitepager_pagecount(pPager);
1033 }
1034 if( pPager->errMask!=0 ){
1035 rc = pager_errcode(pPager);
1036 return rc;
1037 }
drh7d02cb72003-06-04 16:24:39 +00001038 if( nPage>=(unsigned)pPager->dbSize ){
drhf7c57532003-04-25 13:22:51 +00001039 return SQLITE_OK;
1040 }
drh34e79ce2004-02-08 06:05:46 +00001041 syncJournal(pPager);
drhd0ba1932004-02-10 01:54:28 +00001042 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
drhf7c57532003-04-25 13:22:51 +00001043 if( rc==SQLITE_OK ){
1044 pPager->dbSize = nPage;
1045 }
1046 return rc;
1047}
1048
1049/*
drhed7c8552001-04-11 14:29:21 +00001050** Shutdown the page cache. Free all memory and close all files.
1051**
1052** If a transaction was in progress when this routine is called, that
1053** transaction is rolled back. All outstanding pages are invalidated
1054** and their memory is freed. Any attempt to use a page associated
1055** with this page cache after this function returns will likely
1056** result in a coredump.
1057*/
drhd9b02572001-04-15 00:37:09 +00001058int sqlitepager_close(Pager *pPager){
1059 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +00001060 switch( pPager->state ){
1061 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +00001062 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +00001063 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +00001064 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +00001065 break;
1066 }
1067 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +00001068 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +00001069 break;
1070 }
1071 default: {
1072 /* Do nothing */
1073 break;
1074 }
1075 }
drhd9b02572001-04-15 00:37:09 +00001076 for(pPg=pPager->pAll; pPg; pPg=pNext){
1077 pNext = pPg->pNextAll;
1078 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +00001079 }
drha7fcb052001-12-14 15:09:55 +00001080 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +00001081 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +00001082 /* Temp files are automatically deleted by the OS
1083 ** if( pPager->tempFile ){
1084 ** sqliteOsDelete(pPager->zFilename);
1085 ** }
1086 */
drhdb48ee02003-01-16 13:42:43 +00001087 CLR_PAGER(pPager);
drh73509ee2003-04-06 20:44:45 +00001088 if( pPager->zFilename!=(char*)&pPager[1] ){
drha76c82e2003-07-27 18:59:42 +00001089 assert( 0 ); /* Cannot happen */
drh73509ee2003-04-06 20:44:45 +00001090 sqliteFree(pPager->zFilename);
1091 sqliteFree(pPager->zJournal);
drha76c82e2003-07-27 18:59:42 +00001092 sqliteFree(pPager->zDirectory);
drh73509ee2003-04-06 20:44:45 +00001093 }
drhed7c8552001-04-11 14:29:21 +00001094 sqliteFree(pPager);
1095 return SQLITE_OK;
1096}
1097
1098/*
drh5e00f6c2001-09-13 13:46:56 +00001099** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +00001100*/
drhd9b02572001-04-15 00:37:09 +00001101Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +00001102 PgHdr *p = DATA_TO_PGHDR(pData);
1103 return p->pgno;
1104}
1105
1106/*
drh7e3b0a02001-04-28 16:52:40 +00001107** Increment the reference count for a page. If the page is
1108** currently on the freelist (the reference count is zero) then
1109** remove it from the freelist.
1110*/
drh836faa42003-01-11 13:30:57 +00001111#define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1112static void _page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +00001113 if( pPg->nRef==0 ){
1114 /* The page is currently on the freelist. Remove it. */
drh341eae82003-01-21 02:39:36 +00001115 if( pPg==pPg->pPager->pFirstSynced ){
1116 PgHdr *p = pPg->pNextFree;
1117 while( p && p->needSync ){ p = p->pNextFree; }
1118 pPg->pPager->pFirstSynced = p;
1119 }
drh7e3b0a02001-04-28 16:52:40 +00001120 if( pPg->pPrevFree ){
1121 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1122 }else{
1123 pPg->pPager->pFirst = pPg->pNextFree;
1124 }
1125 if( pPg->pNextFree ){
1126 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1127 }else{
1128 pPg->pPager->pLast = pPg->pPrevFree;
1129 }
1130 pPg->pPager->nRef++;
1131 }
1132 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +00001133 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +00001134}
1135
1136/*
1137** Increment the reference count for a page. The input pointer is
1138** a reference to the page data.
1139*/
1140int sqlitepager_ref(void *pData){
1141 PgHdr *pPg = DATA_TO_PGHDR(pData);
1142 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +00001143 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +00001144}
1145
1146/*
drh34e79ce2004-02-08 06:05:46 +00001147** Sync the journal. In other words, make sure all the pages that have
1148** been written to the journal have actually reached the surface of the
1149** disk. It is not safe to modify the original database file until after
1150** the journal has been synced. If the original database is modified before
1151** the journal is synced and a power failure occurs, the unsynced journal
1152** data would be lost and we would be unable to completely rollback the
1153** database changes. Database corruption would occur.
1154**
1155** This routine also updates the nRec field in the header of the journal.
1156** (See comments on the pager_playback() routine for additional information.)
1157** If the sync mode is FULL, two syncs will occur. First the whole journal
1158** is synced, then the nRec field is updated, then a second sync occurs.
drhb19a2bc2001-09-16 00:13:26 +00001159**
drh34e79ce2004-02-08 06:05:46 +00001160** For temporary databases, we do not care if we are able to rollback
1161** after a power failure, so sync occurs.
drhfa86c412002-02-02 15:01:15 +00001162**
drh34e79ce2004-02-08 06:05:46 +00001163** This routine clears the needSync field of every page current held in
1164** memory.
drh50e5dad2001-09-15 00:57:28 +00001165*/
drh34e79ce2004-02-08 06:05:46 +00001166static int syncJournal(Pager *pPager){
drh50e5dad2001-09-15 00:57:28 +00001167 PgHdr *pPg;
1168 int rc = SQLITE_OK;
drh03eb96a2002-11-10 23:32:56 +00001169
1170 /* Sync the journal before modifying the main database
1171 ** (assuming there is a journal and it needs to be synced.)
1172 */
drh50e5dad2001-09-15 00:57:28 +00001173 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +00001174 if( !pPager->tempFile ){
drhdb48ee02003-01-16 13:42:43 +00001175 assert( pPager->journalOpen );
1176 assert( !pPager->noSync );
drh968af522003-02-11 14:55:40 +00001177#ifndef NDEBUG
1178 {
drh34e79ce2004-02-08 06:05:46 +00001179 /* Make sure the pPager->nRec counter we are keeping agrees
1180 ** with the nRec computed from the size of the journal file.
1181 */
drh4a0681e2003-02-13 01:58:20 +00001182 off_t hdrSz, pgSz, jSz;
drh968af522003-02-11 14:55:40 +00001183 hdrSz = JOURNAL_HDR_SZ(journal_format);
1184 pgSz = JOURNAL_PG_SZ(journal_format);
drh4a0681e2003-02-13 01:58:20 +00001185 rc = sqliteOsFileSize(&pPager->jfd, &jSz);
drh968af522003-02-11 14:55:40 +00001186 if( rc!=0 ) return rc;
drh4a0681e2003-02-13 01:58:20 +00001187 assert( pPager->nRec*pgSz+hdrSz==jSz );
drh968af522003-02-11 14:55:40 +00001188 }
1189#endif
drhd8d66e82003-02-12 02:10:15 +00001190 if( journal_format>=3 ){
drh34e79ce2004-02-08 06:05:46 +00001191 /* Write the nRec value into the journal file header */
drhd8d66e82003-02-12 02:10:15 +00001192 off_t szJ;
1193 if( pPager->fullSync ){
1194 TRACE1("SYNC\n");
1195 rc = sqliteOsSync(&pPager->jfd);
1196 if( rc!=0 ) return rc;
1197 }
1198 sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
drh99ee3602003-02-16 19:13:36 +00001199 rc = write32bits(&pPager->jfd, pPager->nRec);
1200 if( rc ) return rc;
drhd8d66e82003-02-12 02:10:15 +00001201 szJ = JOURNAL_HDR_SZ(journal_format) +
1202 pPager->nRec*JOURNAL_PG_SZ(journal_format);
1203 sqliteOsSeek(&pPager->jfd, szJ);
drh968af522003-02-11 14:55:40 +00001204 }
drhdb48ee02003-01-16 13:42:43 +00001205 TRACE1("SYNC\n");
drhfa86c412002-02-02 15:01:15 +00001206 rc = sqliteOsSync(&pPager->jfd);
1207 if( rc!=0 ) return rc;
drhdb48ee02003-01-16 13:42:43 +00001208 pPager->journalStarted = 1;
drhfa86c412002-02-02 15:01:15 +00001209 }
drh50e5dad2001-09-15 00:57:28 +00001210 pPager->needSync = 0;
drh341eae82003-01-21 02:39:36 +00001211
1212 /* Erase the needSync flag from every page.
1213 */
1214 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1215 pPg->needSync = 0;
1216 }
1217 pPager->pFirstSynced = pPager->pFirst;
drh50e5dad2001-09-15 00:57:28 +00001218 }
drh03eb96a2002-11-10 23:32:56 +00001219
drh341eae82003-01-21 02:39:36 +00001220#ifndef NDEBUG
1221 /* If the Pager.needSync flag is clear then the PgHdr.needSync
1222 ** flag must also be clear for all pages. Verify that this
1223 ** invariant is true.
drh03eb96a2002-11-10 23:32:56 +00001224 */
drh341eae82003-01-21 02:39:36 +00001225 else{
1226 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1227 assert( pPg->needSync==0 );
1228 }
1229 assert( pPager->pFirstSynced==pPager->pFirst );
drh03eb96a2002-11-10 23:32:56 +00001230 }
drh341eae82003-01-21 02:39:36 +00001231#endif
drhdb48ee02003-01-16 13:42:43 +00001232
drh81a20f22001-10-12 17:30:04 +00001233 return rc;
drh50e5dad2001-09-15 00:57:28 +00001234}
1235
1236/*
drh2554f8b2003-01-22 01:26:44 +00001237** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1238** every one of those pages out to the database file and mark them all
1239** as clean.
1240*/
1241static int pager_write_pagelist(PgHdr *pList){
1242 Pager *pPager;
1243 int rc;
1244
1245 if( pList==0 ) return SQLITE_OK;
1246 pPager = pList->pPager;
1247 while( pList ){
1248 assert( pList->dirty );
drhd0ba1932004-02-10 01:54:28 +00001249 sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drhb20ea9d2004-02-09 01:20:36 +00001250 if( pPager->xCodec ){
1251 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 1);
1252 }
drhd0ba1932004-02-10 01:54:28 +00001253 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
drhb20ea9d2004-02-09 01:20:36 +00001254 if( pPager->xCodec ){
1255 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pList), 0);
1256 }
drh2554f8b2003-01-22 01:26:44 +00001257 if( rc ) return rc;
1258 pList->dirty = 0;
1259 pList = pList->pDirty;
1260 }
1261 return SQLITE_OK;
1262}
1263
1264/*
1265** Collect every dirty page into a dirty list and
1266** return a pointer to the head of that list. All pages are
1267** collected even if they are still in use.
1268*/
1269static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1270 PgHdr *p, *pList;
1271 pList = 0;
1272 for(p=pPager->pAll; p; p=p->pNextAll){
1273 if( p->dirty ){
1274 p->pDirty = pList;
1275 pList = p;
1276 }
1277 }
1278 return pList;
1279}
1280
1281/*
drhd9b02572001-04-15 00:37:09 +00001282** Acquire a page.
1283**
drh58a11682001-11-10 13:51:08 +00001284** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +00001285** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +00001286**
drh306dc212001-05-21 13:45:10 +00001287** A _get works for any page number greater than 0. If the database
1288** file is smaller than the requested page, then no actual disk
1289** read occurs and the memory image of the page is initialized to
1290** all zeros. The extra data appended to a page is always initialized
1291** to zeros the first time a page is loaded into memory.
1292**
drhd9b02572001-04-15 00:37:09 +00001293** The acquisition might fail for several reasons. In all cases,
1294** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +00001295**
1296** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
1297** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +00001298** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +00001299** just returns 0. This routine acquires a read-lock the first time it
1300** has to go to disk, and could also playback an old journal if necessary.
1301** Since _lookup() never goes to disk, it never has to deal with locks
1302** or journal files.
drhed7c8552001-04-11 14:29:21 +00001303*/
drhd9b02572001-04-15 00:37:09 +00001304int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +00001305 PgHdr *pPg;
drh8766c342002-11-09 00:33:15 +00001306 int rc;
drhed7c8552001-04-11 14:29:21 +00001307
drhd9b02572001-04-15 00:37:09 +00001308 /* Make sure we have not hit any critical errors.
1309 */
drh836faa42003-01-11 13:30:57 +00001310 assert( pPager!=0 );
1311 assert( pgno!=0 );
drh2e6d11b2003-04-25 15:37:57 +00001312 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +00001313 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1314 return pager_errcode(pPager);
1315 }
1316
drhed7c8552001-04-11 14:29:21 +00001317 /* If this is the first page accessed, then get a read lock
1318 ** on the database file.
1319 */
1320 if( pPager->nRef==0 ){
drh8766c342002-11-09 00:33:15 +00001321 rc = sqliteOsReadLock(&pPager->fd);
1322 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001323 return rc;
drhed7c8552001-04-11 14:29:21 +00001324 }
drhd9b02572001-04-15 00:37:09 +00001325 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +00001326
1327 /* If a journal file exists, try to play it back.
1328 */
drhda47d772002-12-02 04:25:19 +00001329 if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
drhe2227f02003-06-14 11:42:57 +00001330 int rc;
drhed7c8552001-04-11 14:29:21 +00001331
drha7fcb052001-12-14 15:09:55 +00001332 /* Get a write lock on the database
1333 */
1334 rc = sqliteOsWriteLock(&pPager->fd);
1335 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001336 if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
1337 /* This should never happen! */
1338 rc = SQLITE_INTERNAL;
1339 }
drh8766c342002-11-09 00:33:15 +00001340 return rc;
drha7fcb052001-12-14 15:09:55 +00001341 }
1342 pPager->state = SQLITE_WRITELOCK;
1343
drhe2227f02003-06-14 11:42:57 +00001344 /* Open the journal for reading only. Return SQLITE_BUSY if
1345 ** we are unable to open the journal file.
drhf57b3392001-10-08 13:22:32 +00001346 **
drhe2227f02003-06-14 11:42:57 +00001347 ** The journal file does not need to be locked itself. The
1348 ** journal file is never open unless the main database file holds
1349 ** a write lock, so there is never any chance of two or more
1350 ** processes opening the journal at the same time.
drhed7c8552001-04-11 14:29:21 +00001351 */
drhe2227f02003-06-14 11:42:57 +00001352 rc = sqliteOsOpenReadOnly(pPager->zJournal, &pPager->jfd);
drha7fcb052001-12-14 15:09:55 +00001353 if( rc!=SQLITE_OK ){
1354 rc = sqliteOsUnlock(&pPager->fd);
1355 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +00001356 return SQLITE_BUSY;
1357 }
drha7fcb052001-12-14 15:09:55 +00001358 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001359 pPager->journalStarted = 0;
drhed7c8552001-04-11 14:29:21 +00001360
1361 /* Playback and delete the journal. Drop the database write
1362 ** lock and reacquire the read lock.
1363 */
drh99ee3602003-02-16 19:13:36 +00001364 rc = pager_playback(pPager, 0);
drhd9b02572001-04-15 00:37:09 +00001365 if( rc!=SQLITE_OK ){
1366 return rc;
1367 }
drhed7c8552001-04-11 14:29:21 +00001368 }
1369 pPg = 0;
1370 }else{
1371 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +00001372 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +00001373 }
1374 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001375 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +00001376 int h;
drh7e3b0a02001-04-28 16:52:40 +00001377 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +00001378 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
1379 /* Create a new page */
drhd0ba1932004-02-10 01:54:28 +00001380 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
drh968af522003-02-11 14:55:40 +00001381 + sizeof(u32) + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +00001382 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001383 pager_unwritelock(pPager);
1384 pPager->errMask |= PAGER_ERR_MEM;
1385 return SQLITE_NOMEM;
1386 }
drh8c1238a2003-01-02 14:43:55 +00001387 memset(pPg, 0, sizeof(*pPg));
drhed7c8552001-04-11 14:29:21 +00001388 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +00001389 pPg->pNextAll = pPager->pAll;
1390 if( pPager->pAll ){
1391 pPager->pAll->pPrevAll = pPg;
1392 }
1393 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +00001394 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +00001395 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +00001396 }else{
drhdb48ee02003-01-16 13:42:43 +00001397 /* Find a page to recycle. Try to locate a page that does not
1398 ** require us to do an fsync() on the journal.
1399 */
drh341eae82003-01-21 02:39:36 +00001400 pPg = pPager->pFirstSynced;
drhb19a2bc2001-09-16 00:13:26 +00001401
drhdb48ee02003-01-16 13:42:43 +00001402 /* If we could not find a page that does not require an fsync()
1403 ** on the journal file then fsync the journal file. This is a
1404 ** very slow operation, so we work hard to avoid it. But sometimes
1405 ** it can't be helped.
drhb19a2bc2001-09-16 00:13:26 +00001406 */
drh603240c2002-03-05 01:11:12 +00001407 if( pPg==0 ){
drh34e79ce2004-02-08 06:05:46 +00001408 int rc = syncJournal(pPager);
drh50e5dad2001-09-15 00:57:28 +00001409 if( rc!=0 ){
1410 sqlitepager_rollback(pPager);
drh50e5dad2001-09-15 00:57:28 +00001411 return SQLITE_IOERR;
1412 }
1413 pPg = pPager->pFirst;
1414 }
drhd9b02572001-04-15 00:37:09 +00001415 assert( pPg->nRef==0 );
drhdb48ee02003-01-16 13:42:43 +00001416
1417 /* Write the page to the database file if it is dirty.
1418 */
1419 if( pPg->dirty ){
1420 assert( pPg->needSync==0 );
drh2554f8b2003-01-22 01:26:44 +00001421 pPg->pDirty = 0;
1422 rc = pager_write_pagelist( pPg );
drhdb48ee02003-01-16 13:42:43 +00001423 if( rc!=SQLITE_OK ){
1424 sqlitepager_rollback(pPager);
drhdb48ee02003-01-16 13:42:43 +00001425 return SQLITE_IOERR;
1426 }
drhdb48ee02003-01-16 13:42:43 +00001427 }
drh50e5dad2001-09-15 00:57:28 +00001428 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +00001429
drhdb48ee02003-01-16 13:42:43 +00001430 /* If the page we are recycling is marked as alwaysRollback, then
drh193a6b42002-07-07 16:52:46 +00001431 ** set the global alwaysRollback flag, thus disabling the
1432 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1433 ** It is necessary to do this because the page marked alwaysRollback
1434 ** might be reloaded at a later time but at that point we won't remember
1435 ** that is was marked alwaysRollback. This means that all pages must
1436 ** be marked as alwaysRollback from here on out.
1437 */
1438 if( pPg->alwaysRollback ){
1439 pPager->alwaysRollback = 1;
1440 }
1441
drhd9b02572001-04-15 00:37:09 +00001442 /* Unlink the old page from the free list and the hash table
1443 */
drh341eae82003-01-21 02:39:36 +00001444 if( pPg==pPager->pFirstSynced ){
1445 PgHdr *p = pPg->pNextFree;
1446 while( p && p->needSync ){ p = p->pNextFree; }
1447 pPager->pFirstSynced = p;
1448 }
drh6019e162001-07-02 17:51:45 +00001449 if( pPg->pPrevFree ){
1450 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001451 }else{
drh6019e162001-07-02 17:51:45 +00001452 assert( pPager->pFirst==pPg );
1453 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001454 }
drh6019e162001-07-02 17:51:45 +00001455 if( pPg->pNextFree ){
1456 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1457 }else{
1458 assert( pPager->pLast==pPg );
1459 pPager->pLast = pPg->pPrevFree;
1460 }
1461 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +00001462 if( pPg->pNextHash ){
1463 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1464 }
1465 if( pPg->pPrevHash ){
1466 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1467 }else{
drhd9b02572001-04-15 00:37:09 +00001468 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +00001469 assert( pPager->aHash[h]==pPg );
1470 pPager->aHash[h] = pPg->pNextHash;
1471 }
drh6019e162001-07-02 17:51:45 +00001472 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +00001473 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +00001474 }
1475 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +00001476 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drhed6c8672003-01-12 18:02:16 +00001477 sqliteCheckMemory(pPager->aInJournal, pgno/8);
drhdb48ee02003-01-16 13:42:43 +00001478 assert( pPager->journalOpen );
drh6019e162001-07-02 17:51:45 +00001479 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
drhdb48ee02003-01-16 13:42:43 +00001480 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001481 }else{
1482 pPg->inJournal = 0;
drhdb48ee02003-01-16 13:42:43 +00001483 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001484 }
drh03eb96a2002-11-10 23:32:56 +00001485 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
1486 && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
1487 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001488 }else{
drh03eb96a2002-11-10 23:32:56 +00001489 page_remove_from_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001490 }
drhed7c8552001-04-11 14:29:21 +00001491 pPg->dirty = 0;
1492 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +00001493 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001494 pPager->nRef++;
1495 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +00001496 pPg->pNextHash = pPager->aHash[h];
1497 pPager->aHash[h] = pPg;
1498 if( pPg->pNextHash ){
1499 assert( pPg->pNextHash->pPrevHash==0 );
1500 pPg->pNextHash->pPrevHash = pPg;
1501 }
drh2e6d11b2003-04-25 15:37:57 +00001502 if( pPager->nExtra>0 ){
1503 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1504 }
drh306dc212001-05-21 13:45:10 +00001505 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001506 if( pPager->errMask!=0 ){
1507 sqlitepager_unref(PGHDR_TO_DATA(pPg));
1508 rc = pager_errcode(pPager);
1509 return rc;
1510 }
drh1ab43002002-01-14 09:28:19 +00001511 if( pPager->dbSize<(int)pgno ){
drhd0ba1932004-02-10 01:54:28 +00001512 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
drh306dc212001-05-21 13:45:10 +00001513 }else{
drh81a20f22001-10-12 17:30:04 +00001514 int rc;
drhd0ba1932004-02-10 01:54:28 +00001515 sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1516 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +00001517 if( rc!=SQLITE_OK ){
drh28be87c2002-11-05 23:03:02 +00001518 off_t fileSize;
drh4e371ee2002-09-05 16:08:27 +00001519 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
drhd0ba1932004-02-10 01:54:28 +00001520 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
drh2e6d11b2003-04-25 15:37:57 +00001521 sqlitepager_unref(PGHDR_TO_DATA(pPg));
drh4e371ee2002-09-05 16:08:27 +00001522 return rc;
1523 }else{
drhd0ba1932004-02-10 01:54:28 +00001524 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
drh4e371ee2002-09-05 16:08:27 +00001525 }
drhb20ea9d2004-02-09 01:20:36 +00001526 }else if( pPager->xCodec ){
1527 pPager->xCodec(pPager->pCodecArg, PGHDR_TO_DATA(pPg), 0);
drh81a20f22001-10-12 17:30:04 +00001528 }
drh306dc212001-05-21 13:45:10 +00001529 }
drhed7c8552001-04-11 14:29:21 +00001530 }else{
drhd9b02572001-04-15 00:37:09 +00001531 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001532 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001533 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001534 }
1535 *ppPage = PGHDR_TO_DATA(pPg);
1536 return SQLITE_OK;
1537}
1538
1539/*
drh7e3b0a02001-04-28 16:52:40 +00001540** Acquire a page if it is already in the in-memory cache. Do
1541** not read the page from disk. Return a pointer to the page,
1542** or 0 if the page is not in cache.
1543**
1544** See also sqlitepager_get(). The difference between this routine
1545** and sqlitepager_get() is that _get() will go to the disk and read
1546** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001547** returns NULL if the page is not in cache or if a disk I/O error
1548** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001549*/
1550void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1551 PgHdr *pPg;
1552
drh836faa42003-01-11 13:30:57 +00001553 assert( pPager!=0 );
1554 assert( pgno!=0 );
drh7e3b0a02001-04-28 16:52:40 +00001555 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1556 return 0;
1557 }
drh836faa42003-01-11 13:30:57 +00001558 /* if( pPager->nRef==0 ){
1559 ** return 0;
1560 ** }
1561 */
drh7e3b0a02001-04-28 16:52:40 +00001562 pPg = pager_lookup(pPager, pgno);
1563 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001564 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001565 return PGHDR_TO_DATA(pPg);
1566}
1567
1568/*
drhed7c8552001-04-11 14:29:21 +00001569** Release a page.
1570**
1571** If the number of references to the page drop to zero, then the
1572** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001573** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001574** removed.
1575*/
drhd9b02572001-04-15 00:37:09 +00001576int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001577 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001578
1579 /* Decrement the reference count for this page
1580 */
drhed7c8552001-04-11 14:29:21 +00001581 pPg = DATA_TO_PGHDR(pData);
1582 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001583 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001584 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001585
drh72f82862001-05-24 21:06:34 +00001586 /* When the number of references to a page reach 0, call the
1587 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001588 */
drhed7c8552001-04-11 14:29:21 +00001589 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001590 Pager *pPager;
1591 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001592 pPg->pNextFree = 0;
1593 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001594 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001595 if( pPg->pPrevFree ){
1596 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001597 }else{
1598 pPager->pFirst = pPg;
1599 }
drh341eae82003-01-21 02:39:36 +00001600 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1601 pPager->pFirstSynced = pPg;
1602 }
drh72f82862001-05-24 21:06:34 +00001603 if( pPager->xDestructor ){
1604 pPager->xDestructor(pData);
1605 }
drhd9b02572001-04-15 00:37:09 +00001606
1607 /* When all pages reach the freelist, drop the read lock from
1608 ** the database file.
1609 */
1610 pPager->nRef--;
1611 assert( pPager->nRef>=0 );
1612 if( pPager->nRef==0 ){
1613 pager_reset(pPager);
1614 }
drhed7c8552001-04-11 14:29:21 +00001615 }
drhd9b02572001-04-15 00:37:09 +00001616 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001617}
1618
1619/*
drhda47d772002-12-02 04:25:19 +00001620** Create a journal file for pPager. There should already be a write
1621** lock on the database file when this routine is called.
1622**
1623** Return SQLITE_OK if everything. Return an error code and release the
1624** write lock if anything goes wrong.
1625*/
1626static int pager_open_journal(Pager *pPager){
1627 int rc;
1628 assert( pPager->state==SQLITE_WRITELOCK );
1629 assert( pPager->journalOpen==0 );
1630 assert( pPager->useJournal );
drh3e4c8522003-07-07 10:47:10 +00001631 sqlitepager_pagecount(pPager);
drhda47d772002-12-02 04:25:19 +00001632 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1633 if( pPager->aInJournal==0 ){
1634 sqliteOsReadLock(&pPager->fd);
1635 pPager->state = SQLITE_READLOCK;
1636 return SQLITE_NOMEM;
1637 }
1638 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1639 if( rc!=SQLITE_OK ){
1640 sqliteFree(pPager->aInJournal);
1641 pPager->aInJournal = 0;
1642 sqliteOsReadLock(&pPager->fd);
1643 pPager->state = SQLITE_READLOCK;
1644 return SQLITE_CANTOPEN;
1645 }
drha76c82e2003-07-27 18:59:42 +00001646 sqliteOsOpenDirectory(pPager->zDirectory, &pPager->jfd);
drhda47d772002-12-02 04:25:19 +00001647 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001648 pPager->journalStarted = 0;
drhda47d772002-12-02 04:25:19 +00001649 pPager->needSync = 0;
1650 pPager->alwaysRollback = 0;
drh968af522003-02-11 14:55:40 +00001651 pPager->nRec = 0;
drh2e6d11b2003-04-25 15:37:57 +00001652 if( pPager->errMask!=0 ){
1653 rc = pager_errcode(pPager);
1654 return rc;
1655 }
drhda47d772002-12-02 04:25:19 +00001656 pPager->origDbSize = pPager->dbSize;
drh968af522003-02-11 14:55:40 +00001657 if( journal_format==JOURNAL_FORMAT_3 ){
1658 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
1659 if( rc==SQLITE_OK ){
drh4303fee2003-02-15 23:09:17 +00001660 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
drh968af522003-02-11 14:55:40 +00001661 }
1662 if( rc==SQLITE_OK ){
1663 pPager->cksumInit = (u32)sqliteRandomInteger();
1664 rc = write32bits(&pPager->jfd, pPager->cksumInit);
1665 }
1666 }else if( journal_format==JOURNAL_FORMAT_2 ){
1667 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
drhda47d772002-12-02 04:25:19 +00001668 }else{
drh968af522003-02-11 14:55:40 +00001669 assert( journal_format==JOURNAL_FORMAT_1 );
1670 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
drhda47d772002-12-02 04:25:19 +00001671 }
1672 if( rc==SQLITE_OK ){
1673 rc = write32bits(&pPager->jfd, pPager->dbSize);
1674 }
1675 if( pPager->ckptAutoopen && rc==SQLITE_OK ){
1676 rc = sqlitepager_ckpt_begin(pPager);
1677 }
1678 if( rc!=SQLITE_OK ){
1679 rc = pager_unwritelock(pPager);
1680 if( rc==SQLITE_OK ){
1681 rc = SQLITE_FULL;
1682 }
1683 }
1684 return rc;
1685}
1686
1687/*
drh4b845d72002-03-05 12:41:19 +00001688** Acquire a write-lock on the database. The lock is removed when
1689** the any of the following happen:
1690**
1691** * sqlitepager_commit() is called.
1692** * sqlitepager_rollback() is called.
1693** * sqlitepager_close() is called.
1694** * sqlitepager_unref() is called to on every outstanding page.
1695**
1696** The parameter to this routine is a pointer to any open page of the
1697** database file. Nothing changes about the page - it is used merely
1698** to acquire a pointer to the Pager structure and as proof that there
1699** is already a read-lock on the database.
1700**
drhda47d772002-12-02 04:25:19 +00001701** A journal file is opened if this is not a temporary file. For
1702** temporary files, the opening of the journal file is deferred until
1703** there is an actual need to write to the journal.
1704**
drh4b845d72002-03-05 12:41:19 +00001705** If the database is already write-locked, this routine is a no-op.
1706*/
1707int sqlitepager_begin(void *pData){
1708 PgHdr *pPg = DATA_TO_PGHDR(pData);
1709 Pager *pPager = pPg->pPager;
1710 int rc = SQLITE_OK;
1711 assert( pPg->nRef>0 );
1712 assert( pPager->state!=SQLITE_UNLOCK );
1713 if( pPager->state==SQLITE_READLOCK ){
1714 assert( pPager->aInJournal==0 );
1715 rc = sqliteOsWriteLock(&pPager->fd);
1716 if( rc!=SQLITE_OK ){
1717 return rc;
1718 }
drh4b845d72002-03-05 12:41:19 +00001719 pPager->state = SQLITE_WRITELOCK;
drhda47d772002-12-02 04:25:19 +00001720 pPager->dirtyFile = 0;
drhdb48ee02003-01-16 13:42:43 +00001721 TRACE1("TRANSACTION\n");
drhda47d772002-12-02 04:25:19 +00001722 if( pPager->useJournal && !pPager->tempFile ){
1723 rc = pager_open_journal(pPager);
drh4b845d72002-03-05 12:41:19 +00001724 }
1725 }
1726 return rc;
1727}
1728
1729/*
drhed7c8552001-04-11 14:29:21 +00001730** Mark a data page as writeable. The page is written into the journal
1731** if it is not there already. This routine must be called before making
1732** changes to a page.
1733**
1734** The first time this routine is called, the pager creates a new
1735** journal and acquires a write lock on the database. If the write
1736** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001737** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001738** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001739**
1740** If the journal file could not be written because the disk is full,
1741** then this routine returns SQLITE_FULL and does an immediate rollback.
1742** All subsequent write attempts also return SQLITE_FULL until there
1743** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1744** reset.
drhed7c8552001-04-11 14:29:21 +00001745*/
drhd9b02572001-04-15 00:37:09 +00001746int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001747 PgHdr *pPg = DATA_TO_PGHDR(pData);
1748 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001749 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001750
drh6446c4d2001-12-15 14:22:18 +00001751 /* Check for errors
1752 */
drhd9b02572001-04-15 00:37:09 +00001753 if( pPager->errMask ){
1754 return pager_errcode(pPager);
1755 }
drh5e00f6c2001-09-13 13:46:56 +00001756 if( pPager->readOnly ){
1757 return SQLITE_PERM;
1758 }
drh6446c4d2001-12-15 14:22:18 +00001759
1760 /* Mark the page as dirty. If the page has already been written
1761 ** to the journal then we can return right away.
1762 */
drhd9b02572001-04-15 00:37:09 +00001763 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001764 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001765 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001766 return SQLITE_OK;
1767 }
drh6446c4d2001-12-15 14:22:18 +00001768
1769 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001770 ** written to the transaction journal or the ckeckpoint journal
1771 ** or both.
1772 **
1773 ** First check to see that the transaction journal exists and
1774 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001775 */
drhd9b02572001-04-15 00:37:09 +00001776 assert( pPager->state!=SQLITE_UNLOCK );
drh4b845d72002-03-05 12:41:19 +00001777 rc = sqlitepager_begin(pData);
drhda47d772002-12-02 04:25:19 +00001778 if( rc!=SQLITE_OK ){
1779 return rc;
1780 }
drhd9b02572001-04-15 00:37:09 +00001781 assert( pPager->state==SQLITE_WRITELOCK );
drhda47d772002-12-02 04:25:19 +00001782 if( !pPager->journalOpen && pPager->useJournal ){
1783 rc = pager_open_journal(pPager);
1784 if( rc!=SQLITE_OK ) return rc;
1785 }
1786 assert( pPager->journalOpen || !pPager->useJournal );
1787 pPager->dirtyFile = 1;
drh6446c4d2001-12-15 14:22:18 +00001788
drhfa86c412002-02-02 15:01:15 +00001789 /* The transaction journal now exists and we have a write lock on the
1790 ** main database file. Write the current page to the transaction
1791 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001792 */
drhdb48ee02003-01-16 13:42:43 +00001793 if( !pPg->inJournal && pPager->useJournal ){
1794 if( (int)pPg->pgno <= pPager->origDbSize ){
drh968af522003-02-11 14:55:40 +00001795 int szPg;
1796 u32 saved;
1797 if( journal_format>=JOURNAL_FORMAT_3 ){
1798 u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
1799 saved = *(u32*)PGHDR_TO_EXTRA(pPg);
1800 store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
1801 szPg = SQLITE_PAGE_SIZE+8;
1802 }else{
1803 szPg = SQLITE_PAGE_SIZE+4;
1804 }
1805 store32bits(pPg->pgno, pPg, -4);
drhb20ea9d2004-02-09 01:20:36 +00001806 if( pPager->xCodec ){
drhd0ba1932004-02-10 01:54:28 +00001807 pPager->xCodec(pPager->pCodecArg, pData, 3);
drhb20ea9d2004-02-09 01:20:36 +00001808 }
drh968af522003-02-11 14:55:40 +00001809 rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
drhb20ea9d2004-02-09 01:20:36 +00001810 if( pPager->xCodec ){
drhd0ba1932004-02-10 01:54:28 +00001811 pPager->xCodec(pPager->pCodecArg, pData, 2);
drhb20ea9d2004-02-09 01:20:36 +00001812 }
drh968af522003-02-11 14:55:40 +00001813 if( journal_format>=JOURNAL_FORMAT_3 ){
1814 *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
1815 }
drhdb48ee02003-01-16 13:42:43 +00001816 if( rc!=SQLITE_OK ){
1817 sqlitepager_rollback(pPager);
1818 pPager->errMask |= PAGER_ERR_FULL;
1819 return rc;
1820 }
drh99ee3602003-02-16 19:13:36 +00001821 pPager->nRec++;
drhdb48ee02003-01-16 13:42:43 +00001822 assert( pPager->aInJournal!=0 );
1823 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1824 pPg->needSync = !pPager->noSync;
1825 pPg->inJournal = 1;
1826 if( pPager->ckptInUse ){
1827 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1828 page_add_to_ckpt_list(pPg);
1829 }
1830 TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
1831 }else{
1832 pPg->needSync = !pPager->journalStarted && !pPager->noSync;
1833 TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
drhd9b02572001-04-15 00:37:09 +00001834 }
drhdb48ee02003-01-16 13:42:43 +00001835 if( pPg->needSync ){
1836 pPager->needSync = 1;
drhfa86c412002-02-02 15:01:15 +00001837 }
drh69688d52001-04-14 16:38:23 +00001838 }
drh6446c4d2001-12-15 14:22:18 +00001839
drhfa86c412002-02-02 15:01:15 +00001840 /* If the checkpoint journal is open and the page is not in it,
drh968af522003-02-11 14:55:40 +00001841 ** then write the current page to the checkpoint journal. Note that
1842 ** the checkpoint journal always uses the simplier format 2 that lacks
1843 ** checksums. The header is also omitted from the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001844 */
drh0f892532002-05-30 12:27:03 +00001845 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001846 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh968af522003-02-11 14:55:40 +00001847 store32bits(pPg->pgno, pPg, -4);
drhd0ba1932004-02-10 01:54:28 +00001848 rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
drhfa86c412002-02-02 15:01:15 +00001849 if( rc!=SQLITE_OK ){
1850 sqlitepager_rollback(pPager);
1851 pPager->errMask |= PAGER_ERR_FULL;
1852 return rc;
1853 }
drh9bd47a92003-01-07 14:46:08 +00001854 pPager->ckptNRec++;
drhfa86c412002-02-02 15:01:15 +00001855 assert( pPager->aInCkpt!=0 );
1856 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001857 page_add_to_ckpt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001858 }
1859
1860 /* Update the database size and return.
1861 */
drh1ab43002002-01-14 09:28:19 +00001862 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001863 pPager->dbSize = pPg->pgno;
1864 }
drh69688d52001-04-14 16:38:23 +00001865 return rc;
drhed7c8552001-04-11 14:29:21 +00001866}
1867
1868/*
drhaacc5432002-01-06 17:07:40 +00001869** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +00001870** to sqlitepager_write(). In other words, return TRUE if it is ok
1871** to change the content of the page.
1872*/
1873int sqlitepager_iswriteable(void *pData){
1874 PgHdr *pPg = DATA_TO_PGHDR(pData);
1875 return pPg->dirty;
1876}
1877
1878/*
drh001bbcb2003-03-19 03:14:00 +00001879** Replace the content of a single page with the information in the third
1880** argument.
1881*/
1882int sqlitepager_overwrite(Pager *pPager, Pgno pgno, void *pData){
1883 void *pPage;
1884 int rc;
1885
1886 rc = sqlitepager_get(pPager, pgno, &pPage);
1887 if( rc==SQLITE_OK ){
1888 rc = sqlitepager_write(pPage);
1889 if( rc==SQLITE_OK ){
drhd0ba1932004-02-10 01:54:28 +00001890 memcpy(pPage, pData, SQLITE_PAGE_SIZE);
drh001bbcb2003-03-19 03:14:00 +00001891 }
1892 sqlitepager_unref(pPage);
1893 }
1894 return rc;
1895}
1896
1897/*
drh30e58752002-03-02 20:41:57 +00001898** A call to this routine tells the pager that it is not necessary to
1899** write the information on page "pgno" back to the disk, even though
1900** that page might be marked as dirty.
1901**
1902** The overlying software layer calls this routine when all of the data
1903** on the given page is unused. The pager marks the page as clean so
1904** that it does not get written to disk.
1905**
1906** Tests show that this optimization, together with the
1907** sqlitepager_dont_rollback() below, more than double the speed
1908** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001909**
1910** When this routine is called, set the alwaysRollback flag to true.
1911** Subsequent calls to sqlitepager_dont_rollback() for the same page
1912** will thereafter be ignored. This is necessary to avoid a problem
1913** where a page with data is added to the freelist during one part of
1914** a transaction then removed from the freelist during a later part
1915** of the same transaction and reused for some other purpose. When it
1916** is first added to the freelist, this routine is called. When reused,
1917** the dont_rollback() routine is called. But because the page contains
1918** critical data, we still need to be sure it gets rolled back in spite
1919** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001920*/
1921void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1922 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001923
drh30e58752002-03-02 20:41:57 +00001924 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001925 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001926 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001927 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1928 /* If this pages is the last page in the file and the file has grown
1929 ** during the current transaction, then do NOT mark the page as clean.
1930 ** When the database file grows, we must make sure that the last page
1931 ** gets written at least once so that the disk file will be the correct
1932 ** size. If you do not write this page and the size of the file
1933 ** on the disk ends up being too small, that can lead to database
1934 ** corruption during the next transaction.
1935 */
1936 }else{
drhdb48ee02003-01-16 13:42:43 +00001937 TRACE2("DONT_WRITE %d\n", pgno);
drh8124a302002-06-25 14:43:57 +00001938 pPg->dirty = 0;
1939 }
drh30e58752002-03-02 20:41:57 +00001940 }
1941}
1942
1943/*
1944** A call to this routine tells the pager that if a rollback occurs,
1945** it is not necessary to restore the data on the given page. This
1946** means that the pager does not have to record the given page in the
1947** rollback journal.
1948*/
1949void sqlitepager_dont_rollback(void *pData){
1950 PgHdr *pPg = DATA_TO_PGHDR(pData);
1951 Pager *pPager = pPg->pPager;
1952
1953 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001954 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001955 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1956 assert( pPager->aInJournal!=0 );
1957 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1958 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001959 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001960 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001961 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001962 }
drhdb48ee02003-01-16 13:42:43 +00001963 TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
drh30e58752002-03-02 20:41:57 +00001964 }
drh0f892532002-05-30 12:27:03 +00001965 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001966 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1967 assert( pPager->aInCkpt!=0 );
1968 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh03eb96a2002-11-10 23:32:56 +00001969 page_add_to_ckpt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001970 }
1971}
1972
1973/*
drhed7c8552001-04-11 14:29:21 +00001974** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001975**
1976** If the commit fails for any reason, a rollback attempt is made
1977** and an error code is returned. If the commit worked, SQLITE_OK
1978** is returned.
drhed7c8552001-04-11 14:29:21 +00001979*/
drhd9b02572001-04-15 00:37:09 +00001980int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001981 int rc;
drhed7c8552001-04-11 14:29:21 +00001982 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001983
1984 if( pPager->errMask==PAGER_ERR_FULL ){
1985 rc = sqlitepager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00001986 if( rc==SQLITE_OK ){
1987 rc = SQLITE_FULL;
1988 }
drhd9b02572001-04-15 00:37:09 +00001989 return rc;
1990 }
1991 if( pPager->errMask!=0 ){
1992 rc = pager_errcode(pPager);
1993 return rc;
1994 }
1995 if( pPager->state!=SQLITE_WRITELOCK ){
1996 return SQLITE_ERROR;
1997 }
drhdb48ee02003-01-16 13:42:43 +00001998 TRACE1("COMMIT\n");
drha1680452002-04-18 01:56:57 +00001999 if( pPager->dirtyFile==0 ){
2000 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
2001 ** if there have been no changes to the database file. */
drh341eae82003-01-21 02:39:36 +00002002 assert( pPager->needSync==0 );
drha1680452002-04-18 01:56:57 +00002003 rc = pager_unwritelock(pPager);
2004 pPager->dbSize = -1;
2005 return rc;
2006 }
drhda47d772002-12-02 04:25:19 +00002007 assert( pPager->journalOpen );
drh34e79ce2004-02-08 06:05:46 +00002008 rc = syncJournal(pPager);
drh240c5792004-02-08 00:40:52 +00002009 if( rc!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00002010 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00002011 }
drh2554f8b2003-01-22 01:26:44 +00002012 pPg = pager_get_all_dirty_pages(pPager);
2013 if( pPg ){
2014 rc = pager_write_pagelist(pPg);
2015 if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
2016 goto commit_abort;
2017 }
drh603240c2002-03-05 01:11:12 +00002018 }
drhd9b02572001-04-15 00:37:09 +00002019 rc = pager_unwritelock(pPager);
2020 pPager->dbSize = -1;
2021 return rc;
2022
2023 /* Jump here if anything goes wrong during the commit process.
2024 */
2025commit_abort:
2026 rc = sqlitepager_rollback(pPager);
2027 if( rc==SQLITE_OK ){
2028 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00002029 }
drhed7c8552001-04-11 14:29:21 +00002030 return rc;
2031}
2032
2033/*
2034** Rollback all changes. The database falls back to read-only mode.
2035** All in-memory cache pages revert to their original data contents.
2036** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00002037**
2038** This routine cannot fail unless some other process is not following
2039** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
2040** process is writing trash into the journal file (SQLITE_CORRUPT) or
2041** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
2042** codes are returned for all these occasions. Otherwise,
2043** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00002044*/
drhd9b02572001-04-15 00:37:09 +00002045int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00002046 int rc;
drhdb48ee02003-01-16 13:42:43 +00002047 TRACE1("ROLLBACK\n");
drhda47d772002-12-02 04:25:19 +00002048 if( !pPager->dirtyFile || !pPager->journalOpen ){
2049 rc = pager_unwritelock(pPager);
2050 pPager->dbSize = -1;
2051 return rc;
2052 }
drhdb48ee02003-01-16 13:42:43 +00002053
drhd9b02572001-04-15 00:37:09 +00002054 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00002055 if( pPager->state>=SQLITE_WRITELOCK ){
drh99ee3602003-02-16 19:13:36 +00002056 pager_playback(pPager, 1);
drh4b845d72002-03-05 12:41:19 +00002057 }
drhd9b02572001-04-15 00:37:09 +00002058 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00002059 }
drhd9b02572001-04-15 00:37:09 +00002060 if( pPager->state!=SQLITE_WRITELOCK ){
2061 return SQLITE_OK;
2062 }
drh99ee3602003-02-16 19:13:36 +00002063 rc = pager_playback(pPager, 1);
drhd9b02572001-04-15 00:37:09 +00002064 if( rc!=SQLITE_OK ){
2065 rc = SQLITE_CORRUPT;
2066 pPager->errMask |= PAGER_ERR_CORRUPT;
2067 }
2068 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00002069 return rc;
drh98808ba2001-10-18 12:34:46 +00002070}
drhd9b02572001-04-15 00:37:09 +00002071
2072/*
drh5e00f6c2001-09-13 13:46:56 +00002073** Return TRUE if the database file is opened read-only. Return FALSE
2074** if the database is (in theory) writable.
2075*/
2076int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00002077 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00002078}
2079
2080/*
drhd9b02572001-04-15 00:37:09 +00002081** This routine is used for testing and analysis only.
2082*/
2083int *sqlitepager_stats(Pager *pPager){
2084 static int a[9];
2085 a[0] = pPager->nRef;
2086 a[1] = pPager->nPage;
2087 a[2] = pPager->mxPage;
2088 a[3] = pPager->dbSize;
2089 a[4] = pPager->state;
2090 a[5] = pPager->errMask;
2091 a[6] = pPager->nHit;
2092 a[7] = pPager->nMiss;
2093 a[8] = pPager->nOvfl;
2094 return a;
2095}
drhdd793422001-06-28 01:54:48 +00002096
drhfa86c412002-02-02 15:01:15 +00002097/*
2098** Set the checkpoint.
2099**
2100** This routine should be called with the transaction journal already
2101** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00002102** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00002103*/
2104int sqlitepager_ckpt_begin(Pager *pPager){
2105 int rc;
2106 char zTemp[SQLITE_TEMPNAME_SIZE];
drhda47d772002-12-02 04:25:19 +00002107 if( !pPager->journalOpen ){
2108 pPager->ckptAutoopen = 1;
2109 return SQLITE_OK;
2110 }
drhfa86c412002-02-02 15:01:15 +00002111 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00002112 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00002113 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
2114 if( pPager->aInCkpt==0 ){
2115 sqliteOsReadLock(&pPager->fd);
2116 return SQLITE_NOMEM;
2117 }
drh968af522003-02-11 14:55:40 +00002118#ifndef NDEBUG
drhfa86c412002-02-02 15:01:15 +00002119 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
2120 if( rc ) goto ckpt_begin_failed;
drh968af522003-02-11 14:55:40 +00002121 assert( pPager->ckptJSize ==
2122 pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
2123#endif
2124 pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
2125 + JOURNAL_HDR_SZ(journal_format);
drh663fc632002-02-02 18:49:19 +00002126 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00002127 if( !pPager->ckptOpen ){
2128 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
2129 if( rc ) goto ckpt_begin_failed;
2130 pPager->ckptOpen = 1;
drh9bd47a92003-01-07 14:46:08 +00002131 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002132 }
2133 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00002134 return SQLITE_OK;
2135
2136ckpt_begin_failed:
2137 if( pPager->aInCkpt ){
2138 sqliteFree(pPager->aInCkpt);
2139 pPager->aInCkpt = 0;
2140 }
2141 return rc;
2142}
2143
2144/*
2145** Commit a checkpoint.
2146*/
2147int sqlitepager_ckpt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00002148 if( pPager->ckptInUse ){
drh03eb96a2002-11-10 23:32:56 +00002149 PgHdr *pPg, *pNext;
drh96ddd6d2002-09-05 19:10:33 +00002150 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +00002151 /* sqliteOsTruncate(&pPager->cpfd, 0); */
2152 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002153 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00002154 sqliteFree( pPager->aInCkpt );
2155 pPager->aInCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002156 for(pPg=pPager->pCkpt; pPg; pPg=pNext){
2157 pNext = pPg->pNextCkpt;
2158 assert( pPg->inCkpt );
drh663fc632002-02-02 18:49:19 +00002159 pPg->inCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002160 pPg->pPrevCkpt = pPg->pNextCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002161 }
drh03eb96a2002-11-10 23:32:56 +00002162 pPager->pCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002163 }
drhda47d772002-12-02 04:25:19 +00002164 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002165 return SQLITE_OK;
2166}
2167
2168/*
2169** Rollback a checkpoint.
2170*/
2171int sqlitepager_ckpt_rollback(Pager *pPager){
2172 int rc;
drh0f892532002-05-30 12:27:03 +00002173 if( pPager->ckptInUse ){
drh663fc632002-02-02 18:49:19 +00002174 rc = pager_ckpt_playback(pPager);
2175 sqlitepager_ckpt_commit(pPager);
2176 }else{
2177 rc = SQLITE_OK;
2178 }
drhda47d772002-12-02 04:25:19 +00002179 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002180 return rc;
2181}
2182
drh73509ee2003-04-06 20:44:45 +00002183/*
2184** Return the full pathname of the database file.
2185*/
2186const char *sqlitepager_filename(Pager *pPager){
2187 return pPager->zFilename;
2188}
2189
drhb20ea9d2004-02-09 01:20:36 +00002190/*
2191** Set the codec for this pager
2192*/
2193void sqlitepager_set_codec(
2194 Pager *pPager,
2195 void (*xCodec)(void*,void*,int),
2196 void *pCodecArg
2197){
2198 pPager->xCodec = xCodec;
2199 pPager->pCodecArg = pCodecArg;
2200}
2201
drh74587e52002-08-13 00:01:16 +00002202#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00002203/*
2204** Print a listing of all referenced pages and their ref count.
2205*/
2206void sqlitepager_refdump(Pager *pPager){
2207 PgHdr *pPg;
2208 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2209 if( pPg->nRef<=0 ) continue;
2210 printf("PAGE %3d addr=0x%08x nRef=%d\n",
2211 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2212 }
2213}
2214#endif