blob: 41db493e779031ec1ed4ad4919c1c8581cc99ad0 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drh3aac2dd2004-04-26 14:10:20 +000021** @(#) $Id: pager.c,v 1.102 2004/04/26 14:10:21 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drh829e8022002-11-06 14:08:11 +000023#include "os.h" /* Must be first to enable large file support */
drhd9b02572001-04-15 00:37:09 +000024#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000025#include "pager.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
drhdb48ee02003-01-16 13:42:43 +000030** Macros for troubleshooting. Normally turned off
31*/
32#if 0
33static Pager *mainPager = 0;
34#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39#else
40#define SET_PAGER(X)
41#define CLR_PAGER(X)
42#define TRACE1(X)
43#define TRACE2(X,Y)
44#define TRACE3(X,Y,Z)
45#endif
46
47
48/*
drhed7c8552001-04-11 14:29:21 +000049** The page cache as a whole is always in one of the following
50** states:
51**
52** SQLITE_UNLOCK The page cache is not currently reading or
53** writing the database file. There is no
54** data held in memory. This is the initial
55** state.
56**
57** SQLITE_READLOCK The page cache is reading the database.
58** Writing is not permitted. There can be
59** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000060** file at the same time.
drhed7c8552001-04-11 14:29:21 +000061**
62** SQLITE_WRITELOCK The page cache is writing the database.
63** Access is exclusive. No other processes or
64** threads can be reading or writing while one
65** process is writing.
66**
drh306dc212001-05-21 13:45:10 +000067** The page cache comes up in SQLITE_UNLOCK. The first time a
68** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000070** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000071** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000072** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
73** called on an outstanding page which means that the pager must
74** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
75** The sqlite_page_rollback() and sqlite_page_commit() functions
76** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000077*/
78#define SQLITE_UNLOCK 0
79#define SQLITE_READLOCK 1
80#define SQLITE_WRITELOCK 2
81
drhd9b02572001-04-15 00:37:09 +000082
drhed7c8552001-04-11 14:29:21 +000083/*
84** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000085** This header is only visible to this pager module. The client
86** code that calls pager sees only the data that follows the header.
drhf6038712004-02-08 18:07:34 +000087**
drh3aac2dd2004-04-26 14:10:20 +000088** Client code should call sqlite3pager_write() on a page prior to making
89** any modifications to that page. The first time sqlite3pager_write()
drhf6038712004-02-08 18:07:34 +000090** is called, the original page contents are written into the rollback
91** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
92** the journal page has made it onto the disk surface, PgHdr.needSync
93** is cleared. The modified page cannot be written back into the original
94** database file until the journal pages has been synced to disk and the
95** PgHdr.needSync has been cleared.
96**
drh3aac2dd2004-04-26 14:10:20 +000097** The PgHdr.dirty flag is set when sqlite3pager_write() is called and
drhf6038712004-02-08 18:07:34 +000098** is cleared again when the page content is written back to the original
99** database file.
drhed7c8552001-04-11 14:29:21 +0000100*/
drhd9b02572001-04-15 00:37:09 +0000101typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +0000102struct PgHdr {
103 Pager *pPager; /* The pager to which this page belongs */
104 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +0000105 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +0000106 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +0000107 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
108 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drh03eb96a2002-11-10 23:32:56 +0000109 PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
drh193a6b42002-07-07 16:52:46 +0000110 u8 inJournal; /* TRUE if has been written to journal */
111 u8 inCkpt; /* TRUE if written to the checkpoint journal */
112 u8 dirty; /* TRUE if we need to write back changes */
drhdb48ee02003-01-16 13:42:43 +0000113 u8 needSync; /* Sync journal before writing this page */
drh193a6b42002-07-07 16:52:46 +0000114 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drh2554f8b2003-01-22 01:26:44 +0000115 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
drhd0ba1932004-02-10 01:54:28 +0000116 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh973b6e32003-02-12 14:09:42 +0000117 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +0000118};
119
drh9eb9e262004-02-11 02:18:05 +0000120
121/*
122** A macro used for invoking the codec if there is one
123*/
124#ifdef SQLITE_HAS_CODEC
125# define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
126#else
127# define CODEC(P,D,N,X)
128#endif
129
drhed7c8552001-04-11 14:29:21 +0000130/*
drh69688d52001-04-14 16:38:23 +0000131** Convert a pointer to a PgHdr into a pointer to its data
132** and back again.
drhed7c8552001-04-11 14:29:21 +0000133*/
134#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
135#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drhd0ba1932004-02-10 01:54:28 +0000136#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000137
138/*
drhed7c8552001-04-11 14:29:21 +0000139** How big to make the hash table used for locating in-memory pages
drh836faa42003-01-11 13:30:57 +0000140** by page number.
drhed7c8552001-04-11 14:29:21 +0000141*/
drh836faa42003-01-11 13:30:57 +0000142#define N_PG_HASH 2048
143
144/*
145** Hash a page number
146*/
147#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
drhed7c8552001-04-11 14:29:21 +0000148
149/*
150** A open page cache is an instance of the following structure.
151*/
152struct Pager {
153 char *zFilename; /* Name of the database file */
154 char *zJournal; /* Name of the journal file */
drha76c82e2003-07-27 18:59:42 +0000155 char *zDirectory; /* Directory hold database and journal files */
drh8cfbf082001-09-19 13:22:39 +0000156 OsFile fd, jfd; /* File descriptors for database and journal */
drhfa86c412002-02-02 15:01:15 +0000157 OsFile cpfd; /* File descriptor for the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000158 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000159 int origDbSize; /* dbSize before the current change */
drh28be87c2002-11-05 23:03:02 +0000160 int ckptSize; /* Size of database (in pages) at ckpt_begin() */
161 off_t ckptJSize; /* Size of journal at ckpt_begin() */
drh968af522003-02-11 14:55:40 +0000162 int nRec; /* Number of pages written to the journal */
163 u32 cksumInit; /* Quasi-random value added to every checksum */
drh9bd47a92003-01-07 14:46:08 +0000164 int ckptNRec; /* Number of records in the checkpoint journal */
drh7e3b0a02001-04-28 16:52:40 +0000165 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000166 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000167 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000168 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000169 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000170 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drh9eb9e262004-02-11 02:18:05 +0000171 void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
drhb20ea9d2004-02-09 01:20:36 +0000172 void *pCodecArg; /* First argument to xCodec() */
drh603240c2002-03-05 01:11:12 +0000173 u8 journalOpen; /* True if journal file descriptors is valid */
drh34e79ce2004-02-08 06:05:46 +0000174 u8 journalStarted; /* True if header of journal is synced */
175 u8 useJournal; /* Use a rollback journal on this file */
drh603240c2002-03-05 01:11:12 +0000176 u8 ckptOpen; /* True if the checkpoint journal is open */
drh0f892532002-05-30 12:27:03 +0000177 u8 ckptInUse; /* True we are in a checkpoint */
drhda47d772002-12-02 04:25:19 +0000178 u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
drh603240c2002-03-05 01:11:12 +0000179 u8 noSync; /* Do not sync the journal if true */
drh968af522003-02-11 14:55:40 +0000180 u8 fullSync; /* Do extra syncs of the journal for robustness */
drh603240c2002-03-05 01:11:12 +0000181 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
182 u8 errMask; /* One of several kinds of errors */
183 u8 tempFile; /* zFilename is a temporary file */
184 u8 readOnly; /* True for a read-only database */
185 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000186 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000187 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drh603240c2002-03-05 01:11:12 +0000188 u8 *aInJournal; /* One bit for each page in the database file */
189 u8 *aInCkpt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000190 PgHdr *pFirst, *pLast; /* List of free pages */
drh341eae82003-01-21 02:39:36 +0000191 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
drhd9b02572001-04-15 00:37:09 +0000192 PgHdr *pAll; /* List of all pages */
drh03eb96a2002-11-10 23:32:56 +0000193 PgHdr *pCkpt; /* List of pages in the checkpoint journal */
drhed7c8552001-04-11 14:29:21 +0000194 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000195};
196
197/*
198** These are bits that can be set in Pager.errMask.
199*/
200#define PAGER_ERR_FULL 0x01 /* a write() failed */
201#define PAGER_ERR_MEM 0x02 /* malloc() failed */
202#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
203#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000204#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000205
206/*
207** The journal file contains page records in the following
208** format.
drh968af522003-02-11 14:55:40 +0000209**
210** Actually, this structure is the complete page record for pager
211** formats less than 3. Beginning with format 3, this record is surrounded
212** by two checksums.
drhd9b02572001-04-15 00:37:09 +0000213*/
214typedef struct PageRecord PageRecord;
215struct PageRecord {
drhb20ea9d2004-02-09 01:20:36 +0000216 Pgno pgno; /* The page number */
drhd0ba1932004-02-10 01:54:28 +0000217 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
drhd9b02572001-04-15 00:37:09 +0000218};
219
220/*
drh5e00f6c2001-09-13 13:46:56 +0000221** Journal files begin with the following magic string. The data
222** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000223**
drh968af522003-02-11 14:55:40 +0000224** There are three journal formats (so far). The 1st journal format writes
225** 32-bit integers in the byte-order of the host machine. New
226** formats writes integers as big-endian. All new journals use the
drh94f33312002-08-12 12:29:56 +0000227** new format, but we have to be able to read an older journal in order
drh968af522003-02-11 14:55:40 +0000228** to rollback journals created by older versions of the library.
229**
230** The 3rd journal format (added for 2.8.0) adds additional sanity
231** checking information to the journal. If the power fails while the
232** journal is being written, semi-random garbage data might appear in
233** the journal file after power is restored. If an attempt is then made
234** to roll the journal back, the database could be corrupted. The additional
235** sanity checking data is an attempt to discover the garbage in the
236** journal and ignore it.
237**
238** The sanity checking information for the 3rd journal format consists
239** of a 32-bit checksum on each page of data. The checksum covers both
drhd0ba1932004-02-10 01:54:28 +0000240** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
drh968af522003-02-11 14:55:40 +0000241** This cksum is initialized to a 32-bit random value that appears in the
242** journal file right after the header. The random initializer is important,
243** because garbage data that appears at the end of a journal is likely
244** data that was once in other files that have now been deleted. If the
245** garbage data came from an obsolete journal file, the checksums might
246** be correct. But by initializing the checksum to random value which
247** is different for every journal, we minimize that risk.
drhd9b02572001-04-15 00:37:09 +0000248*/
drh968af522003-02-11 14:55:40 +0000249static const unsigned char aJournalMagic1[] = {
drhd9b02572001-04-15 00:37:09 +0000250 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000251};
drh968af522003-02-11 14:55:40 +0000252static const unsigned char aJournalMagic2[] = {
drh94f33312002-08-12 12:29:56 +0000253 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
254};
drh968af522003-02-11 14:55:40 +0000255static const unsigned char aJournalMagic3[] = {
256 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
257};
258#define JOURNAL_FORMAT_1 1
259#define JOURNAL_FORMAT_2 2
260#define JOURNAL_FORMAT_3 3
drh94f33312002-08-12 12:29:56 +0000261
262/*
drh968af522003-02-11 14:55:40 +0000263** The following integer determines what format to use when creating
264** new primary journal files. By default we always use format 3.
265** When testing, we can set this value to older journal formats in order to
266** make sure that newer versions of the library are able to rollback older
267** journal files.
268**
269** Note that checkpoint journals always use format 2 and omit the header.
drh94f33312002-08-12 12:29:56 +0000270*/
271#ifdef SQLITE_TEST
drh968af522003-02-11 14:55:40 +0000272int journal_format = 3;
drh74587e52002-08-13 00:01:16 +0000273#else
drh968af522003-02-11 14:55:40 +0000274# define journal_format 3
drh94f33312002-08-12 12:29:56 +0000275#endif
drhed7c8552001-04-11 14:29:21 +0000276
277/*
drh968af522003-02-11 14:55:40 +0000278** The size of the header and of each page in the journal varies according
279** to which journal format is being used. The following macros figure out
280** the sizes based on format numbers.
281*/
282#define JOURNAL_HDR_SZ(X) \
283 (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
284#define JOURNAL_PG_SZ(X) \
drhd0ba1932004-02-10 01:54:28 +0000285 (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
drh968af522003-02-11 14:55:40 +0000286
287/*
drhdd793422001-06-28 01:54:48 +0000288** Enable reference count tracking here:
289*/
drh74587e52002-08-13 00:01:16 +0000290#ifdef SQLITE_TEST
drh3aac2dd2004-04-26 14:10:20 +0000291 int pager3_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000292 static void pager_refinfo(PgHdr *p){
293 static int cnt = 0;
drh3aac2dd2004-04-26 14:10:20 +0000294 if( !pager3_refinfo_enable ) return;
drhdd793422001-06-28 01:54:48 +0000295 printf(
296 "REFCNT: %4d addr=0x%08x nRef=%d\n",
297 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
298 );
299 cnt++; /* Something to set a breakpoint on */
300 }
301# define REFINFO(X) pager_refinfo(X)
302#else
303# define REFINFO(X)
304#endif
305
306/*
drh34e79ce2004-02-08 06:05:46 +0000307** Read a 32-bit integer from the given file descriptor. Store the integer
308** that is read in *pRes. Return SQLITE_OK if everything worked, or an
309** error code is something goes wrong.
310**
311** If the journal format is 2 or 3, read a big-endian integer. If the
312** journal format is 1, read an integer in the native byte-order of the
313** host machine.
drh94f33312002-08-12 12:29:56 +0000314*/
drh968af522003-02-11 14:55:40 +0000315static int read32bits(int format, OsFile *fd, u32 *pRes){
drh94f33312002-08-12 12:29:56 +0000316 u32 res;
317 int rc;
318 rc = sqliteOsRead(fd, &res, sizeof(res));
drh968af522003-02-11 14:55:40 +0000319 if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
drh94f33312002-08-12 12:29:56 +0000320 unsigned char ac[4];
321 memcpy(ac, &res, 4);
322 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
323 }
324 *pRes = res;
325 return rc;
326}
327
328/*
drh34e79ce2004-02-08 06:05:46 +0000329** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
330** on success or an error code is something goes wrong.
331**
332** If the journal format is 2 or 3, write the integer as 4 big-endian
333** bytes. If the journal format is 1, write the integer in the native
334** byte order. In normal operation, only formats 2 and 3 are used.
335** Journal format 1 is only used for testing.
drh94f33312002-08-12 12:29:56 +0000336*/
337static int write32bits(OsFile *fd, u32 val){
338 unsigned char ac[4];
drh968af522003-02-11 14:55:40 +0000339 if( journal_format<=1 ){
drh94f33312002-08-12 12:29:56 +0000340 return sqliteOsWrite(fd, &val, 4);
341 }
drh94f33312002-08-12 12:29:56 +0000342 ac[0] = (val>>24) & 0xff;
343 ac[1] = (val>>16) & 0xff;
344 ac[2] = (val>>8) & 0xff;
345 ac[3] = val & 0xff;
346 return sqliteOsWrite(fd, ac, 4);
347}
348
drh2554f8b2003-01-22 01:26:44 +0000349/*
350** Write a 32-bit integer into a page header right before the
351** page data. This will overwrite the PgHdr.pDirty pointer.
drh34e79ce2004-02-08 06:05:46 +0000352**
353** The integer is big-endian for formats 2 and 3 and native byte order
354** for journal format 1.
drh2554f8b2003-01-22 01:26:44 +0000355*/
drh968af522003-02-11 14:55:40 +0000356static void store32bits(u32 val, PgHdr *p, int offset){
drh2554f8b2003-01-22 01:26:44 +0000357 unsigned char *ac;
drhec1bd0b2003-08-26 11:41:27 +0000358 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
drh968af522003-02-11 14:55:40 +0000359 if( journal_format<=1 ){
drh2554f8b2003-01-22 01:26:44 +0000360 memcpy(ac, &val, 4);
361 }else{
362 ac[0] = (val>>24) & 0xff;
363 ac[1] = (val>>16) & 0xff;
364 ac[2] = (val>>8) & 0xff;
365 ac[3] = val & 0xff;
366 }
367}
368
drh94f33312002-08-12 12:29:56 +0000369
370/*
drhd9b02572001-04-15 00:37:09 +0000371** Convert the bits in the pPager->errMask into an approprate
372** return code.
373*/
374static int pager_errcode(Pager *pPager){
375 int rc = SQLITE_OK;
376 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000377 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000378 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
379 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
380 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
381 return rc;
drhed7c8552001-04-11 14:29:21 +0000382}
383
384/*
drh03eb96a2002-11-10 23:32:56 +0000385** Add or remove a page from the list of all pages that are in the
386** checkpoint journal.
387**
388** The Pager keeps a separate list of pages that are currently in
drh3aac2dd2004-04-26 14:10:20 +0000389** the checkpoint journal. This helps the sqlite3pager_stmt_commit()
drh03eb96a2002-11-10 23:32:56 +0000390** routine run MUCH faster for the common case where there are many
391** pages in memory but only a few are in the checkpoint journal.
392*/
drh3aac2dd2004-04-26 14:10:20 +0000393static void page_add_to_stmt_list(PgHdr *pPg){
drh03eb96a2002-11-10 23:32:56 +0000394 Pager *pPager = pPg->pPager;
395 if( pPg->inCkpt ) return;
396 assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
397 pPg->pPrevCkpt = 0;
398 if( pPager->pCkpt ){
399 pPager->pCkpt->pPrevCkpt = pPg;
400 }
401 pPg->pNextCkpt = pPager->pCkpt;
402 pPager->pCkpt = pPg;
403 pPg->inCkpt = 1;
404}
drh3aac2dd2004-04-26 14:10:20 +0000405static void page_remove_from_stmt_list(PgHdr *pPg){
drh03eb96a2002-11-10 23:32:56 +0000406 if( !pPg->inCkpt ) return;
407 if( pPg->pPrevCkpt ){
408 assert( pPg->pPrevCkpt->pNextCkpt==pPg );
409 pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
410 }else{
411 assert( pPg->pPager->pCkpt==pPg );
412 pPg->pPager->pCkpt = pPg->pNextCkpt;
413 }
414 if( pPg->pNextCkpt ){
415 assert( pPg->pNextCkpt->pPrevCkpt==pPg );
416 pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
417 }
418 pPg->pNextCkpt = 0;
419 pPg->pPrevCkpt = 0;
420 pPg->inCkpt = 0;
421}
422
423/*
drhed7c8552001-04-11 14:29:21 +0000424** Find a page in the hash table given its page number. Return
425** a pointer to the page or NULL if not found.
426*/
drhd9b02572001-04-15 00:37:09 +0000427static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drh836faa42003-01-11 13:30:57 +0000428 PgHdr *p = pPager->aHash[pager_hash(pgno)];
drhed7c8552001-04-11 14:29:21 +0000429 while( p && p->pgno!=pgno ){
430 p = p->pNextHash;
431 }
432 return p;
433}
434
435/*
436** Unlock the database and clear the in-memory cache. This routine
437** sets the state of the pager back to what it was when it was first
438** opened. Any outstanding pages are invalidated and subsequent attempts
439** to access those pages will likely result in a coredump.
440*/
drhd9b02572001-04-15 00:37:09 +0000441static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000442 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000443 for(pPg=pPager->pAll; pPg; pPg=pNext){
444 pNext = pPg->pNextAll;
445 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000446 }
447 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000448 pPager->pFirstSynced = 0;
drhd9b02572001-04-15 00:37:09 +0000449 pPager->pLast = 0;
450 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000451 memset(pPager->aHash, 0, sizeof(pPager->aHash));
452 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000453 if( pPager->state>=SQLITE_WRITELOCK ){
drh3aac2dd2004-04-26 14:10:20 +0000454 sqlite3pager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000455 }
drha7fcb052001-12-14 15:09:55 +0000456 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000457 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000458 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000459 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000460 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000461}
462
463/*
464** When this routine is called, the pager has the journal file open and
465** a write lock on the database. This routine releases the database
466** write lock and acquires a read lock in its place. The journal file
467** is deleted and closed.
drh50457892003-09-06 01:10:47 +0000468**
469** TODO: Consider keeping the journal file open for temporary databases.
470** This might give a performance improvement on windows where opening
471** a file is an expensive operation.
drhed7c8552001-04-11 14:29:21 +0000472*/
drhd9b02572001-04-15 00:37:09 +0000473static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000474 int rc;
drhd9b02572001-04-15 00:37:09 +0000475 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000476 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh3aac2dd2004-04-26 14:10:20 +0000477 sqlite3pager_stmt_commit(pPager);
drh0f892532002-05-30 12:27:03 +0000478 if( pPager->ckptOpen ){
479 sqliteOsClose(&pPager->cpfd);
480 pPager->ckptOpen = 0;
481 }
drhda47d772002-12-02 04:25:19 +0000482 if( pPager->journalOpen ){
483 sqliteOsClose(&pPager->jfd);
484 pPager->journalOpen = 0;
485 sqliteOsDelete(pPager->zJournal);
486 sqliteFree( pPager->aInJournal );
487 pPager->aInJournal = 0;
488 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
489 pPg->inJournal = 0;
490 pPg->dirty = 0;
drhdb48ee02003-01-16 13:42:43 +0000491 pPg->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000492 }
493 }else{
494 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
drhd9b02572001-04-15 00:37:09 +0000495 }
drhda47d772002-12-02 04:25:19 +0000496 rc = sqliteOsReadLock(&pPager->fd);
drh8e298f92002-07-06 16:28:47 +0000497 if( rc==SQLITE_OK ){
498 pPager->state = SQLITE_READLOCK;
499 }else{
500 /* This can only happen if a process does a BEGIN, then forks and the
501 ** child process does the COMMIT. Because of the semantics of unix
502 ** file locking, the unlock will fail.
503 */
504 pPager->state = SQLITE_UNLOCK;
505 }
drhed7c8552001-04-11 14:29:21 +0000506 return rc;
507}
508
drhed7c8552001-04-11 14:29:21 +0000509/*
drh968af522003-02-11 14:55:40 +0000510** Compute and return a checksum for the page of data.
drh34e79ce2004-02-08 06:05:46 +0000511**
512** This is not a real checksum. It is really just the sum of the
513** random initial value and the page number. We considered do a checksum
514** of the database, but that was found to be too slow.
drh968af522003-02-11 14:55:40 +0000515*/
516static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
517 u32 cksum = pPager->cksumInit + pgno;
drh968af522003-02-11 14:55:40 +0000518 return cksum;
519}
520
521/*
drhfa86c412002-02-02 15:01:15 +0000522** Read a single page from the journal file opened on file descriptor
523** jfd. Playback this one page.
drh968af522003-02-11 14:55:40 +0000524**
525** There are three different journal formats. The format parameter determines
526** which format is used by the journal that is played back.
drhfa86c412002-02-02 15:01:15 +0000527*/
drh968af522003-02-11 14:55:40 +0000528static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
drhfa86c412002-02-02 15:01:15 +0000529 int rc;
530 PgHdr *pPg; /* An existing page in the cache */
531 PageRecord pgRec;
drh968af522003-02-11 14:55:40 +0000532 u32 cksum;
drhfa86c412002-02-02 15:01:15 +0000533
drh968af522003-02-11 14:55:40 +0000534 rc = read32bits(format, jfd, &pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000535 if( rc!=SQLITE_OK ) return rc;
drh94f33312002-08-12 12:29:56 +0000536 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drh99ee3602003-02-16 19:13:36 +0000537 if( rc!=SQLITE_OK ) return rc;
drhfa86c412002-02-02 15:01:15 +0000538
drh968af522003-02-11 14:55:40 +0000539 /* Sanity checking on the page. This is more important that I originally
540 ** thought. If a power failure occurs while the journal is being written,
541 ** it could cause invalid data to be written into the journal. We need to
542 ** detect this invalid data (with high probability) and ignore it.
543 */
544 if( pgRec.pgno==0 ){
545 return SQLITE_DONE;
546 }
drh7d02cb72003-06-04 16:24:39 +0000547 if( pgRec.pgno>(unsigned)pPager->dbSize ){
drh968af522003-02-11 14:55:40 +0000548 return SQLITE_OK;
549 }
550 if( format>=JOURNAL_FORMAT_3 ){
551 rc = read32bits(format, jfd, &cksum);
drh99ee3602003-02-16 19:13:36 +0000552 if( rc ) return rc;
drh968af522003-02-11 14:55:40 +0000553 if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
554 return SQLITE_DONE;
555 }
556 }
drhfa86c412002-02-02 15:01:15 +0000557
558 /* Playback the page. Update the in-memory copy of the page
559 ** at the same time, if there is one.
560 */
561 pPg = pager_lookup(pPager, pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000562 TRACE2("PLAYBACK %d\n", pgRec.pgno);
drhd0ba1932004-02-10 01:54:28 +0000563 sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
564 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhfa86c412002-02-02 15:01:15 +0000565 if( pPg ){
drhacf4ac92003-12-17 23:57:34 +0000566 /* No page should ever be rolled back that is in use, except for page
567 ** 1 which is held in use in order to keep the lock on the database
568 ** active.
569 */
570 assert( pPg->nRef==0 || pPg->pgno==1 );
drhd0ba1932004-02-10 01:54:28 +0000571 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drhacf4ac92003-12-17 23:57:34 +0000572 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhdb48ee02003-01-16 13:42:43 +0000573 pPg->dirty = 0;
574 pPg->needSync = 0;
drh9eb9e262004-02-11 02:18:05 +0000575 CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
drhfa86c412002-02-02 15:01:15 +0000576 }
577 return rc;
578}
579
580/*
drhed7c8552001-04-11 14:29:21 +0000581** Playback the journal and thus restore the database file to
582** the state it was in before we started making changes.
583**
drh34e79ce2004-02-08 06:05:46 +0000584** The journal file format is as follows:
585**
586** * 8 byte prefix. One of the aJournalMagic123 vectors defined
587** above. The format of the journal file is determined by which
588** of the three prefix vectors is seen.
589** * 4 byte big-endian integer which is the number of valid page records
590** in the journal. If this value is 0xffffffff, then compute the
591** number of page records from the journal size. This field appears
592** in format 3 only.
593** * 4 byte big-endian integer which is the initial value for the
594** sanity checksum. This field appears in format 3 only.
595** * 4 byte integer which is the number of pages to truncate the
596** database to during a rollback.
597** * Zero or more pages instances, each as follows:
598** + 4 byte page number.
drhd0ba1932004-02-10 01:54:28 +0000599** + SQLITE_PAGE_SIZE bytes of data.
drh34e79ce2004-02-08 06:05:46 +0000600** + 4 byte checksum (format 3 only)
601**
602** When we speak of the journal header, we mean the first 4 bullets above.
603** Each entry in the journal is an instance of the 5th bullet. Note that
604** bullets 2 and 3 only appear in format-3 journals.
605**
606** Call the value from the second bullet "nRec". nRec is the number of
607** valid page entries in the journal. In most cases, you can compute the
608** value of nRec from the size of the journal file. But if a power
609** failure occurred while the journal was being written, it could be the
610** case that the size of the journal file had already been increased but
611** the extra entries had not yet made it safely to disk. In such a case,
612** the value of nRec computed from the file size would be too large. For
613** that reason, we always use the nRec value in the header.
614**
615** If the nRec value is 0xffffffff it means that nRec should be computed
616** from the file size. This value is used when the user selects the
617** no-sync option for the journal. A power failure could lead to corruption
618** in this case. But for things like temporary table (which will be
619** deleted when the power is restored) we don't care.
620**
621** Journal formats 1 and 2 do not have an nRec value in the header so we
622** have to compute nRec from the file size. This has risks (as described
623** above) which is why all persistent tables have been changed to use
624** format 3.
drhed7c8552001-04-11 14:29:21 +0000625**
drhd9b02572001-04-15 00:37:09 +0000626** If the file opened as the journal file is not a well-formed
drh34e79ce2004-02-08 06:05:46 +0000627** journal file then the database will likely already be
628** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
629** and SQLITE_CORRUPT is returned. If it all works, then this routine
630** returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000631*/
drh99ee3602003-02-16 19:13:36 +0000632static int pager_playback(Pager *pPager, int useJournalSize){
drh968af522003-02-11 14:55:40 +0000633 off_t szJ; /* Size of the journal file in bytes */
634 int nRec; /* Number of Records in the journal */
drhd9b02572001-04-15 00:37:09 +0000635 int i; /* Loop counter */
636 Pgno mxPg = 0; /* Size of the original file in pages */
drh968af522003-02-11 14:55:40 +0000637 int format; /* Format of the journal file. */
638 unsigned char aMagic[sizeof(aJournalMagic1)];
drhed7c8552001-04-11 14:29:21 +0000639 int rc;
640
drhc3a64ba2001-11-22 00:01:27 +0000641 /* Figure out how many records are in the journal. Abort early if
642 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000643 */
drh8cfbf082001-09-19 13:22:39 +0000644 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000645 sqliteOsSeek(&pPager->jfd, 0);
drh968af522003-02-11 14:55:40 +0000646 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhc3a64ba2001-11-22 00:01:27 +0000647 if( rc!=SQLITE_OK ){
648 goto end_playback;
649 }
drh240c5792004-02-08 00:40:52 +0000650
651 /* If the journal file is too small to contain a complete header,
drh34e79ce2004-02-08 06:05:46 +0000652 ** it must mean that the process that created the journal was just
653 ** beginning to write the journal file when it died. In that case,
654 ** the database file should have still been completely unchanged.
655 ** Nothing needs to be rolled back. We can safely ignore this journal.
drh240c5792004-02-08 00:40:52 +0000656 */
drh968af522003-02-11 14:55:40 +0000657 if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
drhc3a64ba2001-11-22 00:01:27 +0000658 goto end_playback;
659 }
660
661 /* Read the beginning of the journal and truncate the
662 ** database file back to its original size.
663 */
drha7fcb052001-12-14 15:09:55 +0000664 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000665 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000666 rc = SQLITE_PROTOCOL;
667 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000668 }
drh968af522003-02-11 14:55:40 +0000669 if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
670 format = JOURNAL_FORMAT_3;
671 }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
672 format = JOURNAL_FORMAT_2;
673 }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
674 format = JOURNAL_FORMAT_1;
drh94f33312002-08-12 12:29:56 +0000675 }else{
676 rc = SQLITE_PROTOCOL;
677 goto end_playback;
678 }
drh968af522003-02-11 14:55:40 +0000679 if( format>=JOURNAL_FORMAT_3 ){
drh240c5792004-02-08 00:40:52 +0000680 if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
681 /* Ignore the journal if it is too small to contain a complete
682 ** header. We already did this test once above, but at the prior
683 ** test, we did not know the journal format and so we had to assume
684 ** the smallest possible header. Now we know the header is bigger
drh34e79ce2004-02-08 06:05:46 +0000685 ** than the minimum so we test again.
drh240c5792004-02-08 00:40:52 +0000686 */
687 goto end_playback;
688 }
drh133cdf62004-01-07 02:52:07 +0000689 rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
drh968af522003-02-11 14:55:40 +0000690 if( rc ) goto end_playback;
691 rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
692 if( rc ) goto end_playback;
drh99ee3602003-02-16 19:13:36 +0000693 if( nRec==0xffffffff || useJournalSize ){
drh968af522003-02-11 14:55:40 +0000694 nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
695 }
696 }else{
drhd8d66e82003-02-12 02:10:15 +0000697 nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
698 assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
drh968af522003-02-11 14:55:40 +0000699 }
700 rc = read32bits(format, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000701 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000702 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000703 }
drhd8d66e82003-02-12 02:10:15 +0000704 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
drhd0ba1932004-02-10 01:54:28 +0000705 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
drh81a20f22001-10-12 17:30:04 +0000706 if( rc!=SQLITE_OK ){
707 goto end_playback;
708 }
drhd9b02572001-04-15 00:37:09 +0000709 pPager->dbSize = mxPg;
710
drhfa86c412002-02-02 15:01:15 +0000711 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000712 */
drh968af522003-02-11 14:55:40 +0000713 for(i=0; i<nRec; i++){
714 rc = pager_playback_one_page(pPager, &pPager->jfd, format);
715 if( rc!=SQLITE_OK ){
716 if( rc==SQLITE_DONE ){
drh968af522003-02-11 14:55:40 +0000717 rc = SQLITE_OK;
718 }
719 break;
720 }
drhed7c8552001-04-11 14:29:21 +0000721 }
drh81a20f22001-10-12 17:30:04 +0000722
drh4a0681e2003-02-13 01:58:20 +0000723 /* Pages that have been written to the journal but never synced
724 ** where not restored by the loop above. We have to restore those
drh240c5792004-02-08 00:40:52 +0000725 ** pages by reading them back from the original database.
drhdb48ee02003-01-16 13:42:43 +0000726 */
727 if( rc==SQLITE_OK ){
728 PgHdr *pPg;
729 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
drhd0ba1932004-02-10 01:54:28 +0000730 char zBuf[SQLITE_PAGE_SIZE];
drh4a0681e2003-02-13 01:58:20 +0000731 if( !pPg->dirty ) continue;
drhdb48ee02003-01-16 13:42:43 +0000732 if( (int)pPg->pgno <= pPager->origDbSize ){
drhd0ba1932004-02-10 01:54:28 +0000733 sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
734 rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +0000735 TRACE2("REFETCH %d\n", pPg->pgno);
736 CODEC(pPager, zBuf, pPg->pgno, 2);
drhdb48ee02003-01-16 13:42:43 +0000737 if( rc ) break;
738 }else{
drhd0ba1932004-02-10 01:54:28 +0000739 memset(zBuf, 0, SQLITE_PAGE_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000740 }
drhd0ba1932004-02-10 01:54:28 +0000741 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
742 memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
drh3a840692003-01-29 22:58:26 +0000743 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
744 }
drhdb48ee02003-01-16 13:42:43 +0000745 pPg->needSync = 0;
746 pPg->dirty = 0;
747 }
748 }
drh4a0681e2003-02-13 01:58:20 +0000749
750end_playback:
drhd9b02572001-04-15 00:37:09 +0000751 if( rc!=SQLITE_OK ){
752 pager_unwritelock(pPager);
753 pPager->errMask |= PAGER_ERR_CORRUPT;
754 rc = SQLITE_CORRUPT;
755 }else{
756 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000757 }
drhd9b02572001-04-15 00:37:09 +0000758 return rc;
drhed7c8552001-04-11 14:29:21 +0000759}
760
761/*
drhfa86c412002-02-02 15:01:15 +0000762** Playback the checkpoint journal.
763**
764** This is similar to playing back the transaction journal but with
765** a few extra twists.
766**
drh663fc632002-02-02 18:49:19 +0000767** (1) The number of pages in the database file at the start of
768** the checkpoint is stored in pPager->ckptSize, not in the
769** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000770**
771** (2) In addition to playing back the checkpoint journal, also
772** playback all pages of the transaction journal beginning
773** at offset pPager->ckptJSize.
774*/
drh3aac2dd2004-04-26 14:10:20 +0000775static int pager_stmt_playback(Pager *pPager){
drh968af522003-02-11 14:55:40 +0000776 off_t szJ; /* Size of the full journal */
777 int nRec; /* Number of Records */
drhfa86c412002-02-02 15:01:15 +0000778 int i; /* Loop counter */
779 int rc;
780
781 /* Truncate the database back to its original size.
782 */
drhd0ba1932004-02-10 01:54:28 +0000783 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
drhfa86c412002-02-02 15:01:15 +0000784 pPager->dbSize = pPager->ckptSize;
785
786 /* Figure out how many records are in the checkpoint journal.
787 */
drh0f892532002-05-30 12:27:03 +0000788 assert( pPager->ckptInUse && pPager->journalOpen );
drhfa86c412002-02-02 15:01:15 +0000789 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +0000790 nRec = pPager->ckptNRec;
drhfa86c412002-02-02 15:01:15 +0000791
792 /* Copy original pages out of the checkpoint journal and back into the
drh968af522003-02-11 14:55:40 +0000793 ** database file. Note that the checkpoint journal always uses format
794 ** 2 instead of format 3 since it does not need to be concerned with
795 ** power failures corrupting the journal and can thus omit the checksums.
drhfa86c412002-02-02 15:01:15 +0000796 */
797 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000798 rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
799 assert( rc!=SQLITE_DONE );
drh3aac2dd2004-04-26 14:10:20 +0000800 if( rc!=SQLITE_OK ) goto end_stmt_playback;
drhfa86c412002-02-02 15:01:15 +0000801 }
802
803 /* Figure out how many pages need to be copied out of the transaction
804 ** journal.
805 */
806 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
807 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +0000808 goto end_stmt_playback;
drhfa86c412002-02-02 15:01:15 +0000809 }
drh968af522003-02-11 14:55:40 +0000810 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
drhfa86c412002-02-02 15:01:15 +0000811 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +0000812 goto end_stmt_playback;
drhfa86c412002-02-02 15:01:15 +0000813 }
drh968af522003-02-11 14:55:40 +0000814 nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
drhfa86c412002-02-02 15:01:15 +0000815 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000816 rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
817 if( rc!=SQLITE_OK ){
818 assert( rc!=SQLITE_DONE );
drh3aac2dd2004-04-26 14:10:20 +0000819 goto end_stmt_playback;
drh968af522003-02-11 14:55:40 +0000820 }
drhfa86c412002-02-02 15:01:15 +0000821 }
822
drh3aac2dd2004-04-26 14:10:20 +0000823end_stmt_playback:
drhfa86c412002-02-02 15:01:15 +0000824 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000825 pPager->errMask |= PAGER_ERR_CORRUPT;
826 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000827 }
828 return rc;
829}
830
831/*
drhf57b14a2001-09-14 18:54:08 +0000832** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000833**
834** The maximum number is the absolute value of the mxPage parameter.
835** If mxPage is negative, the noSync flag is also set. noSync bypasses
836** calls to sqliteOsSync(). The pager runs much faster with noSync on,
837** but if the operating system crashes or there is an abrupt power
838** failure, the database file might be left in an inconsistent and
839** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +0000840*/
drh3aac2dd2004-04-26 14:10:20 +0000841void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +0000842 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +0000843 pPager->noSync = pPager->tempFile;
drh946966f2004-02-25 02:20:41 +0000844 if( pPager->noSync==0 ) pPager->needSync = 0;
drh603240c2002-03-05 01:11:12 +0000845 }else{
846 pPager->noSync = 1;
847 mxPage = -mxPage;
848 }
drhf57b14a2001-09-14 18:54:08 +0000849 if( mxPage>10 ){
850 pPager->mxPage = mxPage;
851 }
852}
853
854/*
drh973b6e32003-02-12 14:09:42 +0000855** Adjust the robustness of the database to damage due to OS crashes
856** or power failures by changing the number of syncs()s when writing
857** the rollback journal. There are three levels:
858**
859** OFF sqliteOsSync() is never called. This is the default
860** for temporary and transient files.
861**
862** NORMAL The journal is synced once before writes begin on the
863** database. This is normally adequate protection, but
864** it is theoretically possible, though very unlikely,
865** that an inopertune power failure could leave the journal
866** in a state which would cause damage to the database
867** when it is rolled back.
868**
869** FULL The journal is synced twice before writes begin on the
drh34e79ce2004-02-08 06:05:46 +0000870** database (with some additional information - the nRec field
871** of the journal header - being written in between the two
872** syncs). If we assume that writing a
drh973b6e32003-02-12 14:09:42 +0000873** single disk sector is atomic, then this mode provides
874** assurance that the journal will not be corrupted to the
875** point of causing damage to the database during rollback.
876**
877** Numeric values associated with these states are OFF==1, NORMAL=2,
878** and FULL=3.
879*/
drh3aac2dd2004-04-26 14:10:20 +0000880void sqlite3pager_set_safety_level(Pager *pPager, int level){
drh973b6e32003-02-12 14:09:42 +0000881 pPager->noSync = level==1 || pPager->tempFile;
882 pPager->fullSync = level==3 && !pPager->tempFile;
drh946966f2004-02-25 02:20:41 +0000883 if( pPager->noSync==0 ) pPager->needSync = 0;
drh973b6e32003-02-12 14:09:42 +0000884}
885
886/*
drhfa86c412002-02-02 15:01:15 +0000887** Open a temporary file. Write the name of the file into zName
888** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
889** the file descriptor into *fd. Return SQLITE_OK on success or some
890** other error code if we fail.
891**
892** The OS will automatically delete the temporary file when it is
893** closed.
894*/
drh3aac2dd2004-04-26 14:10:20 +0000895static int sqlite3pager_opentemp(char *zFile, OsFile *fd){
drhfa86c412002-02-02 15:01:15 +0000896 int cnt = 8;
897 int rc;
898 do{
899 cnt--;
900 sqliteOsTempFileName(zFile);
901 rc = sqliteOsOpenExclusive(zFile, fd, 1);
902 }while( cnt>0 && rc!=SQLITE_OK );
903 return rc;
904}
905
906/*
drhed7c8552001-04-11 14:29:21 +0000907** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000908** The file to be cached need not exist. The file is not locked until
drh3aac2dd2004-04-26 14:10:20 +0000909** the first call to sqlite3pager_get() and is only held open until the
910** last page is released using sqlite3pager_unref().
drh382c0242001-10-06 16:33:02 +0000911**
drh6446c4d2001-12-15 14:22:18 +0000912** If zFilename is NULL then a randomly-named temporary file is created
913** and used as the file to be cached. The file will be deleted
914** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000915*/
drh3aac2dd2004-04-26 14:10:20 +0000916int sqlite3pager_open(
drh7e3b0a02001-04-28 16:52:40 +0000917 Pager **ppPager, /* Return the Pager structure here */
918 const char *zFilename, /* Name of the database file to open */
919 int mxPage, /* Max number of in-memory cache pages */
drhda47d772002-12-02 04:25:19 +0000920 int nExtra, /* Extra bytes append to each in-memory page */
921 int useJournal /* TRUE to use a rollback journal on this file */
drh7e3b0a02001-04-28 16:52:40 +0000922){
drhed7c8552001-04-11 14:29:21 +0000923 Pager *pPager;
drh3e7a6092002-12-07 21:45:14 +0000924 char *zFullPathname;
drhed7c8552001-04-11 14:29:21 +0000925 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000926 OsFile fd;
drha76c82e2003-07-27 18:59:42 +0000927 int rc, i;
drh5e00f6c2001-09-13 13:46:56 +0000928 int tempFile;
929 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000930 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000931
drhd9b02572001-04-15 00:37:09 +0000932 *ppPager = 0;
933 if( sqlite_malloc_failed ){
934 return SQLITE_NOMEM;
935 }
drh901afd42003-08-26 11:25:58 +0000936 if( zFilename && zFilename[0] ){
drh3e7a6092002-12-07 21:45:14 +0000937 zFullPathname = sqliteOsFullPathname(zFilename);
938 rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000939 tempFile = 0;
940 }else{
drh3aac2dd2004-04-26 14:10:20 +0000941 rc = sqlite3pager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +0000942 zFilename = zTemp;
drh3e7a6092002-12-07 21:45:14 +0000943 zFullPathname = sqliteOsFullPathname(zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000944 tempFile = 1;
945 }
drh3e7a6092002-12-07 21:45:14 +0000946 if( sqlite_malloc_failed ){
947 return SQLITE_NOMEM;
948 }
drh8cfbf082001-09-19 13:22:39 +0000949 if( rc!=SQLITE_OK ){
drh3e7a6092002-12-07 21:45:14 +0000950 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000951 return SQLITE_CANTOPEN;
952 }
drh3e7a6092002-12-07 21:45:14 +0000953 nameLen = strlen(zFullPathname);
drha76c82e2003-07-27 18:59:42 +0000954 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
drhd9b02572001-04-15 00:37:09 +0000955 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000956 sqliteOsClose(&fd);
drh3e7a6092002-12-07 21:45:14 +0000957 sqliteFree(zFullPathname);
drhd9b02572001-04-15 00:37:09 +0000958 return SQLITE_NOMEM;
959 }
drhdb48ee02003-01-16 13:42:43 +0000960 SET_PAGER(pPager);
drhed7c8552001-04-11 14:29:21 +0000961 pPager->zFilename = (char*)&pPager[1];
drha76c82e2003-07-27 18:59:42 +0000962 pPager->zDirectory = &pPager->zFilename[nameLen+1];
963 pPager->zJournal = &pPager->zDirectory[nameLen+1];
drh3e7a6092002-12-07 21:45:14 +0000964 strcpy(pPager->zFilename, zFullPathname);
drha76c82e2003-07-27 18:59:42 +0000965 strcpy(pPager->zDirectory, zFullPathname);
966 for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
967 if( i>0 ) pPager->zDirectory[i-1] = 0;
drh3e7a6092002-12-07 21:45:14 +0000968 strcpy(pPager->zJournal, zFullPathname);
969 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +0000970 strcpy(&pPager->zJournal[nameLen], "-journal");
971 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000972 pPager->journalOpen = 0;
drhda47d772002-12-02 04:25:19 +0000973 pPager->useJournal = useJournal;
drhfa86c412002-02-02 15:01:15 +0000974 pPager->ckptOpen = 0;
drh0f892532002-05-30 12:27:03 +0000975 pPager->ckptInUse = 0;
drhed7c8552001-04-11 14:29:21 +0000976 pPager->nRef = 0;
977 pPager->dbSize = -1;
drhfa86c412002-02-02 15:01:15 +0000978 pPager->ckptSize = 0;
979 pPager->ckptJSize = 0;
drhed7c8552001-04-11 14:29:21 +0000980 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000981 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000982 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000983 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000984 pPager->tempFile = tempFile;
985 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000986 pPager->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000987 pPager->noSync = pPager->tempFile || !useJournal;
drhed7c8552001-04-11 14:29:21 +0000988 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000989 pPager->pFirstSynced = 0;
drhed7c8552001-04-11 14:29:21 +0000990 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000991 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000992 memset(pPager->aHash, 0, sizeof(pPager->aHash));
993 *ppPager = pPager;
994 return SQLITE_OK;
995}
996
997/*
drh72f82862001-05-24 21:06:34 +0000998** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000999** when the reference count on each page reaches zero. The destructor can
1000** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +00001001**
drh3aac2dd2004-04-26 14:10:20 +00001002** The destructor is not called as a result sqlite3pager_close().
1003** Destructors are only called by sqlite3pager_unref().
drh72f82862001-05-24 21:06:34 +00001004*/
drh3aac2dd2004-04-26 14:10:20 +00001005void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
drh72f82862001-05-24 21:06:34 +00001006 pPager->xDestructor = xDesc;
1007}
1008
1009/*
drh5e00f6c2001-09-13 13:46:56 +00001010** Return the total number of pages in the disk file associated with
1011** pPager.
drhed7c8552001-04-11 14:29:21 +00001012*/
drh3aac2dd2004-04-26 14:10:20 +00001013int sqlite3pager_pagecount(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +00001014 off_t n;
drhd9b02572001-04-15 00:37:09 +00001015 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +00001016 if( pPager->dbSize>=0 ){
1017 return pPager->dbSize;
1018 }
drha7fcb052001-12-14 15:09:55 +00001019 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +00001020 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +00001021 return 0;
drhed7c8552001-04-11 14:29:21 +00001022 }
drhd0ba1932004-02-10 01:54:28 +00001023 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +00001024 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +00001025 pPager->dbSize = n;
1026 }
1027 return n;
1028}
1029
1030/*
drhf7c57532003-04-25 13:22:51 +00001031** Forward declaration
1032*/
drh34e79ce2004-02-08 06:05:46 +00001033static int syncJournal(Pager*);
drhf7c57532003-04-25 13:22:51 +00001034
1035/*
1036** Truncate the file to the number of pages specified.
1037*/
drh3aac2dd2004-04-26 14:10:20 +00001038int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
drhf7c57532003-04-25 13:22:51 +00001039 int rc;
drh2e6d11b2003-04-25 15:37:57 +00001040 if( pPager->dbSize<0 ){
drh3aac2dd2004-04-26 14:10:20 +00001041 sqlite3pager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001042 }
1043 if( pPager->errMask!=0 ){
1044 rc = pager_errcode(pPager);
1045 return rc;
1046 }
drh7d02cb72003-06-04 16:24:39 +00001047 if( nPage>=(unsigned)pPager->dbSize ){
drhf7c57532003-04-25 13:22:51 +00001048 return SQLITE_OK;
1049 }
drh34e79ce2004-02-08 06:05:46 +00001050 syncJournal(pPager);
drhd0ba1932004-02-10 01:54:28 +00001051 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
drhf7c57532003-04-25 13:22:51 +00001052 if( rc==SQLITE_OK ){
1053 pPager->dbSize = nPage;
1054 }
1055 return rc;
1056}
1057
1058/*
drhed7c8552001-04-11 14:29:21 +00001059** Shutdown the page cache. Free all memory and close all files.
1060**
1061** If a transaction was in progress when this routine is called, that
1062** transaction is rolled back. All outstanding pages are invalidated
1063** and their memory is freed. Any attempt to use a page associated
1064** with this page cache after this function returns will likely
1065** result in a coredump.
1066*/
drh3aac2dd2004-04-26 14:10:20 +00001067int sqlite3pager_close(Pager *pPager){
drhd9b02572001-04-15 00:37:09 +00001068 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +00001069 switch( pPager->state ){
1070 case SQLITE_WRITELOCK: {
drh3aac2dd2004-04-26 14:10:20 +00001071 sqlite3pager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +00001072 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +00001073 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +00001074 break;
1075 }
1076 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +00001077 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +00001078 break;
1079 }
1080 default: {
1081 /* Do nothing */
1082 break;
1083 }
1084 }
drhd9b02572001-04-15 00:37:09 +00001085 for(pPg=pPager->pAll; pPg; pPg=pNext){
1086 pNext = pPg->pNextAll;
1087 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +00001088 }
drha7fcb052001-12-14 15:09:55 +00001089 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +00001090 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +00001091 /* Temp files are automatically deleted by the OS
1092 ** if( pPager->tempFile ){
1093 ** sqliteOsDelete(pPager->zFilename);
1094 ** }
1095 */
drhdb48ee02003-01-16 13:42:43 +00001096 CLR_PAGER(pPager);
drh73509ee2003-04-06 20:44:45 +00001097 if( pPager->zFilename!=(char*)&pPager[1] ){
drha76c82e2003-07-27 18:59:42 +00001098 assert( 0 ); /* Cannot happen */
drh73509ee2003-04-06 20:44:45 +00001099 sqliteFree(pPager->zFilename);
1100 sqliteFree(pPager->zJournal);
drha76c82e2003-07-27 18:59:42 +00001101 sqliteFree(pPager->zDirectory);
drh73509ee2003-04-06 20:44:45 +00001102 }
drhed7c8552001-04-11 14:29:21 +00001103 sqliteFree(pPager);
1104 return SQLITE_OK;
1105}
1106
1107/*
drh5e00f6c2001-09-13 13:46:56 +00001108** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +00001109*/
drh3aac2dd2004-04-26 14:10:20 +00001110Pgno sqlite3pager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +00001111 PgHdr *p = DATA_TO_PGHDR(pData);
1112 return p->pgno;
1113}
1114
1115/*
drh7e3b0a02001-04-28 16:52:40 +00001116** Increment the reference count for a page. If the page is
1117** currently on the freelist (the reference count is zero) then
1118** remove it from the freelist.
1119*/
drh836faa42003-01-11 13:30:57 +00001120#define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1121static void _page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +00001122 if( pPg->nRef==0 ){
1123 /* The page is currently on the freelist. Remove it. */
drh341eae82003-01-21 02:39:36 +00001124 if( pPg==pPg->pPager->pFirstSynced ){
1125 PgHdr *p = pPg->pNextFree;
1126 while( p && p->needSync ){ p = p->pNextFree; }
1127 pPg->pPager->pFirstSynced = p;
1128 }
drh7e3b0a02001-04-28 16:52:40 +00001129 if( pPg->pPrevFree ){
1130 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1131 }else{
1132 pPg->pPager->pFirst = pPg->pNextFree;
1133 }
1134 if( pPg->pNextFree ){
1135 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1136 }else{
1137 pPg->pPager->pLast = pPg->pPrevFree;
1138 }
1139 pPg->pPager->nRef++;
1140 }
1141 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +00001142 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +00001143}
1144
1145/*
1146** Increment the reference count for a page. The input pointer is
1147** a reference to the page data.
1148*/
drh3aac2dd2004-04-26 14:10:20 +00001149int sqlite3pager_ref(void *pData){
drhdf0b3b02001-06-23 11:36:20 +00001150 PgHdr *pPg = DATA_TO_PGHDR(pData);
1151 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +00001152 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +00001153}
1154
1155/*
drh34e79ce2004-02-08 06:05:46 +00001156** Sync the journal. In other words, make sure all the pages that have
1157** been written to the journal have actually reached the surface of the
1158** disk. It is not safe to modify the original database file until after
1159** the journal has been synced. If the original database is modified before
1160** the journal is synced and a power failure occurs, the unsynced journal
1161** data would be lost and we would be unable to completely rollback the
1162** database changes. Database corruption would occur.
1163**
1164** This routine also updates the nRec field in the header of the journal.
1165** (See comments on the pager_playback() routine for additional information.)
1166** If the sync mode is FULL, two syncs will occur. First the whole journal
1167** is synced, then the nRec field is updated, then a second sync occurs.
drhb19a2bc2001-09-16 00:13:26 +00001168**
drh34e79ce2004-02-08 06:05:46 +00001169** For temporary databases, we do not care if we are able to rollback
1170** after a power failure, so sync occurs.
drhfa86c412002-02-02 15:01:15 +00001171**
drh34e79ce2004-02-08 06:05:46 +00001172** This routine clears the needSync field of every page current held in
1173** memory.
drh50e5dad2001-09-15 00:57:28 +00001174*/
drh34e79ce2004-02-08 06:05:46 +00001175static int syncJournal(Pager *pPager){
drh50e5dad2001-09-15 00:57:28 +00001176 PgHdr *pPg;
1177 int rc = SQLITE_OK;
drh03eb96a2002-11-10 23:32:56 +00001178
1179 /* Sync the journal before modifying the main database
1180 ** (assuming there is a journal and it needs to be synced.)
1181 */
drh50e5dad2001-09-15 00:57:28 +00001182 if( pPager->needSync ){
drhfa86c412002-02-02 15:01:15 +00001183 if( !pPager->tempFile ){
drhdb48ee02003-01-16 13:42:43 +00001184 assert( pPager->journalOpen );
drh946966f2004-02-25 02:20:41 +00001185 /* assert( !pPager->noSync ); // noSync might be set if synchronous
1186 ** was turned off after the transaction was started. Ticket #615 */
drh968af522003-02-11 14:55:40 +00001187#ifndef NDEBUG
1188 {
drh34e79ce2004-02-08 06:05:46 +00001189 /* Make sure the pPager->nRec counter we are keeping agrees
1190 ** with the nRec computed from the size of the journal file.
1191 */
drh4a0681e2003-02-13 01:58:20 +00001192 off_t hdrSz, pgSz, jSz;
drh968af522003-02-11 14:55:40 +00001193 hdrSz = JOURNAL_HDR_SZ(journal_format);
1194 pgSz = JOURNAL_PG_SZ(journal_format);
drh4a0681e2003-02-13 01:58:20 +00001195 rc = sqliteOsFileSize(&pPager->jfd, &jSz);
drh968af522003-02-11 14:55:40 +00001196 if( rc!=0 ) return rc;
drh4a0681e2003-02-13 01:58:20 +00001197 assert( pPager->nRec*pgSz+hdrSz==jSz );
drh968af522003-02-11 14:55:40 +00001198 }
1199#endif
drhd8d66e82003-02-12 02:10:15 +00001200 if( journal_format>=3 ){
drh34e79ce2004-02-08 06:05:46 +00001201 /* Write the nRec value into the journal file header */
drhd8d66e82003-02-12 02:10:15 +00001202 off_t szJ;
1203 if( pPager->fullSync ){
1204 TRACE1("SYNC\n");
1205 rc = sqliteOsSync(&pPager->jfd);
1206 if( rc!=0 ) return rc;
1207 }
1208 sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
drh99ee3602003-02-16 19:13:36 +00001209 rc = write32bits(&pPager->jfd, pPager->nRec);
1210 if( rc ) return rc;
drhd8d66e82003-02-12 02:10:15 +00001211 szJ = JOURNAL_HDR_SZ(journal_format) +
1212 pPager->nRec*JOURNAL_PG_SZ(journal_format);
1213 sqliteOsSeek(&pPager->jfd, szJ);
drh968af522003-02-11 14:55:40 +00001214 }
drhdb48ee02003-01-16 13:42:43 +00001215 TRACE1("SYNC\n");
drhfa86c412002-02-02 15:01:15 +00001216 rc = sqliteOsSync(&pPager->jfd);
1217 if( rc!=0 ) return rc;
drhdb48ee02003-01-16 13:42:43 +00001218 pPager->journalStarted = 1;
drhfa86c412002-02-02 15:01:15 +00001219 }
drh50e5dad2001-09-15 00:57:28 +00001220 pPager->needSync = 0;
drh341eae82003-01-21 02:39:36 +00001221
1222 /* Erase the needSync flag from every page.
1223 */
1224 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1225 pPg->needSync = 0;
1226 }
1227 pPager->pFirstSynced = pPager->pFirst;
drh50e5dad2001-09-15 00:57:28 +00001228 }
drh03eb96a2002-11-10 23:32:56 +00001229
drh341eae82003-01-21 02:39:36 +00001230#ifndef NDEBUG
1231 /* If the Pager.needSync flag is clear then the PgHdr.needSync
1232 ** flag must also be clear for all pages. Verify that this
1233 ** invariant is true.
drh03eb96a2002-11-10 23:32:56 +00001234 */
drh341eae82003-01-21 02:39:36 +00001235 else{
1236 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1237 assert( pPg->needSync==0 );
1238 }
1239 assert( pPager->pFirstSynced==pPager->pFirst );
drh03eb96a2002-11-10 23:32:56 +00001240 }
drh341eae82003-01-21 02:39:36 +00001241#endif
drhdb48ee02003-01-16 13:42:43 +00001242
drh81a20f22001-10-12 17:30:04 +00001243 return rc;
drh50e5dad2001-09-15 00:57:28 +00001244}
1245
1246/*
drh2554f8b2003-01-22 01:26:44 +00001247** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1248** every one of those pages out to the database file and mark them all
1249** as clean.
1250*/
1251static int pager_write_pagelist(PgHdr *pList){
1252 Pager *pPager;
1253 int rc;
1254
1255 if( pList==0 ) return SQLITE_OK;
1256 pPager = pList->pPager;
1257 while( pList ){
1258 assert( pList->dirty );
drhd0ba1932004-02-10 01:54:28 +00001259 sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +00001260 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
1261 TRACE2("STORE %d\n", pList->pgno);
drhd0ba1932004-02-10 01:54:28 +00001262 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +00001263 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
drh2554f8b2003-01-22 01:26:44 +00001264 if( rc ) return rc;
1265 pList->dirty = 0;
1266 pList = pList->pDirty;
1267 }
1268 return SQLITE_OK;
1269}
1270
1271/*
1272** Collect every dirty page into a dirty list and
1273** return a pointer to the head of that list. All pages are
1274** collected even if they are still in use.
1275*/
1276static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1277 PgHdr *p, *pList;
1278 pList = 0;
1279 for(p=pPager->pAll; p; p=p->pNextAll){
1280 if( p->dirty ){
1281 p->pDirty = pList;
1282 pList = p;
1283 }
1284 }
1285 return pList;
1286}
1287
1288/*
drhd9b02572001-04-15 00:37:09 +00001289** Acquire a page.
1290**
drh58a11682001-11-10 13:51:08 +00001291** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +00001292** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +00001293**
drh306dc212001-05-21 13:45:10 +00001294** A _get works for any page number greater than 0. If the database
1295** file is smaller than the requested page, then no actual disk
1296** read occurs and the memory image of the page is initialized to
1297** all zeros. The extra data appended to a page is always initialized
1298** to zeros the first time a page is loaded into memory.
1299**
drhd9b02572001-04-15 00:37:09 +00001300** The acquisition might fail for several reasons. In all cases,
1301** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +00001302**
drh3aac2dd2004-04-26 14:10:20 +00001303** See also sqlite3pager_lookup(). Both this routine and _lookup() attempt
drh7e3b0a02001-04-28 16:52:40 +00001304** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +00001305** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +00001306** just returns 0. This routine acquires a read-lock the first time it
1307** has to go to disk, and could also playback an old journal if necessary.
1308** Since _lookup() never goes to disk, it never has to deal with locks
1309** or journal files.
drhed7c8552001-04-11 14:29:21 +00001310*/
drh3aac2dd2004-04-26 14:10:20 +00001311int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +00001312 PgHdr *pPg;
drh8766c342002-11-09 00:33:15 +00001313 int rc;
drhed7c8552001-04-11 14:29:21 +00001314
drhd9b02572001-04-15 00:37:09 +00001315 /* Make sure we have not hit any critical errors.
1316 */
drh836faa42003-01-11 13:30:57 +00001317 assert( pPager!=0 );
1318 assert( pgno!=0 );
drh2e6d11b2003-04-25 15:37:57 +00001319 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +00001320 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1321 return pager_errcode(pPager);
1322 }
1323
drhed7c8552001-04-11 14:29:21 +00001324 /* If this is the first page accessed, then get a read lock
1325 ** on the database file.
1326 */
1327 if( pPager->nRef==0 ){
drh8766c342002-11-09 00:33:15 +00001328 rc = sqliteOsReadLock(&pPager->fd);
1329 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001330 return rc;
drhed7c8552001-04-11 14:29:21 +00001331 }
drhd9b02572001-04-15 00:37:09 +00001332 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +00001333
1334 /* If a journal file exists, try to play it back.
1335 */
drhda47d772002-12-02 04:25:19 +00001336 if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
drhe2227f02003-06-14 11:42:57 +00001337 int rc;
drhed7c8552001-04-11 14:29:21 +00001338
drha7fcb052001-12-14 15:09:55 +00001339 /* Get a write lock on the database
1340 */
1341 rc = sqliteOsWriteLock(&pPager->fd);
1342 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001343 if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
1344 /* This should never happen! */
1345 rc = SQLITE_INTERNAL;
1346 }
drh8766c342002-11-09 00:33:15 +00001347 return rc;
drha7fcb052001-12-14 15:09:55 +00001348 }
1349 pPager->state = SQLITE_WRITELOCK;
1350
drhe2227f02003-06-14 11:42:57 +00001351 /* Open the journal for reading only. Return SQLITE_BUSY if
1352 ** we are unable to open the journal file.
drhf57b3392001-10-08 13:22:32 +00001353 **
drhe2227f02003-06-14 11:42:57 +00001354 ** The journal file does not need to be locked itself. The
1355 ** journal file is never open unless the main database file holds
1356 ** a write lock, so there is never any chance of two or more
1357 ** processes opening the journal at the same time.
drhed7c8552001-04-11 14:29:21 +00001358 */
drhe2227f02003-06-14 11:42:57 +00001359 rc = sqliteOsOpenReadOnly(pPager->zJournal, &pPager->jfd);
drha7fcb052001-12-14 15:09:55 +00001360 if( rc!=SQLITE_OK ){
1361 rc = sqliteOsUnlock(&pPager->fd);
1362 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +00001363 return SQLITE_BUSY;
1364 }
drha7fcb052001-12-14 15:09:55 +00001365 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001366 pPager->journalStarted = 0;
drhed7c8552001-04-11 14:29:21 +00001367
1368 /* Playback and delete the journal. Drop the database write
1369 ** lock and reacquire the read lock.
1370 */
drh99ee3602003-02-16 19:13:36 +00001371 rc = pager_playback(pPager, 0);
drhd9b02572001-04-15 00:37:09 +00001372 if( rc!=SQLITE_OK ){
1373 return rc;
1374 }
drhed7c8552001-04-11 14:29:21 +00001375 }
1376 pPg = 0;
1377 }else{
1378 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +00001379 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +00001380 }
1381 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001382 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +00001383 int h;
drh7e3b0a02001-04-28 16:52:40 +00001384 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +00001385 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
1386 /* Create a new page */
drhd0ba1932004-02-10 01:54:28 +00001387 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
drh968af522003-02-11 14:55:40 +00001388 + sizeof(u32) + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +00001389 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001390 pager_unwritelock(pPager);
1391 pPager->errMask |= PAGER_ERR_MEM;
1392 return SQLITE_NOMEM;
1393 }
drh8c1238a2003-01-02 14:43:55 +00001394 memset(pPg, 0, sizeof(*pPg));
drhed7c8552001-04-11 14:29:21 +00001395 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +00001396 pPg->pNextAll = pPager->pAll;
1397 if( pPager->pAll ){
1398 pPager->pAll->pPrevAll = pPg;
1399 }
1400 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +00001401 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +00001402 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +00001403 }else{
drhdb48ee02003-01-16 13:42:43 +00001404 /* Find a page to recycle. Try to locate a page that does not
1405 ** require us to do an fsync() on the journal.
1406 */
drh341eae82003-01-21 02:39:36 +00001407 pPg = pPager->pFirstSynced;
drhb19a2bc2001-09-16 00:13:26 +00001408
drhdb48ee02003-01-16 13:42:43 +00001409 /* If we could not find a page that does not require an fsync()
1410 ** on the journal file then fsync the journal file. This is a
1411 ** very slow operation, so we work hard to avoid it. But sometimes
1412 ** it can't be helped.
drhb19a2bc2001-09-16 00:13:26 +00001413 */
drh603240c2002-03-05 01:11:12 +00001414 if( pPg==0 ){
drh34e79ce2004-02-08 06:05:46 +00001415 int rc = syncJournal(pPager);
drh50e5dad2001-09-15 00:57:28 +00001416 if( rc!=0 ){
drh3aac2dd2004-04-26 14:10:20 +00001417 sqlite3pager_rollback(pPager);
drh50e5dad2001-09-15 00:57:28 +00001418 return SQLITE_IOERR;
1419 }
1420 pPg = pPager->pFirst;
1421 }
drhd9b02572001-04-15 00:37:09 +00001422 assert( pPg->nRef==0 );
drhdb48ee02003-01-16 13:42:43 +00001423
1424 /* Write the page to the database file if it is dirty.
1425 */
1426 if( pPg->dirty ){
1427 assert( pPg->needSync==0 );
drh2554f8b2003-01-22 01:26:44 +00001428 pPg->pDirty = 0;
1429 rc = pager_write_pagelist( pPg );
drhdb48ee02003-01-16 13:42:43 +00001430 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00001431 sqlite3pager_rollback(pPager);
drhdb48ee02003-01-16 13:42:43 +00001432 return SQLITE_IOERR;
1433 }
drhdb48ee02003-01-16 13:42:43 +00001434 }
drh50e5dad2001-09-15 00:57:28 +00001435 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +00001436
drhdb48ee02003-01-16 13:42:43 +00001437 /* If the page we are recycling is marked as alwaysRollback, then
drh193a6b42002-07-07 16:52:46 +00001438 ** set the global alwaysRollback flag, thus disabling the
1439 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1440 ** It is necessary to do this because the page marked alwaysRollback
1441 ** might be reloaded at a later time but at that point we won't remember
1442 ** that is was marked alwaysRollback. This means that all pages must
1443 ** be marked as alwaysRollback from here on out.
1444 */
1445 if( pPg->alwaysRollback ){
1446 pPager->alwaysRollback = 1;
1447 }
1448
drhd9b02572001-04-15 00:37:09 +00001449 /* Unlink the old page from the free list and the hash table
1450 */
drh341eae82003-01-21 02:39:36 +00001451 if( pPg==pPager->pFirstSynced ){
1452 PgHdr *p = pPg->pNextFree;
1453 while( p && p->needSync ){ p = p->pNextFree; }
1454 pPager->pFirstSynced = p;
1455 }
drh6019e162001-07-02 17:51:45 +00001456 if( pPg->pPrevFree ){
1457 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001458 }else{
drh6019e162001-07-02 17:51:45 +00001459 assert( pPager->pFirst==pPg );
1460 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +00001461 }
drh6019e162001-07-02 17:51:45 +00001462 if( pPg->pNextFree ){
1463 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1464 }else{
1465 assert( pPager->pLast==pPg );
1466 pPager->pLast = pPg->pPrevFree;
1467 }
1468 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +00001469 if( pPg->pNextHash ){
1470 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1471 }
1472 if( pPg->pPrevHash ){
1473 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1474 }else{
drhd9b02572001-04-15 00:37:09 +00001475 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +00001476 assert( pPager->aHash[h]==pPg );
1477 pPager->aHash[h] = pPg->pNextHash;
1478 }
drh6019e162001-07-02 17:51:45 +00001479 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +00001480 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +00001481 }
1482 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +00001483 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
drhed6c8672003-01-12 18:02:16 +00001484 sqliteCheckMemory(pPager->aInJournal, pgno/8);
drhdb48ee02003-01-16 13:42:43 +00001485 assert( pPager->journalOpen );
drh6019e162001-07-02 17:51:45 +00001486 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
drhdb48ee02003-01-16 13:42:43 +00001487 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001488 }else{
1489 pPg->inJournal = 0;
drhdb48ee02003-01-16 13:42:43 +00001490 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001491 }
drh03eb96a2002-11-10 23:32:56 +00001492 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
1493 && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
drh3aac2dd2004-04-26 14:10:20 +00001494 page_add_to_stmt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001495 }else{
drh3aac2dd2004-04-26 14:10:20 +00001496 page_remove_from_stmt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001497 }
drhed7c8552001-04-11 14:29:21 +00001498 pPg->dirty = 0;
1499 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +00001500 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001501 pPager->nRef++;
1502 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +00001503 pPg->pNextHash = pPager->aHash[h];
1504 pPager->aHash[h] = pPg;
1505 if( pPg->pNextHash ){
1506 assert( pPg->pNextHash->pPrevHash==0 );
1507 pPg->pNextHash->pPrevHash = pPg;
1508 }
drh2e6d11b2003-04-25 15:37:57 +00001509 if( pPager->nExtra>0 ){
1510 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1511 }
drh3aac2dd2004-04-26 14:10:20 +00001512 if( pPager->dbSize<0 ) sqlite3pager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001513 if( pPager->errMask!=0 ){
drh3aac2dd2004-04-26 14:10:20 +00001514 sqlite3pager_unref(PGHDR_TO_DATA(pPg));
drh2e6d11b2003-04-25 15:37:57 +00001515 rc = pager_errcode(pPager);
1516 return rc;
1517 }
drh1ab43002002-01-14 09:28:19 +00001518 if( pPager->dbSize<(int)pgno ){
drhd0ba1932004-02-10 01:54:28 +00001519 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
drh306dc212001-05-21 13:45:10 +00001520 }else{
drh81a20f22001-10-12 17:30:04 +00001521 int rc;
drhd0ba1932004-02-10 01:54:28 +00001522 sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1523 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +00001524 TRACE2("FETCH %d\n", pPg->pgno);
1525 CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
drh81a20f22001-10-12 17:30:04 +00001526 if( rc!=SQLITE_OK ){
drh28be87c2002-11-05 23:03:02 +00001527 off_t fileSize;
drh4e371ee2002-09-05 16:08:27 +00001528 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
drhd0ba1932004-02-10 01:54:28 +00001529 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
drh3aac2dd2004-04-26 14:10:20 +00001530 sqlite3pager_unref(PGHDR_TO_DATA(pPg));
drh4e371ee2002-09-05 16:08:27 +00001531 return rc;
1532 }else{
drhd0ba1932004-02-10 01:54:28 +00001533 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
drh4e371ee2002-09-05 16:08:27 +00001534 }
drh81a20f22001-10-12 17:30:04 +00001535 }
drh306dc212001-05-21 13:45:10 +00001536 }
drhed7c8552001-04-11 14:29:21 +00001537 }else{
drhd9b02572001-04-15 00:37:09 +00001538 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001539 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001540 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001541 }
1542 *ppPage = PGHDR_TO_DATA(pPg);
1543 return SQLITE_OK;
1544}
1545
1546/*
drh7e3b0a02001-04-28 16:52:40 +00001547** Acquire a page if it is already in the in-memory cache. Do
1548** not read the page from disk. Return a pointer to the page,
1549** or 0 if the page is not in cache.
1550**
drh3aac2dd2004-04-26 14:10:20 +00001551** See also sqlite3pager_get(). The difference between this routine
1552** and sqlite3pager_get() is that _get() will go to the disk and read
drh7e3b0a02001-04-28 16:52:40 +00001553** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001554** returns NULL if the page is not in cache or if a disk I/O error
1555** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001556*/
drh3aac2dd2004-04-26 14:10:20 +00001557void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){
drh7e3b0a02001-04-28 16:52:40 +00001558 PgHdr *pPg;
1559
drh836faa42003-01-11 13:30:57 +00001560 assert( pPager!=0 );
1561 assert( pgno!=0 );
drh7e3b0a02001-04-28 16:52:40 +00001562 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1563 return 0;
1564 }
drh836faa42003-01-11 13:30:57 +00001565 /* if( pPager->nRef==0 ){
1566 ** return 0;
1567 ** }
1568 */
drh7e3b0a02001-04-28 16:52:40 +00001569 pPg = pager_lookup(pPager, pgno);
1570 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001571 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001572 return PGHDR_TO_DATA(pPg);
1573}
1574
1575/*
drhed7c8552001-04-11 14:29:21 +00001576** Release a page.
1577**
1578** If the number of references to the page drop to zero, then the
1579** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001580** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001581** removed.
1582*/
drh3aac2dd2004-04-26 14:10:20 +00001583int sqlite3pager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001584 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001585
1586 /* Decrement the reference count for this page
1587 */
drhed7c8552001-04-11 14:29:21 +00001588 pPg = DATA_TO_PGHDR(pData);
1589 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001590 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001591 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001592
drh72f82862001-05-24 21:06:34 +00001593 /* When the number of references to a page reach 0, call the
1594 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001595 */
drhed7c8552001-04-11 14:29:21 +00001596 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001597 Pager *pPager;
1598 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001599 pPg->pNextFree = 0;
1600 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001601 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001602 if( pPg->pPrevFree ){
1603 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001604 }else{
1605 pPager->pFirst = pPg;
1606 }
drh341eae82003-01-21 02:39:36 +00001607 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1608 pPager->pFirstSynced = pPg;
1609 }
drh72f82862001-05-24 21:06:34 +00001610 if( pPager->xDestructor ){
1611 pPager->xDestructor(pData);
1612 }
drhd9b02572001-04-15 00:37:09 +00001613
1614 /* When all pages reach the freelist, drop the read lock from
1615 ** the database file.
1616 */
1617 pPager->nRef--;
1618 assert( pPager->nRef>=0 );
1619 if( pPager->nRef==0 ){
1620 pager_reset(pPager);
1621 }
drhed7c8552001-04-11 14:29:21 +00001622 }
drhd9b02572001-04-15 00:37:09 +00001623 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001624}
1625
1626/*
drhda47d772002-12-02 04:25:19 +00001627** Create a journal file for pPager. There should already be a write
1628** lock on the database file when this routine is called.
1629**
1630** Return SQLITE_OK if everything. Return an error code and release the
1631** write lock if anything goes wrong.
1632*/
1633static int pager_open_journal(Pager *pPager){
1634 int rc;
1635 assert( pPager->state==SQLITE_WRITELOCK );
1636 assert( pPager->journalOpen==0 );
1637 assert( pPager->useJournal );
drh3aac2dd2004-04-26 14:10:20 +00001638 sqlite3pager_pagecount(pPager);
drhda47d772002-12-02 04:25:19 +00001639 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1640 if( pPager->aInJournal==0 ){
1641 sqliteOsReadLock(&pPager->fd);
1642 pPager->state = SQLITE_READLOCK;
1643 return SQLITE_NOMEM;
1644 }
1645 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1646 if( rc!=SQLITE_OK ){
1647 sqliteFree(pPager->aInJournal);
1648 pPager->aInJournal = 0;
1649 sqliteOsReadLock(&pPager->fd);
1650 pPager->state = SQLITE_READLOCK;
1651 return SQLITE_CANTOPEN;
1652 }
drha76c82e2003-07-27 18:59:42 +00001653 sqliteOsOpenDirectory(pPager->zDirectory, &pPager->jfd);
drhda47d772002-12-02 04:25:19 +00001654 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001655 pPager->journalStarted = 0;
drhda47d772002-12-02 04:25:19 +00001656 pPager->needSync = 0;
1657 pPager->alwaysRollback = 0;
drh968af522003-02-11 14:55:40 +00001658 pPager->nRec = 0;
drh2e6d11b2003-04-25 15:37:57 +00001659 if( pPager->errMask!=0 ){
1660 rc = pager_errcode(pPager);
1661 return rc;
1662 }
drhda47d772002-12-02 04:25:19 +00001663 pPager->origDbSize = pPager->dbSize;
drh968af522003-02-11 14:55:40 +00001664 if( journal_format==JOURNAL_FORMAT_3 ){
1665 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
1666 if( rc==SQLITE_OK ){
drh4303fee2003-02-15 23:09:17 +00001667 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
drh968af522003-02-11 14:55:40 +00001668 }
1669 if( rc==SQLITE_OK ){
drhbbd82df2004-02-11 09:46:30 +00001670 sqliteRandomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
drh968af522003-02-11 14:55:40 +00001671 rc = write32bits(&pPager->jfd, pPager->cksumInit);
1672 }
1673 }else if( journal_format==JOURNAL_FORMAT_2 ){
1674 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
drhda47d772002-12-02 04:25:19 +00001675 }else{
drh968af522003-02-11 14:55:40 +00001676 assert( journal_format==JOURNAL_FORMAT_1 );
1677 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
drhda47d772002-12-02 04:25:19 +00001678 }
1679 if( rc==SQLITE_OK ){
1680 rc = write32bits(&pPager->jfd, pPager->dbSize);
1681 }
1682 if( pPager->ckptAutoopen && rc==SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00001683 rc = sqlite3pager_stmt_begin(pPager);
drhda47d772002-12-02 04:25:19 +00001684 }
1685 if( rc!=SQLITE_OK ){
1686 rc = pager_unwritelock(pPager);
1687 if( rc==SQLITE_OK ){
1688 rc = SQLITE_FULL;
1689 }
1690 }
1691 return rc;
1692}
1693
1694/*
drh4b845d72002-03-05 12:41:19 +00001695** Acquire a write-lock on the database. The lock is removed when
1696** the any of the following happen:
1697**
drh3aac2dd2004-04-26 14:10:20 +00001698** * sqlite3pager_commit() is called.
1699** * sqlite3pager_rollback() is called.
1700** * sqlite3pager_close() is called.
1701** * sqlite3pager_unref() is called to on every outstanding page.
drh4b845d72002-03-05 12:41:19 +00001702**
1703** The parameter to this routine is a pointer to any open page of the
1704** database file. Nothing changes about the page - it is used merely
1705** to acquire a pointer to the Pager structure and as proof that there
1706** is already a read-lock on the database.
1707**
drhda47d772002-12-02 04:25:19 +00001708** A journal file is opened if this is not a temporary file. For
1709** temporary files, the opening of the journal file is deferred until
1710** there is an actual need to write to the journal.
1711**
drh4b845d72002-03-05 12:41:19 +00001712** If the database is already write-locked, this routine is a no-op.
1713*/
drh3aac2dd2004-04-26 14:10:20 +00001714int sqlite3pager_begin(void *pData){
drh4b845d72002-03-05 12:41:19 +00001715 PgHdr *pPg = DATA_TO_PGHDR(pData);
1716 Pager *pPager = pPg->pPager;
1717 int rc = SQLITE_OK;
1718 assert( pPg->nRef>0 );
1719 assert( pPager->state!=SQLITE_UNLOCK );
1720 if( pPager->state==SQLITE_READLOCK ){
1721 assert( pPager->aInJournal==0 );
1722 rc = sqliteOsWriteLock(&pPager->fd);
1723 if( rc!=SQLITE_OK ){
1724 return rc;
1725 }
drh4b845d72002-03-05 12:41:19 +00001726 pPager->state = SQLITE_WRITELOCK;
drhda47d772002-12-02 04:25:19 +00001727 pPager->dirtyFile = 0;
drhdb48ee02003-01-16 13:42:43 +00001728 TRACE1("TRANSACTION\n");
drhda47d772002-12-02 04:25:19 +00001729 if( pPager->useJournal && !pPager->tempFile ){
1730 rc = pager_open_journal(pPager);
drh4b845d72002-03-05 12:41:19 +00001731 }
1732 }
1733 return rc;
1734}
1735
1736/*
drhed7c8552001-04-11 14:29:21 +00001737** Mark a data page as writeable. The page is written into the journal
1738** if it is not there already. This routine must be called before making
1739** changes to a page.
1740**
1741** The first time this routine is called, the pager creates a new
1742** journal and acquires a write lock on the database. If the write
1743** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001744** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001745** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001746**
1747** If the journal file could not be written because the disk is full,
1748** then this routine returns SQLITE_FULL and does an immediate rollback.
1749** All subsequent write attempts also return SQLITE_FULL until there
drh3aac2dd2004-04-26 14:10:20 +00001750** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
drhd9b02572001-04-15 00:37:09 +00001751** reset.
drhed7c8552001-04-11 14:29:21 +00001752*/
drh3aac2dd2004-04-26 14:10:20 +00001753int sqlite3pager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001754 PgHdr *pPg = DATA_TO_PGHDR(pData);
1755 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001756 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001757
drh6446c4d2001-12-15 14:22:18 +00001758 /* Check for errors
1759 */
drhd9b02572001-04-15 00:37:09 +00001760 if( pPager->errMask ){
1761 return pager_errcode(pPager);
1762 }
drh5e00f6c2001-09-13 13:46:56 +00001763 if( pPager->readOnly ){
1764 return SQLITE_PERM;
1765 }
drh6446c4d2001-12-15 14:22:18 +00001766
1767 /* Mark the page as dirty. If the page has already been written
1768 ** to the journal then we can return right away.
1769 */
drhd9b02572001-04-15 00:37:09 +00001770 pPg->dirty = 1;
drh0f892532002-05-30 12:27:03 +00001771 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
drha1680452002-04-18 01:56:57 +00001772 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00001773 return SQLITE_OK;
1774 }
drh6446c4d2001-12-15 14:22:18 +00001775
1776 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00001777 ** written to the transaction journal or the ckeckpoint journal
1778 ** or both.
1779 **
1780 ** First check to see that the transaction journal exists and
1781 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00001782 */
drhd9b02572001-04-15 00:37:09 +00001783 assert( pPager->state!=SQLITE_UNLOCK );
drh3aac2dd2004-04-26 14:10:20 +00001784 rc = sqlite3pager_begin(pData);
drhda47d772002-12-02 04:25:19 +00001785 if( rc!=SQLITE_OK ){
1786 return rc;
1787 }
drhd9b02572001-04-15 00:37:09 +00001788 assert( pPager->state==SQLITE_WRITELOCK );
drhda47d772002-12-02 04:25:19 +00001789 if( !pPager->journalOpen && pPager->useJournal ){
1790 rc = pager_open_journal(pPager);
1791 if( rc!=SQLITE_OK ) return rc;
1792 }
1793 assert( pPager->journalOpen || !pPager->useJournal );
1794 pPager->dirtyFile = 1;
drh6446c4d2001-12-15 14:22:18 +00001795
drhfa86c412002-02-02 15:01:15 +00001796 /* The transaction journal now exists and we have a write lock on the
1797 ** main database file. Write the current page to the transaction
1798 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00001799 */
drhdb48ee02003-01-16 13:42:43 +00001800 if( !pPg->inJournal && pPager->useJournal ){
1801 if( (int)pPg->pgno <= pPager->origDbSize ){
drh968af522003-02-11 14:55:40 +00001802 int szPg;
1803 u32 saved;
1804 if( journal_format>=JOURNAL_FORMAT_3 ){
1805 u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
1806 saved = *(u32*)PGHDR_TO_EXTRA(pPg);
1807 store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
1808 szPg = SQLITE_PAGE_SIZE+8;
1809 }else{
1810 szPg = SQLITE_PAGE_SIZE+4;
1811 }
1812 store32bits(pPg->pgno, pPg, -4);
drh9eb9e262004-02-11 02:18:05 +00001813 CODEC(pPager, pData, pPg->pgno, 7);
drh968af522003-02-11 14:55:40 +00001814 rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
drh9eb9e262004-02-11 02:18:05 +00001815 TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
1816 CODEC(pPager, pData, pPg->pgno, 0);
drh968af522003-02-11 14:55:40 +00001817 if( journal_format>=JOURNAL_FORMAT_3 ){
1818 *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
1819 }
drhdb48ee02003-01-16 13:42:43 +00001820 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00001821 sqlite3pager_rollback(pPager);
drhdb48ee02003-01-16 13:42:43 +00001822 pPager->errMask |= PAGER_ERR_FULL;
1823 return rc;
1824 }
drh99ee3602003-02-16 19:13:36 +00001825 pPager->nRec++;
drhdb48ee02003-01-16 13:42:43 +00001826 assert( pPager->aInJournal!=0 );
1827 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1828 pPg->needSync = !pPager->noSync;
1829 pPg->inJournal = 1;
1830 if( pPager->ckptInUse ){
1831 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh3aac2dd2004-04-26 14:10:20 +00001832 page_add_to_stmt_list(pPg);
drhdb48ee02003-01-16 13:42:43 +00001833 }
drhdb48ee02003-01-16 13:42:43 +00001834 }else{
1835 pPg->needSync = !pPager->journalStarted && !pPager->noSync;
1836 TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
drhd9b02572001-04-15 00:37:09 +00001837 }
drhdb48ee02003-01-16 13:42:43 +00001838 if( pPg->needSync ){
1839 pPager->needSync = 1;
drhfa86c412002-02-02 15:01:15 +00001840 }
drh69688d52001-04-14 16:38:23 +00001841 }
drh6446c4d2001-12-15 14:22:18 +00001842
drhfa86c412002-02-02 15:01:15 +00001843 /* If the checkpoint journal is open and the page is not in it,
drh968af522003-02-11 14:55:40 +00001844 ** then write the current page to the checkpoint journal. Note that
1845 ** the checkpoint journal always uses the simplier format 2 that lacks
1846 ** checksums. The header is also omitted from the checkpoint journal.
drh6446c4d2001-12-15 14:22:18 +00001847 */
drh0f892532002-05-30 12:27:03 +00001848 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh1e336b42002-02-14 12:50:33 +00001849 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drh968af522003-02-11 14:55:40 +00001850 store32bits(pPg->pgno, pPg, -4);
drh9eb9e262004-02-11 02:18:05 +00001851 CODEC(pPager, pData, pPg->pgno, 7);
drhd0ba1932004-02-10 01:54:28 +00001852 rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
drh9eb9e262004-02-11 02:18:05 +00001853 TRACE2("CKPT-JOURNAL %d\n", pPg->pgno);
1854 CODEC(pPager, pData, pPg->pgno, 0);
drhfa86c412002-02-02 15:01:15 +00001855 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00001856 sqlite3pager_rollback(pPager);
drhfa86c412002-02-02 15:01:15 +00001857 pPager->errMask |= PAGER_ERR_FULL;
1858 return rc;
1859 }
drh9bd47a92003-01-07 14:46:08 +00001860 pPager->ckptNRec++;
drhfa86c412002-02-02 15:01:15 +00001861 assert( pPager->aInCkpt!=0 );
1862 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh3aac2dd2004-04-26 14:10:20 +00001863 page_add_to_stmt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001864 }
1865
1866 /* Update the database size and return.
1867 */
drh1ab43002002-01-14 09:28:19 +00001868 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00001869 pPager->dbSize = pPg->pgno;
1870 }
drh69688d52001-04-14 16:38:23 +00001871 return rc;
drhed7c8552001-04-11 14:29:21 +00001872}
1873
1874/*
drhaacc5432002-01-06 17:07:40 +00001875** Return TRUE if the page given in the argument was previously passed
drh3aac2dd2004-04-26 14:10:20 +00001876** to sqlite3pager_write(). In other words, return TRUE if it is ok
drh6019e162001-07-02 17:51:45 +00001877** to change the content of the page.
1878*/
drh3aac2dd2004-04-26 14:10:20 +00001879int sqlite3pager_iswriteable(void *pData){
drh6019e162001-07-02 17:51:45 +00001880 PgHdr *pPg = DATA_TO_PGHDR(pData);
1881 return pPg->dirty;
1882}
1883
1884/*
drh001bbcb2003-03-19 03:14:00 +00001885** Replace the content of a single page with the information in the third
1886** argument.
1887*/
drh3aac2dd2004-04-26 14:10:20 +00001888int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){
drh001bbcb2003-03-19 03:14:00 +00001889 void *pPage;
1890 int rc;
1891
drh3aac2dd2004-04-26 14:10:20 +00001892 rc = sqlite3pager_get(pPager, pgno, &pPage);
drh001bbcb2003-03-19 03:14:00 +00001893 if( rc==SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00001894 rc = sqlite3pager_write(pPage);
drh001bbcb2003-03-19 03:14:00 +00001895 if( rc==SQLITE_OK ){
drhd0ba1932004-02-10 01:54:28 +00001896 memcpy(pPage, pData, SQLITE_PAGE_SIZE);
drh001bbcb2003-03-19 03:14:00 +00001897 }
drh3aac2dd2004-04-26 14:10:20 +00001898 sqlite3pager_unref(pPage);
drh001bbcb2003-03-19 03:14:00 +00001899 }
1900 return rc;
1901}
1902
1903/*
drh30e58752002-03-02 20:41:57 +00001904** A call to this routine tells the pager that it is not necessary to
1905** write the information on page "pgno" back to the disk, even though
1906** that page might be marked as dirty.
1907**
1908** The overlying software layer calls this routine when all of the data
1909** on the given page is unused. The pager marks the page as clean so
1910** that it does not get written to disk.
1911**
1912** Tests show that this optimization, together with the
drh3aac2dd2004-04-26 14:10:20 +00001913** sqlite3pager_dont_rollback() below, more than double the speed
drh30e58752002-03-02 20:41:57 +00001914** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00001915**
1916** When this routine is called, set the alwaysRollback flag to true.
drh3aac2dd2004-04-26 14:10:20 +00001917** Subsequent calls to sqlite3pager_dont_rollback() for the same page
drh8e298f92002-07-06 16:28:47 +00001918** will thereafter be ignored. This is necessary to avoid a problem
1919** where a page with data is added to the freelist during one part of
1920** a transaction then removed from the freelist during a later part
1921** of the same transaction and reused for some other purpose. When it
1922** is first added to the freelist, this routine is called. When reused,
1923** the dont_rollback() routine is called. But because the page contains
1924** critical data, we still need to be sure it gets rolled back in spite
1925** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00001926*/
drh3aac2dd2004-04-26 14:10:20 +00001927void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){
drh30e58752002-03-02 20:41:57 +00001928 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00001929
drh30e58752002-03-02 20:41:57 +00001930 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00001931 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00001932 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00001933 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1934 /* If this pages is the last page in the file and the file has grown
1935 ** during the current transaction, then do NOT mark the page as clean.
1936 ** When the database file grows, we must make sure that the last page
1937 ** gets written at least once so that the disk file will be the correct
1938 ** size. If you do not write this page and the size of the file
1939 ** on the disk ends up being too small, that can lead to database
1940 ** corruption during the next transaction.
1941 */
1942 }else{
drhdb48ee02003-01-16 13:42:43 +00001943 TRACE2("DONT_WRITE %d\n", pgno);
drh8124a302002-06-25 14:43:57 +00001944 pPg->dirty = 0;
1945 }
drh30e58752002-03-02 20:41:57 +00001946 }
1947}
1948
1949/*
1950** A call to this routine tells the pager that if a rollback occurs,
1951** it is not necessary to restore the data on the given page. This
1952** means that the pager does not have to record the given page in the
1953** rollback journal.
1954*/
drh3aac2dd2004-04-26 14:10:20 +00001955void sqlite3pager_dont_rollback(void *pData){
drh30e58752002-03-02 20:41:57 +00001956 PgHdr *pPg = DATA_TO_PGHDR(pData);
1957 Pager *pPager = pPg->pPager;
1958
1959 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drh193a6b42002-07-07 16:52:46 +00001960 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
drh30e58752002-03-02 20:41:57 +00001961 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1962 assert( pPager->aInJournal!=0 );
1963 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1964 pPg->inJournal = 1;
drh0f892532002-05-30 12:27:03 +00001965 if( pPager->ckptInUse ){
drh30e58752002-03-02 20:41:57 +00001966 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh3aac2dd2004-04-26 14:10:20 +00001967 page_add_to_stmt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001968 }
drhdb48ee02003-01-16 13:42:43 +00001969 TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
drh30e58752002-03-02 20:41:57 +00001970 }
drh0f892532002-05-30 12:27:03 +00001971 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
drh30e58752002-03-02 20:41:57 +00001972 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1973 assert( pPager->aInCkpt!=0 );
1974 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh3aac2dd2004-04-26 14:10:20 +00001975 page_add_to_stmt_list(pPg);
drh30e58752002-03-02 20:41:57 +00001976 }
1977}
1978
1979/*
drhed7c8552001-04-11 14:29:21 +00001980** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001981**
1982** If the commit fails for any reason, a rollback attempt is made
1983** and an error code is returned. If the commit worked, SQLITE_OK
1984** is returned.
drhed7c8552001-04-11 14:29:21 +00001985*/
drh3aac2dd2004-04-26 14:10:20 +00001986int sqlite3pager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001987 int rc;
drhed7c8552001-04-11 14:29:21 +00001988 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001989
1990 if( pPager->errMask==PAGER_ERR_FULL ){
drh3aac2dd2004-04-26 14:10:20 +00001991 rc = sqlite3pager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00001992 if( rc==SQLITE_OK ){
1993 rc = SQLITE_FULL;
1994 }
drhd9b02572001-04-15 00:37:09 +00001995 return rc;
1996 }
1997 if( pPager->errMask!=0 ){
1998 rc = pager_errcode(pPager);
1999 return rc;
2000 }
2001 if( pPager->state!=SQLITE_WRITELOCK ){
2002 return SQLITE_ERROR;
2003 }
drhdb48ee02003-01-16 13:42:43 +00002004 TRACE1("COMMIT\n");
drha1680452002-04-18 01:56:57 +00002005 if( pPager->dirtyFile==0 ){
2006 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
2007 ** if there have been no changes to the database file. */
drh341eae82003-01-21 02:39:36 +00002008 assert( pPager->needSync==0 );
drha1680452002-04-18 01:56:57 +00002009 rc = pager_unwritelock(pPager);
2010 pPager->dbSize = -1;
2011 return rc;
2012 }
drhda47d772002-12-02 04:25:19 +00002013 assert( pPager->journalOpen );
drh34e79ce2004-02-08 06:05:46 +00002014 rc = syncJournal(pPager);
drh240c5792004-02-08 00:40:52 +00002015 if( rc!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00002016 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00002017 }
drh2554f8b2003-01-22 01:26:44 +00002018 pPg = pager_get_all_dirty_pages(pPager);
2019 if( pPg ){
2020 rc = pager_write_pagelist(pPg);
2021 if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
2022 goto commit_abort;
2023 }
drh603240c2002-03-05 01:11:12 +00002024 }
drhd9b02572001-04-15 00:37:09 +00002025 rc = pager_unwritelock(pPager);
2026 pPager->dbSize = -1;
2027 return rc;
2028
2029 /* Jump here if anything goes wrong during the commit process.
2030 */
2031commit_abort:
drh3aac2dd2004-04-26 14:10:20 +00002032 rc = sqlite3pager_rollback(pPager);
drhd9b02572001-04-15 00:37:09 +00002033 if( rc==SQLITE_OK ){
2034 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00002035 }
drhed7c8552001-04-11 14:29:21 +00002036 return rc;
2037}
2038
2039/*
2040** Rollback all changes. The database falls back to read-only mode.
2041** All in-memory cache pages revert to their original data contents.
2042** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00002043**
2044** This routine cannot fail unless some other process is not following
2045** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
2046** process is writing trash into the journal file (SQLITE_CORRUPT) or
2047** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
2048** codes are returned for all these occasions. Otherwise,
2049** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00002050*/
drh3aac2dd2004-04-26 14:10:20 +00002051int sqlite3pager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00002052 int rc;
drhdb48ee02003-01-16 13:42:43 +00002053 TRACE1("ROLLBACK\n");
drhda47d772002-12-02 04:25:19 +00002054 if( !pPager->dirtyFile || !pPager->journalOpen ){
2055 rc = pager_unwritelock(pPager);
2056 pPager->dbSize = -1;
2057 return rc;
2058 }
drhdb48ee02003-01-16 13:42:43 +00002059
drhd9b02572001-04-15 00:37:09 +00002060 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00002061 if( pPager->state>=SQLITE_WRITELOCK ){
drh99ee3602003-02-16 19:13:36 +00002062 pager_playback(pPager, 1);
drh4b845d72002-03-05 12:41:19 +00002063 }
drhd9b02572001-04-15 00:37:09 +00002064 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00002065 }
drhd9b02572001-04-15 00:37:09 +00002066 if( pPager->state!=SQLITE_WRITELOCK ){
2067 return SQLITE_OK;
2068 }
drh99ee3602003-02-16 19:13:36 +00002069 rc = pager_playback(pPager, 1);
drhd9b02572001-04-15 00:37:09 +00002070 if( rc!=SQLITE_OK ){
2071 rc = SQLITE_CORRUPT;
2072 pPager->errMask |= PAGER_ERR_CORRUPT;
2073 }
2074 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00002075 return rc;
drh98808ba2001-10-18 12:34:46 +00002076}
drhd9b02572001-04-15 00:37:09 +00002077
2078/*
drh5e00f6c2001-09-13 13:46:56 +00002079** Return TRUE if the database file is opened read-only. Return FALSE
2080** if the database is (in theory) writable.
2081*/
drh3aac2dd2004-04-26 14:10:20 +00002082int sqlite3pager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00002083 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00002084}
2085
2086/*
drhd9b02572001-04-15 00:37:09 +00002087** This routine is used for testing and analysis only.
2088*/
drh3aac2dd2004-04-26 14:10:20 +00002089int *sqlite3pager_stats(Pager *pPager){
drhd9b02572001-04-15 00:37:09 +00002090 static int a[9];
2091 a[0] = pPager->nRef;
2092 a[1] = pPager->nPage;
2093 a[2] = pPager->mxPage;
2094 a[3] = pPager->dbSize;
2095 a[4] = pPager->state;
2096 a[5] = pPager->errMask;
2097 a[6] = pPager->nHit;
2098 a[7] = pPager->nMiss;
2099 a[8] = pPager->nOvfl;
2100 return a;
2101}
drhdd793422001-06-28 01:54:48 +00002102
drhfa86c412002-02-02 15:01:15 +00002103/*
2104** Set the checkpoint.
2105**
2106** This routine should be called with the transaction journal already
2107** open. A new checkpoint journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00002108** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00002109*/
drh3aac2dd2004-04-26 14:10:20 +00002110int sqlite3pager_stmt_begin(Pager *pPager){
drhfa86c412002-02-02 15:01:15 +00002111 int rc;
2112 char zTemp[SQLITE_TEMPNAME_SIZE];
drhda47d772002-12-02 04:25:19 +00002113 if( !pPager->journalOpen ){
2114 pPager->ckptAutoopen = 1;
2115 return SQLITE_OK;
2116 }
drhfa86c412002-02-02 15:01:15 +00002117 assert( pPager->journalOpen );
drh0f892532002-05-30 12:27:03 +00002118 assert( !pPager->ckptInUse );
drhfa86c412002-02-02 15:01:15 +00002119 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
2120 if( pPager->aInCkpt==0 ){
2121 sqliteOsReadLock(&pPager->fd);
2122 return SQLITE_NOMEM;
2123 }
drh968af522003-02-11 14:55:40 +00002124#ifndef NDEBUG
drhfa86c412002-02-02 15:01:15 +00002125 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
2126 if( rc ) goto ckpt_begin_failed;
drh968af522003-02-11 14:55:40 +00002127 assert( pPager->ckptJSize ==
2128 pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
2129#endif
2130 pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
2131 + JOURNAL_HDR_SZ(journal_format);
drh663fc632002-02-02 18:49:19 +00002132 pPager->ckptSize = pPager->dbSize;
drh0f892532002-05-30 12:27:03 +00002133 if( !pPager->ckptOpen ){
drh3aac2dd2004-04-26 14:10:20 +00002134 rc = sqlite3pager_opentemp(zTemp, &pPager->cpfd);
drh0f892532002-05-30 12:27:03 +00002135 if( rc ) goto ckpt_begin_failed;
2136 pPager->ckptOpen = 1;
drh9bd47a92003-01-07 14:46:08 +00002137 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002138 }
2139 pPager->ckptInUse = 1;
drhfa86c412002-02-02 15:01:15 +00002140 return SQLITE_OK;
2141
2142ckpt_begin_failed:
2143 if( pPager->aInCkpt ){
2144 sqliteFree(pPager->aInCkpt);
2145 pPager->aInCkpt = 0;
2146 }
2147 return rc;
2148}
2149
2150/*
2151** Commit a checkpoint.
2152*/
drh3aac2dd2004-04-26 14:10:20 +00002153int sqlite3pager_stmt_commit(Pager *pPager){
drh0f892532002-05-30 12:27:03 +00002154 if( pPager->ckptInUse ){
drh03eb96a2002-11-10 23:32:56 +00002155 PgHdr *pPg, *pNext;
drh96ddd6d2002-09-05 19:10:33 +00002156 sqliteOsSeek(&pPager->cpfd, 0);
drh9bd47a92003-01-07 14:46:08 +00002157 /* sqliteOsTruncate(&pPager->cpfd, 0); */
2158 pPager->ckptNRec = 0;
drh0f892532002-05-30 12:27:03 +00002159 pPager->ckptInUse = 0;
drh663fc632002-02-02 18:49:19 +00002160 sqliteFree( pPager->aInCkpt );
2161 pPager->aInCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002162 for(pPg=pPager->pCkpt; pPg; pPg=pNext){
2163 pNext = pPg->pNextCkpt;
2164 assert( pPg->inCkpt );
drh663fc632002-02-02 18:49:19 +00002165 pPg->inCkpt = 0;
drh03eb96a2002-11-10 23:32:56 +00002166 pPg->pPrevCkpt = pPg->pNextCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002167 }
drh03eb96a2002-11-10 23:32:56 +00002168 pPager->pCkpt = 0;
drh663fc632002-02-02 18:49:19 +00002169 }
drhda47d772002-12-02 04:25:19 +00002170 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002171 return SQLITE_OK;
2172}
2173
2174/*
2175** Rollback a checkpoint.
2176*/
drh3aac2dd2004-04-26 14:10:20 +00002177int sqlite3pager_stmt_rollback(Pager *pPager){
drhfa86c412002-02-02 15:01:15 +00002178 int rc;
drh0f892532002-05-30 12:27:03 +00002179 if( pPager->ckptInUse ){
drh3aac2dd2004-04-26 14:10:20 +00002180 rc = pager_stmt_playback(pPager);
2181 sqlite3pager_stmt_commit(pPager);
drh663fc632002-02-02 18:49:19 +00002182 }else{
2183 rc = SQLITE_OK;
2184 }
drhda47d772002-12-02 04:25:19 +00002185 pPager->ckptAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002186 return rc;
2187}
2188
drh73509ee2003-04-06 20:44:45 +00002189/*
2190** Return the full pathname of the database file.
2191*/
drh3aac2dd2004-04-26 14:10:20 +00002192const char *sqlite3pager_filename(Pager *pPager){
drh73509ee2003-04-06 20:44:45 +00002193 return pPager->zFilename;
2194}
2195
drhb20ea9d2004-02-09 01:20:36 +00002196/*
2197** Set the codec for this pager
2198*/
drh3aac2dd2004-04-26 14:10:20 +00002199void sqlite3pager_set_codec(
drhb20ea9d2004-02-09 01:20:36 +00002200 Pager *pPager,
drh9eb9e262004-02-11 02:18:05 +00002201 void (*xCodec)(void*,void*,Pgno,int),
drhb20ea9d2004-02-09 01:20:36 +00002202 void *pCodecArg
2203){
2204 pPager->xCodec = xCodec;
2205 pPager->pCodecArg = pCodecArg;
2206}
2207
drh74587e52002-08-13 00:01:16 +00002208#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00002209/*
2210** Print a listing of all referenced pages and their ref count.
2211*/
drh3aac2dd2004-04-26 14:10:20 +00002212void sqlite3pager_refdump(Pager *pPager){
drhdd793422001-06-28 01:54:48 +00002213 PgHdr *pPg;
2214 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2215 if( pPg->nRef<=0 ) continue;
2216 printf("PAGE %3d addr=0x%08x nRef=%d\n",
2217 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2218 }
2219}
2220#endif