blob: 7068df7861e57f8a6f68bbf73aea686214eddd13 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
danielk197713adf8a2004-06-03 16:08:41 +000021** @(#) $Id: pager.c,v 1.110 2004/06/03 16:08:42 danielk1977 Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drh829e8022002-11-06 14:08:11 +000023#include "os.h" /* Must be first to enable large file support */
drhd9b02572001-04-15 00:37:09 +000024#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000025#include "pager.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
drhdb48ee02003-01-16 13:42:43 +000030** Macros for troubleshooting. Normally turned off
31*/
32#if 0
33static Pager *mainPager = 0;
34#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39#else
40#define SET_PAGER(X)
41#define CLR_PAGER(X)
42#define TRACE1(X)
43#define TRACE2(X,Y)
44#define TRACE3(X,Y,Z)
45#endif
46
47
48/*
drhed7c8552001-04-11 14:29:21 +000049** The page cache as a whole is always in one of the following
50** states:
51**
52** SQLITE_UNLOCK The page cache is not currently reading or
53** writing the database file. There is no
54** data held in memory. This is the initial
55** state.
56**
57** SQLITE_READLOCK The page cache is reading the database.
58** Writing is not permitted. There can be
59** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000060** file at the same time.
drhed7c8552001-04-11 14:29:21 +000061**
62** SQLITE_WRITELOCK The page cache is writing the database.
63** Access is exclusive. No other processes or
64** threads can be reading or writing while one
65** process is writing.
66**
drh306dc212001-05-21 13:45:10 +000067** The page cache comes up in SQLITE_UNLOCK. The first time a
68** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000070** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000071** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000072** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
73** called on an outstanding page which means that the pager must
74** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
75** The sqlite_page_rollback() and sqlite_page_commit() functions
76** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000077*/
78#define SQLITE_UNLOCK 0
79#define SQLITE_READLOCK 1
80#define SQLITE_WRITELOCK 2
81
drhd9b02572001-04-15 00:37:09 +000082
drhed7c8552001-04-11 14:29:21 +000083/*
84** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000085** This header is only visible to this pager module. The client
86** code that calls pager sees only the data that follows the header.
drhf6038712004-02-08 18:07:34 +000087**
drh3aac2dd2004-04-26 14:10:20 +000088** Client code should call sqlite3pager_write() on a page prior to making
89** any modifications to that page. The first time sqlite3pager_write()
drhf6038712004-02-08 18:07:34 +000090** is called, the original page contents are written into the rollback
91** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
92** the journal page has made it onto the disk surface, PgHdr.needSync
93** is cleared. The modified page cannot be written back into the original
94** database file until the journal pages has been synced to disk and the
95** PgHdr.needSync has been cleared.
96**
drh3aac2dd2004-04-26 14:10:20 +000097** The PgHdr.dirty flag is set when sqlite3pager_write() is called and
drhf6038712004-02-08 18:07:34 +000098** is cleared again when the page content is written back to the original
99** database file.
drhed7c8552001-04-11 14:29:21 +0000100*/
drhd9b02572001-04-15 00:37:09 +0000101typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +0000102struct PgHdr {
103 Pager *pPager; /* The pager to which this page belongs */
104 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +0000105 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhd9b02572001-04-15 00:37:09 +0000106 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
drhac69b052004-05-12 13:30:07 +0000107 PgHdr *pNextAll; /* A list of all pages */
108 PgHdr *pNextStmt, *pPrevStmt; /* List of pages in the statement journal */
drh193a6b42002-07-07 16:52:46 +0000109 u8 inJournal; /* TRUE if has been written to journal */
drhac69b052004-05-12 13:30:07 +0000110 u8 inStmt; /* TRUE if in the statement subjournal */
drh193a6b42002-07-07 16:52:46 +0000111 u8 dirty; /* TRUE if we need to write back changes */
drhdb48ee02003-01-16 13:42:43 +0000112 u8 needSync; /* Sync journal before writing this page */
drh193a6b42002-07-07 16:52:46 +0000113 u8 alwaysRollback; /* Disable dont_rollback() for this page */
drhac69b052004-05-12 13:30:07 +0000114 short int nRef; /* Number of users of this page */
drh2554f8b2003-01-22 01:26:44 +0000115 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
drhd0ba1932004-02-10 01:54:28 +0000116 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh973b6e32003-02-12 14:09:42 +0000117 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +0000118};
119
drhac69b052004-05-12 13:30:07 +0000120/*
121** For an in-memory only database, some extra information is recorded about
122** each page so that changes can be rolled back. (Journal files are not
123** used for in-memory databases.) The following information is added to
124** the end of every EXTRA block for in-memory databases.
125**
126** This information could have been added directly to the PgHdr structure.
127** But then it would take up an extra 8 bytes of storage on every PgHdr
128** even for disk-based databases. Splitting it out saves 8 bytes. This
129** is only a savings of 0.8% but those percentages add up.
130*/
131typedef struct PgHistory PgHistory;
132struct PgHistory {
133 u8 *pOrig; /* Original page text. Restore to this on a full rollback */
134 u8 *pStmt; /* Text as it was at the beginning of the current statement */
135};
drh9eb9e262004-02-11 02:18:05 +0000136
137/*
138** A macro used for invoking the codec if there is one
139*/
140#ifdef SQLITE_HAS_CODEC
141# define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
142#else
143# define CODEC(P,D,N,X)
144#endif
145
drhed7c8552001-04-11 14:29:21 +0000146/*
drh69688d52001-04-14 16:38:23 +0000147** Convert a pointer to a PgHdr into a pointer to its data
148** and back again.
drhed7c8552001-04-11 14:29:21 +0000149*/
150#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
151#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drhd0ba1932004-02-10 01:54:28 +0000152#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhac69b052004-05-12 13:30:07 +0000153#define PGHDR_TO_HIST(P,PGR) \
154 ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
drhed7c8552001-04-11 14:29:21 +0000155
156/*
drhed7c8552001-04-11 14:29:21 +0000157** How big to make the hash table used for locating in-memory pages
drh836faa42003-01-11 13:30:57 +0000158** by page number.
drhed7c8552001-04-11 14:29:21 +0000159*/
drh836faa42003-01-11 13:30:57 +0000160#define N_PG_HASH 2048
161
162/*
163** Hash a page number
164*/
165#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
drhed7c8552001-04-11 14:29:21 +0000166
167/*
168** A open page cache is an instance of the following structure.
169*/
170struct Pager {
171 char *zFilename; /* Name of the database file */
172 char *zJournal; /* Name of the journal file */
drha76c82e2003-07-27 18:59:42 +0000173 char *zDirectory; /* Directory hold database and journal files */
drh8cfbf082001-09-19 13:22:39 +0000174 OsFile fd, jfd; /* File descriptors for database and journal */
drhac69b052004-05-12 13:30:07 +0000175 OsFile stfd; /* File descriptor for the statement subjournal*/
drhed7c8552001-04-11 14:29:21 +0000176 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000177 int origDbSize; /* dbSize before the current change */
drhac69b052004-05-12 13:30:07 +0000178 int stmtSize; /* Size of database (in pages) at stmt_begin() */
179 off_t stmtJSize; /* Size of journal at stmt_begin() */
drh968af522003-02-11 14:55:40 +0000180 int nRec; /* Number of pages written to the journal */
181 u32 cksumInit; /* Quasi-random value added to every checksum */
drhac69b052004-05-12 13:30:07 +0000182 int stmtNRec; /* Number of records in stmt subjournal */
drh7e3b0a02001-04-28 16:52:40 +0000183 int nExtra; /* Add this many bytes to each in-memory page */
drhb6f41482004-05-14 01:58:11 +0000184 void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
185 int pageSize; /* Number of bytes in a page */
drhed7c8552001-04-11 14:29:21 +0000186 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000187 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000188 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000189 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
drh9eb9e262004-02-11 02:18:05 +0000190 void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
drhb20ea9d2004-02-09 01:20:36 +0000191 void *pCodecArg; /* First argument to xCodec() */
drh603240c2002-03-05 01:11:12 +0000192 u8 journalOpen; /* True if journal file descriptors is valid */
drh34e79ce2004-02-08 06:05:46 +0000193 u8 journalStarted; /* True if header of journal is synced */
194 u8 useJournal; /* Use a rollback journal on this file */
drhac69b052004-05-12 13:30:07 +0000195 u8 stmtOpen; /* True if the statement subjournal is open */
196 u8 stmtInUse; /* True we are in a statement subtransaction */
197 u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/
drh603240c2002-03-05 01:11:12 +0000198 u8 noSync; /* Do not sync the journal if true */
drh968af522003-02-11 14:55:40 +0000199 u8 fullSync; /* Do extra syncs of the journal for robustness */
drh603240c2002-03-05 01:11:12 +0000200 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
201 u8 errMask; /* One of several kinds of errors */
202 u8 tempFile; /* zFilename is a temporary file */
203 u8 readOnly; /* True for a read-only database */
204 u8 needSync; /* True if an fsync() is needed on the journal */
drha1680452002-04-18 01:56:57 +0000205 u8 dirtyFile; /* True if database file has changed in any way */
drh193a6b42002-07-07 16:52:46 +0000206 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
drhac69b052004-05-12 13:30:07 +0000207 u8 memDb; /* True to inhibit all file I/O */
drh603240c2002-03-05 01:11:12 +0000208 u8 *aInJournal; /* One bit for each page in the database file */
drhac69b052004-05-12 13:30:07 +0000209 u8 *aInStmt; /* One bit for each page in the database */
drhed7c8552001-04-11 14:29:21 +0000210 PgHdr *pFirst, *pLast; /* List of free pages */
drh341eae82003-01-21 02:39:36 +0000211 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
drhd9b02572001-04-15 00:37:09 +0000212 PgHdr *pAll; /* List of all pages */
drhac69b052004-05-12 13:30:07 +0000213 PgHdr *pStmt; /* List of pages in the statement subjournal */
drhed7c8552001-04-11 14:29:21 +0000214 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
danielk197713adf8a2004-06-03 16:08:41 +0000215 int nMaster; /* Number of bytes to reserve for master j.p */
drhd9b02572001-04-15 00:37:09 +0000216};
217
218/*
219** These are bits that can be set in Pager.errMask.
220*/
221#define PAGER_ERR_FULL 0x01 /* a write() failed */
222#define PAGER_ERR_MEM 0x02 /* malloc() failed */
223#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
224#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000225#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000226
227/*
228** The journal file contains page records in the following
229** format.
drh968af522003-02-11 14:55:40 +0000230**
231** Actually, this structure is the complete page record for pager
232** formats less than 3. Beginning with format 3, this record is surrounded
233** by two checksums.
drhd9b02572001-04-15 00:37:09 +0000234*/
235typedef struct PageRecord PageRecord;
236struct PageRecord {
drhb20ea9d2004-02-09 01:20:36 +0000237 Pgno pgno; /* The page number */
drhd0ba1932004-02-10 01:54:28 +0000238 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
drhd9b02572001-04-15 00:37:09 +0000239};
240
241/*
drh5e00f6c2001-09-13 13:46:56 +0000242** Journal files begin with the following magic string. The data
243** was obtained from /dev/random. It is used only as a sanity check.
drh94f33312002-08-12 12:29:56 +0000244**
drh968af522003-02-11 14:55:40 +0000245** There are three journal formats (so far). The 1st journal format writes
246** 32-bit integers in the byte-order of the host machine. New
247** formats writes integers as big-endian. All new journals use the
drh94f33312002-08-12 12:29:56 +0000248** new format, but we have to be able to read an older journal in order
drh968af522003-02-11 14:55:40 +0000249** to rollback journals created by older versions of the library.
250**
251** The 3rd journal format (added for 2.8.0) adds additional sanity
252** checking information to the journal. If the power fails while the
253** journal is being written, semi-random garbage data might appear in
254** the journal file after power is restored. If an attempt is then made
255** to roll the journal back, the database could be corrupted. The additional
256** sanity checking data is an attempt to discover the garbage in the
257** journal and ignore it.
258**
259** The sanity checking information for the 3rd journal format consists
260** of a 32-bit checksum on each page of data. The checksum covers both
drhd0ba1932004-02-10 01:54:28 +0000261** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
drh968af522003-02-11 14:55:40 +0000262** This cksum is initialized to a 32-bit random value that appears in the
263** journal file right after the header. The random initializer is important,
264** because garbage data that appears at the end of a journal is likely
265** data that was once in other files that have now been deleted. If the
266** garbage data came from an obsolete journal file, the checksums might
267** be correct. But by initializing the checksum to random value which
268** is different for every journal, we minimize that risk.
drhd9b02572001-04-15 00:37:09 +0000269*/
drh968af522003-02-11 14:55:40 +0000270static const unsigned char aJournalMagic1[] = {
drhd9b02572001-04-15 00:37:09 +0000271 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000272};
drh968af522003-02-11 14:55:40 +0000273static const unsigned char aJournalMagic2[] = {
drh94f33312002-08-12 12:29:56 +0000274 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
275};
drh968af522003-02-11 14:55:40 +0000276static const unsigned char aJournalMagic3[] = {
277 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
278};
279#define JOURNAL_FORMAT_1 1
280#define JOURNAL_FORMAT_2 2
281#define JOURNAL_FORMAT_3 3
drh94f33312002-08-12 12:29:56 +0000282
283/*
drh968af522003-02-11 14:55:40 +0000284** The following integer determines what format to use when creating
285** new primary journal files. By default we always use format 3.
286** When testing, we can set this value to older journal formats in order to
287** make sure that newer versions of the library are able to rollback older
288** journal files.
289**
drhac69b052004-05-12 13:30:07 +0000290** Note that statement journals always use format 2 and omit the header.
drh94f33312002-08-12 12:29:56 +0000291*/
292#ifdef SQLITE_TEST
drh968af522003-02-11 14:55:40 +0000293int journal_format = 3;
drh74587e52002-08-13 00:01:16 +0000294#else
drh968af522003-02-11 14:55:40 +0000295# define journal_format 3
drh94f33312002-08-12 12:29:56 +0000296#endif
drhed7c8552001-04-11 14:29:21 +0000297
298/*
drh968af522003-02-11 14:55:40 +0000299** The size of the header and of each page in the journal varies according
300** to which journal format is being used. The following macros figure out
301** the sizes based on format numbers.
302*/
danielk197713adf8a2004-06-03 16:08:41 +0000303/*
drh968af522003-02-11 14:55:40 +0000304#define JOURNAL_HDR_SZ(X) \
305 (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
danielk197713adf8a2004-06-03 16:08:41 +0000306*/
307#define JOURNAL_HDR_SZ(pPager, X) (\
308 sizeof(aJournalMagic1) + \
309 sizeof(Pgno) + \
310 ((X)>=3?3*sizeof(u32)+(pPager)->nMaster:0) )
drh968af522003-02-11 14:55:40 +0000311#define JOURNAL_PG_SZ(X) \
drhd0ba1932004-02-10 01:54:28 +0000312 (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
drh968af522003-02-11 14:55:40 +0000313
danielk197713adf8a2004-06-03 16:08:41 +0000314
drh968af522003-02-11 14:55:40 +0000315/*
drhdd793422001-06-28 01:54:48 +0000316** Enable reference count tracking here:
317*/
drh74587e52002-08-13 00:01:16 +0000318#ifdef SQLITE_TEST
drh3aac2dd2004-04-26 14:10:20 +0000319 int pager3_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000320 static void pager_refinfo(PgHdr *p){
321 static int cnt = 0;
drh3aac2dd2004-04-26 14:10:20 +0000322 if( !pager3_refinfo_enable ) return;
drhdd793422001-06-28 01:54:48 +0000323 printf(
324 "REFCNT: %4d addr=0x%08x nRef=%d\n",
325 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
326 );
327 cnt++; /* Something to set a breakpoint on */
328 }
329# define REFINFO(X) pager_refinfo(X)
330#else
331# define REFINFO(X)
332#endif
333
334/*
drh34e79ce2004-02-08 06:05:46 +0000335** Read a 32-bit integer from the given file descriptor. Store the integer
336** that is read in *pRes. Return SQLITE_OK if everything worked, or an
337** error code is something goes wrong.
338**
339** If the journal format is 2 or 3, read a big-endian integer. If the
340** journal format is 1, read an integer in the native byte-order of the
341** host machine.
drh94f33312002-08-12 12:29:56 +0000342*/
drh968af522003-02-11 14:55:40 +0000343static int read32bits(int format, OsFile *fd, u32 *pRes){
drh94f33312002-08-12 12:29:56 +0000344 u32 res;
345 int rc;
danielk19774adee202004-05-08 08:23:19 +0000346 rc = sqlite3OsRead(fd, &res, sizeof(res));
drh968af522003-02-11 14:55:40 +0000347 if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
drh94f33312002-08-12 12:29:56 +0000348 unsigned char ac[4];
349 memcpy(ac, &res, 4);
350 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
351 }
352 *pRes = res;
353 return rc;
354}
355
356/*
drh34e79ce2004-02-08 06:05:46 +0000357** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
358** on success or an error code is something goes wrong.
359**
360** If the journal format is 2 or 3, write the integer as 4 big-endian
361** bytes. If the journal format is 1, write the integer in the native
362** byte order. In normal operation, only formats 2 and 3 are used.
363** Journal format 1 is only used for testing.
drh94f33312002-08-12 12:29:56 +0000364*/
365static int write32bits(OsFile *fd, u32 val){
366 unsigned char ac[4];
drh968af522003-02-11 14:55:40 +0000367 if( journal_format<=1 ){
danielk19774adee202004-05-08 08:23:19 +0000368 return sqlite3OsWrite(fd, &val, 4);
drh94f33312002-08-12 12:29:56 +0000369 }
drh94f33312002-08-12 12:29:56 +0000370 ac[0] = (val>>24) & 0xff;
371 ac[1] = (val>>16) & 0xff;
372 ac[2] = (val>>8) & 0xff;
373 ac[3] = val & 0xff;
danielk19774adee202004-05-08 08:23:19 +0000374 return sqlite3OsWrite(fd, ac, 4);
drh94f33312002-08-12 12:29:56 +0000375}
376
drh2554f8b2003-01-22 01:26:44 +0000377/*
378** Write a 32-bit integer into a page header right before the
379** page data. This will overwrite the PgHdr.pDirty pointer.
drh34e79ce2004-02-08 06:05:46 +0000380**
381** The integer is big-endian for formats 2 and 3 and native byte order
382** for journal format 1.
drh2554f8b2003-01-22 01:26:44 +0000383*/
drh968af522003-02-11 14:55:40 +0000384static void store32bits(u32 val, PgHdr *p, int offset){
drh2554f8b2003-01-22 01:26:44 +0000385 unsigned char *ac;
drhec1bd0b2003-08-26 11:41:27 +0000386 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
drh968af522003-02-11 14:55:40 +0000387 if( journal_format<=1 ){
drh2554f8b2003-01-22 01:26:44 +0000388 memcpy(ac, &val, 4);
389 }else{
390 ac[0] = (val>>24) & 0xff;
391 ac[1] = (val>>16) & 0xff;
392 ac[2] = (val>>8) & 0xff;
393 ac[3] = val & 0xff;
394 }
395}
396
drh94f33312002-08-12 12:29:56 +0000397
398/*
drhd9b02572001-04-15 00:37:09 +0000399** Convert the bits in the pPager->errMask into an approprate
400** return code.
401*/
402static int pager_errcode(Pager *pPager){
403 int rc = SQLITE_OK;
404 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000405 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000406 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
407 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
408 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
409 return rc;
drhed7c8552001-04-11 14:29:21 +0000410}
411
412/*
drh03eb96a2002-11-10 23:32:56 +0000413** Add or remove a page from the list of all pages that are in the
drhac69b052004-05-12 13:30:07 +0000414** statement journal.
drh03eb96a2002-11-10 23:32:56 +0000415**
416** The Pager keeps a separate list of pages that are currently in
drhac69b052004-05-12 13:30:07 +0000417** the statement journal. This helps the sqlite3pager_stmt_commit()
drh03eb96a2002-11-10 23:32:56 +0000418** routine run MUCH faster for the common case where there are many
drhac69b052004-05-12 13:30:07 +0000419** pages in memory but only a few are in the statement journal.
drh03eb96a2002-11-10 23:32:56 +0000420*/
drh3aac2dd2004-04-26 14:10:20 +0000421static void page_add_to_stmt_list(PgHdr *pPg){
drh03eb96a2002-11-10 23:32:56 +0000422 Pager *pPager = pPg->pPager;
drhac69b052004-05-12 13:30:07 +0000423 if( pPg->inStmt ) return;
424 assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 );
425 pPg->pPrevStmt = 0;
426 if( pPager->pStmt ){
427 pPager->pStmt->pPrevStmt = pPg;
drh03eb96a2002-11-10 23:32:56 +0000428 }
drhac69b052004-05-12 13:30:07 +0000429 pPg->pNextStmt = pPager->pStmt;
430 pPager->pStmt = pPg;
431 pPg->inStmt = 1;
drh03eb96a2002-11-10 23:32:56 +0000432}
drh3aac2dd2004-04-26 14:10:20 +0000433static void page_remove_from_stmt_list(PgHdr *pPg){
drhac69b052004-05-12 13:30:07 +0000434 if( !pPg->inStmt ) return;
435 if( pPg->pPrevStmt ){
436 assert( pPg->pPrevStmt->pNextStmt==pPg );
437 pPg->pPrevStmt->pNextStmt = pPg->pNextStmt;
drh03eb96a2002-11-10 23:32:56 +0000438 }else{
drhac69b052004-05-12 13:30:07 +0000439 assert( pPg->pPager->pStmt==pPg );
440 pPg->pPager->pStmt = pPg->pNextStmt;
drh03eb96a2002-11-10 23:32:56 +0000441 }
drhac69b052004-05-12 13:30:07 +0000442 if( pPg->pNextStmt ){
443 assert( pPg->pNextStmt->pPrevStmt==pPg );
444 pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt;
drh03eb96a2002-11-10 23:32:56 +0000445 }
drhac69b052004-05-12 13:30:07 +0000446 pPg->pNextStmt = 0;
447 pPg->pPrevStmt = 0;
448 pPg->inStmt = 0;
drh03eb96a2002-11-10 23:32:56 +0000449}
450
451/*
drhed7c8552001-04-11 14:29:21 +0000452** Find a page in the hash table given its page number. Return
453** a pointer to the page or NULL if not found.
454*/
drhd9b02572001-04-15 00:37:09 +0000455static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drh836faa42003-01-11 13:30:57 +0000456 PgHdr *p = pPager->aHash[pager_hash(pgno)];
drhed7c8552001-04-11 14:29:21 +0000457 while( p && p->pgno!=pgno ){
458 p = p->pNextHash;
459 }
460 return p;
461}
462
463/*
464** Unlock the database and clear the in-memory cache. This routine
465** sets the state of the pager back to what it was when it was first
466** opened. Any outstanding pages are invalidated and subsequent attempts
467** to access those pages will likely result in a coredump.
468*/
drhd9b02572001-04-15 00:37:09 +0000469static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000470 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000471 for(pPg=pPager->pAll; pPg; pPg=pNext){
472 pNext = pPg->pNextAll;
473 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000474 }
475 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +0000476 pPager->pFirstSynced = 0;
drhd9b02572001-04-15 00:37:09 +0000477 pPager->pLast = 0;
478 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000479 memset(pPager->aHash, 0, sizeof(pPager->aHash));
480 pPager->nPage = 0;
drhfa86c412002-02-02 15:01:15 +0000481 if( pPager->state>=SQLITE_WRITELOCK ){
drh3aac2dd2004-04-26 14:10:20 +0000482 sqlite3pager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000483 }
danielk19774adee202004-05-08 08:23:19 +0000484 sqlite3OsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000485 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000486 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000487 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000488 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000489}
490
491/*
492** When this routine is called, the pager has the journal file open and
493** a write lock on the database. This routine releases the database
494** write lock and acquires a read lock in its place. The journal file
495** is deleted and closed.
drh50457892003-09-06 01:10:47 +0000496**
497** TODO: Consider keeping the journal file open for temporary databases.
498** This might give a performance improvement on windows where opening
499** a file is an expensive operation.
drhed7c8552001-04-11 14:29:21 +0000500*/
drhd9b02572001-04-15 00:37:09 +0000501static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000502 int rc;
drhd9b02572001-04-15 00:37:09 +0000503 PgHdr *pPg;
drhfa86c412002-02-02 15:01:15 +0000504 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
drh3aac2dd2004-04-26 14:10:20 +0000505 sqlite3pager_stmt_commit(pPager);
drhac69b052004-05-12 13:30:07 +0000506 if( pPager->stmtOpen ){
507 sqlite3OsClose(&pPager->stfd);
508 pPager->stmtOpen = 0;
drh0f892532002-05-30 12:27:03 +0000509 }
drhda47d772002-12-02 04:25:19 +0000510 if( pPager->journalOpen ){
danielk19774adee202004-05-08 08:23:19 +0000511 sqlite3OsClose(&pPager->jfd);
drhda47d772002-12-02 04:25:19 +0000512 pPager->journalOpen = 0;
danielk19774adee202004-05-08 08:23:19 +0000513 sqlite3OsDelete(pPager->zJournal);
drhda47d772002-12-02 04:25:19 +0000514 sqliteFree( pPager->aInJournal );
515 pPager->aInJournal = 0;
516 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
517 pPg->inJournal = 0;
518 pPg->dirty = 0;
drhdb48ee02003-01-16 13:42:43 +0000519 pPg->needSync = 0;
drhda47d772002-12-02 04:25:19 +0000520 }
521 }else{
522 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
drhd9b02572001-04-15 00:37:09 +0000523 }
danielk19774adee202004-05-08 08:23:19 +0000524 rc = sqlite3OsReadLock(&pPager->fd);
drh8e298f92002-07-06 16:28:47 +0000525 if( rc==SQLITE_OK ){
526 pPager->state = SQLITE_READLOCK;
527 }else{
528 /* This can only happen if a process does a BEGIN, then forks and the
529 ** child process does the COMMIT. Because of the semantics of unix
530 ** file locking, the unlock will fail.
531 */
532 pPager->state = SQLITE_UNLOCK;
533 }
drhed7c8552001-04-11 14:29:21 +0000534 return rc;
535}
536
drhed7c8552001-04-11 14:29:21 +0000537/*
drh968af522003-02-11 14:55:40 +0000538** Compute and return a checksum for the page of data.
drh34e79ce2004-02-08 06:05:46 +0000539**
540** This is not a real checksum. It is really just the sum of the
541** random initial value and the page number. We considered do a checksum
542** of the database, but that was found to be too slow.
drh968af522003-02-11 14:55:40 +0000543*/
544static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
545 u32 cksum = pPager->cksumInit + pgno;
drh968af522003-02-11 14:55:40 +0000546 return cksum;
547}
548
549/*
drhfa86c412002-02-02 15:01:15 +0000550** Read a single page from the journal file opened on file descriptor
551** jfd. Playback this one page.
drh968af522003-02-11 14:55:40 +0000552**
553** There are three different journal formats. The format parameter determines
554** which format is used by the journal that is played back.
drhfa86c412002-02-02 15:01:15 +0000555*/
drh968af522003-02-11 14:55:40 +0000556static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
drhfa86c412002-02-02 15:01:15 +0000557 int rc;
558 PgHdr *pPg; /* An existing page in the cache */
559 PageRecord pgRec;
drh968af522003-02-11 14:55:40 +0000560 u32 cksum;
drhfa86c412002-02-02 15:01:15 +0000561
drh968af522003-02-11 14:55:40 +0000562 rc = read32bits(format, jfd, &pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000563 if( rc!=SQLITE_OK ) return rc;
danielk19774adee202004-05-08 08:23:19 +0000564 rc = sqlite3OsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
drh99ee3602003-02-16 19:13:36 +0000565 if( rc!=SQLITE_OK ) return rc;
drhfa86c412002-02-02 15:01:15 +0000566
drh968af522003-02-11 14:55:40 +0000567 /* Sanity checking on the page. This is more important that I originally
568 ** thought. If a power failure occurs while the journal is being written,
569 ** it could cause invalid data to be written into the journal. We need to
570 ** detect this invalid data (with high probability) and ignore it.
571 */
572 if( pgRec.pgno==0 ){
573 return SQLITE_DONE;
574 }
drh7d02cb72003-06-04 16:24:39 +0000575 if( pgRec.pgno>(unsigned)pPager->dbSize ){
drh968af522003-02-11 14:55:40 +0000576 return SQLITE_OK;
577 }
578 if( format>=JOURNAL_FORMAT_3 ){
579 rc = read32bits(format, jfd, &cksum);
drh99ee3602003-02-16 19:13:36 +0000580 if( rc ) return rc;
drh968af522003-02-11 14:55:40 +0000581 if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
582 return SQLITE_DONE;
583 }
584 }
drhfa86c412002-02-02 15:01:15 +0000585
586 /* Playback the page. Update the in-memory copy of the page
587 ** at the same time, if there is one.
588 */
589 pPg = pager_lookup(pPager, pgRec.pgno);
drh99ee3602003-02-16 19:13:36 +0000590 TRACE2("PLAYBACK %d\n", pgRec.pgno);
danielk19774adee202004-05-08 08:23:19 +0000591 sqlite3OsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
592 rc = sqlite3OsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhfa86c412002-02-02 15:01:15 +0000593 if( pPg ){
drhacf4ac92003-12-17 23:57:34 +0000594 /* No page should ever be rolled back that is in use, except for page
595 ** 1 which is held in use in order to keep the lock on the database
596 ** active.
597 */
drhb6f41482004-05-14 01:58:11 +0000598 void *pData;
drhacf4ac92003-12-17 23:57:34 +0000599 assert( pPg->nRef==0 || pPg->pgno==1 );
drhb6f41482004-05-14 01:58:11 +0000600 pData = PGHDR_TO_DATA(pPg);
601 memcpy(pData, pgRec.aData, pPager->pageSize);
drhde647132004-05-07 17:57:49 +0000602 if( pPager->xDestructor ){
drhb6f41482004-05-14 01:58:11 +0000603 pPager->xDestructor(pData, pPager->pageSize);
drhde647132004-05-07 17:57:49 +0000604 }
drhdb48ee02003-01-16 13:42:43 +0000605 pPg->dirty = 0;
606 pPg->needSync = 0;
drhb6f41482004-05-14 01:58:11 +0000607 CODEC(pPager, pData, pPg->pgno, 3);
drhfa86c412002-02-02 15:01:15 +0000608 }
609 return rc;
610}
611
612/*
danielk197713adf8a2004-06-03 16:08:41 +0000613** Parameter zMaster is the name of a master journal file. A single journal
614** file that referred to the master journal file has just been rolled back.
615** This routine checks if it is possible to delete the master journal file,
616** and does so if it is.
617*/
618static int pager_delmaster(const char *zMaster){
619 int rc;
620 int master_open = 0;
621 OsFile master;
622 char *zMasterJournal = 0; /* Contents of master journal file */
623 off_t nMasterJournal; /* Size of master journal file */
624
625 /* Open the master journal file exclusively in case some other process
626 ** is running this routine also. Not that it makes too much difference.
627 */
628 rc = sqlite3OsOpenExclusive(zMaster, &master, 0);
629 if( rc!=SQLITE_OK ) goto delmaster_out;
630 master_open = 1;
631
632 rc = sqlite3OsFileSize(&master, &nMasterJournal);
633 if( rc!=SQLITE_OK ) goto delmaster_out;
634
635 if( nMasterJournal>0 ){
636 char *zDb;
637 zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
638 if( !zMasterJournal ){
639 rc = SQLITE_NOMEM;
640 goto delmaster_out;
641 }
642 rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
643 if( rc!=SQLITE_OK ) goto delmaster_out;
644
645 zDb = zMasterJournal;
646 while( (zDb-zMasterJournal)<nMasterJournal ){
647 char *zJournal = 0;
648 sqlite3SetString(&zJournal, zDb, "-journal", 0);
649 if( !zJournal ){
650 rc = SQLITE_NOMEM;
651 goto delmaster_out;
652 }
653 if( sqlite3OsFileExists(zJournal) ){
654 /* One of the journals pointed to by the master journal exists.
655 ** Open it and check if it points at the master journal. If
656 ** so, return without deleting the master journal file.
657 */
658 OsFile journal;
659 int nMaster;
660
661 rc = sqlite3OsOpenReadOnly(zJournal, &journal);
662 if( rc!=SQLITE_OK ){
663 sqlite3OsClose(&journal);
664 sqliteFree(zJournal);
665 goto delmaster_out;
666 }
667 sqlite3OsClose(&journal);
668
669 /* Seek to the point in the journal where the master journal name
670 ** is stored. Read the master journal name into memory obtained
671 ** from malloc.
672 */
673 rc = sqlite3OsSeek(&journal, sizeof(aJournalMagic3)+2*sizeof(u32));
674 if( rc!=SQLITE_OK ) goto delmaster_out;
675 rc = read32bits(3, &journal, (u32 *)&nMaster);
676 if( rc!=SQLITE_OK ) goto delmaster_out;
677 if( nMaster>0 && nMaster==strlen(zMaster)+1 ){
678 char *zMasterPtr = (char *)sqliteMalloc(nMaster);
679 if( !zMasterPtr ){
680 rc = SQLITE_NOMEM;
681 }
682 rc = sqlite3OsRead(&journal, zMasterPtr, nMaster);
683 if( rc!=SQLITE_OK ){
684 sqliteFree(zMasterPtr);
685 goto delmaster_out;
686 }
687 if( 0==strncmp(zMasterPtr, zMaster, nMaster) ){
688 /* We have a match. Do not delete the master journal file. */
689 sqliteFree(zMasterPtr);
690 goto delmaster_out;
691 }
692 }
693 }
694 zDb += (strlen(zDb)+1);
695 }
696 }
697
698 sqlite3OsDelete(zMaster);
699
700delmaster_out:
701 if( zMasterJournal ){
702 sqliteFree(zMasterJournal);
703 }
704 if( master_open ){
705 sqlite3OsClose(&master);
706 }
707 return rc;
708}
709
710/*
drhed7c8552001-04-11 14:29:21 +0000711** Playback the journal and thus restore the database file to
712** the state it was in before we started making changes.
713**
drh34e79ce2004-02-08 06:05:46 +0000714** The journal file format is as follows:
715**
716** * 8 byte prefix. One of the aJournalMagic123 vectors defined
717** above. The format of the journal file is determined by which
718** of the three prefix vectors is seen.
719** * 4 byte big-endian integer which is the number of valid page records
720** in the journal. If this value is 0xffffffff, then compute the
721** number of page records from the journal size. This field appears
722** in format 3 only.
723** * 4 byte big-endian integer which is the initial value for the
724** sanity checksum. This field appears in format 3 only.
725** * 4 byte integer which is the number of pages to truncate the
726** database to during a rollback.
727** * Zero or more pages instances, each as follows:
728** + 4 byte page number.
drhd0ba1932004-02-10 01:54:28 +0000729** + SQLITE_PAGE_SIZE bytes of data.
drh34e79ce2004-02-08 06:05:46 +0000730** + 4 byte checksum (format 3 only)
731**
732** When we speak of the journal header, we mean the first 4 bullets above.
733** Each entry in the journal is an instance of the 5th bullet. Note that
734** bullets 2 and 3 only appear in format-3 journals.
735**
736** Call the value from the second bullet "nRec". nRec is the number of
737** valid page entries in the journal. In most cases, you can compute the
738** value of nRec from the size of the journal file. But if a power
739** failure occurred while the journal was being written, it could be the
740** case that the size of the journal file had already been increased but
741** the extra entries had not yet made it safely to disk. In such a case,
742** the value of nRec computed from the file size would be too large. For
743** that reason, we always use the nRec value in the header.
744**
745** If the nRec value is 0xffffffff it means that nRec should be computed
746** from the file size. This value is used when the user selects the
747** no-sync option for the journal. A power failure could lead to corruption
748** in this case. But for things like temporary table (which will be
749** deleted when the power is restored) we don't care.
750**
751** Journal formats 1 and 2 do not have an nRec value in the header so we
752** have to compute nRec from the file size. This has risks (as described
753** above) which is why all persistent tables have been changed to use
754** format 3.
drhed7c8552001-04-11 14:29:21 +0000755**
drhd9b02572001-04-15 00:37:09 +0000756** If the file opened as the journal file is not a well-formed
drh34e79ce2004-02-08 06:05:46 +0000757** journal file then the database will likely already be
758** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
759** and SQLITE_CORRUPT is returned. If it all works, then this routine
760** returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000761*/
drh99ee3602003-02-16 19:13:36 +0000762static int pager_playback(Pager *pPager, int useJournalSize){
drh968af522003-02-11 14:55:40 +0000763 off_t szJ; /* Size of the journal file in bytes */
764 int nRec; /* Number of Records in the journal */
drhd9b02572001-04-15 00:37:09 +0000765 int i; /* Loop counter */
766 Pgno mxPg = 0; /* Size of the original file in pages */
drh968af522003-02-11 14:55:40 +0000767 int format; /* Format of the journal file. */
768 unsigned char aMagic[sizeof(aJournalMagic1)];
drhed7c8552001-04-11 14:29:21 +0000769 int rc;
danielk197713adf8a2004-06-03 16:08:41 +0000770 char *zMaster = 0; /* Name of master journal file if any */
drhed7c8552001-04-11 14:29:21 +0000771
drhc3a64ba2001-11-22 00:01:27 +0000772 /* Figure out how many records are in the journal. Abort early if
773 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000774 */
drh8cfbf082001-09-19 13:22:39 +0000775 assert( pPager->journalOpen );
danielk19774adee202004-05-08 08:23:19 +0000776 sqlite3OsSeek(&pPager->jfd, 0);
777 rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
drhc3a64ba2001-11-22 00:01:27 +0000778 if( rc!=SQLITE_OK ){
779 goto end_playback;
780 }
drh240c5792004-02-08 00:40:52 +0000781
782 /* If the journal file is too small to contain a complete header,
drh34e79ce2004-02-08 06:05:46 +0000783 ** it must mean that the process that created the journal was just
784 ** beginning to write the journal file when it died. In that case,
785 ** the database file should have still been completely unchanged.
786 ** Nothing needs to be rolled back. We can safely ignore this journal.
drh240c5792004-02-08 00:40:52 +0000787 */
drh968af522003-02-11 14:55:40 +0000788 if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
drhc3a64ba2001-11-22 00:01:27 +0000789 goto end_playback;
790 }
791
792 /* Read the beginning of the journal and truncate the
793 ** database file back to its original size.
794 */
danielk19774adee202004-05-08 08:23:19 +0000795 rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drh94f33312002-08-12 12:29:56 +0000796 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000797 rc = SQLITE_PROTOCOL;
798 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000799 }
drh968af522003-02-11 14:55:40 +0000800 if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
801 format = JOURNAL_FORMAT_3;
802 }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
803 format = JOURNAL_FORMAT_2;
804 }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
805 format = JOURNAL_FORMAT_1;
drh94f33312002-08-12 12:29:56 +0000806 }else{
807 rc = SQLITE_PROTOCOL;
808 goto end_playback;
809 }
drh968af522003-02-11 14:55:40 +0000810 if( format>=JOURNAL_FORMAT_3 ){
danielk197713adf8a2004-06-03 16:08:41 +0000811 if( szJ < sizeof(aMagic) + 4*sizeof(u32) ){
drh240c5792004-02-08 00:40:52 +0000812 /* Ignore the journal if it is too small to contain a complete
813 ** header. We already did this test once above, but at the prior
814 ** test, we did not know the journal format and so we had to assume
815 ** the smallest possible header. Now we know the header is bigger
drh34e79ce2004-02-08 06:05:46 +0000816 ** than the minimum so we test again.
drh240c5792004-02-08 00:40:52 +0000817 */
818 goto end_playback;
819 }
drh133cdf62004-01-07 02:52:07 +0000820 rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
drh968af522003-02-11 14:55:40 +0000821 if( rc ) goto end_playback;
822 rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
823 if( rc ) goto end_playback;
drh99ee3602003-02-16 19:13:36 +0000824 if( nRec==0xffffffff || useJournalSize ){
danielk197713adf8a2004-06-03 16:08:41 +0000825 nRec = (szJ - JOURNAL_HDR_SZ(pPager, 3))/JOURNAL_PG_SZ(3);
826 }
827
828 /* Check if a master journal file is specified. If one is specified,
829 ** only proceed with the playback if it still exists.
830 */
831 rc = read32bits(format, &pPager->jfd, &pPager->nMaster);
832 if( rc ) goto end_playback;
833 if( pPager->nMaster>0 ){
834 zMaster = sqliteMalloc(pPager->nMaster);
835 if( !zMaster ){
836 rc = SQLITE_NOMEM;
837 goto end_playback;
838 }
839 rc = sqlite3OsRead(&pPager->jfd, zMaster, pPager->nMaster);
840 if( rc!=SQLITE_OK || (strlen(zMaster) && !sqlite3OsFileExists(zMaster)) ){
841 goto end_playback;
842 }
drh968af522003-02-11 14:55:40 +0000843 }
844 }else{
danielk197713adf8a2004-06-03 16:08:41 +0000845 nRec = (szJ - JOURNAL_HDR_SZ(pPager, 2))/JOURNAL_PG_SZ(2);
846 assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(pPager, 2)==szJ );
drh968af522003-02-11 14:55:40 +0000847 }
848 rc = read32bits(format, &pPager->jfd, &mxPg);
drhd9b02572001-04-15 00:37:09 +0000849 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000850 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000851 }
drhd8d66e82003-02-12 02:10:15 +0000852 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
danielk19774adee202004-05-08 08:23:19 +0000853 rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
drh81a20f22001-10-12 17:30:04 +0000854 if( rc!=SQLITE_OK ){
855 goto end_playback;
856 }
drhd9b02572001-04-15 00:37:09 +0000857 pPager->dbSize = mxPg;
858
drhfa86c412002-02-02 15:01:15 +0000859 /* Copy original pages out of the journal and back into the database file.
drhed7c8552001-04-11 14:29:21 +0000860 */
drh968af522003-02-11 14:55:40 +0000861 for(i=0; i<nRec; i++){
862 rc = pager_playback_one_page(pPager, &pPager->jfd, format);
863 if( rc!=SQLITE_OK ){
864 if( rc==SQLITE_DONE ){
drh968af522003-02-11 14:55:40 +0000865 rc = SQLITE_OK;
866 }
867 break;
868 }
drhed7c8552001-04-11 14:29:21 +0000869 }
drh81a20f22001-10-12 17:30:04 +0000870
drh4a0681e2003-02-13 01:58:20 +0000871 /* Pages that have been written to the journal but never synced
872 ** where not restored by the loop above. We have to restore those
drh240c5792004-02-08 00:40:52 +0000873 ** pages by reading them back from the original database.
drhdb48ee02003-01-16 13:42:43 +0000874 */
875 if( rc==SQLITE_OK ){
876 PgHdr *pPg;
877 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
drhd0ba1932004-02-10 01:54:28 +0000878 char zBuf[SQLITE_PAGE_SIZE];
drh4a0681e2003-02-13 01:58:20 +0000879 if( !pPg->dirty ) continue;
drhdb48ee02003-01-16 13:42:43 +0000880 if( (int)pPg->pgno <= pPager->origDbSize ){
danielk19774adee202004-05-08 08:23:19 +0000881 sqlite3OsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
882 rc = sqlite3OsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +0000883 TRACE2("REFETCH %d\n", pPg->pgno);
884 CODEC(pPager, zBuf, pPg->pgno, 2);
drhdb48ee02003-01-16 13:42:43 +0000885 if( rc ) break;
886 }else{
drhd0ba1932004-02-10 01:54:28 +0000887 memset(zBuf, 0, SQLITE_PAGE_SIZE);
drhdb48ee02003-01-16 13:42:43 +0000888 }
drhd0ba1932004-02-10 01:54:28 +0000889 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
890 memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
drh3a840692003-01-29 22:58:26 +0000891 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
892 }
drhdb48ee02003-01-16 13:42:43 +0000893 pPg->needSync = 0;
894 pPg->dirty = 0;
895 }
896 }
drh4a0681e2003-02-13 01:58:20 +0000897
898end_playback:
danielk197713adf8a2004-06-03 16:08:41 +0000899 if( zMaster ){
900 /* If there was a master journal and this routine will return true,
901 ** see if it is possible to delete the master journal. If errors
902 ** occur during this process, ignore them.
903 */
904 if( rc==SQLITE_OK ){
905 pager_delmaster(zMaster);
906 }
907 sqliteFree(zMaster);
908 }
drhd9b02572001-04-15 00:37:09 +0000909 if( rc!=SQLITE_OK ){
danielk197713adf8a2004-06-03 16:08:41 +0000910 /* FIX ME: We shouldn't delete the journal if an error occured during
911 ** rollback. It may have been a transient error and the rollback may
912 ** succeed next time it is attempted.
913 */
drhd9b02572001-04-15 00:37:09 +0000914 pager_unwritelock(pPager);
915 pPager->errMask |= PAGER_ERR_CORRUPT;
916 rc = SQLITE_CORRUPT;
917 }else{
918 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000919 }
drhd9b02572001-04-15 00:37:09 +0000920 return rc;
drhed7c8552001-04-11 14:29:21 +0000921}
922
923/*
drhac69b052004-05-12 13:30:07 +0000924** Playback the statement journal.
drhfa86c412002-02-02 15:01:15 +0000925**
926** This is similar to playing back the transaction journal but with
927** a few extra twists.
928**
drh663fc632002-02-02 18:49:19 +0000929** (1) The number of pages in the database file at the start of
drhac69b052004-05-12 13:30:07 +0000930** the statement is stored in pPager->stmtSize, not in the
drh663fc632002-02-02 18:49:19 +0000931** journal file itself.
drhfa86c412002-02-02 15:01:15 +0000932**
drhac69b052004-05-12 13:30:07 +0000933** (2) In addition to playing back the statement journal, also
drhfa86c412002-02-02 15:01:15 +0000934** playback all pages of the transaction journal beginning
drhac69b052004-05-12 13:30:07 +0000935** at offset pPager->stmtJSize.
drhfa86c412002-02-02 15:01:15 +0000936*/
drh3aac2dd2004-04-26 14:10:20 +0000937static int pager_stmt_playback(Pager *pPager){
drh968af522003-02-11 14:55:40 +0000938 off_t szJ; /* Size of the full journal */
939 int nRec; /* Number of Records */
drhfa86c412002-02-02 15:01:15 +0000940 int i; /* Loop counter */
941 int rc;
942
943 /* Truncate the database back to its original size.
944 */
drhac69b052004-05-12 13:30:07 +0000945 rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->stmtSize);
946 pPager->dbSize = pPager->stmtSize;
drhfa86c412002-02-02 15:01:15 +0000947
drhac69b052004-05-12 13:30:07 +0000948 /* Figure out how many records are in the statement journal.
drhfa86c412002-02-02 15:01:15 +0000949 */
drhac69b052004-05-12 13:30:07 +0000950 assert( pPager->stmtInUse && pPager->journalOpen );
951 sqlite3OsSeek(&pPager->stfd, 0);
952 nRec = pPager->stmtNRec;
drhfa86c412002-02-02 15:01:15 +0000953
drhac69b052004-05-12 13:30:07 +0000954 /* Copy original pages out of the statement journal and back into the
955 ** database file. Note that the statement journal always uses format
drh968af522003-02-11 14:55:40 +0000956 ** 2 instead of format 3 since it does not need to be concerned with
957 ** power failures corrupting the journal and can thus omit the checksums.
drhfa86c412002-02-02 15:01:15 +0000958 */
959 for(i=nRec-1; i>=0; i--){
drhac69b052004-05-12 13:30:07 +0000960 rc = pager_playback_one_page(pPager, &pPager->stfd, 2);
drh968af522003-02-11 14:55:40 +0000961 assert( rc!=SQLITE_DONE );
drh3aac2dd2004-04-26 14:10:20 +0000962 if( rc!=SQLITE_OK ) goto end_stmt_playback;
drhfa86c412002-02-02 15:01:15 +0000963 }
964
965 /* Figure out how many pages need to be copied out of the transaction
966 ** journal.
967 */
drhac69b052004-05-12 13:30:07 +0000968 rc = sqlite3OsSeek(&pPager->jfd, pPager->stmtJSize);
drhfa86c412002-02-02 15:01:15 +0000969 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +0000970 goto end_stmt_playback;
drhfa86c412002-02-02 15:01:15 +0000971 }
danielk19774adee202004-05-08 08:23:19 +0000972 rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
drhfa86c412002-02-02 15:01:15 +0000973 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +0000974 goto end_stmt_playback;
drhfa86c412002-02-02 15:01:15 +0000975 }
drhac69b052004-05-12 13:30:07 +0000976 nRec = (szJ - pPager->stmtJSize)/JOURNAL_PG_SZ(journal_format);
drhfa86c412002-02-02 15:01:15 +0000977 for(i=nRec-1; i>=0; i--){
drh968af522003-02-11 14:55:40 +0000978 rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
979 if( rc!=SQLITE_OK ){
980 assert( rc!=SQLITE_DONE );
drh3aac2dd2004-04-26 14:10:20 +0000981 goto end_stmt_playback;
drh968af522003-02-11 14:55:40 +0000982 }
drhfa86c412002-02-02 15:01:15 +0000983 }
984
drh3aac2dd2004-04-26 14:10:20 +0000985end_stmt_playback:
drhfa86c412002-02-02 15:01:15 +0000986 if( rc!=SQLITE_OK ){
drhfa86c412002-02-02 15:01:15 +0000987 pPager->errMask |= PAGER_ERR_CORRUPT;
988 rc = SQLITE_CORRUPT;
drhfa86c412002-02-02 15:01:15 +0000989 }
990 return rc;
991}
992
993/*
drhf57b14a2001-09-14 18:54:08 +0000994** Change the maximum number of in-memory pages that are allowed.
drhcd61c282002-03-06 22:01:34 +0000995**
996** The maximum number is the absolute value of the mxPage parameter.
997** If mxPage is negative, the noSync flag is also set. noSync bypasses
danielk19774adee202004-05-08 08:23:19 +0000998** calls to sqlite3OsSync(). The pager runs much faster with noSync on,
drhcd61c282002-03-06 22:01:34 +0000999** but if the operating system crashes or there is an abrupt power
1000** failure, the database file might be left in an inconsistent and
1001** unrepairable state.
drhf57b14a2001-09-14 18:54:08 +00001002*/
drh3aac2dd2004-04-26 14:10:20 +00001003void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){
drh603240c2002-03-05 01:11:12 +00001004 if( mxPage>=0 ){
drha1680452002-04-18 01:56:57 +00001005 pPager->noSync = pPager->tempFile;
danielk19771d850a72004-05-31 08:26:49 +00001006 if( pPager->noSync ) pPager->needSync = 0;
drh603240c2002-03-05 01:11:12 +00001007 }else{
1008 pPager->noSync = 1;
1009 mxPage = -mxPage;
1010 }
drhf57b14a2001-09-14 18:54:08 +00001011 if( mxPage>10 ){
1012 pPager->mxPage = mxPage;
1013 }
1014}
1015
1016/*
drh973b6e32003-02-12 14:09:42 +00001017** Adjust the robustness of the database to damage due to OS crashes
1018** or power failures by changing the number of syncs()s when writing
1019** the rollback journal. There are three levels:
1020**
danielk19774adee202004-05-08 08:23:19 +00001021** OFF sqlite3OsSync() is never called. This is the default
drh973b6e32003-02-12 14:09:42 +00001022** for temporary and transient files.
1023**
1024** NORMAL The journal is synced once before writes begin on the
1025** database. This is normally adequate protection, but
1026** it is theoretically possible, though very unlikely,
1027** that an inopertune power failure could leave the journal
1028** in a state which would cause damage to the database
1029** when it is rolled back.
1030**
1031** FULL The journal is synced twice before writes begin on the
drh34e79ce2004-02-08 06:05:46 +00001032** database (with some additional information - the nRec field
1033** of the journal header - being written in between the two
1034** syncs). If we assume that writing a
drh973b6e32003-02-12 14:09:42 +00001035** single disk sector is atomic, then this mode provides
1036** assurance that the journal will not be corrupted to the
1037** point of causing damage to the database during rollback.
1038**
1039** Numeric values associated with these states are OFF==1, NORMAL=2,
1040** and FULL=3.
1041*/
drh3aac2dd2004-04-26 14:10:20 +00001042void sqlite3pager_set_safety_level(Pager *pPager, int level){
drh973b6e32003-02-12 14:09:42 +00001043 pPager->noSync = level==1 || pPager->tempFile;
1044 pPager->fullSync = level==3 && !pPager->tempFile;
danielk19771d850a72004-05-31 08:26:49 +00001045 if( pPager->noSync ) pPager->needSync = 0;
drh973b6e32003-02-12 14:09:42 +00001046}
1047
1048/*
drhfa86c412002-02-02 15:01:15 +00001049** Open a temporary file. Write the name of the file into zName
1050** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
1051** the file descriptor into *fd. Return SQLITE_OK on success or some
1052** other error code if we fail.
1053**
1054** The OS will automatically delete the temporary file when it is
1055** closed.
1056*/
drh3aac2dd2004-04-26 14:10:20 +00001057static int sqlite3pager_opentemp(char *zFile, OsFile *fd){
drhfa86c412002-02-02 15:01:15 +00001058 int cnt = 8;
1059 int rc;
1060 do{
1061 cnt--;
danielk19774adee202004-05-08 08:23:19 +00001062 sqlite3OsTempFileName(zFile);
1063 rc = sqlite3OsOpenExclusive(zFile, fd, 1);
drhfa86c412002-02-02 15:01:15 +00001064 }while( cnt>0 && rc!=SQLITE_OK );
1065 return rc;
1066}
1067
1068/*
drhed7c8552001-04-11 14:29:21 +00001069** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +00001070** The file to be cached need not exist. The file is not locked until
drh3aac2dd2004-04-26 14:10:20 +00001071** the first call to sqlite3pager_get() and is only held open until the
1072** last page is released using sqlite3pager_unref().
drh382c0242001-10-06 16:33:02 +00001073**
drh6446c4d2001-12-15 14:22:18 +00001074** If zFilename is NULL then a randomly-named temporary file is created
1075** and used as the file to be cached. The file will be deleted
1076** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +00001077*/
drh3aac2dd2004-04-26 14:10:20 +00001078int sqlite3pager_open(
drh7e3b0a02001-04-28 16:52:40 +00001079 Pager **ppPager, /* Return the Pager structure here */
1080 const char *zFilename, /* Name of the database file to open */
1081 int mxPage, /* Max number of in-memory cache pages */
drhda47d772002-12-02 04:25:19 +00001082 int nExtra, /* Extra bytes append to each in-memory page */
1083 int useJournal /* TRUE to use a rollback journal on this file */
drh7e3b0a02001-04-28 16:52:40 +00001084){
drhed7c8552001-04-11 14:29:21 +00001085 Pager *pPager;
drh3e7a6092002-12-07 21:45:14 +00001086 char *zFullPathname;
drhed7c8552001-04-11 14:29:21 +00001087 int nameLen;
drh8cfbf082001-09-19 13:22:39 +00001088 OsFile fd;
drha76c82e2003-07-27 18:59:42 +00001089 int rc, i;
drh5e00f6c2001-09-13 13:46:56 +00001090 int tempFile;
drhac69b052004-05-12 13:30:07 +00001091 int memDb = 0;
drh5e00f6c2001-09-13 13:46:56 +00001092 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +00001093 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +00001094
drhd9b02572001-04-15 00:37:09 +00001095 *ppPager = 0;
danielk19776f8a5032004-05-10 10:34:51 +00001096 if( sqlite3_malloc_failed ){
drhd9b02572001-04-15 00:37:09 +00001097 return SQLITE_NOMEM;
1098 }
drh901afd42003-08-26 11:25:58 +00001099 if( zFilename && zFilename[0] ){
drhac69b052004-05-12 13:30:07 +00001100 if( strcmp(zFilename,":memory:")==0 ){
1101 memDb = 1;
1102 zFullPathname = sqliteMalloc(4);
1103 if( zFullPathname ) strcpy(zFullPathname, "nil");
1104 rc = SQLITE_OK;
1105 }else{
1106 zFullPathname = sqlite3OsFullPathname(zFilename);
1107 rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly);
1108 tempFile = 0;
1109 }
drh5e00f6c2001-09-13 13:46:56 +00001110 }else{
drh3aac2dd2004-04-26 14:10:20 +00001111 rc = sqlite3pager_opentemp(zTemp, &fd);
drh5e00f6c2001-09-13 13:46:56 +00001112 zFilename = zTemp;
danielk19774adee202004-05-08 08:23:19 +00001113 zFullPathname = sqlite3OsFullPathname(zFilename);
drh5e00f6c2001-09-13 13:46:56 +00001114 tempFile = 1;
1115 }
danielk19776f8a5032004-05-10 10:34:51 +00001116 if( sqlite3_malloc_failed ){
drh3e7a6092002-12-07 21:45:14 +00001117 return SQLITE_NOMEM;
1118 }
drh8cfbf082001-09-19 13:22:39 +00001119 if( rc!=SQLITE_OK ){
drh3e7a6092002-12-07 21:45:14 +00001120 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +00001121 return SQLITE_CANTOPEN;
1122 }
drh3e7a6092002-12-07 21:45:14 +00001123 nameLen = strlen(zFullPathname);
drha76c82e2003-07-27 18:59:42 +00001124 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
drhd9b02572001-04-15 00:37:09 +00001125 if( pPager==0 ){
danielk19774adee202004-05-08 08:23:19 +00001126 sqlite3OsClose(&fd);
drh3e7a6092002-12-07 21:45:14 +00001127 sqliteFree(zFullPathname);
drhd9b02572001-04-15 00:37:09 +00001128 return SQLITE_NOMEM;
1129 }
drhdb48ee02003-01-16 13:42:43 +00001130 SET_PAGER(pPager);
drhed7c8552001-04-11 14:29:21 +00001131 pPager->zFilename = (char*)&pPager[1];
drha76c82e2003-07-27 18:59:42 +00001132 pPager->zDirectory = &pPager->zFilename[nameLen+1];
1133 pPager->zJournal = &pPager->zDirectory[nameLen+1];
drh3e7a6092002-12-07 21:45:14 +00001134 strcpy(pPager->zFilename, zFullPathname);
drha76c82e2003-07-27 18:59:42 +00001135 strcpy(pPager->zDirectory, zFullPathname);
1136 for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
1137 if( i>0 ) pPager->zDirectory[i-1] = 0;
drh3e7a6092002-12-07 21:45:14 +00001138 strcpy(pPager->zJournal, zFullPathname);
1139 sqliteFree(zFullPathname);
drhed7c8552001-04-11 14:29:21 +00001140 strcpy(&pPager->zJournal[nameLen], "-journal");
1141 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +00001142 pPager->journalOpen = 0;
drhac69b052004-05-12 13:30:07 +00001143 pPager->useJournal = useJournal && !memDb;
1144 pPager->stmtOpen = 0;
1145 pPager->stmtInUse = 0;
drhed7c8552001-04-11 14:29:21 +00001146 pPager->nRef = 0;
drhac69b052004-05-12 13:30:07 +00001147 pPager->dbSize = memDb-1;
1148 pPager->pageSize = SQLITE_PAGE_SIZE;
1149 pPager->stmtSize = 0;
1150 pPager->stmtJSize = 0;
drhed7c8552001-04-11 14:29:21 +00001151 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +00001152 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +00001153 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +00001154 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +00001155 pPager->tempFile = tempFile;
drhac69b052004-05-12 13:30:07 +00001156 pPager->memDb = memDb;
drh5e00f6c2001-09-13 13:46:56 +00001157 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +00001158 pPager->needSync = 0;
drhda47d772002-12-02 04:25:19 +00001159 pPager->noSync = pPager->tempFile || !useJournal;
drhed7c8552001-04-11 14:29:21 +00001160 pPager->pFirst = 0;
drh341eae82003-01-21 02:39:36 +00001161 pPager->pFirstSynced = 0;
drhed7c8552001-04-11 14:29:21 +00001162 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +00001163 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +00001164 memset(pPager->aHash, 0, sizeof(pPager->aHash));
1165 *ppPager = pPager;
1166 return SQLITE_OK;
1167}
1168
1169/*
drh72f82862001-05-24 21:06:34 +00001170** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +00001171** when the reference count on each page reaches zero. The destructor can
1172** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +00001173**
drh3aac2dd2004-04-26 14:10:20 +00001174** The destructor is not called as a result sqlite3pager_close().
1175** Destructors are only called by sqlite3pager_unref().
drh72f82862001-05-24 21:06:34 +00001176*/
drhb6f41482004-05-14 01:58:11 +00001177void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){
drh72f82862001-05-24 21:06:34 +00001178 pPager->xDestructor = xDesc;
1179}
1180
1181/*
drh5e00f6c2001-09-13 13:46:56 +00001182** Return the total number of pages in the disk file associated with
1183** pPager.
drhed7c8552001-04-11 14:29:21 +00001184*/
drh3aac2dd2004-04-26 14:10:20 +00001185int sqlite3pager_pagecount(Pager *pPager){
drh28be87c2002-11-05 23:03:02 +00001186 off_t n;
drhd9b02572001-04-15 00:37:09 +00001187 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +00001188 if( pPager->dbSize>=0 ){
1189 return pPager->dbSize;
1190 }
danielk19774adee202004-05-08 08:23:19 +00001191 if( sqlite3OsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +00001192 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +00001193 return 0;
drhed7c8552001-04-11 14:29:21 +00001194 }
drhd0ba1932004-02-10 01:54:28 +00001195 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +00001196 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +00001197 pPager->dbSize = n;
1198 }
1199 return n;
1200}
1201
1202/*
drhf7c57532003-04-25 13:22:51 +00001203** Forward declaration
1204*/
danielk197713adf8a2004-06-03 16:08:41 +00001205static int syncJournal(Pager*, const char*);
drhf7c57532003-04-25 13:22:51 +00001206
drhac69b052004-05-12 13:30:07 +00001207
1208/*
1209** Unlink a page from the free list (the list of all pages where nRef==0)
1210** and from its hash collision chain.
1211*/
1212static void unlinkPage(PgHdr *pPg){
1213 Pager *pPager = pPg->pPager;
1214
1215 /* Keep the pFirstSynced pointer pointing at the first synchronized page */
1216 if( pPg==pPager->pFirstSynced ){
1217 PgHdr *p = pPg->pNextFree;
1218 while( p && p->needSync ){ p = p->pNextFree; }
1219 pPager->pFirstSynced = p;
1220 }
1221
1222 /* Unlink from the freelist */
1223 if( pPg->pPrevFree ){
1224 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1225 }else{
1226 assert( pPager->pFirst==pPg );
1227 pPager->pFirst = pPg->pNextFree;
1228 }
1229 if( pPg->pNextFree ){
1230 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1231 }else{
1232 assert( pPager->pLast==pPg );
1233 pPager->pLast = pPg->pPrevFree;
1234 }
1235 pPg->pNextFree = pPg->pPrevFree = 0;
1236
1237 /* Unlink from the pgno hash table */
1238 if( pPg->pNextHash ){
1239 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1240 }
1241 if( pPg->pPrevHash ){
1242 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1243 }else{
1244 int h = pager_hash(pPg->pgno);
1245 assert( pPager->aHash[h]==pPg );
1246 pPager->aHash[h] = pPg->pNextHash;
1247 }
1248 pPg->pNextHash = pPg->pPrevHash = 0;
1249}
1250
1251/*
1252** This routine is used to truncate an in-memory database. Delete
1253** every pages whose pgno is larger than pPager->dbSize and is unreferenced.
1254** Referenced pages larger than pPager->dbSize are zeroed.
1255*/
1256static void memoryTruncate(Pager *pPager){
1257 PgHdr *pPg;
1258 PgHdr **ppPg;
1259 int dbSize = pPager->dbSize;
1260
1261 ppPg = &pPager->pAll;
1262 while( (pPg = *ppPg)!=0 ){
1263 if( pPg->pgno<=dbSize ){
1264 ppPg = &pPg->pNextAll;
1265 }else if( pPg->nRef>0 ){
1266 memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
1267 ppPg = &pPg->pNextAll;
1268 }else{
1269 *ppPg = pPg->pNextAll;
1270 unlinkPage(pPg);
1271 sqliteFree(pPg);
1272 pPager->nPage--;
1273 }
1274 }
1275}
1276
drhf7c57532003-04-25 13:22:51 +00001277/*
1278** Truncate the file to the number of pages specified.
1279*/
drh3aac2dd2004-04-26 14:10:20 +00001280int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
drhf7c57532003-04-25 13:22:51 +00001281 int rc;
drh2e6d11b2003-04-25 15:37:57 +00001282 if( pPager->dbSize<0 ){
drh3aac2dd2004-04-26 14:10:20 +00001283 sqlite3pager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001284 }
1285 if( pPager->errMask!=0 ){
1286 rc = pager_errcode(pPager);
1287 return rc;
1288 }
drh7d02cb72003-06-04 16:24:39 +00001289 if( nPage>=(unsigned)pPager->dbSize ){
drhf7c57532003-04-25 13:22:51 +00001290 return SQLITE_OK;
1291 }
drhac69b052004-05-12 13:30:07 +00001292 if( pPager->memDb ){
1293 pPager->dbSize = nPage;
1294 memoryTruncate(pPager);
1295 return SQLITE_OK;
1296 }
danielk197713adf8a2004-06-03 16:08:41 +00001297 syncJournal(pPager, 0);
danielk19774adee202004-05-08 08:23:19 +00001298 rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
drhf7c57532003-04-25 13:22:51 +00001299 if( rc==SQLITE_OK ){
1300 pPager->dbSize = nPage;
1301 }
1302 return rc;
1303}
1304
1305/*
drhed7c8552001-04-11 14:29:21 +00001306** Shutdown the page cache. Free all memory and close all files.
1307**
1308** If a transaction was in progress when this routine is called, that
1309** transaction is rolled back. All outstanding pages are invalidated
1310** and their memory is freed. Any attempt to use a page associated
1311** with this page cache after this function returns will likely
1312** result in a coredump.
1313*/
drh3aac2dd2004-04-26 14:10:20 +00001314int sqlite3pager_close(Pager *pPager){
drhd9b02572001-04-15 00:37:09 +00001315 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +00001316 switch( pPager->state ){
1317 case SQLITE_WRITELOCK: {
drh3aac2dd2004-04-26 14:10:20 +00001318 sqlite3pager_rollback(pPager);
drhac69b052004-05-12 13:30:07 +00001319 if( !pPager->memDb ){
1320 sqlite3OsUnlock(&pPager->fd);
1321 }
drh8cfbf082001-09-19 13:22:39 +00001322 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +00001323 break;
1324 }
1325 case SQLITE_READLOCK: {
drhac69b052004-05-12 13:30:07 +00001326 if( !pPager->memDb ){
1327 sqlite3OsUnlock(&pPager->fd);
1328 }
drhed7c8552001-04-11 14:29:21 +00001329 break;
1330 }
1331 default: {
1332 /* Do nothing */
1333 break;
1334 }
1335 }
drhd9b02572001-04-15 00:37:09 +00001336 for(pPg=pPager->pAll; pPg; pPg=pNext){
1337 pNext = pPg->pNextAll;
1338 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +00001339 }
drhac69b052004-05-12 13:30:07 +00001340 if( !pPager->memDb ){
1341 sqlite3OsClose(&pPager->fd);
1342 }
drh8cfbf082001-09-19 13:22:39 +00001343 assert( pPager->journalOpen==0 );
drh0f892532002-05-30 12:27:03 +00001344 /* Temp files are automatically deleted by the OS
1345 ** if( pPager->tempFile ){
danielk19774adee202004-05-08 08:23:19 +00001346 ** sqlite3OsDelete(pPager->zFilename);
drh0f892532002-05-30 12:27:03 +00001347 ** }
1348 */
drhdb48ee02003-01-16 13:42:43 +00001349 CLR_PAGER(pPager);
drh73509ee2003-04-06 20:44:45 +00001350 if( pPager->zFilename!=(char*)&pPager[1] ){
drha76c82e2003-07-27 18:59:42 +00001351 assert( 0 ); /* Cannot happen */
drh73509ee2003-04-06 20:44:45 +00001352 sqliteFree(pPager->zFilename);
1353 sqliteFree(pPager->zJournal);
drha76c82e2003-07-27 18:59:42 +00001354 sqliteFree(pPager->zDirectory);
drh73509ee2003-04-06 20:44:45 +00001355 }
drhed7c8552001-04-11 14:29:21 +00001356 sqliteFree(pPager);
1357 return SQLITE_OK;
1358}
1359
1360/*
drh5e00f6c2001-09-13 13:46:56 +00001361** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +00001362*/
drh3aac2dd2004-04-26 14:10:20 +00001363Pgno sqlite3pager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +00001364 PgHdr *p = DATA_TO_PGHDR(pData);
1365 return p->pgno;
1366}
1367
1368/*
drhc8629a12004-05-08 20:07:40 +00001369** The page_ref() function increments the reference count for a page.
1370** If the page is currently on the freelist (the reference count is zero) then
drh7e3b0a02001-04-28 16:52:40 +00001371** remove it from the freelist.
drhc8629a12004-05-08 20:07:40 +00001372**
1373** For non-test systems, page_ref() is a macro that calls _page_ref()
1374** online of the reference count is zero. For test systems, page_ref()
1375** is a real function so that we can set breakpoints and trace it.
drh7e3b0a02001-04-28 16:52:40 +00001376*/
drh836faa42003-01-11 13:30:57 +00001377static void _page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +00001378 if( pPg->nRef==0 ){
1379 /* The page is currently on the freelist. Remove it. */
drh341eae82003-01-21 02:39:36 +00001380 if( pPg==pPg->pPager->pFirstSynced ){
1381 PgHdr *p = pPg->pNextFree;
1382 while( p && p->needSync ){ p = p->pNextFree; }
1383 pPg->pPager->pFirstSynced = p;
1384 }
drh7e3b0a02001-04-28 16:52:40 +00001385 if( pPg->pPrevFree ){
1386 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1387 }else{
1388 pPg->pPager->pFirst = pPg->pNextFree;
1389 }
1390 if( pPg->pNextFree ){
1391 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1392 }else{
1393 pPg->pPager->pLast = pPg->pPrevFree;
1394 }
1395 pPg->pPager->nRef++;
1396 }
1397 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +00001398 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +00001399}
drhc8629a12004-05-08 20:07:40 +00001400#ifdef SQLITE_TEST
1401 static void page_ref(PgHdr *pPg){
1402 if( pPg->nRef==0 ){
1403 _page_ref(pPg);
1404 }else{
1405 pPg->nRef++;
1406 REFINFO(pPg);
1407 }
1408 }
1409#else
1410# define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1411#endif
drhdf0b3b02001-06-23 11:36:20 +00001412
1413/*
1414** Increment the reference count for a page. The input pointer is
1415** a reference to the page data.
1416*/
drh3aac2dd2004-04-26 14:10:20 +00001417int sqlite3pager_ref(void *pData){
drhdf0b3b02001-06-23 11:36:20 +00001418 PgHdr *pPg = DATA_TO_PGHDR(pData);
1419 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +00001420 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +00001421}
1422
1423/*
drh34e79ce2004-02-08 06:05:46 +00001424** Sync the journal. In other words, make sure all the pages that have
1425** been written to the journal have actually reached the surface of the
1426** disk. It is not safe to modify the original database file until after
1427** the journal has been synced. If the original database is modified before
1428** the journal is synced and a power failure occurs, the unsynced journal
1429** data would be lost and we would be unable to completely rollback the
1430** database changes. Database corruption would occur.
1431**
1432** This routine also updates the nRec field in the header of the journal.
1433** (See comments on the pager_playback() routine for additional information.)
1434** If the sync mode is FULL, two syncs will occur. First the whole journal
1435** is synced, then the nRec field is updated, then a second sync occurs.
drhb19a2bc2001-09-16 00:13:26 +00001436**
drh34e79ce2004-02-08 06:05:46 +00001437** For temporary databases, we do not care if we are able to rollback
1438** after a power failure, so sync occurs.
drhfa86c412002-02-02 15:01:15 +00001439**
drh34e79ce2004-02-08 06:05:46 +00001440** This routine clears the needSync field of every page current held in
1441** memory.
drh50e5dad2001-09-15 00:57:28 +00001442*/
danielk197713adf8a2004-06-03 16:08:41 +00001443static int syncJournal(Pager *pPager, const char *zMaster){
drh50e5dad2001-09-15 00:57:28 +00001444 PgHdr *pPg;
1445 int rc = SQLITE_OK;
drh03eb96a2002-11-10 23:32:56 +00001446
1447 /* Sync the journal before modifying the main database
1448 ** (assuming there is a journal and it needs to be synced.)
1449 */
danielk197713adf8a2004-06-03 16:08:41 +00001450 if( pPager->needSync || zMaster ){
drhfa86c412002-02-02 15:01:15 +00001451 if( !pPager->tempFile ){
drhdb48ee02003-01-16 13:42:43 +00001452 assert( pPager->journalOpen );
drh946966f2004-02-25 02:20:41 +00001453 /* assert( !pPager->noSync ); // noSync might be set if synchronous
1454 ** was turned off after the transaction was started. Ticket #615 */
drh968af522003-02-11 14:55:40 +00001455#ifndef NDEBUG
1456 {
drh34e79ce2004-02-08 06:05:46 +00001457 /* Make sure the pPager->nRec counter we are keeping agrees
1458 ** with the nRec computed from the size of the journal file.
1459 */
drh4a0681e2003-02-13 01:58:20 +00001460 off_t hdrSz, pgSz, jSz;
danielk197713adf8a2004-06-03 16:08:41 +00001461 hdrSz = JOURNAL_HDR_SZ(pPager, journal_format);
drh968af522003-02-11 14:55:40 +00001462 pgSz = JOURNAL_PG_SZ(journal_format);
danielk19774adee202004-05-08 08:23:19 +00001463 rc = sqlite3OsFileSize(&pPager->jfd, &jSz);
drh968af522003-02-11 14:55:40 +00001464 if( rc!=0 ) return rc;
drh4a0681e2003-02-13 01:58:20 +00001465 assert( pPager->nRec*pgSz+hdrSz==jSz );
drh968af522003-02-11 14:55:40 +00001466 }
1467#endif
drhd8d66e82003-02-12 02:10:15 +00001468 if( journal_format>=3 ){
drh34e79ce2004-02-08 06:05:46 +00001469 /* Write the nRec value into the journal file header */
drhd8d66e82003-02-12 02:10:15 +00001470 off_t szJ;
1471 if( pPager->fullSync ){
1472 TRACE1("SYNC\n");
danielk19774adee202004-05-08 08:23:19 +00001473 rc = sqlite3OsSync(&pPager->jfd);
drhd8d66e82003-02-12 02:10:15 +00001474 if( rc!=0 ) return rc;
1475 }
danielk19774adee202004-05-08 08:23:19 +00001476 sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic1));
drh99ee3602003-02-16 19:13:36 +00001477 rc = write32bits(&pPager->jfd, pPager->nRec);
1478 if( rc ) return rc;
danielk197713adf8a2004-06-03 16:08:41 +00001479
1480 /* Write the name of the master journal file if one is specified */
1481 if( zMaster ){
1482 assert( strlen(zMaster)<pPager->nMaster );
1483 rc = sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic3) + 3*4);
1484 if( rc ) return rc;
1485 rc = sqlite3OsWrite(&pPager->jfd, zMaster, strlen(zMaster)+1);
1486 if( rc ) return rc;
1487 }
1488
1489 szJ = JOURNAL_HDR_SZ(pPager, journal_format) +
drhd8d66e82003-02-12 02:10:15 +00001490 pPager->nRec*JOURNAL_PG_SZ(journal_format);
danielk19774adee202004-05-08 08:23:19 +00001491 sqlite3OsSeek(&pPager->jfd, szJ);
drh968af522003-02-11 14:55:40 +00001492 }
drhdb48ee02003-01-16 13:42:43 +00001493 TRACE1("SYNC\n");
danielk19774adee202004-05-08 08:23:19 +00001494 rc = sqlite3OsSync(&pPager->jfd);
drhfa86c412002-02-02 15:01:15 +00001495 if( rc!=0 ) return rc;
drhdb48ee02003-01-16 13:42:43 +00001496 pPager->journalStarted = 1;
drhfa86c412002-02-02 15:01:15 +00001497 }
drh50e5dad2001-09-15 00:57:28 +00001498 pPager->needSync = 0;
drh341eae82003-01-21 02:39:36 +00001499
1500 /* Erase the needSync flag from every page.
1501 */
1502 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1503 pPg->needSync = 0;
1504 }
1505 pPager->pFirstSynced = pPager->pFirst;
drh50e5dad2001-09-15 00:57:28 +00001506 }
drh03eb96a2002-11-10 23:32:56 +00001507
drh341eae82003-01-21 02:39:36 +00001508#ifndef NDEBUG
1509 /* If the Pager.needSync flag is clear then the PgHdr.needSync
1510 ** flag must also be clear for all pages. Verify that this
1511 ** invariant is true.
drh03eb96a2002-11-10 23:32:56 +00001512 */
drh341eae82003-01-21 02:39:36 +00001513 else{
1514 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1515 assert( pPg->needSync==0 );
1516 }
1517 assert( pPager->pFirstSynced==pPager->pFirst );
drh03eb96a2002-11-10 23:32:56 +00001518 }
drh341eae82003-01-21 02:39:36 +00001519#endif
drhdb48ee02003-01-16 13:42:43 +00001520
drh81a20f22001-10-12 17:30:04 +00001521 return rc;
drh50e5dad2001-09-15 00:57:28 +00001522}
1523
1524/*
drh2554f8b2003-01-22 01:26:44 +00001525** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1526** every one of those pages out to the database file and mark them all
1527** as clean.
1528*/
1529static int pager_write_pagelist(PgHdr *pList){
1530 Pager *pPager;
1531 int rc;
1532
1533 if( pList==0 ) return SQLITE_OK;
1534 pPager = pList->pPager;
1535 while( pList ){
1536 assert( pList->dirty );
danielk19774adee202004-05-08 08:23:19 +00001537 sqlite3OsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +00001538 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
1539 TRACE2("STORE %d\n", pList->pgno);
danielk19774adee202004-05-08 08:23:19 +00001540 rc = sqlite3OsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +00001541 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
drh2554f8b2003-01-22 01:26:44 +00001542 if( rc ) return rc;
1543 pList->dirty = 0;
1544 pList = pList->pDirty;
1545 }
1546 return SQLITE_OK;
1547}
1548
1549/*
1550** Collect every dirty page into a dirty list and
1551** return a pointer to the head of that list. All pages are
1552** collected even if they are still in use.
1553*/
1554static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1555 PgHdr *p, *pList;
1556 pList = 0;
1557 for(p=pPager->pAll; p; p=p->pNextAll){
1558 if( p->dirty ){
1559 p->pDirty = pList;
1560 pList = p;
1561 }
1562 }
1563 return pList;
1564}
1565
1566/*
drhd9b02572001-04-15 00:37:09 +00001567** Acquire a page.
1568**
drh58a11682001-11-10 13:51:08 +00001569** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +00001570** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +00001571**
drh306dc212001-05-21 13:45:10 +00001572** A _get works for any page number greater than 0. If the database
1573** file is smaller than the requested page, then no actual disk
1574** read occurs and the memory image of the page is initialized to
1575** all zeros. The extra data appended to a page is always initialized
1576** to zeros the first time a page is loaded into memory.
1577**
drhd9b02572001-04-15 00:37:09 +00001578** The acquisition might fail for several reasons. In all cases,
1579** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +00001580**
drh3aac2dd2004-04-26 14:10:20 +00001581** See also sqlite3pager_lookup(). Both this routine and _lookup() attempt
drh7e3b0a02001-04-28 16:52:40 +00001582** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +00001583** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +00001584** just returns 0. This routine acquires a read-lock the first time it
1585** has to go to disk, and could also playback an old journal if necessary.
1586** Since _lookup() never goes to disk, it never has to deal with locks
1587** or journal files.
drhed7c8552001-04-11 14:29:21 +00001588*/
drh3aac2dd2004-04-26 14:10:20 +00001589int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +00001590 PgHdr *pPg;
drh8766c342002-11-09 00:33:15 +00001591 int rc;
drhed7c8552001-04-11 14:29:21 +00001592
drhd9b02572001-04-15 00:37:09 +00001593 /* Make sure we have not hit any critical errors.
1594 */
drh836faa42003-01-11 13:30:57 +00001595 assert( pPager!=0 );
1596 assert( pgno!=0 );
drh2e6d11b2003-04-25 15:37:57 +00001597 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +00001598 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1599 return pager_errcode(pPager);
1600 }
1601
danielk197713adf8a2004-06-03 16:08:41 +00001602 /* If this is the first page accessed, then get a SHARED lock
drhed7c8552001-04-11 14:29:21 +00001603 ** on the database file.
1604 */
drhac69b052004-05-12 13:30:07 +00001605 if( pPager->nRef==0 && !pPager->memDb ){
danielk19774adee202004-05-08 08:23:19 +00001606 rc = sqlite3OsReadLock(&pPager->fd);
drh8766c342002-11-09 00:33:15 +00001607 if( rc!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001608 return rc;
drhed7c8552001-04-11 14:29:21 +00001609 }
drhd9b02572001-04-15 00:37:09 +00001610 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +00001611
danielk197713adf8a2004-06-03 16:08:41 +00001612 /* If a journal file exists, and there is no RESERVED lock on the
1613 ** database file, then it either needs to be played back or deleted.
drhed7c8552001-04-11 14:29:21 +00001614 */
danielk197713adf8a2004-06-03 16:08:41 +00001615 if( pPager->useJournal &&
1616 sqlite3OsFileExists(pPager->zJournal) &&
1617 !sqlite3OsCheckWriteLock(&pPager->fd)
1618 ){
drhe2227f02003-06-14 11:42:57 +00001619 int rc;
drhed7c8552001-04-11 14:29:21 +00001620
danielk197713adf8a2004-06-03 16:08:41 +00001621 /* Get an EXCLUSIVE lock on the database file. */
1622 rc = sqlite3OsLock(&pPager->fd, EXCLUSIVE_LOCK);
drha7fcb052001-12-14 15:09:55 +00001623 if( rc!=SQLITE_OK ){
danielk19774adee202004-05-08 08:23:19 +00001624 if( sqlite3OsUnlock(&pPager->fd)!=SQLITE_OK ){
drh8766c342002-11-09 00:33:15 +00001625 /* This should never happen! */
1626 rc = SQLITE_INTERNAL;
1627 }
drh8766c342002-11-09 00:33:15 +00001628 return rc;
drha7fcb052001-12-14 15:09:55 +00001629 }
1630 pPager->state = SQLITE_WRITELOCK;
1631
drhe2227f02003-06-14 11:42:57 +00001632 /* Open the journal for reading only. Return SQLITE_BUSY if
1633 ** we are unable to open the journal file.
drhf57b3392001-10-08 13:22:32 +00001634 **
drhe2227f02003-06-14 11:42:57 +00001635 ** The journal file does not need to be locked itself. The
1636 ** journal file is never open unless the main database file holds
1637 ** a write lock, so there is never any chance of two or more
1638 ** processes opening the journal at the same time.
drhed7c8552001-04-11 14:29:21 +00001639 */
danielk19774adee202004-05-08 08:23:19 +00001640 rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd);
drha7fcb052001-12-14 15:09:55 +00001641 if( rc!=SQLITE_OK ){
danielk19774adee202004-05-08 08:23:19 +00001642 rc = sqlite3OsUnlock(&pPager->fd);
drha7fcb052001-12-14 15:09:55 +00001643 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +00001644 return SQLITE_BUSY;
1645 }
drha7fcb052001-12-14 15:09:55 +00001646 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001647 pPager->journalStarted = 0;
drhed7c8552001-04-11 14:29:21 +00001648
1649 /* Playback and delete the journal. Drop the database write
1650 ** lock and reacquire the read lock.
1651 */
drh99ee3602003-02-16 19:13:36 +00001652 rc = pager_playback(pPager, 0);
drhd9b02572001-04-15 00:37:09 +00001653 if( rc!=SQLITE_OK ){
1654 return rc;
1655 }
drhed7c8552001-04-11 14:29:21 +00001656 }
1657 pPg = 0;
1658 }else{
1659 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +00001660 pPg = pager_lookup(pPager, pgno);
drhac69b052004-05-12 13:30:07 +00001661 if( pPager->memDb && pPager->state==SQLITE_UNLOCK ){
1662 pPager->state = SQLITE_READLOCK;
1663 }
drhed7c8552001-04-11 14:29:21 +00001664 }
1665 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001666 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +00001667 int h;
drh7e3b0a02001-04-28 16:52:40 +00001668 pPager->nMiss++;
drhac69b052004-05-12 13:30:07 +00001669 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || pPager->memDb ){
drhed7c8552001-04-11 14:29:21 +00001670 /* Create a new page */
drhd0ba1932004-02-10 01:54:28 +00001671 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
drhac69b052004-05-12 13:30:07 +00001672 + sizeof(u32) + pPager->nExtra
1673 + pPager->memDb*sizeof(PgHistory) );
drhd9b02572001-04-15 00:37:09 +00001674 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +00001675 pager_unwritelock(pPager);
1676 pPager->errMask |= PAGER_ERR_MEM;
1677 return SQLITE_NOMEM;
1678 }
drh8c1238a2003-01-02 14:43:55 +00001679 memset(pPg, 0, sizeof(*pPg));
drhac69b052004-05-12 13:30:07 +00001680 if( pPager->memDb ){
1681 memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory));
1682 }
drhed7c8552001-04-11 14:29:21 +00001683 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +00001684 pPg->pNextAll = pPager->pAll;
drhd79caeb2001-04-15 02:27:24 +00001685 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +00001686 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +00001687 }else{
drhdb48ee02003-01-16 13:42:43 +00001688 /* Find a page to recycle. Try to locate a page that does not
1689 ** require us to do an fsync() on the journal.
1690 */
drh341eae82003-01-21 02:39:36 +00001691 pPg = pPager->pFirstSynced;
drhb19a2bc2001-09-16 00:13:26 +00001692
drhdb48ee02003-01-16 13:42:43 +00001693 /* If we could not find a page that does not require an fsync()
1694 ** on the journal file then fsync the journal file. This is a
1695 ** very slow operation, so we work hard to avoid it. But sometimes
1696 ** it can't be helped.
drhb19a2bc2001-09-16 00:13:26 +00001697 */
drh603240c2002-03-05 01:11:12 +00001698 if( pPg==0 ){
danielk197713adf8a2004-06-03 16:08:41 +00001699 int rc = syncJournal(pPager, 0);
drh50e5dad2001-09-15 00:57:28 +00001700 if( rc!=0 ){
drh3aac2dd2004-04-26 14:10:20 +00001701 sqlite3pager_rollback(pPager);
drh50e5dad2001-09-15 00:57:28 +00001702 return SQLITE_IOERR;
1703 }
1704 pPg = pPager->pFirst;
1705 }
drhd9b02572001-04-15 00:37:09 +00001706 assert( pPg->nRef==0 );
drhdb48ee02003-01-16 13:42:43 +00001707
1708 /* Write the page to the database file if it is dirty.
1709 */
1710 if( pPg->dirty ){
1711 assert( pPg->needSync==0 );
drh2554f8b2003-01-22 01:26:44 +00001712 pPg->pDirty = 0;
1713 rc = pager_write_pagelist( pPg );
drhdb48ee02003-01-16 13:42:43 +00001714 if( rc!=SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00001715 sqlite3pager_rollback(pPager);
drhdb48ee02003-01-16 13:42:43 +00001716 return SQLITE_IOERR;
1717 }
drhdb48ee02003-01-16 13:42:43 +00001718 }
drh50e5dad2001-09-15 00:57:28 +00001719 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +00001720
drhdb48ee02003-01-16 13:42:43 +00001721 /* If the page we are recycling is marked as alwaysRollback, then
drh193a6b42002-07-07 16:52:46 +00001722 ** set the global alwaysRollback flag, thus disabling the
1723 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1724 ** It is necessary to do this because the page marked alwaysRollback
1725 ** might be reloaded at a later time but at that point we won't remember
1726 ** that is was marked alwaysRollback. This means that all pages must
1727 ** be marked as alwaysRollback from here on out.
1728 */
1729 if( pPg->alwaysRollback ){
1730 pPager->alwaysRollback = 1;
1731 }
1732
drhd9b02572001-04-15 00:37:09 +00001733 /* Unlink the old page from the free list and the hash table
1734 */
drhac69b052004-05-12 13:30:07 +00001735 unlinkPage(pPg);
drhd9b02572001-04-15 00:37:09 +00001736 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +00001737 }
1738 pPg->pgno = pgno;
drh1ab43002002-01-14 09:28:19 +00001739 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
danielk19774adee202004-05-08 08:23:19 +00001740 sqlite3CheckMemory(pPager->aInJournal, pgno/8);
drhdb48ee02003-01-16 13:42:43 +00001741 assert( pPager->journalOpen );
drh6019e162001-07-02 17:51:45 +00001742 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
drhdb48ee02003-01-16 13:42:43 +00001743 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001744 }else{
1745 pPg->inJournal = 0;
drhdb48ee02003-01-16 13:42:43 +00001746 pPg->needSync = 0;
drh6019e162001-07-02 17:51:45 +00001747 }
drhac69b052004-05-12 13:30:07 +00001748 if( pPager->aInStmt && (int)pgno<=pPager->stmtSize
1749 && (pPager->aInStmt[pgno/8] & (1<<(pgno&7)))!=0 ){
drh3aac2dd2004-04-26 14:10:20 +00001750 page_add_to_stmt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001751 }else{
drh3aac2dd2004-04-26 14:10:20 +00001752 page_remove_from_stmt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00001753 }
drhed7c8552001-04-11 14:29:21 +00001754 pPg->dirty = 0;
1755 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +00001756 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001757 pPager->nRef++;
1758 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +00001759 pPg->pNextHash = pPager->aHash[h];
1760 pPager->aHash[h] = pPg;
1761 if( pPg->pNextHash ){
1762 assert( pPg->pNextHash->pPrevHash==0 );
1763 pPg->pNextHash->pPrevHash = pPg;
1764 }
drh2e6d11b2003-04-25 15:37:57 +00001765 if( pPager->nExtra>0 ){
1766 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1767 }
drh3aac2dd2004-04-26 14:10:20 +00001768 if( pPager->dbSize<0 ) sqlite3pager_pagecount(pPager);
drh2e6d11b2003-04-25 15:37:57 +00001769 if( pPager->errMask!=0 ){
drh3aac2dd2004-04-26 14:10:20 +00001770 sqlite3pager_unref(PGHDR_TO_DATA(pPg));
drh2e6d11b2003-04-25 15:37:57 +00001771 rc = pager_errcode(pPager);
1772 return rc;
1773 }
drh1ab43002002-01-14 09:28:19 +00001774 if( pPager->dbSize<(int)pgno ){
drhd0ba1932004-02-10 01:54:28 +00001775 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
drh306dc212001-05-21 13:45:10 +00001776 }else{
drh81a20f22001-10-12 17:30:04 +00001777 int rc;
drhac69b052004-05-12 13:30:07 +00001778 assert( pPager->memDb==0 );
danielk19774adee202004-05-08 08:23:19 +00001779 sqlite3OsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1780 rc = sqlite3OsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh9eb9e262004-02-11 02:18:05 +00001781 TRACE2("FETCH %d\n", pPg->pgno);
1782 CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
drh81a20f22001-10-12 17:30:04 +00001783 if( rc!=SQLITE_OK ){
drh28be87c2002-11-05 23:03:02 +00001784 off_t fileSize;
danielk19774adee202004-05-08 08:23:19 +00001785 if( sqlite3OsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
drhd0ba1932004-02-10 01:54:28 +00001786 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
drh3aac2dd2004-04-26 14:10:20 +00001787 sqlite3pager_unref(PGHDR_TO_DATA(pPg));
drh4e371ee2002-09-05 16:08:27 +00001788 return rc;
1789 }else{
drhd0ba1932004-02-10 01:54:28 +00001790 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
drh4e371ee2002-09-05 16:08:27 +00001791 }
drh81a20f22001-10-12 17:30:04 +00001792 }
drh306dc212001-05-21 13:45:10 +00001793 }
drhed7c8552001-04-11 14:29:21 +00001794 }else{
drhd9b02572001-04-15 00:37:09 +00001795 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +00001796 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +00001797 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +00001798 }
1799 *ppPage = PGHDR_TO_DATA(pPg);
1800 return SQLITE_OK;
1801}
1802
1803/*
drh7e3b0a02001-04-28 16:52:40 +00001804** Acquire a page if it is already in the in-memory cache. Do
1805** not read the page from disk. Return a pointer to the page,
1806** or 0 if the page is not in cache.
1807**
drh3aac2dd2004-04-26 14:10:20 +00001808** See also sqlite3pager_get(). The difference between this routine
1809** and sqlite3pager_get() is that _get() will go to the disk and read
drh7e3b0a02001-04-28 16:52:40 +00001810** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +00001811** returns NULL if the page is not in cache or if a disk I/O error
1812** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +00001813*/
drh3aac2dd2004-04-26 14:10:20 +00001814void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){
drh7e3b0a02001-04-28 16:52:40 +00001815 PgHdr *pPg;
1816
drh836faa42003-01-11 13:30:57 +00001817 assert( pPager!=0 );
1818 assert( pgno!=0 );
drh7e3b0a02001-04-28 16:52:40 +00001819 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1820 return 0;
1821 }
drh7e3b0a02001-04-28 16:52:40 +00001822 pPg = pager_lookup(pPager, pgno);
1823 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +00001824 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +00001825 return PGHDR_TO_DATA(pPg);
1826}
1827
1828/*
drhed7c8552001-04-11 14:29:21 +00001829** Release a page.
1830**
1831** If the number of references to the page drop to zero, then the
1832** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +00001833** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +00001834** removed.
1835*/
drh3aac2dd2004-04-26 14:10:20 +00001836int sqlite3pager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +00001837 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001838
1839 /* Decrement the reference count for this page
1840 */
drhed7c8552001-04-11 14:29:21 +00001841 pPg = DATA_TO_PGHDR(pData);
1842 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +00001843 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +00001844 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +00001845
drh72f82862001-05-24 21:06:34 +00001846 /* When the number of references to a page reach 0, call the
1847 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +00001848 */
drhed7c8552001-04-11 14:29:21 +00001849 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +00001850 Pager *pPager;
1851 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +00001852 pPg->pNextFree = 0;
1853 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +00001854 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +00001855 if( pPg->pPrevFree ){
1856 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +00001857 }else{
1858 pPager->pFirst = pPg;
1859 }
drh341eae82003-01-21 02:39:36 +00001860 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1861 pPager->pFirstSynced = pPg;
1862 }
drh72f82862001-05-24 21:06:34 +00001863 if( pPager->xDestructor ){
drhb6f41482004-05-14 01:58:11 +00001864 pPager->xDestructor(pData, pPager->pageSize);
drh72f82862001-05-24 21:06:34 +00001865 }
drhd9b02572001-04-15 00:37:09 +00001866
1867 /* When all pages reach the freelist, drop the read lock from
1868 ** the database file.
1869 */
1870 pPager->nRef--;
1871 assert( pPager->nRef>=0 );
drhac69b052004-05-12 13:30:07 +00001872 if( pPager->nRef==0 && !pPager->memDb ){
drhd9b02572001-04-15 00:37:09 +00001873 pager_reset(pPager);
1874 }
drhed7c8552001-04-11 14:29:21 +00001875 }
drhd9b02572001-04-15 00:37:09 +00001876 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001877}
1878
1879/*
drhda47d772002-12-02 04:25:19 +00001880** Create a journal file for pPager. There should already be a write
1881** lock on the database file when this routine is called.
1882**
1883** Return SQLITE_OK if everything. Return an error code and release the
1884** write lock if anything goes wrong.
1885*/
1886static int pager_open_journal(Pager *pPager){
1887 int rc;
1888 assert( pPager->state==SQLITE_WRITELOCK );
1889 assert( pPager->journalOpen==0 );
1890 assert( pPager->useJournal );
drh3aac2dd2004-04-26 14:10:20 +00001891 sqlite3pager_pagecount(pPager);
drhda47d772002-12-02 04:25:19 +00001892 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1893 if( pPager->aInJournal==0 ){
danielk19774adee202004-05-08 08:23:19 +00001894 sqlite3OsReadLock(&pPager->fd);
drhda47d772002-12-02 04:25:19 +00001895 pPager->state = SQLITE_READLOCK;
1896 return SQLITE_NOMEM;
1897 }
danielk19774adee202004-05-08 08:23:19 +00001898 rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
drhda47d772002-12-02 04:25:19 +00001899 if( rc!=SQLITE_OK ){
1900 sqliteFree(pPager->aInJournal);
1901 pPager->aInJournal = 0;
danielk19774adee202004-05-08 08:23:19 +00001902 sqlite3OsReadLock(&pPager->fd);
drhda47d772002-12-02 04:25:19 +00001903 pPager->state = SQLITE_READLOCK;
1904 return SQLITE_CANTOPEN;
1905 }
danielk19774adee202004-05-08 08:23:19 +00001906 sqlite3OsOpenDirectory(pPager->zDirectory, &pPager->jfd);
drhda47d772002-12-02 04:25:19 +00001907 pPager->journalOpen = 1;
drhdb48ee02003-01-16 13:42:43 +00001908 pPager->journalStarted = 0;
drhda47d772002-12-02 04:25:19 +00001909 pPager->needSync = 0;
1910 pPager->alwaysRollback = 0;
drh968af522003-02-11 14:55:40 +00001911 pPager->nRec = 0;
drh2e6d11b2003-04-25 15:37:57 +00001912 if( pPager->errMask!=0 ){
1913 rc = pager_errcode(pPager);
1914 return rc;
1915 }
drhda47d772002-12-02 04:25:19 +00001916 pPager->origDbSize = pPager->dbSize;
drh968af522003-02-11 14:55:40 +00001917 if( journal_format==JOURNAL_FORMAT_3 ){
danielk197713adf8a2004-06-03 16:08:41 +00001918 /* Create the header for a format 3 journal:
1919 ** - 8 bytes: Magic identifying journal format 3.
1920 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
1921 ** - 4 bytes: Magic used for page checksums.
1922 ** - 4 bytes: Number of bytes reserved for master journal ptr (nMaster)
1923 ** - nMaster bytes: Space for a master journal pointer.
1924 ** - 4 bytes: Initial database page count.
1925 */
danielk19774adee202004-05-08 08:23:19 +00001926 rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
drh968af522003-02-11 14:55:40 +00001927 if( rc==SQLITE_OK ){
drh4303fee2003-02-15 23:09:17 +00001928 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
drh968af522003-02-11 14:55:40 +00001929 }
1930 if( rc==SQLITE_OK ){
danielk19774adee202004-05-08 08:23:19 +00001931 sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
drh968af522003-02-11 14:55:40 +00001932 rc = write32bits(&pPager->jfd, pPager->cksumInit);
1933 }
danielk197713adf8a2004-06-03 16:08:41 +00001934 if( rc==SQLITE_OK ){
1935 rc = write32bits(&pPager->jfd, pPager->nMaster);
1936 }
1937
1938 /* Unless the size reserved for the master-journal pointer is 0, set
1939 ** the first byte of the master journal pointer to 0x00. Either way,
1940 ** this is interpreted as 'no master journal' in the event of a
1941 ** rollback after a crash.
1942 */
1943 if( rc==SQLITE_OK && pPager->nMaster>0 ){
1944 rc = sqlite3OsWrite(&pPager->jfd, "", 1);
1945 }
1946 if( rc==SQLITE_OK ){
1947 rc = sqlite3OsSeek(&pPager->jfd,
1948 sizeof(aJournalMagic3) + 3*4 + pPager->nMaster);
1949 }
drh968af522003-02-11 14:55:40 +00001950 }else if( journal_format==JOURNAL_FORMAT_2 ){
danielk19774adee202004-05-08 08:23:19 +00001951 rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
drhda47d772002-12-02 04:25:19 +00001952 }else{
drh968af522003-02-11 14:55:40 +00001953 assert( journal_format==JOURNAL_FORMAT_1 );
danielk19774adee202004-05-08 08:23:19 +00001954 rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
drhda47d772002-12-02 04:25:19 +00001955 }
1956 if( rc==SQLITE_OK ){
1957 rc = write32bits(&pPager->jfd, pPager->dbSize);
1958 }
drhac69b052004-05-12 13:30:07 +00001959 if( pPager->stmtAutoopen && rc==SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00001960 rc = sqlite3pager_stmt_begin(pPager);
drhda47d772002-12-02 04:25:19 +00001961 }
1962 if( rc!=SQLITE_OK ){
1963 rc = pager_unwritelock(pPager);
1964 if( rc==SQLITE_OK ){
1965 rc = SQLITE_FULL;
1966 }
1967 }
1968 return rc;
1969}
1970
1971/*
drh4b845d72002-03-05 12:41:19 +00001972** Acquire a write-lock on the database. The lock is removed when
1973** the any of the following happen:
1974**
drh3aac2dd2004-04-26 14:10:20 +00001975** * sqlite3pager_commit() is called.
1976** * sqlite3pager_rollback() is called.
1977** * sqlite3pager_close() is called.
1978** * sqlite3pager_unref() is called to on every outstanding page.
drh4b845d72002-03-05 12:41:19 +00001979**
danielk197713adf8a2004-06-03 16:08:41 +00001980** The first parameter to this routine is a pointer to any open page of the
1981** database file. Nothing changes about the page - it is used merely to
1982** acquire a pointer to the Pager structure and as proof that there is
1983** already a read-lock on the database.
drh4b845d72002-03-05 12:41:19 +00001984**
danielk197713adf8a2004-06-03 16:08:41 +00001985** The second parameter indicates how much space in bytes to reserve for a
1986** master journal file-name at the start of the journal when it is created.
1987**
1988** A journal file is opened if this is not a temporary file. For temporary
1989** files, the opening of the journal file is deferred until there is an
1990** actual need to write to the journal.
drhda47d772002-12-02 04:25:19 +00001991**
drh4b845d72002-03-05 12:41:19 +00001992** If the database is already write-locked, this routine is a no-op.
1993*/
danielk197713adf8a2004-06-03 16:08:41 +00001994int sqlite3pager_begin(void *pData, int nMaster){
drh4b845d72002-03-05 12:41:19 +00001995 PgHdr *pPg = DATA_TO_PGHDR(pData);
1996 Pager *pPager = pPg->pPager;
1997 int rc = SQLITE_OK;
1998 assert( pPg->nRef>0 );
danielk197713adf8a2004-06-03 16:08:41 +00001999 assert( nMaster>=0 );
drh4b845d72002-03-05 12:41:19 +00002000 assert( pPager->state!=SQLITE_UNLOCK );
2001 if( pPager->state==SQLITE_READLOCK ){
2002 assert( pPager->aInJournal==0 );
drhac69b052004-05-12 13:30:07 +00002003 if( pPager->memDb ){
2004 pPager->state = SQLITE_WRITELOCK;
2005 pPager->origDbSize = pPager->dbSize;
2006 }else{
2007 rc = sqlite3OsWriteLock(&pPager->fd);
2008 if( rc!=SQLITE_OK ){
2009 return rc;
2010 }
danielk197713adf8a2004-06-03 16:08:41 +00002011 pPager->nMaster = nMaster;
drhac69b052004-05-12 13:30:07 +00002012 pPager->state = SQLITE_WRITELOCK;
2013 pPager->dirtyFile = 0;
2014 TRACE1("TRANSACTION\n");
2015 if( pPager->useJournal && !pPager->tempFile ){
2016 rc = pager_open_journal(pPager);
2017 }
drh4b845d72002-03-05 12:41:19 +00002018 }
2019 }
2020 return rc;
2021}
2022
2023/*
drhed7c8552001-04-11 14:29:21 +00002024** Mark a data page as writeable. The page is written into the journal
2025** if it is not there already. This routine must be called before making
2026** changes to a page.
2027**
2028** The first time this routine is called, the pager creates a new
2029** journal and acquires a write lock on the database. If the write
2030** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00002031** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00002032** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00002033**
2034** If the journal file could not be written because the disk is full,
2035** then this routine returns SQLITE_FULL and does an immediate rollback.
2036** All subsequent write attempts also return SQLITE_FULL until there
drh3aac2dd2004-04-26 14:10:20 +00002037** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
drhd9b02572001-04-15 00:37:09 +00002038** reset.
drhed7c8552001-04-11 14:29:21 +00002039*/
drh3aac2dd2004-04-26 14:10:20 +00002040int sqlite3pager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00002041 PgHdr *pPg = DATA_TO_PGHDR(pData);
2042 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00002043 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00002044
drh6446c4d2001-12-15 14:22:18 +00002045 /* Check for errors
2046 */
drhd9b02572001-04-15 00:37:09 +00002047 if( pPager->errMask ){
2048 return pager_errcode(pPager);
2049 }
drh5e00f6c2001-09-13 13:46:56 +00002050 if( pPager->readOnly ){
2051 return SQLITE_PERM;
2052 }
drh6446c4d2001-12-15 14:22:18 +00002053
2054 /* Mark the page as dirty. If the page has already been written
2055 ** to the journal then we can return right away.
2056 */
drhd9b02572001-04-15 00:37:09 +00002057 pPg->dirty = 1;
drhac69b052004-05-12 13:30:07 +00002058 if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){
drha1680452002-04-18 01:56:57 +00002059 pPager->dirtyFile = 1;
drhfa86c412002-02-02 15:01:15 +00002060 return SQLITE_OK;
2061 }
drh6446c4d2001-12-15 14:22:18 +00002062
2063 /* If we get this far, it means that the page needs to be
drhfa86c412002-02-02 15:01:15 +00002064 ** written to the transaction journal or the ckeckpoint journal
2065 ** or both.
2066 **
2067 ** First check to see that the transaction journal exists and
2068 ** create it if it does not.
drh6446c4d2001-12-15 14:22:18 +00002069 */
drhd9b02572001-04-15 00:37:09 +00002070 assert( pPager->state!=SQLITE_UNLOCK );
danielk197713adf8a2004-06-03 16:08:41 +00002071 rc = sqlite3pager_begin(pData, 0);
drhda47d772002-12-02 04:25:19 +00002072 if( rc!=SQLITE_OK ){
2073 return rc;
2074 }
drhd9b02572001-04-15 00:37:09 +00002075 assert( pPager->state==SQLITE_WRITELOCK );
drhda47d772002-12-02 04:25:19 +00002076 if( !pPager->journalOpen && pPager->useJournal ){
2077 rc = pager_open_journal(pPager);
2078 if( rc!=SQLITE_OK ) return rc;
2079 }
2080 assert( pPager->journalOpen || !pPager->useJournal );
2081 pPager->dirtyFile = 1;
drh6446c4d2001-12-15 14:22:18 +00002082
drhfa86c412002-02-02 15:01:15 +00002083 /* The transaction journal now exists and we have a write lock on the
2084 ** main database file. Write the current page to the transaction
2085 ** journal if it is not there already.
drh6446c4d2001-12-15 14:22:18 +00002086 */
drhac69b052004-05-12 13:30:07 +00002087 if( !pPg->inJournal && (pPager->useJournal || pPager->memDb) ){
drhdb48ee02003-01-16 13:42:43 +00002088 if( (int)pPg->pgno <= pPager->origDbSize ){
drh968af522003-02-11 14:55:40 +00002089 int szPg;
2090 u32 saved;
drhac69b052004-05-12 13:30:07 +00002091 if( pPager->memDb ){
2092 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2093 TRACE2("JOURNAL %d\n", pPg->pgno);
2094 assert( pHist->pOrig==0 );
2095 pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
2096 if( pHist->pOrig ){
2097 memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
2098 }
2099 pPg->inJournal = 1;
danielk197713adf8a2004-06-03 16:08:41 +00002100 }else{
drhac69b052004-05-12 13:30:07 +00002101 if( journal_format>=JOURNAL_FORMAT_3 ){
2102 u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
2103 saved = *(u32*)PGHDR_TO_EXTRA(pPg);
2104 store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
2105 szPg = SQLITE_PAGE_SIZE+8;
2106 }else{
2107 szPg = SQLITE_PAGE_SIZE+4;
2108 }
2109 store32bits(pPg->pgno, pPg, -4);
2110 CODEC(pPager, pData, pPg->pgno, 7);
2111 rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
2112 TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
2113 CODEC(pPager, pData, pPg->pgno, 0);
2114 if( journal_format>=JOURNAL_FORMAT_3 ){
2115 *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
2116 }
2117 if( rc!=SQLITE_OK ){
2118 sqlite3pager_rollback(pPager);
2119 pPager->errMask |= PAGER_ERR_FULL;
2120 return rc;
2121 }
2122 pPager->nRec++;
2123 assert( pPager->aInJournal!=0 );
2124 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2125 pPg->needSync = !pPager->noSync;
2126 pPg->inJournal = 1;
2127 if( pPager->stmtInUse ){
2128 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2129 page_add_to_stmt_list(pPg);
2130 }
drhdb48ee02003-01-16 13:42:43 +00002131 }
drhdb48ee02003-01-16 13:42:43 +00002132 }else{
2133 pPg->needSync = !pPager->journalStarted && !pPager->noSync;
2134 TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
drhd9b02572001-04-15 00:37:09 +00002135 }
drhdb48ee02003-01-16 13:42:43 +00002136 if( pPg->needSync ){
2137 pPager->needSync = 1;
drhfa86c412002-02-02 15:01:15 +00002138 }
drh69688d52001-04-14 16:38:23 +00002139 }
drh6446c4d2001-12-15 14:22:18 +00002140
drhac69b052004-05-12 13:30:07 +00002141 /* If the statement journal is open and the page is not in it,
2142 ** then write the current page to the statement journal. Note that
2143 ** the statement journal always uses the simplier format 2 that lacks
2144 ** checksums. The header is also omitted from the statement journal.
drh6446c4d2001-12-15 14:22:18 +00002145 */
drhac69b052004-05-12 13:30:07 +00002146 if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
drh1e336b42002-02-14 12:50:33 +00002147 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drhac69b052004-05-12 13:30:07 +00002148 if( pPager->memDb ){
2149 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2150 assert( pHist->pStmt==0 );
2151 pHist->pStmt = sqliteMallocRaw( pPager->pageSize );
2152 if( pHist->pStmt ){
2153 memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
2154 }
2155 TRACE2("STMT-JOURNAL %d\n", pPg->pgno);
2156 }else{
2157 store32bits(pPg->pgno, pPg, -4);
2158 CODEC(pPager, pData, pPg->pgno, 7);
2159 rc = sqlite3OsWrite(&pPager->stfd, ((char*)pData)-4, SQLITE_PAGE_SIZE+4);
2160 TRACE2("STMT-JOURNAL %d\n", pPg->pgno);
2161 CODEC(pPager, pData, pPg->pgno, 0);
2162 if( rc!=SQLITE_OK ){
2163 sqlite3pager_rollback(pPager);
2164 pPager->errMask |= PAGER_ERR_FULL;
2165 return rc;
2166 }
2167 pPager->stmtNRec++;
2168 assert( pPager->aInStmt!=0 );
2169 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhfa86c412002-02-02 15:01:15 +00002170 }
drh3aac2dd2004-04-26 14:10:20 +00002171 page_add_to_stmt_list(pPg);
drhfa86c412002-02-02 15:01:15 +00002172 }
2173
2174 /* Update the database size and return.
2175 */
drh1ab43002002-01-14 09:28:19 +00002176 if( pPager->dbSize<(int)pPg->pgno ){
drh306dc212001-05-21 13:45:10 +00002177 pPager->dbSize = pPg->pgno;
2178 }
drh69688d52001-04-14 16:38:23 +00002179 return rc;
drhed7c8552001-04-11 14:29:21 +00002180}
2181
2182/*
drhaacc5432002-01-06 17:07:40 +00002183** Return TRUE if the page given in the argument was previously passed
drh3aac2dd2004-04-26 14:10:20 +00002184** to sqlite3pager_write(). In other words, return TRUE if it is ok
drh6019e162001-07-02 17:51:45 +00002185** to change the content of the page.
2186*/
drh3aac2dd2004-04-26 14:10:20 +00002187int sqlite3pager_iswriteable(void *pData){
drh6019e162001-07-02 17:51:45 +00002188 PgHdr *pPg = DATA_TO_PGHDR(pData);
2189 return pPg->dirty;
2190}
2191
2192/*
drh001bbcb2003-03-19 03:14:00 +00002193** Replace the content of a single page with the information in the third
2194** argument.
2195*/
drh3aac2dd2004-04-26 14:10:20 +00002196int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){
drh001bbcb2003-03-19 03:14:00 +00002197 void *pPage;
2198 int rc;
2199
drh3aac2dd2004-04-26 14:10:20 +00002200 rc = sqlite3pager_get(pPager, pgno, &pPage);
drh001bbcb2003-03-19 03:14:00 +00002201 if( rc==SQLITE_OK ){
drh3aac2dd2004-04-26 14:10:20 +00002202 rc = sqlite3pager_write(pPage);
drh001bbcb2003-03-19 03:14:00 +00002203 if( rc==SQLITE_OK ){
drhd0ba1932004-02-10 01:54:28 +00002204 memcpy(pPage, pData, SQLITE_PAGE_SIZE);
drh001bbcb2003-03-19 03:14:00 +00002205 }
drh3aac2dd2004-04-26 14:10:20 +00002206 sqlite3pager_unref(pPage);
drh001bbcb2003-03-19 03:14:00 +00002207 }
2208 return rc;
2209}
2210
2211/*
drh30e58752002-03-02 20:41:57 +00002212** A call to this routine tells the pager that it is not necessary to
2213** write the information on page "pgno" back to the disk, even though
2214** that page might be marked as dirty.
2215**
2216** The overlying software layer calls this routine when all of the data
2217** on the given page is unused. The pager marks the page as clean so
2218** that it does not get written to disk.
2219**
2220** Tests show that this optimization, together with the
drh3aac2dd2004-04-26 14:10:20 +00002221** sqlite3pager_dont_rollback() below, more than double the speed
drh30e58752002-03-02 20:41:57 +00002222** of large INSERT operations and quadruple the speed of large DELETEs.
drh8e298f92002-07-06 16:28:47 +00002223**
2224** When this routine is called, set the alwaysRollback flag to true.
drh3aac2dd2004-04-26 14:10:20 +00002225** Subsequent calls to sqlite3pager_dont_rollback() for the same page
drh8e298f92002-07-06 16:28:47 +00002226** will thereafter be ignored. This is necessary to avoid a problem
2227** where a page with data is added to the freelist during one part of
2228** a transaction then removed from the freelist during a later part
2229** of the same transaction and reused for some other purpose. When it
2230** is first added to the freelist, this routine is called. When reused,
2231** the dont_rollback() routine is called. But because the page contains
2232** critical data, we still need to be sure it gets rolled back in spite
2233** of the dont_rollback() call.
drh30e58752002-03-02 20:41:57 +00002234*/
drh3aac2dd2004-04-26 14:10:20 +00002235void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){
drh30e58752002-03-02 20:41:57 +00002236 PgHdr *pPg;
drh8e298f92002-07-06 16:28:47 +00002237
drh30e58752002-03-02 20:41:57 +00002238 pPg = pager_lookup(pPager, pgno);
drh8e298f92002-07-06 16:28:47 +00002239 pPg->alwaysRollback = 1;
drh30e58752002-03-02 20:41:57 +00002240 if( pPg && pPg->dirty ){
drh8124a302002-06-25 14:43:57 +00002241 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
2242 /* If this pages is the last page in the file and the file has grown
2243 ** during the current transaction, then do NOT mark the page as clean.
2244 ** When the database file grows, we must make sure that the last page
2245 ** gets written at least once so that the disk file will be the correct
2246 ** size. If you do not write this page and the size of the file
2247 ** on the disk ends up being too small, that can lead to database
2248 ** corruption during the next transaction.
2249 */
2250 }else{
drhdb48ee02003-01-16 13:42:43 +00002251 TRACE2("DONT_WRITE %d\n", pgno);
drh8124a302002-06-25 14:43:57 +00002252 pPg->dirty = 0;
2253 }
drh30e58752002-03-02 20:41:57 +00002254 }
2255}
2256
2257/*
2258** A call to this routine tells the pager that if a rollback occurs,
2259** it is not necessary to restore the data on the given page. This
2260** means that the pager does not have to record the given page in the
2261** rollback journal.
2262*/
drh3aac2dd2004-04-26 14:10:20 +00002263void sqlite3pager_dont_rollback(void *pData){
drh30e58752002-03-02 20:41:57 +00002264 PgHdr *pPg = DATA_TO_PGHDR(pData);
2265 Pager *pPager = pPg->pPager;
2266
2267 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
drhac69b052004-05-12 13:30:07 +00002268 if( pPg->alwaysRollback || pPager->alwaysRollback || pPager->memDb ) return;
drh30e58752002-03-02 20:41:57 +00002269 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
2270 assert( pPager->aInJournal!=0 );
2271 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
2272 pPg->inJournal = 1;
drhac69b052004-05-12 13:30:07 +00002273 if( pPager->stmtInUse ){
2274 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh3aac2dd2004-04-26 14:10:20 +00002275 page_add_to_stmt_list(pPg);
drh30e58752002-03-02 20:41:57 +00002276 }
drhdb48ee02003-01-16 13:42:43 +00002277 TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
drh30e58752002-03-02 20:41:57 +00002278 }
drhac69b052004-05-12 13:30:07 +00002279 if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
drh30e58752002-03-02 20:41:57 +00002280 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
drhac69b052004-05-12 13:30:07 +00002281 assert( pPager->aInStmt!=0 );
2282 pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh3aac2dd2004-04-26 14:10:20 +00002283 page_add_to_stmt_list(pPg);
drh30e58752002-03-02 20:41:57 +00002284 }
2285}
2286
drhac69b052004-05-12 13:30:07 +00002287
2288/*
2289** Clear a PgHistory block
2290*/
2291static void clearHistory(PgHistory *pHist){
2292 sqliteFree(pHist->pOrig);
2293 sqliteFree(pHist->pStmt);
2294 pHist->pOrig = 0;
2295 pHist->pStmt = 0;
2296}
2297
drh30e58752002-03-02 20:41:57 +00002298/*
drhed7c8552001-04-11 14:29:21 +00002299** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00002300**
2301** If the commit fails for any reason, a rollback attempt is made
2302** and an error code is returned. If the commit worked, SQLITE_OK
2303** is returned.
drhed7c8552001-04-11 14:29:21 +00002304*/
drh3aac2dd2004-04-26 14:10:20 +00002305int sqlite3pager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00002306 int rc;
drhed7c8552001-04-11 14:29:21 +00002307 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00002308
2309 if( pPager->errMask==PAGER_ERR_FULL ){
drh3aac2dd2004-04-26 14:10:20 +00002310 rc = sqlite3pager_rollback(pPager);
drh4e371ee2002-09-05 16:08:27 +00002311 if( rc==SQLITE_OK ){
2312 rc = SQLITE_FULL;
2313 }
drhd9b02572001-04-15 00:37:09 +00002314 return rc;
2315 }
2316 if( pPager->errMask!=0 ){
2317 rc = pager_errcode(pPager);
2318 return rc;
2319 }
2320 if( pPager->state!=SQLITE_WRITELOCK ){
2321 return SQLITE_ERROR;
2322 }
drhdb48ee02003-01-16 13:42:43 +00002323 TRACE1("COMMIT\n");
drhac69b052004-05-12 13:30:07 +00002324 if( pPager->memDb ){
2325 pPg = pager_get_all_dirty_pages(pPager);
2326 while( pPg ){
2327 clearHistory(PGHDR_TO_HIST(pPg, pPager));
2328 pPg->dirty = 0;
2329 pPg->inJournal = 0;
2330 pPg->inStmt = 0;
2331 pPg->pPrevStmt = pPg->pNextStmt = 0;
2332 pPg = pPg->pDirty;
2333 }
2334 pPager->pStmt = 0;
2335 pPager->state = SQLITE_READLOCK;
2336 return SQLITE_OK;
2337 }
danielk197713adf8a2004-06-03 16:08:41 +00002338#if 0
drha1680452002-04-18 01:56:57 +00002339 if( pPager->dirtyFile==0 ){
danielk19774adee202004-05-08 08:23:19 +00002340 /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
drha1680452002-04-18 01:56:57 +00002341 ** if there have been no changes to the database file. */
drh341eae82003-01-21 02:39:36 +00002342 assert( pPager->needSync==0 );
drha1680452002-04-18 01:56:57 +00002343 rc = pager_unwritelock(pPager);
2344 pPager->dbSize = -1;
2345 return rc;
2346 }
drhda47d772002-12-02 04:25:19 +00002347 assert( pPager->journalOpen );
danielk197713adf8a2004-06-03 16:08:41 +00002348 rc = syncJournal(pPager, 0);
drh240c5792004-02-08 00:40:52 +00002349 if( rc!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00002350 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00002351 }
drh2554f8b2003-01-22 01:26:44 +00002352 pPg = pager_get_all_dirty_pages(pPager);
2353 if( pPg ){
2354 rc = pager_write_pagelist(pPg);
danielk19774adee202004-05-08 08:23:19 +00002355 if( rc || (!pPager->noSync && sqlite3OsSync(&pPager->fd)!=SQLITE_OK) ){
drh2554f8b2003-01-22 01:26:44 +00002356 goto commit_abort;
2357 }
drh603240c2002-03-05 01:11:12 +00002358 }
danielk197713adf8a2004-06-03 16:08:41 +00002359#endif
2360 rc = sqlite3pager_sync(pPager, 0);
2361 if( rc!=SQLITE_OK ) goto commit_abort;
2362
drhd9b02572001-04-15 00:37:09 +00002363 rc = pager_unwritelock(pPager);
2364 pPager->dbSize = -1;
2365 return rc;
2366
2367 /* Jump here if anything goes wrong during the commit process.
2368 */
2369commit_abort:
drh3aac2dd2004-04-26 14:10:20 +00002370 rc = sqlite3pager_rollback(pPager);
drhd9b02572001-04-15 00:37:09 +00002371 if( rc==SQLITE_OK ){
2372 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00002373 }
drhed7c8552001-04-11 14:29:21 +00002374 return rc;
2375}
2376
2377/*
2378** Rollback all changes. The database falls back to read-only mode.
2379** All in-memory cache pages revert to their original data contents.
2380** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00002381**
2382** This routine cannot fail unless some other process is not following
2383** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
2384** process is writing trash into the journal file (SQLITE_CORRUPT) or
2385** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
2386** codes are returned for all these occasions. Otherwise,
2387** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00002388*/
drh3aac2dd2004-04-26 14:10:20 +00002389int sqlite3pager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00002390 int rc;
drhdb48ee02003-01-16 13:42:43 +00002391 TRACE1("ROLLBACK\n");
drhac69b052004-05-12 13:30:07 +00002392 if( pPager->memDb ){
2393 PgHdr *p;
2394 for(p=pPager->pAll; p; p=p->pNextAll){
2395 PgHistory *pHist;
2396 if( !p->dirty ) continue;
2397 pHist = PGHDR_TO_HIST(p, pPager);
2398 if( pHist->pOrig ){
2399 memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
2400 TRACE2("ROLLBACK-PAGE %d\n", p->pgno);
2401 }else{
2402 TRACE2("PAGE %d is clean\n", p->pgno);
2403 }
2404 clearHistory(pHist);
2405 p->dirty = 0;
2406 p->inJournal = 0;
2407 p->inStmt = 0;
2408 p->pPrevStmt = p->pNextStmt = 0;
2409 }
2410 pPager->pStmt = 0;
2411 pPager->dbSize = pPager->origDbSize;
2412 memoryTruncate(pPager);
2413 pPager->stmtInUse = 0;
2414 pPager->state = SQLITE_READLOCK;
2415 return SQLITE_OK;
2416 }
2417
drhda47d772002-12-02 04:25:19 +00002418 if( !pPager->dirtyFile || !pPager->journalOpen ){
2419 rc = pager_unwritelock(pPager);
2420 pPager->dbSize = -1;
2421 return rc;
2422 }
drhdb48ee02003-01-16 13:42:43 +00002423
drhd9b02572001-04-15 00:37:09 +00002424 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
drh4b845d72002-03-05 12:41:19 +00002425 if( pPager->state>=SQLITE_WRITELOCK ){
drh99ee3602003-02-16 19:13:36 +00002426 pager_playback(pPager, 1);
drh4b845d72002-03-05 12:41:19 +00002427 }
drhd9b02572001-04-15 00:37:09 +00002428 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00002429 }
drhd9b02572001-04-15 00:37:09 +00002430 if( pPager->state!=SQLITE_WRITELOCK ){
2431 return SQLITE_OK;
2432 }
drh99ee3602003-02-16 19:13:36 +00002433 rc = pager_playback(pPager, 1);
drhd9b02572001-04-15 00:37:09 +00002434 if( rc!=SQLITE_OK ){
2435 rc = SQLITE_CORRUPT;
2436 pPager->errMask |= PAGER_ERR_CORRUPT;
2437 }
2438 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00002439 return rc;
drh98808ba2001-10-18 12:34:46 +00002440}
drhd9b02572001-04-15 00:37:09 +00002441
2442/*
drh5e00f6c2001-09-13 13:46:56 +00002443** Return TRUE if the database file is opened read-only. Return FALSE
2444** if the database is (in theory) writable.
2445*/
drh3aac2dd2004-04-26 14:10:20 +00002446int sqlite3pager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00002447 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00002448}
2449
2450/*
drhd9b02572001-04-15 00:37:09 +00002451** This routine is used for testing and analysis only.
2452*/
drh3aac2dd2004-04-26 14:10:20 +00002453int *sqlite3pager_stats(Pager *pPager){
drhd9b02572001-04-15 00:37:09 +00002454 static int a[9];
2455 a[0] = pPager->nRef;
2456 a[1] = pPager->nPage;
2457 a[2] = pPager->mxPage;
2458 a[3] = pPager->dbSize;
2459 a[4] = pPager->state;
2460 a[5] = pPager->errMask;
2461 a[6] = pPager->nHit;
2462 a[7] = pPager->nMiss;
2463 a[8] = pPager->nOvfl;
2464 return a;
2465}
drhdd793422001-06-28 01:54:48 +00002466
drhfa86c412002-02-02 15:01:15 +00002467/*
drhac69b052004-05-12 13:30:07 +00002468** Set the statement rollback point.
drhfa86c412002-02-02 15:01:15 +00002469**
2470** This routine should be called with the transaction journal already
drhac69b052004-05-12 13:30:07 +00002471** open. A new statement journal is created that can be used to rollback
drhaaab5722002-02-19 13:39:21 +00002472** changes of a single SQL command within a larger transaction.
drhfa86c412002-02-02 15:01:15 +00002473*/
drh3aac2dd2004-04-26 14:10:20 +00002474int sqlite3pager_stmt_begin(Pager *pPager){
drhfa86c412002-02-02 15:01:15 +00002475 int rc;
2476 char zTemp[SQLITE_TEMPNAME_SIZE];
drhac69b052004-05-12 13:30:07 +00002477 assert( !pPager->stmtInUse );
2478 TRACE1("STMT-BEGIN\n");
2479 if( pPager->memDb ){
2480 pPager->stmtInUse = 1;
2481 pPager->stmtSize = pPager->dbSize;
2482 return SQLITE_OK;
2483 }
drhda47d772002-12-02 04:25:19 +00002484 if( !pPager->journalOpen ){
drhac69b052004-05-12 13:30:07 +00002485 pPager->stmtAutoopen = 1;
drhda47d772002-12-02 04:25:19 +00002486 return SQLITE_OK;
2487 }
drhfa86c412002-02-02 15:01:15 +00002488 assert( pPager->journalOpen );
drhac69b052004-05-12 13:30:07 +00002489 pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 );
2490 if( pPager->aInStmt==0 ){
danielk19774adee202004-05-08 08:23:19 +00002491 sqlite3OsReadLock(&pPager->fd);
drhfa86c412002-02-02 15:01:15 +00002492 return SQLITE_NOMEM;
2493 }
drh968af522003-02-11 14:55:40 +00002494#ifndef NDEBUG
drhac69b052004-05-12 13:30:07 +00002495 rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize);
2496 if( rc ) goto stmt_begin_failed;
2497 assert( pPager->stmtJSize ==
danielk197713adf8a2004-06-03 16:08:41 +00002498 pPager->nRec*JOURNAL_PG_SZ(journal_format) +
2499 JOURNAL_HDR_SZ(pPager, journal_format) );
drh968af522003-02-11 14:55:40 +00002500#endif
drhac69b052004-05-12 13:30:07 +00002501 pPager->stmtJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
danielk197713adf8a2004-06-03 16:08:41 +00002502 + JOURNAL_HDR_SZ(pPager, journal_format);
drhac69b052004-05-12 13:30:07 +00002503 pPager->stmtSize = pPager->dbSize;
2504 if( !pPager->stmtOpen ){
2505 rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
2506 if( rc ) goto stmt_begin_failed;
2507 pPager->stmtOpen = 1;
2508 pPager->stmtNRec = 0;
drh0f892532002-05-30 12:27:03 +00002509 }
drhac69b052004-05-12 13:30:07 +00002510 pPager->stmtInUse = 1;
drhfa86c412002-02-02 15:01:15 +00002511 return SQLITE_OK;
2512
drhac69b052004-05-12 13:30:07 +00002513stmt_begin_failed:
2514 if( pPager->aInStmt ){
2515 sqliteFree(pPager->aInStmt);
2516 pPager->aInStmt = 0;
drhfa86c412002-02-02 15:01:15 +00002517 }
2518 return rc;
2519}
2520
2521/*
drhac69b052004-05-12 13:30:07 +00002522** Commit a statement.
drhfa86c412002-02-02 15:01:15 +00002523*/
drh3aac2dd2004-04-26 14:10:20 +00002524int sqlite3pager_stmt_commit(Pager *pPager){
drhac69b052004-05-12 13:30:07 +00002525 if( pPager->stmtInUse ){
drh03eb96a2002-11-10 23:32:56 +00002526 PgHdr *pPg, *pNext;
drhac69b052004-05-12 13:30:07 +00002527 TRACE1("STMT-COMMIT\n");
2528 if( !pPager->memDb ){
2529 sqlite3OsSeek(&pPager->stfd, 0);
2530 /* sqlite3OsTruncate(&pPager->stfd, 0); */
2531 sqliteFree( pPager->aInStmt );
2532 pPager->aInStmt = 0;
drh663fc632002-02-02 18:49:19 +00002533 }
drhac69b052004-05-12 13:30:07 +00002534 for(pPg=pPager->pStmt; pPg; pPg=pNext){
2535 pNext = pPg->pNextStmt;
2536 assert( pPg->inStmt );
2537 pPg->inStmt = 0;
2538 pPg->pPrevStmt = pPg->pNextStmt = 0;
2539 if( pPager->memDb ){
2540 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2541 sqliteFree(pHist->pStmt);
2542 pHist->pStmt = 0;
2543 }
2544 }
2545 pPager->stmtNRec = 0;
2546 pPager->stmtInUse = 0;
2547 pPager->pStmt = 0;
drh663fc632002-02-02 18:49:19 +00002548 }
drhac69b052004-05-12 13:30:07 +00002549 pPager->stmtAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002550 return SQLITE_OK;
2551}
2552
2553/*
drhac69b052004-05-12 13:30:07 +00002554** Rollback a statement.
drhfa86c412002-02-02 15:01:15 +00002555*/
drh3aac2dd2004-04-26 14:10:20 +00002556int sqlite3pager_stmt_rollback(Pager *pPager){
drhfa86c412002-02-02 15:01:15 +00002557 int rc;
drhac69b052004-05-12 13:30:07 +00002558 if( pPager->stmtInUse ){
2559 TRACE1("STMT-ROLLBACK\n");
2560 if( pPager->memDb ){
2561 PgHdr *pPg;
2562 for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){
2563 PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
2564 if( pHist->pStmt ){
2565 memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
2566 sqliteFree(pHist->pStmt);
2567 pHist->pStmt = 0;
2568 }
2569 }
2570 pPager->dbSize = pPager->stmtSize;
2571 memoryTruncate(pPager);
2572 rc = SQLITE_OK;
2573 }else{
2574 rc = pager_stmt_playback(pPager);
2575 }
drh3aac2dd2004-04-26 14:10:20 +00002576 sqlite3pager_stmt_commit(pPager);
drh663fc632002-02-02 18:49:19 +00002577 }else{
2578 rc = SQLITE_OK;
2579 }
drhac69b052004-05-12 13:30:07 +00002580 pPager->stmtAutoopen = 0;
drhfa86c412002-02-02 15:01:15 +00002581 return rc;
2582}
2583
drh73509ee2003-04-06 20:44:45 +00002584/*
2585** Return the full pathname of the database file.
2586*/
drh3aac2dd2004-04-26 14:10:20 +00002587const char *sqlite3pager_filename(Pager *pPager){
drh73509ee2003-04-06 20:44:45 +00002588 return pPager->zFilename;
2589}
2590
drhb20ea9d2004-02-09 01:20:36 +00002591/*
2592** Set the codec for this pager
2593*/
drh3aac2dd2004-04-26 14:10:20 +00002594void sqlite3pager_set_codec(
drhb20ea9d2004-02-09 01:20:36 +00002595 Pager *pPager,
drh9eb9e262004-02-11 02:18:05 +00002596 void (*xCodec)(void*,void*,Pgno,int),
drhb20ea9d2004-02-09 01:20:36 +00002597 void *pCodecArg
2598){
2599 pPager->xCodec = xCodec;
2600 pPager->pCodecArg = pCodecArg;
2601}
2602
danielk197713adf8a2004-06-03 16:08:41 +00002603/*
2604** Sync the database file for the pager pPager. zMaster points to the name
2605** of a master journal file that should be written into the individual
2606** journal file. zMaster may be NULL, which is interpreted as no master
2607** journal (a single database transaction).
2608**
2609** This routine ensures that the journal is synced, all dirty pages written
2610** to the database file and the database file synced. The only thing that
2611** remains to commit the transaction is to delete the journal file (or
2612** master journal file if specified).
2613**
2614** Note that if zMaster==NULL, this does not overwrite a previous value
2615** passed to an sqlite3pager_sync() call.
2616*/
2617int sqlite3pager_sync(Pager *pPager, const char *zMaster){
2618 int rc = SQLITE_OK;
2619
2620 /* If this is an in-memory db, or no pages have been written to, this
2621 ** function is a no-op.
2622 */
2623 if( !pPager->memDb && pPager->dirtyFile ){
2624 PgHdr *pPg;
2625 assert( pPager->journalOpen );
2626
2627 /* Sync the journal file */
2628 rc = syncJournal(pPager, zMaster);
2629 if( rc!=SQLITE_OK ) goto sync_exit;
2630
2631 /* Write all dirty pages to the database file */
2632 pPg = pager_get_all_dirty_pages(pPager);
2633 rc = pager_write_pagelist(pPg);
2634 if( rc!=SQLITE_OK ) goto sync_exit;
2635
2636 /* If any pages were actually written, sync the database file */
2637 if( pPg && !pPager->noSync ){
2638 rc = sqlite3OsSync(&pPager->fd);
2639 }
2640 }
2641
2642sync_exit:
2643 return rc;
2644}
2645
drh74587e52002-08-13 00:01:16 +00002646#ifdef SQLITE_TEST
drhdd793422001-06-28 01:54:48 +00002647/*
2648** Print a listing of all referenced pages and their ref count.
2649*/
drh3aac2dd2004-04-26 14:10:20 +00002650void sqlite3pager_refdump(Pager *pPager){
drhdd793422001-06-28 01:54:48 +00002651 PgHdr *pPg;
2652 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2653 if( pPg->nRef<=0 ) continue;
2654 printf("PAGE %3d addr=0x%08x nRef=%d\n",
2655 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2656 }
2657}
2658#endif