blob: 4c5f27398e526ff8ad01f4f991dc5b8b34ccfa7f [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drhaacc5432002-01-06 17:07:40 +000021** @(#) $Id: pager.c,v 1.35 2002/01/06 17:07:40 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drhd9b02572001-04-15 00:37:09 +000023#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000024#include "pager.h"
drh8cfbf082001-09-19 13:22:39 +000025#include "os.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
30** The page cache as a whole is always in one of the following
31** states:
32**
33** SQLITE_UNLOCK The page cache is not currently reading or
34** writing the database file. There is no
35** data held in memory. This is the initial
36** state.
37**
38** SQLITE_READLOCK The page cache is reading the database.
39** Writing is not permitted. There can be
40** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000041** file at the same time.
drhed7c8552001-04-11 14:29:21 +000042**
43** SQLITE_WRITELOCK The page cache is writing the database.
44** Access is exclusive. No other processes or
45** threads can be reading or writing while one
46** process is writing.
47**
drh306dc212001-05-21 13:45:10 +000048** The page cache comes up in SQLITE_UNLOCK. The first time a
49** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000050** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000051** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000052** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000053** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
54** called on an outstanding page which means that the pager must
55** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
56** The sqlite_page_rollback() and sqlite_page_commit() functions
57** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000058*/
59#define SQLITE_UNLOCK 0
60#define SQLITE_READLOCK 1
61#define SQLITE_WRITELOCK 2
62
drhd9b02572001-04-15 00:37:09 +000063
drhed7c8552001-04-11 14:29:21 +000064/*
65** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000066** This header is only visible to this pager module. The client
67** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000068*/
drhd9b02572001-04-15 00:37:09 +000069typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000070struct PgHdr {
71 Pager *pPager; /* The pager to which this page belongs */
72 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000073 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000074 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000075 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
76 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000077 char inJournal; /* TRUE if has been written to journal */
78 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000079 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000080 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000081};
82
83/*
drh69688d52001-04-14 16:38:23 +000084** Convert a pointer to a PgHdr into a pointer to its data
85** and back again.
drhed7c8552001-04-11 14:29:21 +000086*/
87#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
88#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000089#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000090
91/*
drhed7c8552001-04-11 14:29:21 +000092** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +000093** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +000094*/
drhb19a2bc2001-09-16 00:13:26 +000095#define N_PG_HASH 373
drhed7c8552001-04-11 14:29:21 +000096
97/*
98** A open page cache is an instance of the following structure.
99*/
100struct Pager {
101 char *zFilename; /* Name of the database file */
102 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000103 OsFile fd, jfd; /* File descriptors for database and journal */
104 int journalOpen; /* True if journal file descriptors is valid */
drhed7c8552001-04-11 14:29:21 +0000105 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000106 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000107 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000108 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000109 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000110 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000111 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000112 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
113 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
114 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000115 unsigned char tempFile; /* zFilename is a temporary file */
116 unsigned char readOnly; /* True for a read-only database */
drhf57b14a2001-09-14 18:54:08 +0000117 unsigned char needSync; /* True if an fsync() is needed on the journal */
drh6019e162001-07-02 17:51:45 +0000118 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000119 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000120 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000121 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000122};
123
124/*
125** These are bits that can be set in Pager.errMask.
126*/
127#define PAGER_ERR_FULL 0x01 /* a write() failed */
128#define PAGER_ERR_MEM 0x02 /* malloc() failed */
129#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
130#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000131#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000132
133/*
134** The journal file contains page records in the following
135** format.
136*/
137typedef struct PageRecord PageRecord;
138struct PageRecord {
139 Pgno pgno; /* The page number */
140 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
141};
142
143/*
drh5e00f6c2001-09-13 13:46:56 +0000144** Journal files begin with the following magic string. The data
145** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000146*/
147static const unsigned char aJournalMagic[] = {
148 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000149};
150
151/*
152** Hash a page number
153*/
drhd9b02572001-04-15 00:37:09 +0000154#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000155
156/*
drhdd793422001-06-28 01:54:48 +0000157** Enable reference count tracking here:
158*/
159#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000160 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000161 static void pager_refinfo(PgHdr *p){
162 static int cnt = 0;
163 if( !pager_refinfo_enable ) return;
164 printf(
165 "REFCNT: %4d addr=0x%08x nRef=%d\n",
166 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
167 );
168 cnt++; /* Something to set a breakpoint on */
169 }
170# define REFINFO(X) pager_refinfo(X)
171#else
172# define REFINFO(X)
173#endif
174
175/*
drhd9b02572001-04-15 00:37:09 +0000176** Convert the bits in the pPager->errMask into an approprate
177** return code.
178*/
179static int pager_errcode(Pager *pPager){
180 int rc = SQLITE_OK;
181 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000182 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000183 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
184 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
185 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
186 return rc;
drhed7c8552001-04-11 14:29:21 +0000187}
188
189/*
190** Find a page in the hash table given its page number. Return
191** a pointer to the page or NULL if not found.
192*/
drhd9b02572001-04-15 00:37:09 +0000193static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000194 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
195 while( p && p->pgno!=pgno ){
196 p = p->pNextHash;
197 }
198 return p;
199}
200
201/*
202** Unlock the database and clear the in-memory cache. This routine
203** sets the state of the pager back to what it was when it was first
204** opened. Any outstanding pages are invalidated and subsequent attempts
205** to access those pages will likely result in a coredump.
206*/
drhd9b02572001-04-15 00:37:09 +0000207static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000208 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000209 for(pPg=pPager->pAll; pPg; pPg=pNext){
210 pNext = pPg->pNextAll;
211 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000212 }
213 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000214 pPager->pLast = 0;
215 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000216 memset(pPager->aHash, 0, sizeof(pPager->aHash));
217 pPager->nPage = 0;
218 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000219 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000220 }
drha7fcb052001-12-14 15:09:55 +0000221 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000222 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000223 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000224 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000225 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000226}
227
228/*
229** When this routine is called, the pager has the journal file open and
230** a write lock on the database. This routine releases the database
231** write lock and acquires a read lock in its place. The journal file
232** is deleted and closed.
drhed7c8552001-04-11 14:29:21 +0000233*/
drhd9b02572001-04-15 00:37:09 +0000234static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000235 int rc;
drhd9b02572001-04-15 00:37:09 +0000236 PgHdr *pPg;
237 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
drha7fcb052001-12-14 15:09:55 +0000238 sqliteOsClose(&pPager->jfd);
drh8cfbf082001-09-19 13:22:39 +0000239 pPager->journalOpen = 0;
240 sqliteOsDelete(pPager->zJournal);
drha7fcb052001-12-14 15:09:55 +0000241 rc = sqliteOsReadLock(&pPager->fd);
242 assert( rc==SQLITE_OK );
drh6019e162001-07-02 17:51:45 +0000243 sqliteFree( pPager->aInJournal );
244 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000245 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
246 pPg->inJournal = 0;
247 pPg->dirty = 0;
248 }
drha7fcb052001-12-14 15:09:55 +0000249 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000250 return rc;
251}
252
drhed7c8552001-04-11 14:29:21 +0000253/*
254** Playback the journal and thus restore the database file to
255** the state it was in before we started making changes.
256**
drhd9b02572001-04-15 00:37:09 +0000257** The journal file format is as follows: There is an initial
258** file-type string for sanity checking. Then there is a single
259** Pgno number which is the number of pages in the database before
260** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000261** Next come zero or more page records where each page record
262** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
263** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000264**
drh6446c4d2001-12-15 14:22:18 +0000265** For playback, the pages are read from the journal in
drhd9b02572001-04-15 00:37:09 +0000266** reverse order and put back into the original database file.
drh6446c4d2001-12-15 14:22:18 +0000267** It used to be required to replay pages in reverse order because
268** there was a possibility of a page appearing in the journal more
269** than once. In that case, the original value of the page was
270** the first entry so it should be reset last. But now, a bitmap
271** is used to record every page that is in the journal. No pages
272** are ever repeated. So we could, in theory, playback the journal
273** in the forward direction and it would still work.
drhed7c8552001-04-11 14:29:21 +0000274**
drhd9b02572001-04-15 00:37:09 +0000275** If the file opened as the journal file is not a well-formed
276** journal file (as determined by looking at the magic number
277** at the beginning) then this routine returns SQLITE_PROTOCOL.
278** If any other errors occur during playback, the database will
279** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
280** pPager->errMask and SQLITE_CORRUPT is returned. If it all
281** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000282*/
drhd9b02572001-04-15 00:37:09 +0000283static int pager_playback(Pager *pPager){
284 int nRec; /* Number of Records */
285 int i; /* Loop counter */
286 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000287 PgHdr *pPg; /* An existing page in the cache */
288 PageRecord pgRec;
289 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000290 int rc;
291
drhc3a64ba2001-11-22 00:01:27 +0000292 /* Figure out how many records are in the journal. Abort early if
293 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000294 */
drh8cfbf082001-09-19 13:22:39 +0000295 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000296 sqliteOsSeek(&pPager->jfd, 0);
297 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
drhc3a64ba2001-11-22 00:01:27 +0000298 if( rc!=SQLITE_OK ){
299 goto end_playback;
300 }
301 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
302 if( nRec<=0 ){
303 goto end_playback;
304 }
305
306 /* Read the beginning of the journal and truncate the
307 ** database file back to its original size.
308 */
drha7fcb052001-12-14 15:09:55 +0000309 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drhd9b02572001-04-15 00:37:09 +0000310 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
drh81a20f22001-10-12 17:30:04 +0000311 rc = SQLITE_PROTOCOL;
312 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000313 }
drha7fcb052001-12-14 15:09:55 +0000314 rc = sqliteOsRead(&pPager->jfd, &mxPg, sizeof(mxPg));
drhd9b02572001-04-15 00:37:09 +0000315 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000316 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000317 }
drha7fcb052001-12-14 15:09:55 +0000318 rc = sqliteOsTruncate(&pPager->fd, mxPg*SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000319 if( rc!=SQLITE_OK ){
320 goto end_playback;
321 }
drhd9b02572001-04-15 00:37:09 +0000322 pPager->dbSize = mxPg;
323
drhed7c8552001-04-11 14:29:21 +0000324 /* Process segments beginning with the last and working backwards
325 ** to the first.
326 */
drhd9b02572001-04-15 00:37:09 +0000327 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000328 /* Seek to the beginning of the segment */
drh254cba22001-09-20 01:44:42 +0000329 int ofst;
drhd9b02572001-04-15 00:37:09 +0000330 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
drha7fcb052001-12-14 15:09:55 +0000331 rc = sqliteOsSeek(&pPager->jfd, ofst);
drhd9b02572001-04-15 00:37:09 +0000332 if( rc!=SQLITE_OK ) break;
drha7fcb052001-12-14 15:09:55 +0000333 rc = sqliteOsRead(&pPager->jfd, &pgRec, sizeof(pgRec));
drhd9b02572001-04-15 00:37:09 +0000334 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000335
drhd9b02572001-04-15 00:37:09 +0000336 /* Sanity checking on the page */
337 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
338 rc = SQLITE_CORRUPT;
339 break;
drhed7c8552001-04-11 14:29:21 +0000340 }
341
drhd9b02572001-04-15 00:37:09 +0000342 /* Playback the page. Update the in-memory copy of the page
343 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000344 */
drhd9b02572001-04-15 00:37:09 +0000345 pPg = pager_lookup(pPager, pgRec.pgno);
346 if( pPg ){
347 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000348 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000349 }
drha7fcb052001-12-14 15:09:55 +0000350 rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000351 if( rc!=SQLITE_OK ) break;
drha7fcb052001-12-14 15:09:55 +0000352 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000353 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000354 }
drh81a20f22001-10-12 17:30:04 +0000355
356end_playback:
drhd9b02572001-04-15 00:37:09 +0000357 if( rc!=SQLITE_OK ){
358 pager_unwritelock(pPager);
359 pPager->errMask |= PAGER_ERR_CORRUPT;
360 rc = SQLITE_CORRUPT;
361 }else{
362 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000363 }
drhd9b02572001-04-15 00:37:09 +0000364 return rc;
drhed7c8552001-04-11 14:29:21 +0000365}
366
367/*
drhf57b14a2001-09-14 18:54:08 +0000368** Change the maximum number of in-memory pages that are allowed.
369*/
370void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
371 if( mxPage>10 ){
372 pPager->mxPage = mxPage;
373 }
374}
375
376/*
drhed7c8552001-04-11 14:29:21 +0000377** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000378** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000379** the first call to sqlitepager_get() and is only held open until the
380** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000381**
drh6446c4d2001-12-15 14:22:18 +0000382** If zFilename is NULL then a randomly-named temporary file is created
383** and used as the file to be cached. The file will be deleted
384** automatically when it is closed.
drhed7c8552001-04-11 14:29:21 +0000385*/
drh7e3b0a02001-04-28 16:52:40 +0000386int sqlitepager_open(
387 Pager **ppPager, /* Return the Pager structure here */
388 const char *zFilename, /* Name of the database file to open */
389 int mxPage, /* Max number of in-memory cache pages */
390 int nExtra /* Extra bytes append to each in-memory page */
391){
drhed7c8552001-04-11 14:29:21 +0000392 Pager *pPager;
393 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000394 OsFile fd;
395 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000396 int tempFile;
397 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000398 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000399
drhd9b02572001-04-15 00:37:09 +0000400 *ppPager = 0;
401 if( sqlite_malloc_failed ){
402 return SQLITE_NOMEM;
403 }
drh5e00f6c2001-09-13 13:46:56 +0000404 if( zFilename ){
drh8cfbf082001-09-19 13:22:39 +0000405 rc = sqliteOsOpenReadWrite(zFilename, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000406 tempFile = 0;
407 }else{
408 int cnt = 8;
drh8cfbf082001-09-19 13:22:39 +0000409 sqliteOsTempFileName(zTemp);
drh5e00f6c2001-09-13 13:46:56 +0000410 do{
411 cnt--;
drh8cfbf082001-09-19 13:22:39 +0000412 sqliteOsTempFileName(zTemp);
413 rc = sqliteOsOpenExclusive(zTemp, &fd);
414 }while( cnt>0 && rc!=SQLITE_OK );
drh5e00f6c2001-09-13 13:46:56 +0000415 zFilename = zTemp;
416 tempFile = 1;
417 }
drh8cfbf082001-09-19 13:22:39 +0000418 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000419 return SQLITE_CANTOPEN;
420 }
421 nameLen = strlen(zFilename);
422 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000423 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000424 sqliteOsClose(&fd);
drhd9b02572001-04-15 00:37:09 +0000425 return SQLITE_NOMEM;
426 }
drhed7c8552001-04-11 14:29:21 +0000427 pPager->zFilename = (char*)&pPager[1];
428 pPager->zJournal = &pPager->zFilename[nameLen+1];
429 strcpy(pPager->zFilename, zFilename);
430 strcpy(pPager->zJournal, zFilename);
431 strcpy(&pPager->zJournal[nameLen], "-journal");
432 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000433 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000434 pPager->nRef = 0;
435 pPager->dbSize = -1;
436 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000437 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000438 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000439 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000440 pPager->tempFile = tempFile;
441 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000442 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000443 pPager->pFirst = 0;
444 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000445 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000446 memset(pPager->aHash, 0, sizeof(pPager->aHash));
447 *ppPager = pPager;
448 return SQLITE_OK;
449}
450
451/*
drh72f82862001-05-24 21:06:34 +0000452** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000453** when the reference count on each page reaches zero. The destructor can
454** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000455**
456** The destructor is not called as a result sqlitepager_close().
457** Destructors are only called by sqlitepager_unref().
458*/
459void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
460 pPager->xDestructor = xDesc;
461}
462
463/*
drh5e00f6c2001-09-13 13:46:56 +0000464** Return the total number of pages in the disk file associated with
465** pPager.
drhed7c8552001-04-11 14:29:21 +0000466*/
drhd9b02572001-04-15 00:37:09 +0000467int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000468 int n;
drhd9b02572001-04-15 00:37:09 +0000469 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000470 if( pPager->dbSize>=0 ){
471 return pPager->dbSize;
472 }
drha7fcb052001-12-14 15:09:55 +0000473 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000474 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000475 return 0;
drhed7c8552001-04-11 14:29:21 +0000476 }
drh8cfbf082001-09-19 13:22:39 +0000477 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000478 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000479 pPager->dbSize = n;
480 }
481 return n;
482}
483
484/*
485** Shutdown the page cache. Free all memory and close all files.
486**
487** If a transaction was in progress when this routine is called, that
488** transaction is rolled back. All outstanding pages are invalidated
489** and their memory is freed. Any attempt to use a page associated
490** with this page cache after this function returns will likely
491** result in a coredump.
492*/
drhd9b02572001-04-15 00:37:09 +0000493int sqlitepager_close(Pager *pPager){
494 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000495 switch( pPager->state ){
496 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000497 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +0000498 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000499 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000500 break;
501 }
502 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +0000503 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000504 break;
505 }
506 default: {
507 /* Do nothing */
508 break;
509 }
510 }
drhd9b02572001-04-15 00:37:09 +0000511 for(pPg=pPager->pAll; pPg; pPg=pNext){
512 pNext = pPg->pNextAll;
513 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000514 }
drha7fcb052001-12-14 15:09:55 +0000515 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000516 assert( pPager->journalOpen==0 );
drh5e00f6c2001-09-13 13:46:56 +0000517 if( pPager->tempFile ){
drh8cfbf082001-09-19 13:22:39 +0000518 sqliteOsDelete(pPager->zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000519 }
drhed7c8552001-04-11 14:29:21 +0000520 sqliteFree(pPager);
521 return SQLITE_OK;
522}
523
524/*
drh5e00f6c2001-09-13 13:46:56 +0000525** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000526*/
drhd9b02572001-04-15 00:37:09 +0000527Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000528 PgHdr *p = DATA_TO_PGHDR(pData);
529 return p->pgno;
530}
531
532/*
drh7e3b0a02001-04-28 16:52:40 +0000533** Increment the reference count for a page. If the page is
534** currently on the freelist (the reference count is zero) then
535** remove it from the freelist.
536*/
drhdf0b3b02001-06-23 11:36:20 +0000537static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000538 if( pPg->nRef==0 ){
539 /* The page is currently on the freelist. Remove it. */
540 if( pPg->pPrevFree ){
541 pPg->pPrevFree->pNextFree = pPg->pNextFree;
542 }else{
543 pPg->pPager->pFirst = pPg->pNextFree;
544 }
545 if( pPg->pNextFree ){
546 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
547 }else{
548 pPg->pPager->pLast = pPg->pPrevFree;
549 }
550 pPg->pPager->nRef++;
551 }
552 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000553 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000554}
555
556/*
557** Increment the reference count for a page. The input pointer is
558** a reference to the page data.
559*/
560int sqlitepager_ref(void *pData){
561 PgHdr *pPg = DATA_TO_PGHDR(pData);
562 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000563 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000564}
565
566/*
drhb19a2bc2001-09-16 00:13:26 +0000567** Sync the journal and then write all free dirty pages to the database
568** file.
569**
570** Writing all free dirty pages to the database after the sync is a
571** non-obvious optimization. fsync() is an expensive operation so we
drh6446c4d2001-12-15 14:22:18 +0000572** want to minimize the number it is called. After an fsync() call,
573** we are free to write dirty pages back to the database. It is best
574** to go ahead and write as many dirty pages as possible to minimize
575** the risk of having to do another fsync() later on. Writing dirty
576** free pages in this way was observed to make database operations go
577** up to 10 times faster.
drh50e5dad2001-09-15 00:57:28 +0000578*/
579static int syncAllPages(Pager *pPager){
580 PgHdr *pPg;
581 int rc = SQLITE_OK;
582 if( pPager->needSync ){
drha7fcb052001-12-14 15:09:55 +0000583 rc = sqliteOsSync(&pPager->jfd);
drh50e5dad2001-09-15 00:57:28 +0000584 if( rc!=0 ) return rc;
585 pPager->needSync = 0;
586 }
587 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
588 if( pPg->dirty ){
drha7fcb052001-12-14 15:09:55 +0000589 sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
590 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh50e5dad2001-09-15 00:57:28 +0000591 if( rc!=SQLITE_OK ) break;
592 pPg->dirty = 0;
593 }
594 }
drh81a20f22001-10-12 17:30:04 +0000595 return rc;
drh50e5dad2001-09-15 00:57:28 +0000596}
597
598/*
drhd9b02572001-04-15 00:37:09 +0000599** Acquire a page.
600**
drh58a11682001-11-10 13:51:08 +0000601** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +0000602** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000603**
drh306dc212001-05-21 13:45:10 +0000604** A _get works for any page number greater than 0. If the database
605** file is smaller than the requested page, then no actual disk
606** read occurs and the memory image of the page is initialized to
607** all zeros. The extra data appended to a page is always initialized
608** to zeros the first time a page is loaded into memory.
609**
drhd9b02572001-04-15 00:37:09 +0000610** The acquisition might fail for several reasons. In all cases,
611** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000612**
613** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
614** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000615** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000616** just returns 0. This routine acquires a read-lock the first time it
617** has to go to disk, and could also playback an old journal if necessary.
618** Since _lookup() never goes to disk, it never has to deal with locks
619** or journal files.
drhed7c8552001-04-11 14:29:21 +0000620*/
drhd9b02572001-04-15 00:37:09 +0000621int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000622 PgHdr *pPg;
623
drhd9b02572001-04-15 00:37:09 +0000624 /* Make sure we have not hit any critical errors.
625 */
626 if( pPager==0 || pgno==0 ){
627 return SQLITE_ERROR;
628 }
629 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
630 return pager_errcode(pPager);
631 }
632
drhed7c8552001-04-11 14:29:21 +0000633 /* If this is the first page accessed, then get a read lock
634 ** on the database file.
635 */
636 if( pPager->nRef==0 ){
drha7fcb052001-12-14 15:09:55 +0000637 if( sqliteOsReadLock(&pPager->fd)!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000638 *ppPage = 0;
639 return SQLITE_BUSY;
640 }
drhd9b02572001-04-15 00:37:09 +0000641 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000642
643 /* If a journal file exists, try to play it back.
644 */
drh8cfbf082001-09-19 13:22:39 +0000645 if( sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +0000646 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +0000647
drha7fcb052001-12-14 15:09:55 +0000648 /* Get a write lock on the database
649 */
650 rc = sqliteOsWriteLock(&pPager->fd);
651 if( rc!=SQLITE_OK ){
drh6446c4d2001-12-15 14:22:18 +0000652 rc = sqliteOsUnlock(&pPager->fd);
drha7fcb052001-12-14 15:09:55 +0000653 assert( rc==SQLITE_OK );
654 *ppPage = 0;
655 return SQLITE_BUSY;
656 }
657 pPager->state = SQLITE_WRITELOCK;
658
drhed7c8552001-04-11 14:29:21 +0000659 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +0000660 ** we cannot get exclusive access to the journal file.
661 **
662 ** Even though we will only be reading from the journal, not writing,
663 ** we have to open the journal for writing in order to obtain an
664 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +0000665 */
drhf57b3392001-10-08 13:22:32 +0000666 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drha7fcb052001-12-14 15:09:55 +0000667 if( rc!=SQLITE_OK ){
668 rc = sqliteOsUnlock(&pPager->fd);
669 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +0000670 *ppPage = 0;
671 return SQLITE_BUSY;
672 }
drha7fcb052001-12-14 15:09:55 +0000673 pPager->journalOpen = 1;
drhed7c8552001-04-11 14:29:21 +0000674
675 /* Playback and delete the journal. Drop the database write
676 ** lock and reacquire the read lock.
677 */
drhd9b02572001-04-15 00:37:09 +0000678 rc = pager_playback(pPager);
679 if( rc!=SQLITE_OK ){
680 return rc;
681 }
drhed7c8552001-04-11 14:29:21 +0000682 }
683 pPg = 0;
684 }else{
685 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000686 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000687 }
688 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000689 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000690 int h;
drh7e3b0a02001-04-28 16:52:40 +0000691 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000692 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
693 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000694 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000695 if( pPg==0 ){
696 *ppPage = 0;
697 pager_unwritelock(pPager);
698 pPager->errMask |= PAGER_ERR_MEM;
699 return SQLITE_NOMEM;
700 }
drhed7c8552001-04-11 14:29:21 +0000701 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000702 pPg->pNextAll = pPager->pAll;
703 if( pPager->pAll ){
704 pPager->pAll->pPrevAll = pPg;
705 }
706 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000707 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000708 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000709 }else{
drhd9b02572001-04-15 00:37:09 +0000710 /* Recycle an older page. First locate the page to be recycled.
711 ** Try to find one that is not dirty and is near the head of
712 ** of the free list */
drh50e5dad2001-09-15 00:57:28 +0000713 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000714 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000715 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000716 pPg = pPg->pNextFree;
717 }
drhb19a2bc2001-09-16 00:13:26 +0000718
719 /* If we could not find a page that has not been used recently
720 ** and which is not dirty, then sync the journal and write all
721 ** dirty free pages into the database file, thus making them
722 ** clean pages and available for recycling.
723 **
724 ** We have to sync the journal before writing a page to the main
725 ** database. But syncing is a very slow operation. So after a
726 ** sync, it is best to write everything we can back to the main
727 ** database to minimize the risk of having to sync again in the
728 ** near future. That is way we write all dirty pages after a
729 ** sync.
730 */
drh50e5dad2001-09-15 00:57:28 +0000731 if( pPg==0 || pPg->dirty ){
732 int rc = syncAllPages(pPager);
733 if( rc!=0 ){
734 sqlitepager_rollback(pPager);
735 *ppPage = 0;
736 return SQLITE_IOERR;
737 }
738 pPg = pPager->pFirst;
739 }
drhd9b02572001-04-15 00:37:09 +0000740 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +0000741 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000742
743 /* Unlink the old page from the free list and the hash table
744 */
drh6019e162001-07-02 17:51:45 +0000745 if( pPg->pPrevFree ){
746 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000747 }else{
drh6019e162001-07-02 17:51:45 +0000748 assert( pPager->pFirst==pPg );
749 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000750 }
drh6019e162001-07-02 17:51:45 +0000751 if( pPg->pNextFree ){
752 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
753 }else{
754 assert( pPager->pLast==pPg );
755 pPager->pLast = pPg->pPrevFree;
756 }
757 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000758 if( pPg->pNextHash ){
759 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
760 }
761 if( pPg->pPrevHash ){
762 pPg->pPrevHash->pNextHash = pPg->pNextHash;
763 }else{
drhd9b02572001-04-15 00:37:09 +0000764 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000765 assert( pPager->aHash[h]==pPg );
766 pPager->aHash[h] = pPg->pNextHash;
767 }
drh6019e162001-07-02 17:51:45 +0000768 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000769 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000770 }
771 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000772 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
773 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
774 }else{
775 pPg->inJournal = 0;
776 }
drhed7c8552001-04-11 14:29:21 +0000777 pPg->dirty = 0;
778 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000779 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000780 pPager->nRef++;
781 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000782 pPg->pNextHash = pPager->aHash[h];
783 pPager->aHash[h] = pPg;
784 if( pPg->pNextHash ){
785 assert( pPg->pNextHash->pPrevHash==0 );
786 pPg->pNextHash->pPrevHash = pPg;
787 }
drh306dc212001-05-21 13:45:10 +0000788 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
789 if( pPager->dbSize<pgno ){
790 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
791 }else{
drh81a20f22001-10-12 17:30:04 +0000792 int rc;
drha7fcb052001-12-14 15:09:55 +0000793 sqliteOsSeek(&pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
794 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000795 if( rc!=SQLITE_OK ){
796 return rc;
797 }
drh306dc212001-05-21 13:45:10 +0000798 }
drh7e3b0a02001-04-28 16:52:40 +0000799 if( pPager->nExtra>0 ){
800 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
801 }
drhed7c8552001-04-11 14:29:21 +0000802 }else{
drhd9b02572001-04-15 00:37:09 +0000803 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000804 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000805 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000806 }
807 *ppPage = PGHDR_TO_DATA(pPg);
808 return SQLITE_OK;
809}
810
811/*
drh7e3b0a02001-04-28 16:52:40 +0000812** Acquire a page if it is already in the in-memory cache. Do
813** not read the page from disk. Return a pointer to the page,
814** or 0 if the page is not in cache.
815**
816** See also sqlitepager_get(). The difference between this routine
817** and sqlitepager_get() is that _get() will go to the disk and read
818** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000819** returns NULL if the page is not in cache or if a disk I/O error
820** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000821*/
822void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
823 PgHdr *pPg;
824
825 /* Make sure we have not hit any critical errors.
826 */
827 if( pPager==0 || pgno==0 ){
828 return 0;
829 }
830 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
831 return 0;
832 }
833 if( pPager->nRef==0 ){
834 return 0;
835 }
836 pPg = pager_lookup(pPager, pgno);
837 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000838 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000839 return PGHDR_TO_DATA(pPg);
840}
841
842/*
drhed7c8552001-04-11 14:29:21 +0000843** Release a page.
844**
845** If the number of references to the page drop to zero, then the
846** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000847** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000848** removed.
849*/
drhd9b02572001-04-15 00:37:09 +0000850int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000851 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000852
853 /* Decrement the reference count for this page
854 */
drhed7c8552001-04-11 14:29:21 +0000855 pPg = DATA_TO_PGHDR(pData);
856 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +0000857 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000858 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000859
drh72f82862001-05-24 21:06:34 +0000860 /* When the number of references to a page reach 0, call the
861 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000862 */
drhed7c8552001-04-11 14:29:21 +0000863 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +0000864 Pager *pPager;
865 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +0000866 pPg->pNextFree = 0;
867 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000868 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000869 if( pPg->pPrevFree ){
870 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000871 }else{
872 pPager->pFirst = pPg;
873 }
drh72f82862001-05-24 21:06:34 +0000874 if( pPager->xDestructor ){
875 pPager->xDestructor(pData);
876 }
drhd9b02572001-04-15 00:37:09 +0000877
878 /* When all pages reach the freelist, drop the read lock from
879 ** the database file.
880 */
881 pPager->nRef--;
882 assert( pPager->nRef>=0 );
883 if( pPager->nRef==0 ){
884 pager_reset(pPager);
885 }
drhed7c8552001-04-11 14:29:21 +0000886 }
drhd9b02572001-04-15 00:37:09 +0000887 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000888}
889
890/*
891** Mark a data page as writeable. The page is written into the journal
892** if it is not there already. This routine must be called before making
893** changes to a page.
894**
895** The first time this routine is called, the pager creates a new
896** journal and acquires a write lock on the database. If the write
897** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000898** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000899** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000900**
901** If the journal file could not be written because the disk is full,
902** then this routine returns SQLITE_FULL and does an immediate rollback.
903** All subsequent write attempts also return SQLITE_FULL until there
904** is a call to sqlitepager_commit() or sqlitepager_rollback() to
905** reset.
drhed7c8552001-04-11 14:29:21 +0000906*/
drhd9b02572001-04-15 00:37:09 +0000907int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000908 PgHdr *pPg = DATA_TO_PGHDR(pData);
909 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000910 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000911
drh6446c4d2001-12-15 14:22:18 +0000912 /* Check for errors
913 */
drhd9b02572001-04-15 00:37:09 +0000914 if( pPager->errMask ){
915 return pager_errcode(pPager);
916 }
drh5e00f6c2001-09-13 13:46:56 +0000917 if( pPager->readOnly ){
918 return SQLITE_PERM;
919 }
drh6446c4d2001-12-15 14:22:18 +0000920
921 /* Mark the page as dirty. If the page has already been written
922 ** to the journal then we can return right away.
923 */
drhd9b02572001-04-15 00:37:09 +0000924 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +0000925 if( pPg->inJournal ){ return SQLITE_OK; }
drh6446c4d2001-12-15 14:22:18 +0000926
927 /* If we get this far, it means that the page needs to be
928 ** written to the journal file. First check to see if the
929 ** journal exists and create it if it does not.
930 */
drhd9b02572001-04-15 00:37:09 +0000931 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +0000932 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +0000933 assert( pPager->aInJournal==0 );
drha7fcb052001-12-14 15:09:55 +0000934 rc = sqliteOsWriteLock(&pPager->fd);
935 if( rc!=SQLITE_OK ){
936 return rc;
937 }
drh6019e162001-07-02 17:51:45 +0000938 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
939 if( pPager->aInJournal==0 ){
drha7fcb052001-12-14 15:09:55 +0000940 sqliteOsReadLock(&pPager->fd);
drh6019e162001-07-02 17:51:45 +0000941 return SQLITE_NOMEM;
942 }
drh8cfbf082001-09-19 13:22:39 +0000943 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd);
944 if( rc!=SQLITE_OK ){
drh6d4abfb2001-10-22 02:58:08 +0000945 sqliteFree(pPager->aInJournal);
drha7fcb052001-12-14 15:09:55 +0000946 pPager->aInJournal = 0;
947 sqliteOsReadLock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000948 return SQLITE_CANTOPEN;
949 }
drh8cfbf082001-09-19 13:22:39 +0000950 pPager->journalOpen = 1;
drhf57b14a2001-09-14 18:54:08 +0000951 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000952 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +0000953 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +0000954 pPager->origDbSize = pPager->dbSize;
drha7fcb052001-12-14 15:09:55 +0000955 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
drhd9b02572001-04-15 00:37:09 +0000956 if( rc==SQLITE_OK ){
drha7fcb052001-12-14 15:09:55 +0000957 rc = sqliteOsWrite(&pPager->jfd, &pPager->dbSize, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000958 }
959 if( rc!=SQLITE_OK ){
960 rc = pager_unwritelock(pPager);
961 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
962 return rc;
963 }
drhed7c8552001-04-11 14:29:21 +0000964 }
drhd9b02572001-04-15 00:37:09 +0000965 assert( pPager->state==SQLITE_WRITELOCK );
drh8cfbf082001-09-19 13:22:39 +0000966 assert( pPager->journalOpen );
drh6446c4d2001-12-15 14:22:18 +0000967
968 /* The journal now exists and we have a write lock on the
969 ** main database file. Write the current page to the journal.
970 */
drhd9b02572001-04-15 00:37:09 +0000971 if( pPg->pgno <= pPager->origDbSize ){
drha7fcb052001-12-14 15:09:55 +0000972 rc = sqliteOsWrite(&pPager->jfd, &pPg->pgno, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000973 if( rc==SQLITE_OK ){
drha7fcb052001-12-14 15:09:55 +0000974 rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000975 }
976 if( rc!=SQLITE_OK ){
977 sqlitepager_rollback(pPager);
978 pPager->errMask |= PAGER_ERR_FULL;
979 return rc;
980 }
drh6019e162001-07-02 17:51:45 +0000981 assert( pPager->aInJournal!=0 );
982 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhf57b14a2001-09-14 18:54:08 +0000983 pPager->needSync = 1;
drh69688d52001-04-14 16:38:23 +0000984 }
drh6446c4d2001-12-15 14:22:18 +0000985
986 /* Mark the current page as being in the journal and return.
987 */
drh69688d52001-04-14 16:38:23 +0000988 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +0000989 if( pPager->dbSize<pPg->pgno ){
990 pPager->dbSize = pPg->pgno;
991 }
drh69688d52001-04-14 16:38:23 +0000992 return rc;
drhed7c8552001-04-11 14:29:21 +0000993}
994
995/*
drhaacc5432002-01-06 17:07:40 +0000996** Return TRUE if the page given in the argument was previously passed
drh6019e162001-07-02 17:51:45 +0000997** to sqlitepager_write(). In other words, return TRUE if it is ok
998** to change the content of the page.
999*/
1000int sqlitepager_iswriteable(void *pData){
1001 PgHdr *pPg = DATA_TO_PGHDR(pData);
1002 return pPg->dirty;
1003}
1004
1005/*
drhed7c8552001-04-11 14:29:21 +00001006** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001007**
1008** If the commit fails for any reason, a rollback attempt is made
1009** and an error code is returned. If the commit worked, SQLITE_OK
1010** is returned.
drhed7c8552001-04-11 14:29:21 +00001011*/
drhd9b02572001-04-15 00:37:09 +00001012int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001013 int rc;
drhed7c8552001-04-11 14:29:21 +00001014 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001015
1016 if( pPager->errMask==PAGER_ERR_FULL ){
1017 rc = sqlitepager_rollback(pPager);
1018 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1019 return rc;
1020 }
1021 if( pPager->errMask!=0 ){
1022 rc = pager_errcode(pPager);
1023 return rc;
1024 }
1025 if( pPager->state!=SQLITE_WRITELOCK ){
1026 return SQLITE_ERROR;
1027 }
drh8cfbf082001-09-19 13:22:39 +00001028 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +00001029 if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001030 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001031 }
drha1b351a2001-09-14 16:42:12 +00001032 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1033 if( pPg->dirty==0 ) continue;
drha7fcb052001-12-14 15:09:55 +00001034 rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001035 if( rc!=SQLITE_OK ) goto commit_abort;
drha7fcb052001-12-14 15:09:55 +00001036 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001037 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001038 }
drha7fcb052001-12-14 15:09:55 +00001039 if( sqliteOsSync(&pPager->fd)!=SQLITE_OK ) goto commit_abort;
drhd9b02572001-04-15 00:37:09 +00001040 rc = pager_unwritelock(pPager);
1041 pPager->dbSize = -1;
1042 return rc;
1043
1044 /* Jump here if anything goes wrong during the commit process.
1045 */
1046commit_abort:
1047 rc = sqlitepager_rollback(pPager);
1048 if( rc==SQLITE_OK ){
1049 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001050 }
drhed7c8552001-04-11 14:29:21 +00001051 return rc;
1052}
1053
1054/*
1055** Rollback all changes. The database falls back to read-only mode.
1056** All in-memory cache pages revert to their original data contents.
1057** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001058**
1059** This routine cannot fail unless some other process is not following
1060** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1061** process is writing trash into the journal file (SQLITE_CORRUPT) or
1062** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1063** codes are returned for all these occasions. Otherwise,
1064** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001065*/
drhd9b02572001-04-15 00:37:09 +00001066int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001067 int rc;
drhd9b02572001-04-15 00:37:09 +00001068 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1069 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001070 }
drhd9b02572001-04-15 00:37:09 +00001071 if( pPager->state!=SQLITE_WRITELOCK ){
1072 return SQLITE_OK;
1073 }
1074 rc = pager_playback(pPager);
1075 if( rc!=SQLITE_OK ){
1076 rc = SQLITE_CORRUPT;
1077 pPager->errMask |= PAGER_ERR_CORRUPT;
1078 }
1079 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001080 return rc;
drh98808ba2001-10-18 12:34:46 +00001081}
drhd9b02572001-04-15 00:37:09 +00001082
1083/*
drh5e00f6c2001-09-13 13:46:56 +00001084** Return TRUE if the database file is opened read-only. Return FALSE
1085** if the database is (in theory) writable.
1086*/
1087int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001088 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001089}
1090
1091/*
drhd9b02572001-04-15 00:37:09 +00001092** This routine is used for testing and analysis only.
1093*/
1094int *sqlitepager_stats(Pager *pPager){
1095 static int a[9];
1096 a[0] = pPager->nRef;
1097 a[1] = pPager->nPage;
1098 a[2] = pPager->mxPage;
1099 a[3] = pPager->dbSize;
1100 a[4] = pPager->state;
1101 a[5] = pPager->errMask;
1102 a[6] = pPager->nHit;
1103 a[7] = pPager->nMiss;
1104 a[8] = pPager->nOvfl;
1105 return a;
1106}
drhdd793422001-06-28 01:54:48 +00001107
1108#if SQLITE_TEST
1109/*
1110** Print a listing of all referenced pages and their ref count.
1111*/
1112void sqlitepager_refdump(Pager *pPager){
1113 PgHdr *pPg;
1114 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1115 if( pPg->nRef<=0 ) continue;
1116 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1117 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1118 }
1119}
1120#endif