blob: 7134744e69083dc64f53570c1fd79aeff1a9cc2c [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drh81a20f22001-10-12 17:30:04 +000021** @(#) $Id: pager.c,v 1.27 2001/10/12 17:30:05 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drhd9b02572001-04-15 00:37:09 +000023#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000024#include "pager.h"
drh8cfbf082001-09-19 13:22:39 +000025#include "os.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
30** The page cache as a whole is always in one of the following
31** states:
32**
33** SQLITE_UNLOCK The page cache is not currently reading or
34** writing the database file. There is no
35** data held in memory. This is the initial
36** state.
37**
38** SQLITE_READLOCK The page cache is reading the database.
39** Writing is not permitted. There can be
40** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000041** file at the same time.
drhed7c8552001-04-11 14:29:21 +000042**
43** SQLITE_WRITELOCK The page cache is writing the database.
44** Access is exclusive. No other processes or
45** threads can be reading or writing while one
46** process is writing.
47**
drh306dc212001-05-21 13:45:10 +000048** The page cache comes up in SQLITE_UNLOCK. The first time a
49** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000050** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000051** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000052** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000053** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
54** called on an outstanding page which means that the pager must
55** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
56** The sqlite_page_rollback() and sqlite_page_commit() functions
57** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000058*/
59#define SQLITE_UNLOCK 0
60#define SQLITE_READLOCK 1
61#define SQLITE_WRITELOCK 2
62
drhd9b02572001-04-15 00:37:09 +000063
drhed7c8552001-04-11 14:29:21 +000064/*
65** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000066** This header is only visible to this pager module. The client
67** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000068*/
drhd9b02572001-04-15 00:37:09 +000069typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000070struct PgHdr {
71 Pager *pPager; /* The pager to which this page belongs */
72 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000073 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000074 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000075 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
76 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000077 char inJournal; /* TRUE if has been written to journal */
78 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000079 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000080 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000081};
82
83/*
drh69688d52001-04-14 16:38:23 +000084** Convert a pointer to a PgHdr into a pointer to its data
85** and back again.
drhed7c8552001-04-11 14:29:21 +000086*/
87#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
88#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000089#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000090
91/*
drhed7c8552001-04-11 14:29:21 +000092** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +000093** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +000094*/
drhb19a2bc2001-09-16 00:13:26 +000095#define N_PG_HASH 373
drhed7c8552001-04-11 14:29:21 +000096
97/*
98** A open page cache is an instance of the following structure.
99*/
100struct Pager {
101 char *zFilename; /* Name of the database file */
102 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000103 OsFile fd, jfd; /* File descriptors for database and journal */
104 int journalOpen; /* True if journal file descriptors is valid */
drhed7c8552001-04-11 14:29:21 +0000105 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000106 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000107 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000108 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000109 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000110 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000111 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000112 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
113 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
114 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000115 unsigned char tempFile; /* zFilename is a temporary file */
116 unsigned char readOnly; /* True for a read-only database */
drhf57b14a2001-09-14 18:54:08 +0000117 unsigned char needSync; /* True if an fsync() is needed on the journal */
drh6019e162001-07-02 17:51:45 +0000118 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000119 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000120 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000121 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000122};
123
124/*
125** These are bits that can be set in Pager.errMask.
126*/
127#define PAGER_ERR_FULL 0x01 /* a write() failed */
128#define PAGER_ERR_MEM 0x02 /* malloc() failed */
129#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
130#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000131#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000132
133/*
134** The journal file contains page records in the following
135** format.
136*/
137typedef struct PageRecord PageRecord;
138struct PageRecord {
139 Pgno pgno; /* The page number */
140 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
141};
142
143/*
drh5e00f6c2001-09-13 13:46:56 +0000144** Journal files begin with the following magic string. The data
145** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000146*/
147static const unsigned char aJournalMagic[] = {
148 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000149};
150
151/*
152** Hash a page number
153*/
drhd9b02572001-04-15 00:37:09 +0000154#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000155
156/*
drhdd793422001-06-28 01:54:48 +0000157** Enable reference count tracking here:
158*/
159#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000160 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000161 static void pager_refinfo(PgHdr *p){
162 static int cnt = 0;
163 if( !pager_refinfo_enable ) return;
164 printf(
165 "REFCNT: %4d addr=0x%08x nRef=%d\n",
166 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
167 );
168 cnt++; /* Something to set a breakpoint on */
169 }
170# define REFINFO(X) pager_refinfo(X)
171#else
172# define REFINFO(X)
173#endif
174
175/*
drhd9b02572001-04-15 00:37:09 +0000176** Convert the bits in the pPager->errMask into an approprate
177** return code.
178*/
179static int pager_errcode(Pager *pPager){
180 int rc = SQLITE_OK;
181 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000182 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000183 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
184 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
185 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
186 return rc;
drhed7c8552001-04-11 14:29:21 +0000187}
188
189/*
190** Find a page in the hash table given its page number. Return
191** a pointer to the page or NULL if not found.
192*/
drhd9b02572001-04-15 00:37:09 +0000193static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000194 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
195 while( p && p->pgno!=pgno ){
196 p = p->pNextHash;
197 }
198 return p;
199}
200
201/*
202** Unlock the database and clear the in-memory cache. This routine
203** sets the state of the pager back to what it was when it was first
204** opened. Any outstanding pages are invalidated and subsequent attempts
205** to access those pages will likely result in a coredump.
206*/
drhd9b02572001-04-15 00:37:09 +0000207static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000208 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000209 for(pPg=pPager->pAll; pPg; pPg=pNext){
210 pNext = pPg->pNextAll;
211 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000212 }
213 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000214 pPager->pLast = 0;
215 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000216 memset(pPager->aHash, 0, sizeof(pPager->aHash));
217 pPager->nPage = 0;
218 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000219 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000220 }
drh8cfbf082001-09-19 13:22:39 +0000221 sqliteOsUnlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000222 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000223 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000224 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000225 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000226}
227
228/*
229** When this routine is called, the pager has the journal file open and
230** a write lock on the database. This routine releases the database
231** write lock and acquires a read lock in its place. The journal file
232** is deleted and closed.
233**
234** We have to release the write lock before acquiring the read lock,
235** so there is a race condition where another process can get the lock
236** while we are not holding it. But, no other process should do this
237** because we are also holding a lock on the journal, and no process
238** should get a write lock on the database without first getting a lock
239** on the journal. So this routine should never fail. But it can fail
240** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000241** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
242** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
243** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000244*/
drhd9b02572001-04-15 00:37:09 +0000245static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000246 int rc;
drhd9b02572001-04-15 00:37:09 +0000247 PgHdr *pPg;
248 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
drh8cfbf082001-09-19 13:22:39 +0000249 sqliteOsUnlock(pPager->fd);
250 rc = sqliteOsLock(pPager->fd, 0);
251 sqliteOsClose(pPager->jfd);
252 pPager->journalOpen = 0;
253 sqliteOsDelete(pPager->zJournal);
drh6019e162001-07-02 17:51:45 +0000254 sqliteFree( pPager->aInJournal );
255 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000256 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
257 pPg->inJournal = 0;
258 pPg->dirty = 0;
259 }
drhed7c8552001-04-11 14:29:21 +0000260 if( rc!=SQLITE_OK ){
261 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000262 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000263 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000264 }else{
drhd9b02572001-04-15 00:37:09 +0000265 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000266 pPager->state = SQLITE_READLOCK;
267 }
268 return rc;
269}
270
drhed7c8552001-04-11 14:29:21 +0000271/*
272** Playback the journal and thus restore the database file to
273** the state it was in before we started making changes.
274**
drhd9b02572001-04-15 00:37:09 +0000275** The journal file format is as follows: There is an initial
276** file-type string for sanity checking. Then there is a single
277** Pgno number which is the number of pages in the database before
278** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000279** Next come zero or more page records where each page record
280** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
281** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000282**
drhd9b02572001-04-15 00:37:09 +0000283** For playback, the pages have to be read from the journal in
284** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000285**
drhd9b02572001-04-15 00:37:09 +0000286** If the file opened as the journal file is not a well-formed
287** journal file (as determined by looking at the magic number
288** at the beginning) then this routine returns SQLITE_PROTOCOL.
289** If any other errors occur during playback, the database will
290** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
291** pPager->errMask and SQLITE_CORRUPT is returned. If it all
292** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000293*/
drhd9b02572001-04-15 00:37:09 +0000294static int pager_playback(Pager *pPager){
295 int nRec; /* Number of Records */
296 int i; /* Loop counter */
297 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000298 PgHdr *pPg; /* An existing page in the cache */
299 PageRecord pgRec;
300 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000301 int rc;
302
drhd9b02572001-04-15 00:37:09 +0000303 /* Read the beginning of the journal and truncate the
304 ** database file back to its original size.
drhed7c8552001-04-11 14:29:21 +0000305 */
drh8cfbf082001-09-19 13:22:39 +0000306 assert( pPager->journalOpen );
307 sqliteOsSeek(pPager->jfd, 0);
308 rc = sqliteOsRead(pPager->jfd, aMagic, sizeof(aMagic));
drhd9b02572001-04-15 00:37:09 +0000309 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
drh81a20f22001-10-12 17:30:04 +0000310 rc = SQLITE_PROTOCOL;
311 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000312 }
drh8cfbf082001-09-19 13:22:39 +0000313 rc = sqliteOsRead(pPager->jfd, &mxPg, sizeof(mxPg));
drhd9b02572001-04-15 00:37:09 +0000314 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000315 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000316 }
drh81a20f22001-10-12 17:30:04 +0000317 rc = sqliteOsTruncate(pPager->fd, mxPg*SQLITE_PAGE_SIZE);
318 if( rc!=SQLITE_OK ){
319 goto end_playback;
320 }
drhd9b02572001-04-15 00:37:09 +0000321 pPager->dbSize = mxPg;
322
323 /* Begin reading the journal beginning at the end and moving
324 ** toward the beginning.
325 */
drh81a20f22001-10-12 17:30:04 +0000326 rc = sqliteOsFileSize(pPager->jfd, &nRec);
327 if( rc!=SQLITE_OK ){
328 goto end_playback;
drhed7c8552001-04-11 14:29:21 +0000329 }
drh8cfbf082001-09-19 13:22:39 +0000330 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhed7c8552001-04-11 14:29:21 +0000331
332 /* Process segments beginning with the last and working backwards
333 ** to the first.
334 */
drhd9b02572001-04-15 00:37:09 +0000335 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000336 /* Seek to the beginning of the segment */
drh254cba22001-09-20 01:44:42 +0000337 int ofst;
drhd9b02572001-04-15 00:37:09 +0000338 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
drh8cfbf082001-09-19 13:22:39 +0000339 rc = sqliteOsSeek(pPager->jfd, ofst);
drhd9b02572001-04-15 00:37:09 +0000340 if( rc!=SQLITE_OK ) break;
drh8cfbf082001-09-19 13:22:39 +0000341 rc = sqliteOsRead(pPager->jfd, &pgRec, sizeof(pgRec));
drhd9b02572001-04-15 00:37:09 +0000342 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000343
drhd9b02572001-04-15 00:37:09 +0000344 /* Sanity checking on the page */
345 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
346 rc = SQLITE_CORRUPT;
347 break;
drhed7c8552001-04-11 14:29:21 +0000348 }
349
drhd9b02572001-04-15 00:37:09 +0000350 /* Playback the page. Update the in-memory copy of the page
351 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000352 */
drhd9b02572001-04-15 00:37:09 +0000353 pPg = pager_lookup(pPager, pgRec.pgno);
354 if( pPg ){
355 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000356 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000357 }
drh8cfbf082001-09-19 13:22:39 +0000358 rc = sqliteOsSeek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000359 if( rc!=SQLITE_OK ) break;
drh8cfbf082001-09-19 13:22:39 +0000360 rc = sqliteOsWrite(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000361 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000362 }
drh81a20f22001-10-12 17:30:04 +0000363
364end_playback:
drhd9b02572001-04-15 00:37:09 +0000365 if( rc!=SQLITE_OK ){
366 pager_unwritelock(pPager);
367 pPager->errMask |= PAGER_ERR_CORRUPT;
368 rc = SQLITE_CORRUPT;
369 }else{
370 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000371 }
drhd9b02572001-04-15 00:37:09 +0000372 return rc;
drhed7c8552001-04-11 14:29:21 +0000373}
374
375/*
drhf57b14a2001-09-14 18:54:08 +0000376** Change the maximum number of in-memory pages that are allowed.
377*/
378void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
379 if( mxPage>10 ){
380 pPager->mxPage = mxPage;
381 }
382}
383
384/*
drhed7c8552001-04-11 14:29:21 +0000385** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000386** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000387** the first call to sqlitepager_get() and is only held open until the
388** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000389**
390** If zFilename is NULL then a random temporary file is created and used
391** as the file to be cached. The file will be deleted automatically when
392** it is closed.
drhed7c8552001-04-11 14:29:21 +0000393*/
drh7e3b0a02001-04-28 16:52:40 +0000394int sqlitepager_open(
395 Pager **ppPager, /* Return the Pager structure here */
396 const char *zFilename, /* Name of the database file to open */
397 int mxPage, /* Max number of in-memory cache pages */
398 int nExtra /* Extra bytes append to each in-memory page */
399){
drhed7c8552001-04-11 14:29:21 +0000400 Pager *pPager;
401 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000402 OsFile fd;
403 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000404 int tempFile;
405 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000406 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000407
drhd9b02572001-04-15 00:37:09 +0000408 *ppPager = 0;
409 if( sqlite_malloc_failed ){
410 return SQLITE_NOMEM;
411 }
drh5e00f6c2001-09-13 13:46:56 +0000412 if( zFilename ){
drh8cfbf082001-09-19 13:22:39 +0000413 rc = sqliteOsOpenReadWrite(zFilename, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000414 tempFile = 0;
415 }else{
416 int cnt = 8;
drh8cfbf082001-09-19 13:22:39 +0000417 sqliteOsTempFileName(zTemp);
drh5e00f6c2001-09-13 13:46:56 +0000418 do{
419 cnt--;
drh8cfbf082001-09-19 13:22:39 +0000420 sqliteOsTempFileName(zTemp);
421 rc = sqliteOsOpenExclusive(zTemp, &fd);
422 }while( cnt>0 && rc!=SQLITE_OK );
drh5e00f6c2001-09-13 13:46:56 +0000423 zFilename = zTemp;
424 tempFile = 1;
425 }
drh8cfbf082001-09-19 13:22:39 +0000426 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000427 return SQLITE_CANTOPEN;
428 }
429 nameLen = strlen(zFilename);
430 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000431 if( pPager==0 ){
drh8cfbf082001-09-19 13:22:39 +0000432 sqliteOsClose(fd);
drhd9b02572001-04-15 00:37:09 +0000433 return SQLITE_NOMEM;
434 }
drhed7c8552001-04-11 14:29:21 +0000435 pPager->zFilename = (char*)&pPager[1];
436 pPager->zJournal = &pPager->zFilename[nameLen+1];
437 strcpy(pPager->zFilename, zFilename);
438 strcpy(pPager->zJournal, zFilename);
439 strcpy(&pPager->zJournal[nameLen], "-journal");
440 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000441 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000442 pPager->nRef = 0;
443 pPager->dbSize = -1;
444 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000445 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000446 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000447 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000448 pPager->tempFile = tempFile;
449 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000450 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000451 pPager->pFirst = 0;
452 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000453 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000454 memset(pPager->aHash, 0, sizeof(pPager->aHash));
455 *ppPager = pPager;
456 return SQLITE_OK;
457}
458
459/*
drh72f82862001-05-24 21:06:34 +0000460** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000461** when the reference count on each page reaches zero. The destructor can
462** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000463**
464** The destructor is not called as a result sqlitepager_close().
465** Destructors are only called by sqlitepager_unref().
466*/
467void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
468 pPager->xDestructor = xDesc;
469}
470
471/*
drh5e00f6c2001-09-13 13:46:56 +0000472** Return the total number of pages in the disk file associated with
473** pPager.
drhed7c8552001-04-11 14:29:21 +0000474*/
drhd9b02572001-04-15 00:37:09 +0000475int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000476 int n;
drhd9b02572001-04-15 00:37:09 +0000477 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000478 if( pPager->dbSize>=0 ){
479 return pPager->dbSize;
480 }
drh8cfbf082001-09-19 13:22:39 +0000481 if( sqliteOsFileSize(pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000482 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000483 return 0;
drhed7c8552001-04-11 14:29:21 +0000484 }
drh8cfbf082001-09-19 13:22:39 +0000485 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000486 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000487 pPager->dbSize = n;
488 }
489 return n;
490}
491
492/*
493** Shutdown the page cache. Free all memory and close all files.
494**
495** If a transaction was in progress when this routine is called, that
496** transaction is rolled back. All outstanding pages are invalidated
497** and their memory is freed. Any attempt to use a page associated
498** with this page cache after this function returns will likely
499** result in a coredump.
500*/
drhd9b02572001-04-15 00:37:09 +0000501int sqlitepager_close(Pager *pPager){
502 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000503 switch( pPager->state ){
504 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000505 sqlitepager_rollback(pPager);
drh8cfbf082001-09-19 13:22:39 +0000506 sqliteOsUnlock(pPager->fd);
507 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000508 break;
509 }
510 case SQLITE_READLOCK: {
drh8cfbf082001-09-19 13:22:39 +0000511 sqliteOsUnlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000512 break;
513 }
514 default: {
515 /* Do nothing */
516 break;
517 }
518 }
drhd9b02572001-04-15 00:37:09 +0000519 for(pPg=pPager->pAll; pPg; pPg=pNext){
520 pNext = pPg->pNextAll;
521 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000522 }
drh8cfbf082001-09-19 13:22:39 +0000523 sqliteOsClose(pPager->fd);
524 assert( pPager->journalOpen==0 );
drh5e00f6c2001-09-13 13:46:56 +0000525 if( pPager->tempFile ){
drh8cfbf082001-09-19 13:22:39 +0000526 sqliteOsDelete(pPager->zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000527 }
drhed7c8552001-04-11 14:29:21 +0000528 sqliteFree(pPager);
529 return SQLITE_OK;
530}
531
532/*
drh5e00f6c2001-09-13 13:46:56 +0000533** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000534*/
drhd9b02572001-04-15 00:37:09 +0000535Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000536 PgHdr *p = DATA_TO_PGHDR(pData);
537 return p->pgno;
538}
539
540/*
drh7e3b0a02001-04-28 16:52:40 +0000541** Increment the reference count for a page. If the page is
542** currently on the freelist (the reference count is zero) then
543** remove it from the freelist.
544*/
drhdf0b3b02001-06-23 11:36:20 +0000545static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000546 if( pPg->nRef==0 ){
547 /* The page is currently on the freelist. Remove it. */
548 if( pPg->pPrevFree ){
549 pPg->pPrevFree->pNextFree = pPg->pNextFree;
550 }else{
551 pPg->pPager->pFirst = pPg->pNextFree;
552 }
553 if( pPg->pNextFree ){
554 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
555 }else{
556 pPg->pPager->pLast = pPg->pPrevFree;
557 }
558 pPg->pPager->nRef++;
559 }
560 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000561 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000562}
563
564/*
565** Increment the reference count for a page. The input pointer is
566** a reference to the page data.
567*/
568int sqlitepager_ref(void *pData){
569 PgHdr *pPg = DATA_TO_PGHDR(pData);
570 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000571 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000572}
573
574/*
drhb19a2bc2001-09-16 00:13:26 +0000575** Sync the journal and then write all free dirty pages to the database
576** file.
577**
578** Writing all free dirty pages to the database after the sync is a
579** non-obvious optimization. fsync() is an expensive operation so we
580** want to minimize the number that occur. So after an fsync() is forced
581** and we are free to write dirty pages back to the database, it is best
582** to go ahead and do as much of that as possible to minimize the chance
583** of having to do another fsync() later on. Writing dirty free pages
584** in this way make database operations go up to 10 times faster.
drh50e5dad2001-09-15 00:57:28 +0000585*/
586static int syncAllPages(Pager *pPager){
587 PgHdr *pPg;
588 int rc = SQLITE_OK;
589 if( pPager->needSync ){
drh8cfbf082001-09-19 13:22:39 +0000590 rc = sqliteOsSync(pPager->jfd);
drh50e5dad2001-09-15 00:57:28 +0000591 if( rc!=0 ) return rc;
592 pPager->needSync = 0;
593 }
594 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
595 if( pPg->dirty ){
drh8cfbf082001-09-19 13:22:39 +0000596 sqliteOsSeek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
597 rc = sqliteOsWrite(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh50e5dad2001-09-15 00:57:28 +0000598 if( rc!=SQLITE_OK ) break;
599 pPg->dirty = 0;
600 }
601 }
drh81a20f22001-10-12 17:30:04 +0000602 return rc;
drh50e5dad2001-09-15 00:57:28 +0000603}
604
605/*
drhd9b02572001-04-15 00:37:09 +0000606** Acquire a page.
607**
drh5e00f6c2001-09-13 13:46:56 +0000608** A read lock on the disk file is obtained when the first page acquired.
609** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000610**
drh306dc212001-05-21 13:45:10 +0000611** A _get works for any page number greater than 0. If the database
612** file is smaller than the requested page, then no actual disk
613** read occurs and the memory image of the page is initialized to
614** all zeros. The extra data appended to a page is always initialized
615** to zeros the first time a page is loaded into memory.
616**
drhd9b02572001-04-15 00:37:09 +0000617** The acquisition might fail for several reasons. In all cases,
618** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000619**
620** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
621** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000622** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000623** just returns 0. This routine acquires a read-lock the first time it
624** has to go to disk, and could also playback an old journal if necessary.
625** Since _lookup() never goes to disk, it never has to deal with locks
626** or journal files.
drhed7c8552001-04-11 14:29:21 +0000627*/
drhd9b02572001-04-15 00:37:09 +0000628int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000629 PgHdr *pPg;
630
drhd9b02572001-04-15 00:37:09 +0000631 /* Make sure we have not hit any critical errors.
632 */
633 if( pPager==0 || pgno==0 ){
634 return SQLITE_ERROR;
635 }
636 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
637 return pager_errcode(pPager);
638 }
639
drhed7c8552001-04-11 14:29:21 +0000640 /* If this is the first page accessed, then get a read lock
641 ** on the database file.
642 */
643 if( pPager->nRef==0 ){
drh8cfbf082001-09-19 13:22:39 +0000644 if( sqliteOsLock(pPager->fd, 0)!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000645 *ppPage = 0;
646 return SQLITE_BUSY;
647 }
drhd9b02572001-04-15 00:37:09 +0000648 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000649
650 /* If a journal file exists, try to play it back.
651 */
drh8cfbf082001-09-19 13:22:39 +0000652 if( sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +0000653 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +0000654
655 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +0000656 ** we cannot get exclusive access to the journal file.
657 **
658 ** Even though we will only be reading from the journal, not writing,
659 ** we have to open the journal for writing in order to obtain an
660 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +0000661 */
drhf57b3392001-10-08 13:22:32 +0000662 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drh8cfbf082001-09-19 13:22:39 +0000663 if( rc==SQLITE_OK ){
664 pPager->journalOpen = 1;
665 }
666 if( rc!=SQLITE_OK || sqliteOsLock(pPager->jfd, 1)!=SQLITE_OK ){
667 if( pPager->journalOpen ){
668 sqliteOsClose(pPager->jfd);
669 pPager->journalOpen = 0;
670 }
671 sqliteOsUnlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000672 *ppPage = 0;
673 return SQLITE_BUSY;
674 }
675
676 /* Get a write lock on the database */
drh8cfbf082001-09-19 13:22:39 +0000677 sqliteOsUnlock(pPager->fd);
678 if( sqliteOsLock(pPager->fd, 1)!=SQLITE_OK ){
679 sqliteOsClose(pPager->jfd);
680 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000681 *ppPage = 0;
682 return SQLITE_PROTOCOL;
683 }
drh8cfbf082001-09-19 13:22:39 +0000684 pPager->state = SQLITE_WRITELOCK;
drhed7c8552001-04-11 14:29:21 +0000685
686 /* Playback and delete the journal. Drop the database write
687 ** lock and reacquire the read lock.
688 */
drhd9b02572001-04-15 00:37:09 +0000689 rc = pager_playback(pPager);
690 if( rc!=SQLITE_OK ){
691 return rc;
692 }
drhed7c8552001-04-11 14:29:21 +0000693 }
694 pPg = 0;
695 }else{
696 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000697 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000698 }
699 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000700 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000701 int h;
drh7e3b0a02001-04-28 16:52:40 +0000702 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000703 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
704 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000705 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000706 if( pPg==0 ){
707 *ppPage = 0;
708 pager_unwritelock(pPager);
709 pPager->errMask |= PAGER_ERR_MEM;
710 return SQLITE_NOMEM;
711 }
drhed7c8552001-04-11 14:29:21 +0000712 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000713 pPg->pNextAll = pPager->pAll;
714 if( pPager->pAll ){
715 pPager->pAll->pPrevAll = pPg;
716 }
717 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000718 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000719 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000720 }else{
drhd9b02572001-04-15 00:37:09 +0000721 /* Recycle an older page. First locate the page to be recycled.
722 ** Try to find one that is not dirty and is near the head of
723 ** of the free list */
drh50e5dad2001-09-15 00:57:28 +0000724 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000725 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000726 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000727 pPg = pPg->pNextFree;
728 }
drhb19a2bc2001-09-16 00:13:26 +0000729
730 /* If we could not find a page that has not been used recently
731 ** and which is not dirty, then sync the journal and write all
732 ** dirty free pages into the database file, thus making them
733 ** clean pages and available for recycling.
734 **
735 ** We have to sync the journal before writing a page to the main
736 ** database. But syncing is a very slow operation. So after a
737 ** sync, it is best to write everything we can back to the main
738 ** database to minimize the risk of having to sync again in the
739 ** near future. That is way we write all dirty pages after a
740 ** sync.
741 */
drh50e5dad2001-09-15 00:57:28 +0000742 if( pPg==0 || pPg->dirty ){
743 int rc = syncAllPages(pPager);
744 if( rc!=0 ){
745 sqlitepager_rollback(pPager);
746 *ppPage = 0;
747 return SQLITE_IOERR;
748 }
749 pPg = pPager->pFirst;
750 }
drhd9b02572001-04-15 00:37:09 +0000751 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +0000752 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000753
754 /* Unlink the old page from the free list and the hash table
755 */
drh6019e162001-07-02 17:51:45 +0000756 if( pPg->pPrevFree ){
757 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000758 }else{
drh6019e162001-07-02 17:51:45 +0000759 assert( pPager->pFirst==pPg );
760 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000761 }
drh6019e162001-07-02 17:51:45 +0000762 if( pPg->pNextFree ){
763 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
764 }else{
765 assert( pPager->pLast==pPg );
766 pPager->pLast = pPg->pPrevFree;
767 }
768 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000769 if( pPg->pNextHash ){
770 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
771 }
772 if( pPg->pPrevHash ){
773 pPg->pPrevHash->pNextHash = pPg->pNextHash;
774 }else{
drhd9b02572001-04-15 00:37:09 +0000775 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000776 assert( pPager->aHash[h]==pPg );
777 pPager->aHash[h] = pPg->pNextHash;
778 }
drh6019e162001-07-02 17:51:45 +0000779 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000780 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000781 }
782 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000783 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
784 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
785 }else{
786 pPg->inJournal = 0;
787 }
drhed7c8552001-04-11 14:29:21 +0000788 pPg->dirty = 0;
789 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000790 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000791 pPager->nRef++;
792 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000793 pPg->pNextHash = pPager->aHash[h];
794 pPager->aHash[h] = pPg;
795 if( pPg->pNextHash ){
796 assert( pPg->pNextHash->pPrevHash==0 );
797 pPg->pNextHash->pPrevHash = pPg;
798 }
drh306dc212001-05-21 13:45:10 +0000799 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
800 if( pPager->dbSize<pgno ){
801 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
802 }else{
drh81a20f22001-10-12 17:30:04 +0000803 int rc;
drh8cfbf082001-09-19 13:22:39 +0000804 sqliteOsSeek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000805 rc = sqliteOsRead(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
806 if( rc!=SQLITE_OK ){
807 return rc;
808 }
drh306dc212001-05-21 13:45:10 +0000809 }
drh7e3b0a02001-04-28 16:52:40 +0000810 if( pPager->nExtra>0 ){
811 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
812 }
drhed7c8552001-04-11 14:29:21 +0000813 }else{
drhd9b02572001-04-15 00:37:09 +0000814 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000815 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000816 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000817 }
818 *ppPage = PGHDR_TO_DATA(pPg);
819 return SQLITE_OK;
820}
821
822/*
drh7e3b0a02001-04-28 16:52:40 +0000823** Acquire a page if it is already in the in-memory cache. Do
824** not read the page from disk. Return a pointer to the page,
825** or 0 if the page is not in cache.
826**
827** See also sqlitepager_get(). The difference between this routine
828** and sqlitepager_get() is that _get() will go to the disk and read
829** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000830** returns NULL if the page is not in cache or if a disk I/O error
831** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000832*/
833void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
834 PgHdr *pPg;
835
836 /* Make sure we have not hit any critical errors.
837 */
838 if( pPager==0 || pgno==0 ){
839 return 0;
840 }
841 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
842 return 0;
843 }
844 if( pPager->nRef==0 ){
845 return 0;
846 }
847 pPg = pager_lookup(pPager, pgno);
848 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000849 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000850 return PGHDR_TO_DATA(pPg);
851}
852
853/*
drhed7c8552001-04-11 14:29:21 +0000854** Release a page.
855**
856** If the number of references to the page drop to zero, then the
857** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000858** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000859** removed.
860*/
drhd9b02572001-04-15 00:37:09 +0000861int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000862 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000863
864 /* Decrement the reference count for this page
865 */
drhed7c8552001-04-11 14:29:21 +0000866 pPg = DATA_TO_PGHDR(pData);
867 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +0000868 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000869 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000870
drh72f82862001-05-24 21:06:34 +0000871 /* When the number of references to a page reach 0, call the
872 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000873 */
drhed7c8552001-04-11 14:29:21 +0000874 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +0000875 Pager *pPager;
876 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +0000877 pPg->pNextFree = 0;
878 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000879 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000880 if( pPg->pPrevFree ){
881 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000882 }else{
883 pPager->pFirst = pPg;
884 }
drh72f82862001-05-24 21:06:34 +0000885 if( pPager->xDestructor ){
886 pPager->xDestructor(pData);
887 }
drhd9b02572001-04-15 00:37:09 +0000888
889 /* When all pages reach the freelist, drop the read lock from
890 ** the database file.
891 */
892 pPager->nRef--;
893 assert( pPager->nRef>=0 );
894 if( pPager->nRef==0 ){
895 pager_reset(pPager);
896 }
drhed7c8552001-04-11 14:29:21 +0000897 }
drhd9b02572001-04-15 00:37:09 +0000898 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000899}
900
901/*
902** Mark a data page as writeable. The page is written into the journal
903** if it is not there already. This routine must be called before making
904** changes to a page.
905**
906** The first time this routine is called, the pager creates a new
907** journal and acquires a write lock on the database. If the write
908** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000909** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000910** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000911**
912** If the journal file could not be written because the disk is full,
913** then this routine returns SQLITE_FULL and does an immediate rollback.
914** All subsequent write attempts also return SQLITE_FULL until there
915** is a call to sqlitepager_commit() or sqlitepager_rollback() to
916** reset.
drhed7c8552001-04-11 14:29:21 +0000917*/
drhd9b02572001-04-15 00:37:09 +0000918int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000919 PgHdr *pPg = DATA_TO_PGHDR(pData);
920 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000921 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000922
drhd9b02572001-04-15 00:37:09 +0000923 if( pPager->errMask ){
924 return pager_errcode(pPager);
925 }
drh5e00f6c2001-09-13 13:46:56 +0000926 if( pPager->readOnly ){
927 return SQLITE_PERM;
928 }
drhd9b02572001-04-15 00:37:09 +0000929 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +0000930 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +0000931 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +0000932 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +0000933 assert( pPager->aInJournal==0 );
934 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
935 if( pPager->aInJournal==0 ){
936 return SQLITE_NOMEM;
937 }
drh8cfbf082001-09-19 13:22:39 +0000938 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd);
939 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000940 return SQLITE_CANTOPEN;
941 }
drh8cfbf082001-09-19 13:22:39 +0000942 pPager->journalOpen = 1;
drhf57b14a2001-09-14 18:54:08 +0000943 pPager->needSync = 0;
drh8cfbf082001-09-19 13:22:39 +0000944 if( sqliteOsLock(pPager->jfd, 1)!=SQLITE_OK ){
945 sqliteOsClose(pPager->jfd);
946 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000947 return SQLITE_BUSY;
948 }
drh8cfbf082001-09-19 13:22:39 +0000949 sqliteOsUnlock(pPager->fd);
950 if( sqliteOsLock(pPager->fd, 1)!=SQLITE_OK ){
951 sqliteOsClose(pPager->jfd);
952 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000953 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000954 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000955 return SQLITE_PROTOCOL;
956 }
957 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +0000958 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +0000959 pPager->origDbSize = pPager->dbSize;
drh8cfbf082001-09-19 13:22:39 +0000960 rc = sqliteOsWrite(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
drhd9b02572001-04-15 00:37:09 +0000961 if( rc==SQLITE_OK ){
drh8cfbf082001-09-19 13:22:39 +0000962 rc = sqliteOsWrite(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000963 }
964 if( rc!=SQLITE_OK ){
965 rc = pager_unwritelock(pPager);
966 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
967 return rc;
968 }
drhed7c8552001-04-11 14:29:21 +0000969 }
drhd9b02572001-04-15 00:37:09 +0000970 assert( pPager->state==SQLITE_WRITELOCK );
drh8cfbf082001-09-19 13:22:39 +0000971 assert( pPager->journalOpen );
drhd9b02572001-04-15 00:37:09 +0000972 if( pPg->pgno <= pPager->origDbSize ){
drh8cfbf082001-09-19 13:22:39 +0000973 rc = sqliteOsWrite(pPager->jfd, &pPg->pgno, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000974 if( rc==SQLITE_OK ){
drh8cfbf082001-09-19 13:22:39 +0000975 rc = sqliteOsWrite(pPager->jfd, pData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000976 }
977 if( rc!=SQLITE_OK ){
978 sqlitepager_rollback(pPager);
979 pPager->errMask |= PAGER_ERR_FULL;
980 return rc;
981 }
drh6019e162001-07-02 17:51:45 +0000982 assert( pPager->aInJournal!=0 );
983 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhf57b14a2001-09-14 18:54:08 +0000984 pPager->needSync = 1;
drh69688d52001-04-14 16:38:23 +0000985 }
drh69688d52001-04-14 16:38:23 +0000986 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +0000987 if( pPager->dbSize<pPg->pgno ){
988 pPager->dbSize = pPg->pgno;
989 }
drh69688d52001-04-14 16:38:23 +0000990 return rc;
drhed7c8552001-04-11 14:29:21 +0000991}
992
993/*
drh6019e162001-07-02 17:51:45 +0000994** Return TRUE if the page given in the argument was previous passed
995** to sqlitepager_write(). In other words, return TRUE if it is ok
996** to change the content of the page.
997*/
998int sqlitepager_iswriteable(void *pData){
999 PgHdr *pPg = DATA_TO_PGHDR(pData);
1000 return pPg->dirty;
1001}
1002
1003/*
drhed7c8552001-04-11 14:29:21 +00001004** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001005**
1006** If the commit fails for any reason, a rollback attempt is made
1007** and an error code is returned. If the commit worked, SQLITE_OK
1008** is returned.
drhed7c8552001-04-11 14:29:21 +00001009*/
drhd9b02572001-04-15 00:37:09 +00001010int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001011 int rc;
drhed7c8552001-04-11 14:29:21 +00001012 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001013
1014 if( pPager->errMask==PAGER_ERR_FULL ){
1015 rc = sqlitepager_rollback(pPager);
1016 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1017 return rc;
1018 }
1019 if( pPager->errMask!=0 ){
1020 rc = pager_errcode(pPager);
1021 return rc;
1022 }
1023 if( pPager->state!=SQLITE_WRITELOCK ){
1024 return SQLITE_ERROR;
1025 }
drh8cfbf082001-09-19 13:22:39 +00001026 assert( pPager->journalOpen );
1027 if( pPager->needSync && sqliteOsSync(pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001028 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001029 }
drha1b351a2001-09-14 16:42:12 +00001030 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1031 if( pPg->dirty==0 ) continue;
drh8cfbf082001-09-19 13:22:39 +00001032 rc = sqliteOsSeek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001033 if( rc!=SQLITE_OK ) goto commit_abort;
drh8cfbf082001-09-19 13:22:39 +00001034 rc = sqliteOsWrite(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001035 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001036 }
drh8cfbf082001-09-19 13:22:39 +00001037 if( sqliteOsSync(pPager->fd)!=SQLITE_OK ) goto commit_abort;
drhd9b02572001-04-15 00:37:09 +00001038 rc = pager_unwritelock(pPager);
1039 pPager->dbSize = -1;
1040 return rc;
1041
1042 /* Jump here if anything goes wrong during the commit process.
1043 */
1044commit_abort:
1045 rc = sqlitepager_rollback(pPager);
1046 if( rc==SQLITE_OK ){
1047 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001048 }
drhed7c8552001-04-11 14:29:21 +00001049 return rc;
1050}
1051
1052/*
1053** Rollback all changes. The database falls back to read-only mode.
1054** All in-memory cache pages revert to their original data contents.
1055** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001056**
1057** This routine cannot fail unless some other process is not following
1058** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1059** process is writing trash into the journal file (SQLITE_CORRUPT) or
1060** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1061** codes are returned for all these occasions. Otherwise,
1062** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001063*/
drhd9b02572001-04-15 00:37:09 +00001064int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001065 int rc;
drhd9b02572001-04-15 00:37:09 +00001066 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1067 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001068 }
drhd9b02572001-04-15 00:37:09 +00001069 if( pPager->state!=SQLITE_WRITELOCK ){
1070 return SQLITE_OK;
1071 }
1072 rc = pager_playback(pPager);
1073 if( rc!=SQLITE_OK ){
1074 rc = SQLITE_CORRUPT;
1075 pPager->errMask |= PAGER_ERR_CORRUPT;
1076 }
1077 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001078 return rc;
1079};
drhd9b02572001-04-15 00:37:09 +00001080
1081/*
drh5e00f6c2001-09-13 13:46:56 +00001082** Return TRUE if the database file is opened read-only. Return FALSE
1083** if the database is (in theory) writable.
1084*/
1085int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001086 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001087}
1088
1089/*
drhd9b02572001-04-15 00:37:09 +00001090** This routine is used for testing and analysis only.
1091*/
1092int *sqlitepager_stats(Pager *pPager){
1093 static int a[9];
1094 a[0] = pPager->nRef;
1095 a[1] = pPager->nPage;
1096 a[2] = pPager->mxPage;
1097 a[3] = pPager->dbSize;
1098 a[4] = pPager->state;
1099 a[5] = pPager->errMask;
1100 a[6] = pPager->nHit;
1101 a[7] = pPager->nMiss;
1102 a[8] = pPager->nOvfl;
1103 return a;
1104}
drhdd793422001-06-28 01:54:48 +00001105
1106#if SQLITE_TEST
1107/*
1108** Print a listing of all referenced pages and their ref count.
1109*/
1110void sqlitepager_refdump(Pager *pPager){
1111 PgHdr *pPg;
1112 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1113 if( pPg->nRef<=0 ) continue;
1114 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1115 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1116 }
1117}
1118#endif