blob: 0b8ac722e8e693871e69b01dc3ffd1075c2f9794 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drhb19a2bc2001-09-16 00:13:26 +000021** @(#) $Id: pager.c,v 1.20 2001/09/16 00:13:27 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drhd9b02572001-04-15 00:37:09 +000023#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000024#include "pager.h"
25#include <fcntl.h>
26#include <sys/stat.h>
27#include <unistd.h>
28#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000029#include <string.h>
drhed7c8552001-04-11 14:29:21 +000030
31/*
32** The page cache as a whole is always in one of the following
33** states:
34**
35** SQLITE_UNLOCK The page cache is not currently reading or
36** writing the database file. There is no
37** data held in memory. This is the initial
38** state.
39**
40** SQLITE_READLOCK The page cache is reading the database.
41** Writing is not permitted. There can be
42** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000043** file at the same time.
drhed7c8552001-04-11 14:29:21 +000044**
45** SQLITE_WRITELOCK The page cache is writing the database.
46** Access is exclusive. No other processes or
47** threads can be reading or writing while one
48** process is writing.
49**
drh306dc212001-05-21 13:45:10 +000050** The page cache comes up in SQLITE_UNLOCK. The first time a
51** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000052** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000053** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000054** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000055** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
56** called on an outstanding page which means that the pager must
57** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
58** The sqlite_page_rollback() and sqlite_page_commit() functions
59** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000060*/
61#define SQLITE_UNLOCK 0
62#define SQLITE_READLOCK 1
63#define SQLITE_WRITELOCK 2
64
drhd9b02572001-04-15 00:37:09 +000065
drhed7c8552001-04-11 14:29:21 +000066/*
67** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000068** This header is only visible to this pager module. The client
69** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000070*/
drhd9b02572001-04-15 00:37:09 +000071typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000072struct PgHdr {
73 Pager *pPager; /* The pager to which this page belongs */
74 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000075 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000076 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000077 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
78 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000079 char inJournal; /* TRUE if has been written to journal */
80 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000081 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000082 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000083};
84
85/*
drh69688d52001-04-14 16:38:23 +000086** Convert a pointer to a PgHdr into a pointer to its data
87** and back again.
drhed7c8552001-04-11 14:29:21 +000088*/
89#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
90#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000091#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000092
93/*
drhed7c8552001-04-11 14:29:21 +000094** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +000095** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +000096*/
drhb19a2bc2001-09-16 00:13:26 +000097#define N_PG_HASH 373
drhed7c8552001-04-11 14:29:21 +000098
99/*
100** A open page cache is an instance of the following structure.
101*/
102struct Pager {
103 char *zFilename; /* Name of the database file */
104 char *zJournal; /* Name of the journal file */
105 int fd, jfd; /* File descriptors for database and journal */
drhed7c8552001-04-11 14:29:21 +0000106 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000107 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000108 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000109 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000110 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000111 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000112 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000113 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
114 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
115 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000116 unsigned char tempFile; /* zFilename is a temporary file */
117 unsigned char readOnly; /* True for a read-only database */
drhf57b14a2001-09-14 18:54:08 +0000118 unsigned char needSync; /* True if an fsync() is needed on the journal */
drh6019e162001-07-02 17:51:45 +0000119 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000120 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000121 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000122 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000123};
124
125/*
126** These are bits that can be set in Pager.errMask.
127*/
128#define PAGER_ERR_FULL 0x01 /* a write() failed */
129#define PAGER_ERR_MEM 0x02 /* malloc() failed */
130#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
131#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
132
133/*
134** The journal file contains page records in the following
135** format.
136*/
137typedef struct PageRecord PageRecord;
138struct PageRecord {
139 Pgno pgno; /* The page number */
140 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
141};
142
143/*
drh5e00f6c2001-09-13 13:46:56 +0000144** Journal files begin with the following magic string. The data
145** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000146*/
147static const unsigned char aJournalMagic[] = {
148 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000149};
150
151/*
152** Hash a page number
153*/
drhd9b02572001-04-15 00:37:09 +0000154#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000155
156/*
drhdd793422001-06-28 01:54:48 +0000157** Enable reference count tracking here:
158*/
159#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000160 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000161 static void pager_refinfo(PgHdr *p){
162 static int cnt = 0;
163 if( !pager_refinfo_enable ) return;
164 printf(
165 "REFCNT: %4d addr=0x%08x nRef=%d\n",
166 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
167 );
168 cnt++; /* Something to set a breakpoint on */
169 }
170# define REFINFO(X) pager_refinfo(X)
171#else
172# define REFINFO(X)
173#endif
174
175/*
drhed7c8552001-04-11 14:29:21 +0000176** Attempt to acquire a read lock (if wrlock==0) or a write lock (if wrlock==1)
177** on the database file. Return 0 on success and non-zero if the lock
178** could not be acquired.
179*/
drhd9b02572001-04-15 00:37:09 +0000180static int pager_lock(int fd, int wrlock){
181 int rc;
drhed7c8552001-04-11 14:29:21 +0000182 struct flock lock;
drhd9b02572001-04-15 00:37:09 +0000183 lock.l_type = wrlock ? F_WRLCK : F_RDLCK;
184 lock.l_whence = SEEK_SET;
185 lock.l_start = lock.l_len = 0L;
186 rc = fcntl(fd, F_SETLK, &lock);
187 return rc!=0;
drhed7c8552001-04-11 14:29:21 +0000188}
189
190/*
191** Unlock the database file.
192*/
drhd9b02572001-04-15 00:37:09 +0000193static int pager_unlock(fd){
194 int rc;
drhed7c8552001-04-11 14:29:21 +0000195 struct flock lock;
196 lock.l_type = F_UNLCK;
drhd9b02572001-04-15 00:37:09 +0000197 lock.l_whence = SEEK_SET;
198 lock.l_start = lock.l_len = 0L;
199 rc = fcntl(fd, F_SETLK, &lock);
200 return rc!=0;
201}
202
203/*
204** Move the cursor for file descriptor fd to the point whereto from
205** the beginning of the file.
206*/
207static int pager_seek(int fd, off_t whereto){
drh6019e162001-07-02 17:51:45 +0000208 /*printf("SEEK to page %d\n", whereto/SQLITE_PAGE_SIZE + 1);*/
drhd9b02572001-04-15 00:37:09 +0000209 lseek(fd, whereto, SEEK_SET);
210 return SQLITE_OK;
211}
212
213/*
214** Truncate the given file so that it contains exactly mxPg pages
215** of data.
216*/
217static int pager_truncate(int fd, Pgno mxPg){
218 int rc;
219 rc = ftruncate(fd, mxPg*SQLITE_PAGE_SIZE);
220 return rc!=0 ? SQLITE_IOERR : SQLITE_OK;
221}
222
223/*
224** Read nBytes of data from fd into pBuf. If the data cannot be
225** read or only a partial read occurs, then the unread parts of
226** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
227** If the read is completely successful, return SQLITE_OK.
228*/
229static int pager_read(int fd, void *pBuf, int nByte){
230 int rc;
drh6019e162001-07-02 17:51:45 +0000231 /* printf("READ\n");*/
drhd9b02572001-04-15 00:37:09 +0000232 rc = read(fd, pBuf, nByte);
233 if( rc<0 ){
234 memset(pBuf, 0, nByte);
235 return SQLITE_IOERR;
236 }
237 if( rc<nByte ){
238 memset(&((char*)pBuf)[rc], 0, nByte - rc);
239 rc = SQLITE_IOERR;
240 }else{
241 rc = SQLITE_OK;
242 }
243 return rc;
244}
245
246/*
247** Write nBytes of data into fd. If any problem occurs or if the
248** write is incomplete, SQLITE_IOERR is returned. SQLITE_OK is
249** returned upon complete success.
250*/
251static int pager_write(int fd, const void *pBuf, int nByte){
252 int rc;
drh6019e162001-07-02 17:51:45 +0000253 /*printf("WRITE\n");*/
drhd9b02572001-04-15 00:37:09 +0000254 rc = write(fd, pBuf, nByte);
255 if( rc<nByte ){
256 return SQLITE_FULL;
257 }else{
258 return SQLITE_OK;
259 }
260}
261
262/*
263** Convert the bits in the pPager->errMask into an approprate
264** return code.
265*/
266static int pager_errcode(Pager *pPager){
267 int rc = SQLITE_OK;
268 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
269 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
270 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
271 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
272 return rc;
drhed7c8552001-04-11 14:29:21 +0000273}
274
275/*
276** Find a page in the hash table given its page number. Return
277** a pointer to the page or NULL if not found.
278*/
drhd9b02572001-04-15 00:37:09 +0000279static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000280 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
281 while( p && p->pgno!=pgno ){
282 p = p->pNextHash;
283 }
284 return p;
285}
286
287/*
288** Unlock the database and clear the in-memory cache. This routine
289** sets the state of the pager back to what it was when it was first
290** opened. Any outstanding pages are invalidated and subsequent attempts
291** to access those pages will likely result in a coredump.
292*/
drhd9b02572001-04-15 00:37:09 +0000293static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000294 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000295 for(pPg=pPager->pAll; pPg; pPg=pNext){
296 pNext = pPg->pNextAll;
297 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000298 }
299 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000300 pPager->pLast = 0;
301 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000302 memset(pPager->aHash, 0, sizeof(pPager->aHash));
303 pPager->nPage = 0;
304 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000305 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000306 }
drhd9b02572001-04-15 00:37:09 +0000307 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000308 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000309 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000310 pPager->nRef = 0;
311}
312
313/*
314** When this routine is called, the pager has the journal file open and
315** a write lock on the database. This routine releases the database
316** write lock and acquires a read lock in its place. The journal file
317** is deleted and closed.
318**
319** We have to release the write lock before acquiring the read lock,
320** so there is a race condition where another process can get the lock
321** while we are not holding it. But, no other process should do this
322** because we are also holding a lock on the journal, and no process
323** should get a write lock on the database without first getting a lock
324** on the journal. So this routine should never fail. But it can fail
325** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000326** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
327** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
328** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000329*/
drhd9b02572001-04-15 00:37:09 +0000330static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000331 int rc;
drhd9b02572001-04-15 00:37:09 +0000332 PgHdr *pPg;
333 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
334 pager_unlock(pPager->fd);
335 rc = pager_lock(pPager->fd, 0);
drhed7c8552001-04-11 14:29:21 +0000336 unlink(pPager->zJournal);
337 close(pPager->jfd);
338 pPager->jfd = -1;
drh6019e162001-07-02 17:51:45 +0000339 sqliteFree( pPager->aInJournal );
340 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000341 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
342 pPg->inJournal = 0;
343 pPg->dirty = 0;
344 }
drhed7c8552001-04-11 14:29:21 +0000345 if( rc!=SQLITE_OK ){
346 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000347 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000348 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000349 }else{
drhd9b02572001-04-15 00:37:09 +0000350 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000351 pPager->state = SQLITE_READLOCK;
352 }
353 return rc;
354}
355
drhed7c8552001-04-11 14:29:21 +0000356/*
357** Playback the journal and thus restore the database file to
358** the state it was in before we started making changes.
359**
drhd9b02572001-04-15 00:37:09 +0000360** The journal file format is as follows: There is an initial
361** file-type string for sanity checking. Then there is a single
362** Pgno number which is the number of pages in the database before
363** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000364** Next come zero or more page records where each page record
365** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
366** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000367**
drhd9b02572001-04-15 00:37:09 +0000368** For playback, the pages have to be read from the journal in
369** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000370**
drhd9b02572001-04-15 00:37:09 +0000371** If the file opened as the journal file is not a well-formed
372** journal file (as determined by looking at the magic number
373** at the beginning) then this routine returns SQLITE_PROTOCOL.
374** If any other errors occur during playback, the database will
375** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
376** pPager->errMask and SQLITE_CORRUPT is returned. If it all
377** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000378*/
drhd9b02572001-04-15 00:37:09 +0000379static int pager_playback(Pager *pPager){
380 int nRec; /* Number of Records */
381 int i; /* Loop counter */
382 Pgno mxPg = 0; /* Size of the original file in pages */
383 struct stat statbuf; /* Used to size the journal */
384 PgHdr *pPg; /* An existing page in the cache */
385 PageRecord pgRec;
386 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000387 int rc;
388
drhd9b02572001-04-15 00:37:09 +0000389 /* Read the beginning of the journal and truncate the
390 ** database file back to its original size.
drhed7c8552001-04-11 14:29:21 +0000391 */
drhd9b02572001-04-15 00:37:09 +0000392 assert( pPager->jfd>=0 );
393 pager_seek(pPager->jfd, 0);
394 rc = pager_read(pPager->jfd, aMagic, sizeof(aMagic));
395 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
396 return SQLITE_PROTOCOL;
397 }
398 rc = pager_read(pPager->jfd, &mxPg, sizeof(mxPg));
399 if( rc!=SQLITE_OK ){
400 return SQLITE_PROTOCOL;
401 }
402 pager_truncate(pPager->fd, mxPg);
403 pPager->dbSize = mxPg;
404
405 /* Begin reading the journal beginning at the end and moving
406 ** toward the beginning.
407 */
408 if( fstat(pPager->jfd, &statbuf)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000409 return SQLITE_OK;
410 }
drhd9b02572001-04-15 00:37:09 +0000411 nRec = (statbuf.st_size - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhed7c8552001-04-11 14:29:21 +0000412
413 /* Process segments beginning with the last and working backwards
414 ** to the first.
415 */
drhd9b02572001-04-15 00:37:09 +0000416 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000417 /* Seek to the beginning of the segment */
drhd9b02572001-04-15 00:37:09 +0000418 off_t ofst;
419 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
420 rc = pager_seek(pPager->jfd, ofst);
421 if( rc!=SQLITE_OK ) break;
422 rc = pager_read(pPager->jfd, &pgRec, sizeof(pgRec));
423 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000424
drhd9b02572001-04-15 00:37:09 +0000425 /* Sanity checking on the page */
426 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
427 rc = SQLITE_CORRUPT;
428 break;
drhed7c8552001-04-11 14:29:21 +0000429 }
430
drhd9b02572001-04-15 00:37:09 +0000431 /* Playback the page. Update the in-memory copy of the page
432 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000433 */
drhd9b02572001-04-15 00:37:09 +0000434 pPg = pager_lookup(pPager, pgRec.pgno);
435 if( pPg ){
436 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000437 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000438 }
drhd9b02572001-04-15 00:37:09 +0000439 rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
440 if( rc!=SQLITE_OK ) break;
441 rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
442 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000443 }
drhd9b02572001-04-15 00:37:09 +0000444 if( rc!=SQLITE_OK ){
445 pager_unwritelock(pPager);
446 pPager->errMask |= PAGER_ERR_CORRUPT;
447 rc = SQLITE_CORRUPT;
448 }else{
449 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000450 }
drhd9b02572001-04-15 00:37:09 +0000451 return rc;
drhed7c8552001-04-11 14:29:21 +0000452}
453
454/*
drh5e00f6c2001-09-13 13:46:56 +0000455** Locate a directory where we can potentially create a temporary
456** file.
457*/
458static const char *findTempDir(void){
459 static const char *azDirs[] = {
460 ".",
461 "/var/tmp",
462 "/usr/tmp",
463 "/tmp",
464 "/temp",
465 "./temp",
466 };
467 int i;
468 struct stat buf;
469 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drh3fc190c2001-09-14 03:24:23 +0000470 if( stat(azDirs[i], &buf) ) continue;
471 if( !S_ISDIR(buf.st_mode) ) continue;
472 if( access(azDirs[i], 07) ) continue;
473 return azDirs[i];
drh5e00f6c2001-09-13 13:46:56 +0000474 }
475 return 0;
476}
477
478/*
drhf57b14a2001-09-14 18:54:08 +0000479** Change the maximum number of in-memory pages that are allowed.
480*/
481void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
482 if( mxPage>10 ){
483 pPager->mxPage = mxPage;
484 }
485}
486
487/*
drhed7c8552001-04-11 14:29:21 +0000488** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000489** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000490** the first call to sqlitepager_get() and is only held open until the
491** last page is released using sqlitepager_unref().
drhed7c8552001-04-11 14:29:21 +0000492*/
drh7e3b0a02001-04-28 16:52:40 +0000493int sqlitepager_open(
494 Pager **ppPager, /* Return the Pager structure here */
495 const char *zFilename, /* Name of the database file to open */
496 int mxPage, /* Max number of in-memory cache pages */
497 int nExtra /* Extra bytes append to each in-memory page */
498){
drhed7c8552001-04-11 14:29:21 +0000499 Pager *pPager;
500 int nameLen;
501 int fd;
drh5e00f6c2001-09-13 13:46:56 +0000502 int tempFile;
503 int readOnly = 0;
504 char zTemp[300];
drhed7c8552001-04-11 14:29:21 +0000505
drhd9b02572001-04-15 00:37:09 +0000506 *ppPager = 0;
507 if( sqlite_malloc_failed ){
508 return SQLITE_NOMEM;
509 }
drh5e00f6c2001-09-13 13:46:56 +0000510 if( zFilename ){
511 fd = open(zFilename, O_RDWR|O_CREAT, 0644);
512 if( fd<0 ){
513 fd = open(zFilename, O_RDONLY, 0);
514 readOnly = 1;
515 }
516 tempFile = 0;
517 }else{
518 int cnt = 8;
drhbe0072d2001-09-13 14:46:09 +0000519 const char *zDir = findTempDir();
drh5e00f6c2001-09-13 13:46:56 +0000520 if( zDir==0 ) return SQLITE_CANTOPEN;
521 do{
522 cnt--;
drhbe0072d2001-09-13 14:46:09 +0000523 sprintf(zTemp,"%s/_sqlite_%u", zDir, (unsigned)sqliteRandomInteger());
drh5e00f6c2001-09-13 13:46:56 +0000524 fd = open(zTemp, O_RDWR|O_CREAT|O_EXCL, 0600);
525 }while( cnt>0 && fd<0 );
526 zFilename = zTemp;
527 tempFile = 1;
528 }
drhed7c8552001-04-11 14:29:21 +0000529 if( fd<0 ){
530 return SQLITE_CANTOPEN;
531 }
532 nameLen = strlen(zFilename);
533 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000534 if( pPager==0 ){
535 close(fd);
536 return SQLITE_NOMEM;
537 }
drhed7c8552001-04-11 14:29:21 +0000538 pPager->zFilename = (char*)&pPager[1];
539 pPager->zJournal = &pPager->zFilename[nameLen+1];
540 strcpy(pPager->zFilename, zFilename);
541 strcpy(pPager->zJournal, zFilename);
542 strcpy(&pPager->zJournal[nameLen], "-journal");
543 pPager->fd = fd;
544 pPager->jfd = -1;
545 pPager->nRef = 0;
546 pPager->dbSize = -1;
547 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000548 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000549 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000550 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000551 pPager->tempFile = tempFile;
552 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000553 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000554 pPager->pFirst = 0;
555 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000556 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000557 memset(pPager->aHash, 0, sizeof(pPager->aHash));
558 *ppPager = pPager;
559 return SQLITE_OK;
560}
561
562/*
drh72f82862001-05-24 21:06:34 +0000563** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000564** when the reference count on each page reaches zero. The destructor can
565** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000566**
567** The destructor is not called as a result sqlitepager_close().
568** Destructors are only called by sqlitepager_unref().
569*/
570void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
571 pPager->xDestructor = xDesc;
572}
573
574/*
drh5e00f6c2001-09-13 13:46:56 +0000575** Return the total number of pages in the disk file associated with
576** pPager.
drhed7c8552001-04-11 14:29:21 +0000577*/
drhd9b02572001-04-15 00:37:09 +0000578int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000579 int n;
580 struct stat statbuf;
drhd9b02572001-04-15 00:37:09 +0000581 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000582 if( pPager->dbSize>=0 ){
583 return pPager->dbSize;
584 }
585 if( fstat(pPager->fd, &statbuf)!=0 ){
586 n = 0;
587 }else{
588 n = statbuf.st_size/SQLITE_PAGE_SIZE;
589 }
drhd9b02572001-04-15 00:37:09 +0000590 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000591 pPager->dbSize = n;
592 }
593 return n;
594}
595
596/*
597** Shutdown the page cache. Free all memory and close all files.
598**
599** If a transaction was in progress when this routine is called, that
600** transaction is rolled back. All outstanding pages are invalidated
601** and their memory is freed. Any attempt to use a page associated
602** with this page cache after this function returns will likely
603** result in a coredump.
604*/
drhd9b02572001-04-15 00:37:09 +0000605int sqlitepager_close(Pager *pPager){
606 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000607 switch( pPager->state ){
608 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000609 sqlitepager_rollback(pPager);
610 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000611 break;
612 }
613 case SQLITE_READLOCK: {
drhd9b02572001-04-15 00:37:09 +0000614 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000615 break;
616 }
617 default: {
618 /* Do nothing */
619 break;
620 }
621 }
drhd9b02572001-04-15 00:37:09 +0000622 for(pPg=pPager->pAll; pPg; pPg=pNext){
623 pNext = pPg->pNextAll;
624 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000625 }
626 if( pPager->fd>=0 ) close(pPager->fd);
627 assert( pPager->jfd<0 );
drh5e00f6c2001-09-13 13:46:56 +0000628 if( pPager->tempFile ){
629 unlink(pPager->zFilename);
630 }
drhed7c8552001-04-11 14:29:21 +0000631 sqliteFree(pPager);
632 return SQLITE_OK;
633}
634
635/*
drh5e00f6c2001-09-13 13:46:56 +0000636** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000637*/
drhd9b02572001-04-15 00:37:09 +0000638Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000639 PgHdr *p = DATA_TO_PGHDR(pData);
640 return p->pgno;
641}
642
643/*
drh7e3b0a02001-04-28 16:52:40 +0000644** Increment the reference count for a page. If the page is
645** currently on the freelist (the reference count is zero) then
646** remove it from the freelist.
647*/
drhdf0b3b02001-06-23 11:36:20 +0000648static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000649 if( pPg->nRef==0 ){
650 /* The page is currently on the freelist. Remove it. */
651 if( pPg->pPrevFree ){
652 pPg->pPrevFree->pNextFree = pPg->pNextFree;
653 }else{
654 pPg->pPager->pFirst = pPg->pNextFree;
655 }
656 if( pPg->pNextFree ){
657 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
658 }else{
659 pPg->pPager->pLast = pPg->pPrevFree;
660 }
661 pPg->pPager->nRef++;
662 }
663 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000664 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000665}
666
667/*
668** Increment the reference count for a page. The input pointer is
669** a reference to the page data.
670*/
671int sqlitepager_ref(void *pData){
672 PgHdr *pPg = DATA_TO_PGHDR(pData);
673 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000674 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000675}
676
677/*
drhb19a2bc2001-09-16 00:13:26 +0000678** Sync the journal and then write all free dirty pages to the database
679** file.
680**
681** Writing all free dirty pages to the database after the sync is a
682** non-obvious optimization. fsync() is an expensive operation so we
683** want to minimize the number that occur. So after an fsync() is forced
684** and we are free to write dirty pages back to the database, it is best
685** to go ahead and do as much of that as possible to minimize the chance
686** of having to do another fsync() later on. Writing dirty free pages
687** in this way make database operations go up to 10 times faster.
drh50e5dad2001-09-15 00:57:28 +0000688*/
689static int syncAllPages(Pager *pPager){
690 PgHdr *pPg;
691 int rc = SQLITE_OK;
692 if( pPager->needSync ){
693 rc = fsync(pPager->jfd);
694 if( rc!=0 ) return rc;
695 pPager->needSync = 0;
696 }
697 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
698 if( pPg->dirty ){
699 pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
700 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
701 if( rc!=SQLITE_OK ) break;
702 pPg->dirty = 0;
703 }
704 }
705 return SQLITE_OK;
706}
707
708/*
drhd9b02572001-04-15 00:37:09 +0000709** Acquire a page.
710**
drh5e00f6c2001-09-13 13:46:56 +0000711** A read lock on the disk file is obtained when the first page acquired.
712** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000713**
drh306dc212001-05-21 13:45:10 +0000714** A _get works for any page number greater than 0. If the database
715** file is smaller than the requested page, then no actual disk
716** read occurs and the memory image of the page is initialized to
717** all zeros. The extra data appended to a page is always initialized
718** to zeros the first time a page is loaded into memory.
719**
drhd9b02572001-04-15 00:37:09 +0000720** The acquisition might fail for several reasons. In all cases,
721** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000722**
723** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
724** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000725** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000726** just returns 0. This routine acquires a read-lock the first time it
727** has to go to disk, and could also playback an old journal if necessary.
728** Since _lookup() never goes to disk, it never has to deal with locks
729** or journal files.
drhed7c8552001-04-11 14:29:21 +0000730*/
drhd9b02572001-04-15 00:37:09 +0000731int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000732 PgHdr *pPg;
733
drhd9b02572001-04-15 00:37:09 +0000734 /* Make sure we have not hit any critical errors.
735 */
736 if( pPager==0 || pgno==0 ){
737 return SQLITE_ERROR;
738 }
739 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
740 return pager_errcode(pPager);
741 }
742
drhed7c8552001-04-11 14:29:21 +0000743 /* If this is the first page accessed, then get a read lock
744 ** on the database file.
745 */
746 if( pPager->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000747 if( pager_lock(pPager->fd, 0)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000748 *ppPage = 0;
749 return SQLITE_BUSY;
750 }
drhd9b02572001-04-15 00:37:09 +0000751 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000752
753 /* If a journal file exists, try to play it back.
754 */
755 if( access(pPager->zJournal,0)==0 ){
756 int rc;
757
758 /* Open the journal for exclusive access. Return SQLITE_BUSY if
759 ** we cannot get exclusive access to the journal file
760 */
761 pPager->jfd = open(pPager->zJournal, O_RDONLY, 0);
drhd9b02572001-04-15 00:37:09 +0000762 if( pPager->jfd<0 || pager_lock(pPager->jfd, 1)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000763 if( pPager->jfd>=0 ){ close(pPager->jfd); pPager->jfd = -1; }
drhd9b02572001-04-15 00:37:09 +0000764 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000765 *ppPage = 0;
766 return SQLITE_BUSY;
767 }
768
769 /* Get a write lock on the database */
drhd9b02572001-04-15 00:37:09 +0000770 pager_unlock(pPager->fd);
771 if( pager_lock(pPager->fd, 1)!=0 ){
772 close(pPager->jfd);
773 pPager->jfd = -1;
drhed7c8552001-04-11 14:29:21 +0000774 *ppPage = 0;
775 return SQLITE_PROTOCOL;
776 }
777
778 /* Playback and delete the journal. Drop the database write
779 ** lock and reacquire the read lock.
780 */
drhd9b02572001-04-15 00:37:09 +0000781 rc = pager_playback(pPager);
782 if( rc!=SQLITE_OK ){
783 return rc;
784 }
drhed7c8552001-04-11 14:29:21 +0000785 }
786 pPg = 0;
787 }else{
788 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000789 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000790 }
791 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000792 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000793 int h;
drh7e3b0a02001-04-28 16:52:40 +0000794 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000795 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
796 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000797 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000798 if( pPg==0 ){
799 *ppPage = 0;
800 pager_unwritelock(pPager);
801 pPager->errMask |= PAGER_ERR_MEM;
802 return SQLITE_NOMEM;
803 }
drhed7c8552001-04-11 14:29:21 +0000804 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000805 pPg->pNextAll = pPager->pAll;
806 if( pPager->pAll ){
807 pPager->pAll->pPrevAll = pPg;
808 }
809 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000810 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000811 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000812 }else{
drhd9b02572001-04-15 00:37:09 +0000813 /* Recycle an older page. First locate the page to be recycled.
814 ** Try to find one that is not dirty and is near the head of
815 ** of the free list */
drh50e5dad2001-09-15 00:57:28 +0000816 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000817 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000818 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000819 pPg = pPg->pNextFree;
820 }
drhb19a2bc2001-09-16 00:13:26 +0000821
822 /* If we could not find a page that has not been used recently
823 ** and which is not dirty, then sync the journal and write all
824 ** dirty free pages into the database file, thus making them
825 ** clean pages and available for recycling.
826 **
827 ** We have to sync the journal before writing a page to the main
828 ** database. But syncing is a very slow operation. So after a
829 ** sync, it is best to write everything we can back to the main
830 ** database to minimize the risk of having to sync again in the
831 ** near future. That is way we write all dirty pages after a
832 ** sync.
833 */
drh50e5dad2001-09-15 00:57:28 +0000834 if( pPg==0 || pPg->dirty ){
835 int rc = syncAllPages(pPager);
836 if( rc!=0 ){
837 sqlitepager_rollback(pPager);
838 *ppPage = 0;
839 return SQLITE_IOERR;
840 }
841 pPg = pPager->pFirst;
842 }
drhd9b02572001-04-15 00:37:09 +0000843 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +0000844 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000845
846 /* Unlink the old page from the free list and the hash table
847 */
drh6019e162001-07-02 17:51:45 +0000848 if( pPg->pPrevFree ){
849 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000850 }else{
drh6019e162001-07-02 17:51:45 +0000851 assert( pPager->pFirst==pPg );
852 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000853 }
drh6019e162001-07-02 17:51:45 +0000854 if( pPg->pNextFree ){
855 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
856 }else{
857 assert( pPager->pLast==pPg );
858 pPager->pLast = pPg->pPrevFree;
859 }
860 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000861 if( pPg->pNextHash ){
862 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
863 }
864 if( pPg->pPrevHash ){
865 pPg->pPrevHash->pNextHash = pPg->pNextHash;
866 }else{
drhd9b02572001-04-15 00:37:09 +0000867 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000868 assert( pPager->aHash[h]==pPg );
869 pPager->aHash[h] = pPg->pNextHash;
870 }
drh6019e162001-07-02 17:51:45 +0000871 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000872 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000873 }
874 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000875 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
876 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
877 }else{
878 pPg->inJournal = 0;
879 }
drhed7c8552001-04-11 14:29:21 +0000880 pPg->dirty = 0;
881 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000882 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000883 pPager->nRef++;
884 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000885 pPg->pNextHash = pPager->aHash[h];
886 pPager->aHash[h] = pPg;
887 if( pPg->pNextHash ){
888 assert( pPg->pNextHash->pPrevHash==0 );
889 pPg->pNextHash->pPrevHash = pPg;
890 }
drh306dc212001-05-21 13:45:10 +0000891 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
892 if( pPager->dbSize<pgno ){
893 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
894 }else{
895 pager_seek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
896 pager_read(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
897 }
drh7e3b0a02001-04-28 16:52:40 +0000898 if( pPager->nExtra>0 ){
899 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
900 }
drhed7c8552001-04-11 14:29:21 +0000901 }else{
drhd9b02572001-04-15 00:37:09 +0000902 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000903 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000904 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000905 }
906 *ppPage = PGHDR_TO_DATA(pPg);
907 return SQLITE_OK;
908}
909
910/*
drh7e3b0a02001-04-28 16:52:40 +0000911** Acquire a page if it is already in the in-memory cache. Do
912** not read the page from disk. Return a pointer to the page,
913** or 0 if the page is not in cache.
914**
915** See also sqlitepager_get(). The difference between this routine
916** and sqlitepager_get() is that _get() will go to the disk and read
917** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000918** returns NULL if the page is not in cache or if a disk I/O error
919** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000920*/
921void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
922 PgHdr *pPg;
923
924 /* Make sure we have not hit any critical errors.
925 */
926 if( pPager==0 || pgno==0 ){
927 return 0;
928 }
929 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
930 return 0;
931 }
932 if( pPager->nRef==0 ){
933 return 0;
934 }
935 pPg = pager_lookup(pPager, pgno);
936 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000937 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000938 return PGHDR_TO_DATA(pPg);
939}
940
941/*
drhed7c8552001-04-11 14:29:21 +0000942** Release a page.
943**
944** If the number of references to the page drop to zero, then the
945** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000946** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000947** removed.
948*/
drhd9b02572001-04-15 00:37:09 +0000949int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000950 Pager *pPager;
951 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000952
953 /* Decrement the reference count for this page
954 */
drhed7c8552001-04-11 14:29:21 +0000955 pPg = DATA_TO_PGHDR(pData);
956 assert( pPg->nRef>0 );
957 pPager = pPg->pPager;
958 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000959 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000960
drh72f82862001-05-24 21:06:34 +0000961 /* When the number of references to a page reach 0, call the
962 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000963 */
drhed7c8552001-04-11 14:29:21 +0000964 if( pPg->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000965 pPg->pNextFree = 0;
966 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000967 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000968 if( pPg->pPrevFree ){
969 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000970 }else{
971 pPager->pFirst = pPg;
972 }
drh72f82862001-05-24 21:06:34 +0000973 if( pPager->xDestructor ){
974 pPager->xDestructor(pData);
975 }
drhd9b02572001-04-15 00:37:09 +0000976
977 /* When all pages reach the freelist, drop the read lock from
978 ** the database file.
979 */
980 pPager->nRef--;
981 assert( pPager->nRef>=0 );
982 if( pPager->nRef==0 ){
983 pager_reset(pPager);
984 }
drhed7c8552001-04-11 14:29:21 +0000985 }
drhd9b02572001-04-15 00:37:09 +0000986 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000987}
988
989/*
990** Mark a data page as writeable. The page is written into the journal
991** if it is not there already. This routine must be called before making
992** changes to a page.
993**
994** The first time this routine is called, the pager creates a new
995** journal and acquires a write lock on the database. If the write
996** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000997** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000998** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000999**
1000** If the journal file could not be written because the disk is full,
1001** then this routine returns SQLITE_FULL and does an immediate rollback.
1002** All subsequent write attempts also return SQLITE_FULL until there
1003** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1004** reset.
drhed7c8552001-04-11 14:29:21 +00001005*/
drhd9b02572001-04-15 00:37:09 +00001006int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001007 PgHdr *pPg = DATA_TO_PGHDR(pData);
1008 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001009 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001010
drhd9b02572001-04-15 00:37:09 +00001011 if( pPager->errMask ){
1012 return pager_errcode(pPager);
1013 }
drh5e00f6c2001-09-13 13:46:56 +00001014 if( pPager->readOnly ){
1015 return SQLITE_PERM;
1016 }
drhd9b02572001-04-15 00:37:09 +00001017 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +00001018 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +00001019 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +00001020 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +00001021 assert( pPager->aInJournal==0 );
1022 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1023 if( pPager->aInJournal==0 ){
1024 return SQLITE_NOMEM;
1025 }
drhed7c8552001-04-11 14:29:21 +00001026 pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
1027 if( pPager->jfd<0 ){
1028 return SQLITE_CANTOPEN;
1029 }
drhf57b14a2001-09-14 18:54:08 +00001030 pPager->needSync = 0;
drhd9b02572001-04-15 00:37:09 +00001031 if( pager_lock(pPager->jfd, 1) ){
drhed7c8552001-04-11 14:29:21 +00001032 close(pPager->jfd);
1033 pPager->jfd = -1;
1034 return SQLITE_BUSY;
1035 }
drhd9b02572001-04-15 00:37:09 +00001036 pager_unlock(pPager->fd);
1037 if( pager_lock(pPager->fd, 1) ){
drhed7c8552001-04-11 14:29:21 +00001038 close(pPager->jfd);
1039 pPager->jfd = -1;
1040 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +00001041 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +00001042 return SQLITE_PROTOCOL;
1043 }
1044 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +00001045 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +00001046 pPager->origDbSize = pPager->dbSize;
drhd9b02572001-04-15 00:37:09 +00001047 rc = pager_write(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1048 if( rc==SQLITE_OK ){
1049 rc = pager_write(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
1050 }
1051 if( rc!=SQLITE_OK ){
1052 rc = pager_unwritelock(pPager);
1053 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1054 return rc;
1055 }
drhed7c8552001-04-11 14:29:21 +00001056 }
drhd9b02572001-04-15 00:37:09 +00001057 assert( pPager->state==SQLITE_WRITELOCK );
drh69688d52001-04-14 16:38:23 +00001058 assert( pPager->jfd>=0 );
drhd9b02572001-04-15 00:37:09 +00001059 if( pPg->pgno <= pPager->origDbSize ){
1060 rc = pager_write(pPager->jfd, &pPg->pgno, sizeof(Pgno));
1061 if( rc==SQLITE_OK ){
1062 rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
1063 }
1064 if( rc!=SQLITE_OK ){
1065 sqlitepager_rollback(pPager);
1066 pPager->errMask |= PAGER_ERR_FULL;
1067 return rc;
1068 }
drh6019e162001-07-02 17:51:45 +00001069 assert( pPager->aInJournal!=0 );
1070 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhf57b14a2001-09-14 18:54:08 +00001071 pPager->needSync = 1;
drh69688d52001-04-14 16:38:23 +00001072 }
drh69688d52001-04-14 16:38:23 +00001073 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +00001074 if( pPager->dbSize<pPg->pgno ){
1075 pPager->dbSize = pPg->pgno;
1076 }
drh69688d52001-04-14 16:38:23 +00001077 return rc;
drhed7c8552001-04-11 14:29:21 +00001078}
1079
1080/*
drh6019e162001-07-02 17:51:45 +00001081** Return TRUE if the page given in the argument was previous passed
1082** to sqlitepager_write(). In other words, return TRUE if it is ok
1083** to change the content of the page.
1084*/
1085int sqlitepager_iswriteable(void *pData){
1086 PgHdr *pPg = DATA_TO_PGHDR(pData);
1087 return pPg->dirty;
1088}
1089
1090/*
drhed7c8552001-04-11 14:29:21 +00001091** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001092**
1093** If the commit fails for any reason, a rollback attempt is made
1094** and an error code is returned. If the commit worked, SQLITE_OK
1095** is returned.
drhed7c8552001-04-11 14:29:21 +00001096*/
drhd9b02572001-04-15 00:37:09 +00001097int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001098 int rc;
drhed7c8552001-04-11 14:29:21 +00001099 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001100
1101 if( pPager->errMask==PAGER_ERR_FULL ){
1102 rc = sqlitepager_rollback(pPager);
1103 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1104 return rc;
1105 }
1106 if( pPager->errMask!=0 ){
1107 rc = pager_errcode(pPager);
1108 return rc;
1109 }
1110 if( pPager->state!=SQLITE_WRITELOCK ){
1111 return SQLITE_ERROR;
1112 }
drhed7c8552001-04-11 14:29:21 +00001113 assert( pPager->jfd>=0 );
drhf57b14a2001-09-14 18:54:08 +00001114 if( pPager->needSync && fsync(pPager->jfd) ){
drhd9b02572001-04-15 00:37:09 +00001115 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001116 }
drha1b351a2001-09-14 16:42:12 +00001117 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1118 if( pPg->dirty==0 ) continue;
1119 rc = pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
1120 if( rc!=SQLITE_OK ) goto commit_abort;
1121 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1122 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001123 }
drhd9b02572001-04-15 00:37:09 +00001124 if( fsync(pPager->fd) ) goto commit_abort;
1125 rc = pager_unwritelock(pPager);
1126 pPager->dbSize = -1;
1127 return rc;
1128
1129 /* Jump here if anything goes wrong during the commit process.
1130 */
1131commit_abort:
1132 rc = sqlitepager_rollback(pPager);
1133 if( rc==SQLITE_OK ){
1134 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001135 }
drhed7c8552001-04-11 14:29:21 +00001136 return rc;
1137}
1138
1139/*
1140** Rollback all changes. The database falls back to read-only mode.
1141** All in-memory cache pages revert to their original data contents.
1142** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001143**
1144** This routine cannot fail unless some other process is not following
1145** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1146** process is writing trash into the journal file (SQLITE_CORRUPT) or
1147** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1148** codes are returned for all these occasions. Otherwise,
1149** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001150*/
drhd9b02572001-04-15 00:37:09 +00001151int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001152 int rc;
drhd9b02572001-04-15 00:37:09 +00001153 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1154 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001155 }
drhd9b02572001-04-15 00:37:09 +00001156 if( pPager->state!=SQLITE_WRITELOCK ){
1157 return SQLITE_OK;
1158 }
1159 rc = pager_playback(pPager);
1160 if( rc!=SQLITE_OK ){
1161 rc = SQLITE_CORRUPT;
1162 pPager->errMask |= PAGER_ERR_CORRUPT;
1163 }
1164 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001165 return rc;
1166};
drhd9b02572001-04-15 00:37:09 +00001167
1168/*
drh5e00f6c2001-09-13 13:46:56 +00001169** Return TRUE if the database file is opened read-only. Return FALSE
1170** if the database is (in theory) writable.
1171*/
1172int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001173 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001174}
1175
1176/*
drhd9b02572001-04-15 00:37:09 +00001177** This routine is used for testing and analysis only.
1178*/
1179int *sqlitepager_stats(Pager *pPager){
1180 static int a[9];
1181 a[0] = pPager->nRef;
1182 a[1] = pPager->nPage;
1183 a[2] = pPager->mxPage;
1184 a[3] = pPager->dbSize;
1185 a[4] = pPager->state;
1186 a[5] = pPager->errMask;
1187 a[6] = pPager->nHit;
1188 a[7] = pPager->nMiss;
1189 a[8] = pPager->nOvfl;
1190 return a;
1191}
drhdd793422001-06-28 01:54:48 +00001192
1193#if SQLITE_TEST
1194/*
1195** Print a listing of all referenced pages and their ref count.
1196*/
1197void sqlitepager_refdump(Pager *pPager){
1198 PgHdr *pPg;
1199 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1200 if( pPg->nRef<=0 ) continue;
1201 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1202 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1203 }
1204}
1205#endif