blob: 1cb2bef94590d0e37bec241e7c91cc5bc199a60d [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drha7fcb052001-12-14 15:09:55 +000021** @(#) $Id: pager.c,v 1.33 2001/12/14 15:09:57 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drhd9b02572001-04-15 00:37:09 +000023#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000024#include "pager.h"
drh8cfbf082001-09-19 13:22:39 +000025#include "os.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
30** The page cache as a whole is always in one of the following
31** states:
32**
33** SQLITE_UNLOCK The page cache is not currently reading or
34** writing the database file. There is no
35** data held in memory. This is the initial
36** state.
37**
38** SQLITE_READLOCK The page cache is reading the database.
39** Writing is not permitted. There can be
40** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000041** file at the same time.
drhed7c8552001-04-11 14:29:21 +000042**
43** SQLITE_WRITELOCK The page cache is writing the database.
44** Access is exclusive. No other processes or
45** threads can be reading or writing while one
46** process is writing.
47**
drh306dc212001-05-21 13:45:10 +000048** The page cache comes up in SQLITE_UNLOCK. The first time a
49** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000050** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000051** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000052** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000053** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
54** called on an outstanding page which means that the pager must
55** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
56** The sqlite_page_rollback() and sqlite_page_commit() functions
57** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000058*/
59#define SQLITE_UNLOCK 0
60#define SQLITE_READLOCK 1
61#define SQLITE_WRITELOCK 2
62
drhd9b02572001-04-15 00:37:09 +000063
drhed7c8552001-04-11 14:29:21 +000064/*
65** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000066** This header is only visible to this pager module. The client
67** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000068*/
drhd9b02572001-04-15 00:37:09 +000069typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000070struct PgHdr {
71 Pager *pPager; /* The pager to which this page belongs */
72 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000073 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000074 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000075 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
76 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000077 char inJournal; /* TRUE if has been written to journal */
78 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000079 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000080 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000081};
82
83/*
drh69688d52001-04-14 16:38:23 +000084** Convert a pointer to a PgHdr into a pointer to its data
85** and back again.
drhed7c8552001-04-11 14:29:21 +000086*/
87#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
88#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000089#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000090
91/*
drhed7c8552001-04-11 14:29:21 +000092** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +000093** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +000094*/
drhb19a2bc2001-09-16 00:13:26 +000095#define N_PG_HASH 373
drhed7c8552001-04-11 14:29:21 +000096
97/*
98** A open page cache is an instance of the following structure.
99*/
100struct Pager {
101 char *zFilename; /* Name of the database file */
102 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000103 OsFile fd, jfd; /* File descriptors for database and journal */
104 int journalOpen; /* True if journal file descriptors is valid */
drhed7c8552001-04-11 14:29:21 +0000105 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000106 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000107 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000108 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000109 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000110 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000111 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000112 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
113 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
114 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000115 unsigned char tempFile; /* zFilename is a temporary file */
116 unsigned char readOnly; /* True for a read-only database */
drhf57b14a2001-09-14 18:54:08 +0000117 unsigned char needSync; /* True if an fsync() is needed on the journal */
drh6019e162001-07-02 17:51:45 +0000118 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000119 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000120 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000121 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000122};
123
124/*
125** These are bits that can be set in Pager.errMask.
126*/
127#define PAGER_ERR_FULL 0x01 /* a write() failed */
128#define PAGER_ERR_MEM 0x02 /* malloc() failed */
129#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
130#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000131#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000132
133/*
134** The journal file contains page records in the following
135** format.
136*/
137typedef struct PageRecord PageRecord;
138struct PageRecord {
139 Pgno pgno; /* The page number */
140 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
141};
142
143/*
drh5e00f6c2001-09-13 13:46:56 +0000144** Journal files begin with the following magic string. The data
145** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000146*/
147static const unsigned char aJournalMagic[] = {
148 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000149};
150
151/*
152** Hash a page number
153*/
drhd9b02572001-04-15 00:37:09 +0000154#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000155
156/*
drhdd793422001-06-28 01:54:48 +0000157** Enable reference count tracking here:
158*/
159#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000160 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000161 static void pager_refinfo(PgHdr *p){
162 static int cnt = 0;
163 if( !pager_refinfo_enable ) return;
164 printf(
165 "REFCNT: %4d addr=0x%08x nRef=%d\n",
166 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
167 );
168 cnt++; /* Something to set a breakpoint on */
169 }
170# define REFINFO(X) pager_refinfo(X)
171#else
172# define REFINFO(X)
173#endif
174
175/*
drhd9b02572001-04-15 00:37:09 +0000176** Convert the bits in the pPager->errMask into an approprate
177** return code.
178*/
179static int pager_errcode(Pager *pPager){
180 int rc = SQLITE_OK;
181 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000182 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000183 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
184 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
185 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
186 return rc;
drhed7c8552001-04-11 14:29:21 +0000187}
188
189/*
190** Find a page in the hash table given its page number. Return
191** a pointer to the page or NULL if not found.
192*/
drhd9b02572001-04-15 00:37:09 +0000193static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000194 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
195 while( p && p->pgno!=pgno ){
196 p = p->pNextHash;
197 }
198 return p;
199}
200
201/*
202** Unlock the database and clear the in-memory cache. This routine
203** sets the state of the pager back to what it was when it was first
204** opened. Any outstanding pages are invalidated and subsequent attempts
205** to access those pages will likely result in a coredump.
206*/
drhd9b02572001-04-15 00:37:09 +0000207static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000208 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000209 for(pPg=pPager->pAll; pPg; pPg=pNext){
210 pNext = pPg->pNextAll;
211 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000212 }
213 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000214 pPager->pLast = 0;
215 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000216 memset(pPager->aHash, 0, sizeof(pPager->aHash));
217 pPager->nPage = 0;
218 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000219 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000220 }
drha7fcb052001-12-14 15:09:55 +0000221 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000222 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000223 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000224 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000225 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000226}
227
228/*
229** When this routine is called, the pager has the journal file open and
230** a write lock on the database. This routine releases the database
231** write lock and acquires a read lock in its place. The journal file
232** is deleted and closed.
233**
234** We have to release the write lock before acquiring the read lock,
235** so there is a race condition where another process can get the lock
236** while we are not holding it. But, no other process should do this
237** because we are also holding a lock on the journal, and no process
238** should get a write lock on the database without first getting a lock
239** on the journal. So this routine should never fail. But it can fail
240** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000241** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
242** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
243** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000244*/
drhd9b02572001-04-15 00:37:09 +0000245static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000246 int rc;
drhd9b02572001-04-15 00:37:09 +0000247 PgHdr *pPg;
248 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
drha7fcb052001-12-14 15:09:55 +0000249 sqliteOsClose(&pPager->jfd);
drh8cfbf082001-09-19 13:22:39 +0000250 pPager->journalOpen = 0;
251 sqliteOsDelete(pPager->zJournal);
drha7fcb052001-12-14 15:09:55 +0000252 rc = sqliteOsReadLock(&pPager->fd);
253 assert( rc==SQLITE_OK );
drh6019e162001-07-02 17:51:45 +0000254 sqliteFree( pPager->aInJournal );
255 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000256 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
257 pPg->inJournal = 0;
258 pPg->dirty = 0;
259 }
drha7fcb052001-12-14 15:09:55 +0000260 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000261 return rc;
262}
263
drhed7c8552001-04-11 14:29:21 +0000264/*
265** Playback the journal and thus restore the database file to
266** the state it was in before we started making changes.
267**
drhd9b02572001-04-15 00:37:09 +0000268** The journal file format is as follows: There is an initial
269** file-type string for sanity checking. Then there is a single
270** Pgno number which is the number of pages in the database before
271** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000272** Next come zero or more page records where each page record
273** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
274** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000275**
drhd9b02572001-04-15 00:37:09 +0000276** For playback, the pages have to be read from the journal in
277** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000278**
drhd9b02572001-04-15 00:37:09 +0000279** If the file opened as the journal file is not a well-formed
280** journal file (as determined by looking at the magic number
281** at the beginning) then this routine returns SQLITE_PROTOCOL.
282** If any other errors occur during playback, the database will
283** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
284** pPager->errMask and SQLITE_CORRUPT is returned. If it all
285** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000286*/
drhd9b02572001-04-15 00:37:09 +0000287static int pager_playback(Pager *pPager){
288 int nRec; /* Number of Records */
289 int i; /* Loop counter */
290 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000291 PgHdr *pPg; /* An existing page in the cache */
292 PageRecord pgRec;
293 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000294 int rc;
295
drhc3a64ba2001-11-22 00:01:27 +0000296 /* Figure out how many records are in the journal. Abort early if
297 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000298 */
drh8cfbf082001-09-19 13:22:39 +0000299 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +0000300 sqliteOsSeek(&pPager->jfd, 0);
301 rc = sqliteOsFileSize(&pPager->jfd, &nRec);
drhc3a64ba2001-11-22 00:01:27 +0000302 if( rc!=SQLITE_OK ){
303 goto end_playback;
304 }
305 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
306 if( nRec<=0 ){
307 goto end_playback;
308 }
309
310 /* Read the beginning of the journal and truncate the
311 ** database file back to its original size.
312 */
drha7fcb052001-12-14 15:09:55 +0000313 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
drhd9b02572001-04-15 00:37:09 +0000314 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
drh81a20f22001-10-12 17:30:04 +0000315 rc = SQLITE_PROTOCOL;
316 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000317 }
drha7fcb052001-12-14 15:09:55 +0000318 rc = sqliteOsRead(&pPager->jfd, &mxPg, sizeof(mxPg));
drhd9b02572001-04-15 00:37:09 +0000319 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000320 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000321 }
drha7fcb052001-12-14 15:09:55 +0000322 rc = sqliteOsTruncate(&pPager->fd, mxPg*SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000323 if( rc!=SQLITE_OK ){
324 goto end_playback;
325 }
drhd9b02572001-04-15 00:37:09 +0000326 pPager->dbSize = mxPg;
327
drhed7c8552001-04-11 14:29:21 +0000328 /* Process segments beginning with the last and working backwards
329 ** to the first.
330 */
drhd9b02572001-04-15 00:37:09 +0000331 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000332 /* Seek to the beginning of the segment */
drh254cba22001-09-20 01:44:42 +0000333 int ofst;
drhd9b02572001-04-15 00:37:09 +0000334 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
drha7fcb052001-12-14 15:09:55 +0000335 rc = sqliteOsSeek(&pPager->jfd, ofst);
drhd9b02572001-04-15 00:37:09 +0000336 if( rc!=SQLITE_OK ) break;
drha7fcb052001-12-14 15:09:55 +0000337 rc = sqliteOsRead(&pPager->jfd, &pgRec, sizeof(pgRec));
drhd9b02572001-04-15 00:37:09 +0000338 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000339
drhd9b02572001-04-15 00:37:09 +0000340 /* Sanity checking on the page */
341 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
342 rc = SQLITE_CORRUPT;
343 break;
drhed7c8552001-04-11 14:29:21 +0000344 }
345
drhd9b02572001-04-15 00:37:09 +0000346 /* Playback the page. Update the in-memory copy of the page
347 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000348 */
drhd9b02572001-04-15 00:37:09 +0000349 pPg = pager_lookup(pPager, pgRec.pgno);
350 if( pPg ){
351 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000352 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000353 }
drha7fcb052001-12-14 15:09:55 +0000354 rc = sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000355 if( rc!=SQLITE_OK ) break;
drha7fcb052001-12-14 15:09:55 +0000356 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000357 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000358 }
drh81a20f22001-10-12 17:30:04 +0000359
360end_playback:
drhd9b02572001-04-15 00:37:09 +0000361 if( rc!=SQLITE_OK ){
362 pager_unwritelock(pPager);
363 pPager->errMask |= PAGER_ERR_CORRUPT;
364 rc = SQLITE_CORRUPT;
365 }else{
366 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000367 }
drhd9b02572001-04-15 00:37:09 +0000368 return rc;
drhed7c8552001-04-11 14:29:21 +0000369}
370
371/*
drhf57b14a2001-09-14 18:54:08 +0000372** Change the maximum number of in-memory pages that are allowed.
373*/
374void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
375 if( mxPage>10 ){
376 pPager->mxPage = mxPage;
377 }
378}
379
380/*
drhed7c8552001-04-11 14:29:21 +0000381** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000382** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000383** the first call to sqlitepager_get() and is only held open until the
384** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000385**
386** If zFilename is NULL then a random temporary file is created and used
387** as the file to be cached. The file will be deleted automatically when
388** it is closed.
drhed7c8552001-04-11 14:29:21 +0000389*/
drh7e3b0a02001-04-28 16:52:40 +0000390int sqlitepager_open(
391 Pager **ppPager, /* Return the Pager structure here */
392 const char *zFilename, /* Name of the database file to open */
393 int mxPage, /* Max number of in-memory cache pages */
394 int nExtra /* Extra bytes append to each in-memory page */
395){
drhed7c8552001-04-11 14:29:21 +0000396 Pager *pPager;
397 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000398 OsFile fd;
399 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000400 int tempFile;
401 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000402 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000403
drhd9b02572001-04-15 00:37:09 +0000404 *ppPager = 0;
405 if( sqlite_malloc_failed ){
406 return SQLITE_NOMEM;
407 }
drh5e00f6c2001-09-13 13:46:56 +0000408 if( zFilename ){
drh8cfbf082001-09-19 13:22:39 +0000409 rc = sqliteOsOpenReadWrite(zFilename, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000410 tempFile = 0;
411 }else{
412 int cnt = 8;
drh8cfbf082001-09-19 13:22:39 +0000413 sqliteOsTempFileName(zTemp);
drh5e00f6c2001-09-13 13:46:56 +0000414 do{
415 cnt--;
drh8cfbf082001-09-19 13:22:39 +0000416 sqliteOsTempFileName(zTemp);
417 rc = sqliteOsOpenExclusive(zTemp, &fd);
418 }while( cnt>0 && rc!=SQLITE_OK );
drh5e00f6c2001-09-13 13:46:56 +0000419 zFilename = zTemp;
420 tempFile = 1;
421 }
drh8cfbf082001-09-19 13:22:39 +0000422 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000423 return SQLITE_CANTOPEN;
424 }
425 nameLen = strlen(zFilename);
426 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000427 if( pPager==0 ){
drha7fcb052001-12-14 15:09:55 +0000428 sqliteOsClose(&fd);
drhd9b02572001-04-15 00:37:09 +0000429 return SQLITE_NOMEM;
430 }
drhed7c8552001-04-11 14:29:21 +0000431 pPager->zFilename = (char*)&pPager[1];
432 pPager->zJournal = &pPager->zFilename[nameLen+1];
433 strcpy(pPager->zFilename, zFilename);
434 strcpy(pPager->zJournal, zFilename);
435 strcpy(&pPager->zJournal[nameLen], "-journal");
436 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000437 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000438 pPager->nRef = 0;
439 pPager->dbSize = -1;
440 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000441 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000442 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000443 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000444 pPager->tempFile = tempFile;
445 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000446 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000447 pPager->pFirst = 0;
448 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000449 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000450 memset(pPager->aHash, 0, sizeof(pPager->aHash));
451 *ppPager = pPager;
452 return SQLITE_OK;
453}
454
455/*
drh72f82862001-05-24 21:06:34 +0000456** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000457** when the reference count on each page reaches zero. The destructor can
458** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000459**
460** The destructor is not called as a result sqlitepager_close().
461** Destructors are only called by sqlitepager_unref().
462*/
463void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
464 pPager->xDestructor = xDesc;
465}
466
467/*
drh5e00f6c2001-09-13 13:46:56 +0000468** Return the total number of pages in the disk file associated with
469** pPager.
drhed7c8552001-04-11 14:29:21 +0000470*/
drhd9b02572001-04-15 00:37:09 +0000471int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000472 int n;
drhd9b02572001-04-15 00:37:09 +0000473 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000474 if( pPager->dbSize>=0 ){
475 return pPager->dbSize;
476 }
drha7fcb052001-12-14 15:09:55 +0000477 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000478 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000479 return 0;
drhed7c8552001-04-11 14:29:21 +0000480 }
drh8cfbf082001-09-19 13:22:39 +0000481 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000482 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000483 pPager->dbSize = n;
484 }
485 return n;
486}
487
488/*
489** Shutdown the page cache. Free all memory and close all files.
490**
491** If a transaction was in progress when this routine is called, that
492** transaction is rolled back. All outstanding pages are invalidated
493** and their memory is freed. Any attempt to use a page associated
494** with this page cache after this function returns will likely
495** result in a coredump.
496*/
drhd9b02572001-04-15 00:37:09 +0000497int sqlitepager_close(Pager *pPager){
498 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000499 switch( pPager->state ){
500 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000501 sqlitepager_rollback(pPager);
drha7fcb052001-12-14 15:09:55 +0000502 sqliteOsUnlock(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000503 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000504 break;
505 }
506 case SQLITE_READLOCK: {
drha7fcb052001-12-14 15:09:55 +0000507 sqliteOsUnlock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000508 break;
509 }
510 default: {
511 /* Do nothing */
512 break;
513 }
514 }
drhd9b02572001-04-15 00:37:09 +0000515 for(pPg=pPager->pAll; pPg; pPg=pNext){
516 pNext = pPg->pNextAll;
517 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000518 }
drha7fcb052001-12-14 15:09:55 +0000519 sqliteOsClose(&pPager->fd);
drh8cfbf082001-09-19 13:22:39 +0000520 assert( pPager->journalOpen==0 );
drh5e00f6c2001-09-13 13:46:56 +0000521 if( pPager->tempFile ){
drh8cfbf082001-09-19 13:22:39 +0000522 sqliteOsDelete(pPager->zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000523 }
drhed7c8552001-04-11 14:29:21 +0000524 sqliteFree(pPager);
525 return SQLITE_OK;
526}
527
528/*
drh5e00f6c2001-09-13 13:46:56 +0000529** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000530*/
drhd9b02572001-04-15 00:37:09 +0000531Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000532 PgHdr *p = DATA_TO_PGHDR(pData);
533 return p->pgno;
534}
535
536/*
drh7e3b0a02001-04-28 16:52:40 +0000537** Increment the reference count for a page. If the page is
538** currently on the freelist (the reference count is zero) then
539** remove it from the freelist.
540*/
drhdf0b3b02001-06-23 11:36:20 +0000541static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000542 if( pPg->nRef==0 ){
543 /* The page is currently on the freelist. Remove it. */
544 if( pPg->pPrevFree ){
545 pPg->pPrevFree->pNextFree = pPg->pNextFree;
546 }else{
547 pPg->pPager->pFirst = pPg->pNextFree;
548 }
549 if( pPg->pNextFree ){
550 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
551 }else{
552 pPg->pPager->pLast = pPg->pPrevFree;
553 }
554 pPg->pPager->nRef++;
555 }
556 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000557 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000558}
559
560/*
561** Increment the reference count for a page. The input pointer is
562** a reference to the page data.
563*/
564int sqlitepager_ref(void *pData){
565 PgHdr *pPg = DATA_TO_PGHDR(pData);
566 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000567 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000568}
569
570/*
drhb19a2bc2001-09-16 00:13:26 +0000571** Sync the journal and then write all free dirty pages to the database
572** file.
573**
574** Writing all free dirty pages to the database after the sync is a
575** non-obvious optimization. fsync() is an expensive operation so we
576** want to minimize the number that occur. So after an fsync() is forced
577** and we are free to write dirty pages back to the database, it is best
578** to go ahead and do as much of that as possible to minimize the chance
579** of having to do another fsync() later on. Writing dirty free pages
580** in this way make database operations go up to 10 times faster.
drh50e5dad2001-09-15 00:57:28 +0000581*/
582static int syncAllPages(Pager *pPager){
583 PgHdr *pPg;
584 int rc = SQLITE_OK;
585 if( pPager->needSync ){
drha7fcb052001-12-14 15:09:55 +0000586 rc = sqliteOsSync(&pPager->jfd);
drh50e5dad2001-09-15 00:57:28 +0000587 if( rc!=0 ) return rc;
588 pPager->needSync = 0;
589 }
590 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
591 if( pPg->dirty ){
drha7fcb052001-12-14 15:09:55 +0000592 sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
593 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh50e5dad2001-09-15 00:57:28 +0000594 if( rc!=SQLITE_OK ) break;
595 pPg->dirty = 0;
596 }
597 }
drh81a20f22001-10-12 17:30:04 +0000598 return rc;
drh50e5dad2001-09-15 00:57:28 +0000599}
600
601/*
drhd9b02572001-04-15 00:37:09 +0000602** Acquire a page.
603**
drh58a11682001-11-10 13:51:08 +0000604** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +0000605** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000606**
drh306dc212001-05-21 13:45:10 +0000607** A _get works for any page number greater than 0. If the database
608** file is smaller than the requested page, then no actual disk
609** read occurs and the memory image of the page is initialized to
610** all zeros. The extra data appended to a page is always initialized
611** to zeros the first time a page is loaded into memory.
612**
drhd9b02572001-04-15 00:37:09 +0000613** The acquisition might fail for several reasons. In all cases,
614** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000615**
616** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
617** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000618** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000619** just returns 0. This routine acquires a read-lock the first time it
620** has to go to disk, and could also playback an old journal if necessary.
621** Since _lookup() never goes to disk, it never has to deal with locks
622** or journal files.
drhed7c8552001-04-11 14:29:21 +0000623*/
drhd9b02572001-04-15 00:37:09 +0000624int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000625 PgHdr *pPg;
626
drhd9b02572001-04-15 00:37:09 +0000627 /* Make sure we have not hit any critical errors.
628 */
629 if( pPager==0 || pgno==0 ){
630 return SQLITE_ERROR;
631 }
632 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
633 return pager_errcode(pPager);
634 }
635
drhed7c8552001-04-11 14:29:21 +0000636 /* If this is the first page accessed, then get a read lock
637 ** on the database file.
638 */
639 if( pPager->nRef==0 ){
drha7fcb052001-12-14 15:09:55 +0000640 if( sqliteOsReadLock(&pPager->fd)!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000641 *ppPage = 0;
642 return SQLITE_BUSY;
643 }
drhd9b02572001-04-15 00:37:09 +0000644 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000645
646 /* If a journal file exists, try to play it back.
647 */
drh8cfbf082001-09-19 13:22:39 +0000648 if( sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +0000649 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +0000650
drha7fcb052001-12-14 15:09:55 +0000651 /* Get a write lock on the database
652 */
653 rc = sqliteOsWriteLock(&pPager->fd);
654 if( rc!=SQLITE_OK ){
655 rc = sqliteOsReadLock(&pPager->fd);
656 assert( rc==SQLITE_OK );
657 *ppPage = 0;
658 return SQLITE_BUSY;
659 }
660 pPager->state = SQLITE_WRITELOCK;
661
drhed7c8552001-04-11 14:29:21 +0000662 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +0000663 ** we cannot get exclusive access to the journal file.
664 **
665 ** Even though we will only be reading from the journal, not writing,
666 ** we have to open the journal for writing in order to obtain an
667 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +0000668 */
drhf57b3392001-10-08 13:22:32 +0000669 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drha7fcb052001-12-14 15:09:55 +0000670 if( rc!=SQLITE_OK ){
671 rc = sqliteOsUnlock(&pPager->fd);
672 assert( rc==SQLITE_OK );
drhed7c8552001-04-11 14:29:21 +0000673 *ppPage = 0;
674 return SQLITE_BUSY;
675 }
drha7fcb052001-12-14 15:09:55 +0000676 pPager->journalOpen = 1;
drhed7c8552001-04-11 14:29:21 +0000677
678 /* Playback and delete the journal. Drop the database write
679 ** lock and reacquire the read lock.
680 */
drhd9b02572001-04-15 00:37:09 +0000681 rc = pager_playback(pPager);
682 if( rc!=SQLITE_OK ){
683 return rc;
684 }
drhed7c8552001-04-11 14:29:21 +0000685 }
686 pPg = 0;
687 }else{
688 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000689 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000690 }
691 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000692 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000693 int h;
drh7e3b0a02001-04-28 16:52:40 +0000694 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000695 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
696 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000697 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000698 if( pPg==0 ){
699 *ppPage = 0;
700 pager_unwritelock(pPager);
701 pPager->errMask |= PAGER_ERR_MEM;
702 return SQLITE_NOMEM;
703 }
drhed7c8552001-04-11 14:29:21 +0000704 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000705 pPg->pNextAll = pPager->pAll;
706 if( pPager->pAll ){
707 pPager->pAll->pPrevAll = pPg;
708 }
709 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000710 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000711 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000712 }else{
drhd9b02572001-04-15 00:37:09 +0000713 /* Recycle an older page. First locate the page to be recycled.
714 ** Try to find one that is not dirty and is near the head of
715 ** of the free list */
drh50e5dad2001-09-15 00:57:28 +0000716 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000717 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000718 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000719 pPg = pPg->pNextFree;
720 }
drhb19a2bc2001-09-16 00:13:26 +0000721
722 /* If we could not find a page that has not been used recently
723 ** and which is not dirty, then sync the journal and write all
724 ** dirty free pages into the database file, thus making them
725 ** clean pages and available for recycling.
726 **
727 ** We have to sync the journal before writing a page to the main
728 ** database. But syncing is a very slow operation. So after a
729 ** sync, it is best to write everything we can back to the main
730 ** database to minimize the risk of having to sync again in the
731 ** near future. That is way we write all dirty pages after a
732 ** sync.
733 */
drh50e5dad2001-09-15 00:57:28 +0000734 if( pPg==0 || pPg->dirty ){
735 int rc = syncAllPages(pPager);
736 if( rc!=0 ){
737 sqlitepager_rollback(pPager);
738 *ppPage = 0;
739 return SQLITE_IOERR;
740 }
741 pPg = pPager->pFirst;
742 }
drhd9b02572001-04-15 00:37:09 +0000743 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +0000744 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000745
746 /* Unlink the old page from the free list and the hash table
747 */
drh6019e162001-07-02 17:51:45 +0000748 if( pPg->pPrevFree ){
749 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000750 }else{
drh6019e162001-07-02 17:51:45 +0000751 assert( pPager->pFirst==pPg );
752 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000753 }
drh6019e162001-07-02 17:51:45 +0000754 if( pPg->pNextFree ){
755 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
756 }else{
757 assert( pPager->pLast==pPg );
758 pPager->pLast = pPg->pPrevFree;
759 }
760 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000761 if( pPg->pNextHash ){
762 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
763 }
764 if( pPg->pPrevHash ){
765 pPg->pPrevHash->pNextHash = pPg->pNextHash;
766 }else{
drhd9b02572001-04-15 00:37:09 +0000767 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000768 assert( pPager->aHash[h]==pPg );
769 pPager->aHash[h] = pPg->pNextHash;
770 }
drh6019e162001-07-02 17:51:45 +0000771 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000772 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000773 }
774 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000775 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
776 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
777 }else{
778 pPg->inJournal = 0;
779 }
drhed7c8552001-04-11 14:29:21 +0000780 pPg->dirty = 0;
781 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000782 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000783 pPager->nRef++;
784 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000785 pPg->pNextHash = pPager->aHash[h];
786 pPager->aHash[h] = pPg;
787 if( pPg->pNextHash ){
788 assert( pPg->pNextHash->pPrevHash==0 );
789 pPg->pNextHash->pPrevHash = pPg;
790 }
drh306dc212001-05-21 13:45:10 +0000791 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
792 if( pPager->dbSize<pgno ){
793 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
794 }else{
drh81a20f22001-10-12 17:30:04 +0000795 int rc;
drha7fcb052001-12-14 15:09:55 +0000796 sqliteOsSeek(&pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
797 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000798 if( rc!=SQLITE_OK ){
799 return rc;
800 }
drh306dc212001-05-21 13:45:10 +0000801 }
drh7e3b0a02001-04-28 16:52:40 +0000802 if( pPager->nExtra>0 ){
803 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
804 }
drhed7c8552001-04-11 14:29:21 +0000805 }else{
drhd9b02572001-04-15 00:37:09 +0000806 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000807 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000808 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000809 }
810 *ppPage = PGHDR_TO_DATA(pPg);
811 return SQLITE_OK;
812}
813
814/*
drh7e3b0a02001-04-28 16:52:40 +0000815** Acquire a page if it is already in the in-memory cache. Do
816** not read the page from disk. Return a pointer to the page,
817** or 0 if the page is not in cache.
818**
819** See also sqlitepager_get(). The difference between this routine
820** and sqlitepager_get() is that _get() will go to the disk and read
821** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000822** returns NULL if the page is not in cache or if a disk I/O error
823** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000824*/
825void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
826 PgHdr *pPg;
827
828 /* Make sure we have not hit any critical errors.
829 */
830 if( pPager==0 || pgno==0 ){
831 return 0;
832 }
833 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
834 return 0;
835 }
836 if( pPager->nRef==0 ){
837 return 0;
838 }
839 pPg = pager_lookup(pPager, pgno);
840 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000841 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000842 return PGHDR_TO_DATA(pPg);
843}
844
845/*
drhed7c8552001-04-11 14:29:21 +0000846** Release a page.
847**
848** If the number of references to the page drop to zero, then the
849** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000850** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000851** removed.
852*/
drhd9b02572001-04-15 00:37:09 +0000853int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000854 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000855
856 /* Decrement the reference count for this page
857 */
drhed7c8552001-04-11 14:29:21 +0000858 pPg = DATA_TO_PGHDR(pData);
859 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +0000860 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000861 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000862
drh72f82862001-05-24 21:06:34 +0000863 /* When the number of references to a page reach 0, call the
864 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000865 */
drhed7c8552001-04-11 14:29:21 +0000866 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +0000867 Pager *pPager;
868 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +0000869 pPg->pNextFree = 0;
870 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000871 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000872 if( pPg->pPrevFree ){
873 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000874 }else{
875 pPager->pFirst = pPg;
876 }
drh72f82862001-05-24 21:06:34 +0000877 if( pPager->xDestructor ){
878 pPager->xDestructor(pData);
879 }
drhd9b02572001-04-15 00:37:09 +0000880
881 /* When all pages reach the freelist, drop the read lock from
882 ** the database file.
883 */
884 pPager->nRef--;
885 assert( pPager->nRef>=0 );
886 if( pPager->nRef==0 ){
887 pager_reset(pPager);
888 }
drhed7c8552001-04-11 14:29:21 +0000889 }
drhd9b02572001-04-15 00:37:09 +0000890 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000891}
892
893/*
894** Mark a data page as writeable. The page is written into the journal
895** if it is not there already. This routine must be called before making
896** changes to a page.
897**
898** The first time this routine is called, the pager creates a new
899** journal and acquires a write lock on the database. If the write
900** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000901** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000902** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000903**
904** If the journal file could not be written because the disk is full,
905** then this routine returns SQLITE_FULL and does an immediate rollback.
906** All subsequent write attempts also return SQLITE_FULL until there
907** is a call to sqlitepager_commit() or sqlitepager_rollback() to
908** reset.
drhed7c8552001-04-11 14:29:21 +0000909*/
drhd9b02572001-04-15 00:37:09 +0000910int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000911 PgHdr *pPg = DATA_TO_PGHDR(pData);
912 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000913 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000914
drhd9b02572001-04-15 00:37:09 +0000915 if( pPager->errMask ){
916 return pager_errcode(pPager);
917 }
drh5e00f6c2001-09-13 13:46:56 +0000918 if( pPager->readOnly ){
919 return SQLITE_PERM;
920 }
drhd9b02572001-04-15 00:37:09 +0000921 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +0000922 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +0000923 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +0000924 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +0000925 assert( pPager->aInJournal==0 );
drha7fcb052001-12-14 15:09:55 +0000926 rc = sqliteOsWriteLock(&pPager->fd);
927 if( rc!=SQLITE_OK ){
928 return rc;
929 }
drh6019e162001-07-02 17:51:45 +0000930 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
931 if( pPager->aInJournal==0 ){
drha7fcb052001-12-14 15:09:55 +0000932 sqliteOsReadLock(&pPager->fd);
drh6019e162001-07-02 17:51:45 +0000933 return SQLITE_NOMEM;
934 }
drh8cfbf082001-09-19 13:22:39 +0000935 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd);
936 if( rc!=SQLITE_OK ){
drh6d4abfb2001-10-22 02:58:08 +0000937 sqliteFree(pPager->aInJournal);
drha7fcb052001-12-14 15:09:55 +0000938 pPager->aInJournal = 0;
939 sqliteOsReadLock(&pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000940 return SQLITE_CANTOPEN;
941 }
drh8cfbf082001-09-19 13:22:39 +0000942 pPager->journalOpen = 1;
drhf57b14a2001-09-14 18:54:08 +0000943 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000944 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +0000945 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +0000946 pPager->origDbSize = pPager->dbSize;
drha7fcb052001-12-14 15:09:55 +0000947 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
drhd9b02572001-04-15 00:37:09 +0000948 if( rc==SQLITE_OK ){
drha7fcb052001-12-14 15:09:55 +0000949 rc = sqliteOsWrite(&pPager->jfd, &pPager->dbSize, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000950 }
951 if( rc!=SQLITE_OK ){
952 rc = pager_unwritelock(pPager);
953 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
954 return rc;
955 }
drhed7c8552001-04-11 14:29:21 +0000956 }
drhd9b02572001-04-15 00:37:09 +0000957 assert( pPager->state==SQLITE_WRITELOCK );
drh8cfbf082001-09-19 13:22:39 +0000958 assert( pPager->journalOpen );
drhd9b02572001-04-15 00:37:09 +0000959 if( pPg->pgno <= pPager->origDbSize ){
drha7fcb052001-12-14 15:09:55 +0000960 rc = sqliteOsWrite(&pPager->jfd, &pPg->pgno, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000961 if( rc==SQLITE_OK ){
drha7fcb052001-12-14 15:09:55 +0000962 rc = sqliteOsWrite(&pPager->jfd, pData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000963 }
964 if( rc!=SQLITE_OK ){
965 sqlitepager_rollback(pPager);
966 pPager->errMask |= PAGER_ERR_FULL;
967 return rc;
968 }
drh6019e162001-07-02 17:51:45 +0000969 assert( pPager->aInJournal!=0 );
970 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhf57b14a2001-09-14 18:54:08 +0000971 pPager->needSync = 1;
drh69688d52001-04-14 16:38:23 +0000972 }
drh69688d52001-04-14 16:38:23 +0000973 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +0000974 if( pPager->dbSize<pPg->pgno ){
975 pPager->dbSize = pPg->pgno;
976 }
drh69688d52001-04-14 16:38:23 +0000977 return rc;
drhed7c8552001-04-11 14:29:21 +0000978}
979
980/*
drh6019e162001-07-02 17:51:45 +0000981** Return TRUE if the page given in the argument was previous passed
982** to sqlitepager_write(). In other words, return TRUE if it is ok
983** to change the content of the page.
984*/
985int sqlitepager_iswriteable(void *pData){
986 PgHdr *pPg = DATA_TO_PGHDR(pData);
987 return pPg->dirty;
988}
989
990/*
drhed7c8552001-04-11 14:29:21 +0000991** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +0000992**
993** If the commit fails for any reason, a rollback attempt is made
994** and an error code is returned. If the commit worked, SQLITE_OK
995** is returned.
drhed7c8552001-04-11 14:29:21 +0000996*/
drhd9b02572001-04-15 00:37:09 +0000997int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +0000998 int rc;
drhed7c8552001-04-11 14:29:21 +0000999 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001000
1001 if( pPager->errMask==PAGER_ERR_FULL ){
1002 rc = sqlitepager_rollback(pPager);
1003 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1004 return rc;
1005 }
1006 if( pPager->errMask!=0 ){
1007 rc = pager_errcode(pPager);
1008 return rc;
1009 }
1010 if( pPager->state!=SQLITE_WRITELOCK ){
1011 return SQLITE_ERROR;
1012 }
drh8cfbf082001-09-19 13:22:39 +00001013 assert( pPager->journalOpen );
drha7fcb052001-12-14 15:09:55 +00001014 if( pPager->needSync && sqliteOsSync(&pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001015 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001016 }
drha1b351a2001-09-14 16:42:12 +00001017 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1018 if( pPg->dirty==0 ) continue;
drha7fcb052001-12-14 15:09:55 +00001019 rc = sqliteOsSeek(&pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001020 if( rc!=SQLITE_OK ) goto commit_abort;
drha7fcb052001-12-14 15:09:55 +00001021 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001022 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001023 }
drha7fcb052001-12-14 15:09:55 +00001024 if( sqliteOsSync(&pPager->fd)!=SQLITE_OK ) goto commit_abort;
drhd9b02572001-04-15 00:37:09 +00001025 rc = pager_unwritelock(pPager);
1026 pPager->dbSize = -1;
1027 return rc;
1028
1029 /* Jump here if anything goes wrong during the commit process.
1030 */
1031commit_abort:
1032 rc = sqlitepager_rollback(pPager);
1033 if( rc==SQLITE_OK ){
1034 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001035 }
drhed7c8552001-04-11 14:29:21 +00001036 return rc;
1037}
1038
1039/*
1040** Rollback all changes. The database falls back to read-only mode.
1041** All in-memory cache pages revert to their original data contents.
1042** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001043**
1044** This routine cannot fail unless some other process is not following
1045** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1046** process is writing trash into the journal file (SQLITE_CORRUPT) or
1047** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1048** codes are returned for all these occasions. Otherwise,
1049** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001050*/
drhd9b02572001-04-15 00:37:09 +00001051int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001052 int rc;
drhd9b02572001-04-15 00:37:09 +00001053 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1054 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001055 }
drhd9b02572001-04-15 00:37:09 +00001056 if( pPager->state!=SQLITE_WRITELOCK ){
1057 return SQLITE_OK;
1058 }
1059 rc = pager_playback(pPager);
1060 if( rc!=SQLITE_OK ){
1061 rc = SQLITE_CORRUPT;
1062 pPager->errMask |= PAGER_ERR_CORRUPT;
1063 }
1064 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001065 return rc;
drh98808ba2001-10-18 12:34:46 +00001066}
drhd9b02572001-04-15 00:37:09 +00001067
1068/*
drh5e00f6c2001-09-13 13:46:56 +00001069** Return TRUE if the database file is opened read-only. Return FALSE
1070** if the database is (in theory) writable.
1071*/
1072int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001073 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001074}
1075
1076/*
drhd9b02572001-04-15 00:37:09 +00001077** This routine is used for testing and analysis only.
1078*/
1079int *sqlitepager_stats(Pager *pPager){
1080 static int a[9];
1081 a[0] = pPager->nRef;
1082 a[1] = pPager->nPage;
1083 a[2] = pPager->mxPage;
1084 a[3] = pPager->dbSize;
1085 a[4] = pPager->state;
1086 a[5] = pPager->errMask;
1087 a[6] = pPager->nHit;
1088 a[7] = pPager->nMiss;
1089 a[8] = pPager->nOvfl;
1090 return a;
1091}
drhdd793422001-06-28 01:54:48 +00001092
1093#if SQLITE_TEST
1094/*
1095** Print a listing of all referenced pages and their ref count.
1096*/
1097void sqlitepager_refdump(Pager *pPager){
1098 PgHdr *pPg;
1099 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1100 if( pPg->nRef<=0 ) continue;
1101 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1102 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1103 }
1104}
1105#endif