blob: a620a3a3309faf0c291d1baa4cfe784899a616bc [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
drhb19a2bc2001-09-16 00:13:26 +00002** 2001 September 15
drhed7c8552001-04-11 14:29:21 +00003**
drhb19a2bc2001-09-16 00:13:26 +00004** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
drhed7c8552001-04-11 14:29:21 +00006**
drhb19a2bc2001-09-16 00:13:26 +00007** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
drhed7c8552001-04-11 14:29:21 +000010**
11*************************************************************************
drhb19a2bc2001-09-16 00:13:26 +000012** This is the implementation of the page cache subsystem or "pager".
drhed7c8552001-04-11 14:29:21 +000013**
drhb19a2bc2001-09-16 00:13:26 +000014** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
drhed7c8552001-04-11 14:29:21 +000020**
drhc3a64ba2001-11-22 00:01:27 +000021** @(#) $Id: pager.c,v 1.31 2001/11/22 00:01:27 drh Exp $
drhed7c8552001-04-11 14:29:21 +000022*/
drhd9b02572001-04-15 00:37:09 +000023#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000024#include "pager.h"
drh8cfbf082001-09-19 13:22:39 +000025#include "os.h"
drhed7c8552001-04-11 14:29:21 +000026#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000027#include <string.h>
drhed7c8552001-04-11 14:29:21 +000028
29/*
30** The page cache as a whole is always in one of the following
31** states:
32**
33** SQLITE_UNLOCK The page cache is not currently reading or
34** writing the database file. There is no
35** data held in memory. This is the initial
36** state.
37**
38** SQLITE_READLOCK The page cache is reading the database.
39** Writing is not permitted. There can be
40** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000041** file at the same time.
drhed7c8552001-04-11 14:29:21 +000042**
43** SQLITE_WRITELOCK The page cache is writing the database.
44** Access is exclusive. No other processes or
45** threads can be reading or writing while one
46** process is writing.
47**
drh306dc212001-05-21 13:45:10 +000048** The page cache comes up in SQLITE_UNLOCK. The first time a
49** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000050** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000051** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000052** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000053** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
54** called on an outstanding page which means that the pager must
55** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
56** The sqlite_page_rollback() and sqlite_page_commit() functions
57** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000058*/
59#define SQLITE_UNLOCK 0
60#define SQLITE_READLOCK 1
61#define SQLITE_WRITELOCK 2
62
drhd9b02572001-04-15 00:37:09 +000063
drhed7c8552001-04-11 14:29:21 +000064/*
65** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000066** This header is only visible to this pager module. The client
67** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000068*/
drhd9b02572001-04-15 00:37:09 +000069typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000070struct PgHdr {
71 Pager *pPager; /* The pager to which this page belongs */
72 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000073 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000074 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000075 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
76 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000077 char inJournal; /* TRUE if has been written to journal */
78 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000079 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000080 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000081};
82
83/*
drh69688d52001-04-14 16:38:23 +000084** Convert a pointer to a PgHdr into a pointer to its data
85** and back again.
drhed7c8552001-04-11 14:29:21 +000086*/
87#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
88#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +000089#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +000090
91/*
drhed7c8552001-04-11 14:29:21 +000092** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +000093** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +000094*/
drhb19a2bc2001-09-16 00:13:26 +000095#define N_PG_HASH 373
drhed7c8552001-04-11 14:29:21 +000096
97/*
98** A open page cache is an instance of the following structure.
99*/
100struct Pager {
101 char *zFilename; /* Name of the database file */
102 char *zJournal; /* Name of the journal file */
drh8cfbf082001-09-19 13:22:39 +0000103 OsFile fd, jfd; /* File descriptors for database and journal */
104 int journalOpen; /* True if journal file descriptors is valid */
drhed7c8552001-04-11 14:29:21 +0000105 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000106 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000107 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000108 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000109 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000110 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000111 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000112 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
113 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
114 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000115 unsigned char tempFile; /* zFilename is a temporary file */
116 unsigned char readOnly; /* True for a read-only database */
drhf57b14a2001-09-14 18:54:08 +0000117 unsigned char needSync; /* True if an fsync() is needed on the journal */
drh6019e162001-07-02 17:51:45 +0000118 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000119 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000120 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000121 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000122};
123
124/*
125** These are bits that can be set in Pager.errMask.
126*/
127#define PAGER_ERR_FULL 0x01 /* a write() failed */
128#define PAGER_ERR_MEM 0x02 /* malloc() failed */
129#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
130#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
drh81a20f22001-10-12 17:30:04 +0000131#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
drhd9b02572001-04-15 00:37:09 +0000132
133/*
134** The journal file contains page records in the following
135** format.
136*/
137typedef struct PageRecord PageRecord;
138struct PageRecord {
139 Pgno pgno; /* The page number */
140 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
141};
142
143/*
drh5e00f6c2001-09-13 13:46:56 +0000144** Journal files begin with the following magic string. The data
145** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000146*/
147static const unsigned char aJournalMagic[] = {
148 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000149};
150
151/*
152** Hash a page number
153*/
drhd9b02572001-04-15 00:37:09 +0000154#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000155
156/*
drhdd793422001-06-28 01:54:48 +0000157** Enable reference count tracking here:
158*/
159#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000160 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000161 static void pager_refinfo(PgHdr *p){
162 static int cnt = 0;
163 if( !pager_refinfo_enable ) return;
164 printf(
165 "REFCNT: %4d addr=0x%08x nRef=%d\n",
166 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
167 );
168 cnt++; /* Something to set a breakpoint on */
169 }
170# define REFINFO(X) pager_refinfo(X)
171#else
172# define REFINFO(X)
173#endif
174
175/*
drhd9b02572001-04-15 00:37:09 +0000176** Convert the bits in the pPager->errMask into an approprate
177** return code.
178*/
179static int pager_errcode(Pager *pPager){
180 int rc = SQLITE_OK;
181 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
drh81a20f22001-10-12 17:30:04 +0000182 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
drhd9b02572001-04-15 00:37:09 +0000183 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
184 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
185 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
186 return rc;
drhed7c8552001-04-11 14:29:21 +0000187}
188
189/*
190** Find a page in the hash table given its page number. Return
191** a pointer to the page or NULL if not found.
192*/
drhd9b02572001-04-15 00:37:09 +0000193static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000194 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
195 while( p && p->pgno!=pgno ){
196 p = p->pNextHash;
197 }
198 return p;
199}
200
201/*
202** Unlock the database and clear the in-memory cache. This routine
203** sets the state of the pager back to what it was when it was first
204** opened. Any outstanding pages are invalidated and subsequent attempts
205** to access those pages will likely result in a coredump.
206*/
drhd9b02572001-04-15 00:37:09 +0000207static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000208 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000209 for(pPg=pPager->pAll; pPg; pPg=pNext){
210 pNext = pPg->pNextAll;
211 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000212 }
213 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000214 pPager->pLast = 0;
215 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000216 memset(pPager->aHash, 0, sizeof(pPager->aHash));
217 pPager->nPage = 0;
218 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000219 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000220 }
drh8cfbf082001-09-19 13:22:39 +0000221 sqliteOsUnlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000222 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000223 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000224 pPager->nRef = 0;
drh8cfbf082001-09-19 13:22:39 +0000225 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000226}
227
228/*
229** When this routine is called, the pager has the journal file open and
230** a write lock on the database. This routine releases the database
231** write lock and acquires a read lock in its place. The journal file
232** is deleted and closed.
233**
234** We have to release the write lock before acquiring the read lock,
235** so there is a race condition where another process can get the lock
236** while we are not holding it. But, no other process should do this
237** because we are also holding a lock on the journal, and no process
238** should get a write lock on the database without first getting a lock
239** on the journal. So this routine should never fail. But it can fail
240** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000241** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
242** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
243** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000244*/
drhd9b02572001-04-15 00:37:09 +0000245static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000246 int rc;
drhd9b02572001-04-15 00:37:09 +0000247 PgHdr *pPg;
248 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
drh8cfbf082001-09-19 13:22:39 +0000249 sqliteOsUnlock(pPager->fd);
250 rc = sqliteOsLock(pPager->fd, 0);
251 sqliteOsClose(pPager->jfd);
252 pPager->journalOpen = 0;
253 sqliteOsDelete(pPager->zJournal);
drh6019e162001-07-02 17:51:45 +0000254 sqliteFree( pPager->aInJournal );
255 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000256 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
257 pPg->inJournal = 0;
258 pPg->dirty = 0;
259 }
drhed7c8552001-04-11 14:29:21 +0000260 if( rc!=SQLITE_OK ){
261 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000262 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000263 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000264 }else{
drhd9b02572001-04-15 00:37:09 +0000265 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000266 pPager->state = SQLITE_READLOCK;
267 }
268 return rc;
269}
270
drhed7c8552001-04-11 14:29:21 +0000271/*
272** Playback the journal and thus restore the database file to
273** the state it was in before we started making changes.
274**
drhd9b02572001-04-15 00:37:09 +0000275** The journal file format is as follows: There is an initial
276** file-type string for sanity checking. Then there is a single
277** Pgno number which is the number of pages in the database before
278** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000279** Next come zero or more page records where each page record
280** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
281** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000282**
drhd9b02572001-04-15 00:37:09 +0000283** For playback, the pages have to be read from the journal in
284** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000285**
drhd9b02572001-04-15 00:37:09 +0000286** If the file opened as the journal file is not a well-formed
287** journal file (as determined by looking at the magic number
288** at the beginning) then this routine returns SQLITE_PROTOCOL.
289** If any other errors occur during playback, the database will
290** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
291** pPager->errMask and SQLITE_CORRUPT is returned. If it all
292** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000293*/
drhd9b02572001-04-15 00:37:09 +0000294static int pager_playback(Pager *pPager){
295 int nRec; /* Number of Records */
296 int i; /* Loop counter */
297 Pgno mxPg = 0; /* Size of the original file in pages */
drhd9b02572001-04-15 00:37:09 +0000298 PgHdr *pPg; /* An existing page in the cache */
299 PageRecord pgRec;
300 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000301 int rc;
302
drhc3a64ba2001-11-22 00:01:27 +0000303 /* Figure out how many records are in the journal. Abort early if
304 ** the journal is empty.
drhed7c8552001-04-11 14:29:21 +0000305 */
drh8cfbf082001-09-19 13:22:39 +0000306 assert( pPager->journalOpen );
307 sqliteOsSeek(pPager->jfd, 0);
drhc3a64ba2001-11-22 00:01:27 +0000308 rc = sqliteOsFileSize(pPager->jfd, &nRec);
309 if( rc!=SQLITE_OK ){
310 goto end_playback;
311 }
312 nRec = (nRec - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
313 if( nRec<=0 ){
314 goto end_playback;
315 }
316
317 /* Read the beginning of the journal and truncate the
318 ** database file back to its original size.
319 */
drh8cfbf082001-09-19 13:22:39 +0000320 rc = sqliteOsRead(pPager->jfd, aMagic, sizeof(aMagic));
drhd9b02572001-04-15 00:37:09 +0000321 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
drh81a20f22001-10-12 17:30:04 +0000322 rc = SQLITE_PROTOCOL;
323 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000324 }
drh8cfbf082001-09-19 13:22:39 +0000325 rc = sqliteOsRead(pPager->jfd, &mxPg, sizeof(mxPg));
drhd9b02572001-04-15 00:37:09 +0000326 if( rc!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000327 goto end_playback;
drhd9b02572001-04-15 00:37:09 +0000328 }
drh81a20f22001-10-12 17:30:04 +0000329 rc = sqliteOsTruncate(pPager->fd, mxPg*SQLITE_PAGE_SIZE);
330 if( rc!=SQLITE_OK ){
331 goto end_playback;
332 }
drhd9b02572001-04-15 00:37:09 +0000333 pPager->dbSize = mxPg;
334
drhed7c8552001-04-11 14:29:21 +0000335 /* Process segments beginning with the last and working backwards
336 ** to the first.
337 */
drhd9b02572001-04-15 00:37:09 +0000338 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000339 /* Seek to the beginning of the segment */
drh254cba22001-09-20 01:44:42 +0000340 int ofst;
drhd9b02572001-04-15 00:37:09 +0000341 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
drh8cfbf082001-09-19 13:22:39 +0000342 rc = sqliteOsSeek(pPager->jfd, ofst);
drhd9b02572001-04-15 00:37:09 +0000343 if( rc!=SQLITE_OK ) break;
drh8cfbf082001-09-19 13:22:39 +0000344 rc = sqliteOsRead(pPager->jfd, &pgRec, sizeof(pgRec));
drhd9b02572001-04-15 00:37:09 +0000345 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000346
drhd9b02572001-04-15 00:37:09 +0000347 /* Sanity checking on the page */
348 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
349 rc = SQLITE_CORRUPT;
350 break;
drhed7c8552001-04-11 14:29:21 +0000351 }
352
drhd9b02572001-04-15 00:37:09 +0000353 /* Playback the page. Update the in-memory copy of the page
354 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000355 */
drhd9b02572001-04-15 00:37:09 +0000356 pPg = pager_lookup(pPager, pgRec.pgno);
357 if( pPg ){
358 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000359 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000360 }
drh8cfbf082001-09-19 13:22:39 +0000361 rc = sqliteOsSeek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000362 if( rc!=SQLITE_OK ) break;
drh8cfbf082001-09-19 13:22:39 +0000363 rc = sqliteOsWrite(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000364 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000365 }
drh81a20f22001-10-12 17:30:04 +0000366
367end_playback:
drhd9b02572001-04-15 00:37:09 +0000368 if( rc!=SQLITE_OK ){
369 pager_unwritelock(pPager);
370 pPager->errMask |= PAGER_ERR_CORRUPT;
371 rc = SQLITE_CORRUPT;
372 }else{
373 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000374 }
drhd9b02572001-04-15 00:37:09 +0000375 return rc;
drhed7c8552001-04-11 14:29:21 +0000376}
377
378/*
drhf57b14a2001-09-14 18:54:08 +0000379** Change the maximum number of in-memory pages that are allowed.
380*/
381void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
382 if( mxPage>10 ){
383 pPager->mxPage = mxPage;
384 }
385}
386
387/*
drhed7c8552001-04-11 14:29:21 +0000388** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000389** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000390** the first call to sqlitepager_get() and is only held open until the
391** last page is released using sqlitepager_unref().
drh382c0242001-10-06 16:33:02 +0000392**
393** If zFilename is NULL then a random temporary file is created and used
394** as the file to be cached. The file will be deleted automatically when
395** it is closed.
drhed7c8552001-04-11 14:29:21 +0000396*/
drh7e3b0a02001-04-28 16:52:40 +0000397int sqlitepager_open(
398 Pager **ppPager, /* Return the Pager structure here */
399 const char *zFilename, /* Name of the database file to open */
400 int mxPage, /* Max number of in-memory cache pages */
401 int nExtra /* Extra bytes append to each in-memory page */
402){
drhed7c8552001-04-11 14:29:21 +0000403 Pager *pPager;
404 int nameLen;
drh8cfbf082001-09-19 13:22:39 +0000405 OsFile fd;
406 int rc;
drh5e00f6c2001-09-13 13:46:56 +0000407 int tempFile;
408 int readOnly = 0;
drh8cfbf082001-09-19 13:22:39 +0000409 char zTemp[SQLITE_TEMPNAME_SIZE];
drhed7c8552001-04-11 14:29:21 +0000410
drhd9b02572001-04-15 00:37:09 +0000411 *ppPager = 0;
412 if( sqlite_malloc_failed ){
413 return SQLITE_NOMEM;
414 }
drh5e00f6c2001-09-13 13:46:56 +0000415 if( zFilename ){
drh8cfbf082001-09-19 13:22:39 +0000416 rc = sqliteOsOpenReadWrite(zFilename, &fd, &readOnly);
drh5e00f6c2001-09-13 13:46:56 +0000417 tempFile = 0;
418 }else{
419 int cnt = 8;
drh8cfbf082001-09-19 13:22:39 +0000420 sqliteOsTempFileName(zTemp);
drh5e00f6c2001-09-13 13:46:56 +0000421 do{
422 cnt--;
drh8cfbf082001-09-19 13:22:39 +0000423 sqliteOsTempFileName(zTemp);
424 rc = sqliteOsOpenExclusive(zTemp, &fd);
425 }while( cnt>0 && rc!=SQLITE_OK );
drh5e00f6c2001-09-13 13:46:56 +0000426 zFilename = zTemp;
427 tempFile = 1;
428 }
drh8cfbf082001-09-19 13:22:39 +0000429 if( rc!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000430 return SQLITE_CANTOPEN;
431 }
432 nameLen = strlen(zFilename);
433 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000434 if( pPager==0 ){
drh8cfbf082001-09-19 13:22:39 +0000435 sqliteOsClose(fd);
drhd9b02572001-04-15 00:37:09 +0000436 return SQLITE_NOMEM;
437 }
drhed7c8552001-04-11 14:29:21 +0000438 pPager->zFilename = (char*)&pPager[1];
439 pPager->zJournal = &pPager->zFilename[nameLen+1];
440 strcpy(pPager->zFilename, zFilename);
441 strcpy(pPager->zJournal, zFilename);
442 strcpy(&pPager->zJournal[nameLen], "-journal");
443 pPager->fd = fd;
drh8cfbf082001-09-19 13:22:39 +0000444 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000445 pPager->nRef = 0;
446 pPager->dbSize = -1;
447 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000448 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000449 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000450 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000451 pPager->tempFile = tempFile;
452 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000453 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000454 pPager->pFirst = 0;
455 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000456 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000457 memset(pPager->aHash, 0, sizeof(pPager->aHash));
458 *ppPager = pPager;
459 return SQLITE_OK;
460}
461
462/*
drh72f82862001-05-24 21:06:34 +0000463** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000464** when the reference count on each page reaches zero. The destructor can
465** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000466**
467** The destructor is not called as a result sqlitepager_close().
468** Destructors are only called by sqlitepager_unref().
469*/
470void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
471 pPager->xDestructor = xDesc;
472}
473
474/*
drh5e00f6c2001-09-13 13:46:56 +0000475** Return the total number of pages in the disk file associated with
476** pPager.
drhed7c8552001-04-11 14:29:21 +0000477*/
drhd9b02572001-04-15 00:37:09 +0000478int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000479 int n;
drhd9b02572001-04-15 00:37:09 +0000480 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000481 if( pPager->dbSize>=0 ){
482 return pPager->dbSize;
483 }
drh8cfbf082001-09-19 13:22:39 +0000484 if( sqliteOsFileSize(pPager->fd, &n)!=SQLITE_OK ){
drh81a20f22001-10-12 17:30:04 +0000485 pPager->errMask |= PAGER_ERR_DISK;
drh8cfbf082001-09-19 13:22:39 +0000486 return 0;
drhed7c8552001-04-11 14:29:21 +0000487 }
drh8cfbf082001-09-19 13:22:39 +0000488 n /= SQLITE_PAGE_SIZE;
drhd9b02572001-04-15 00:37:09 +0000489 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000490 pPager->dbSize = n;
491 }
492 return n;
493}
494
495/*
496** Shutdown the page cache. Free all memory and close all files.
497**
498** If a transaction was in progress when this routine is called, that
499** transaction is rolled back. All outstanding pages are invalidated
500** and their memory is freed. Any attempt to use a page associated
501** with this page cache after this function returns will likely
502** result in a coredump.
503*/
drhd9b02572001-04-15 00:37:09 +0000504int sqlitepager_close(Pager *pPager){
505 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000506 switch( pPager->state ){
507 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000508 sqlitepager_rollback(pPager);
drh8cfbf082001-09-19 13:22:39 +0000509 sqliteOsUnlock(pPager->fd);
510 assert( pPager->journalOpen==0 );
drhed7c8552001-04-11 14:29:21 +0000511 break;
512 }
513 case SQLITE_READLOCK: {
drh8cfbf082001-09-19 13:22:39 +0000514 sqliteOsUnlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000515 break;
516 }
517 default: {
518 /* Do nothing */
519 break;
520 }
521 }
drhd9b02572001-04-15 00:37:09 +0000522 for(pPg=pPager->pAll; pPg; pPg=pNext){
523 pNext = pPg->pNextAll;
524 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000525 }
drh8cfbf082001-09-19 13:22:39 +0000526 sqliteOsClose(pPager->fd);
527 assert( pPager->journalOpen==0 );
drh5e00f6c2001-09-13 13:46:56 +0000528 if( pPager->tempFile ){
drh8cfbf082001-09-19 13:22:39 +0000529 sqliteOsDelete(pPager->zFilename);
drh5e00f6c2001-09-13 13:46:56 +0000530 }
drhed7c8552001-04-11 14:29:21 +0000531 sqliteFree(pPager);
532 return SQLITE_OK;
533}
534
535/*
drh5e00f6c2001-09-13 13:46:56 +0000536** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000537*/
drhd9b02572001-04-15 00:37:09 +0000538Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000539 PgHdr *p = DATA_TO_PGHDR(pData);
540 return p->pgno;
541}
542
543/*
drh7e3b0a02001-04-28 16:52:40 +0000544** Increment the reference count for a page. If the page is
545** currently on the freelist (the reference count is zero) then
546** remove it from the freelist.
547*/
drhdf0b3b02001-06-23 11:36:20 +0000548static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000549 if( pPg->nRef==0 ){
550 /* The page is currently on the freelist. Remove it. */
551 if( pPg->pPrevFree ){
552 pPg->pPrevFree->pNextFree = pPg->pNextFree;
553 }else{
554 pPg->pPager->pFirst = pPg->pNextFree;
555 }
556 if( pPg->pNextFree ){
557 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
558 }else{
559 pPg->pPager->pLast = pPg->pPrevFree;
560 }
561 pPg->pPager->nRef++;
562 }
563 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000564 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000565}
566
567/*
568** Increment the reference count for a page. The input pointer is
569** a reference to the page data.
570*/
571int sqlitepager_ref(void *pData){
572 PgHdr *pPg = DATA_TO_PGHDR(pData);
573 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000574 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000575}
576
577/*
drhb19a2bc2001-09-16 00:13:26 +0000578** Sync the journal and then write all free dirty pages to the database
579** file.
580**
581** Writing all free dirty pages to the database after the sync is a
582** non-obvious optimization. fsync() is an expensive operation so we
583** want to minimize the number that occur. So after an fsync() is forced
584** and we are free to write dirty pages back to the database, it is best
585** to go ahead and do as much of that as possible to minimize the chance
586** of having to do another fsync() later on. Writing dirty free pages
587** in this way make database operations go up to 10 times faster.
drh50e5dad2001-09-15 00:57:28 +0000588*/
589static int syncAllPages(Pager *pPager){
590 PgHdr *pPg;
591 int rc = SQLITE_OK;
592 if( pPager->needSync ){
drh8cfbf082001-09-19 13:22:39 +0000593 rc = sqliteOsSync(pPager->jfd);
drh50e5dad2001-09-15 00:57:28 +0000594 if( rc!=0 ) return rc;
595 pPager->needSync = 0;
596 }
597 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
598 if( pPg->dirty ){
drh8cfbf082001-09-19 13:22:39 +0000599 sqliteOsSeek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
600 rc = sqliteOsWrite(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drh50e5dad2001-09-15 00:57:28 +0000601 if( rc!=SQLITE_OK ) break;
602 pPg->dirty = 0;
603 }
604 }
drh81a20f22001-10-12 17:30:04 +0000605 return rc;
drh50e5dad2001-09-15 00:57:28 +0000606}
607
608/*
drhd9b02572001-04-15 00:37:09 +0000609** Acquire a page.
610**
drh58a11682001-11-10 13:51:08 +0000611** A read lock on the disk file is obtained when the first page is acquired.
drh5e00f6c2001-09-13 13:46:56 +0000612** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000613**
drh306dc212001-05-21 13:45:10 +0000614** A _get works for any page number greater than 0. If the database
615** file is smaller than the requested page, then no actual disk
616** read occurs and the memory image of the page is initialized to
617** all zeros. The extra data appended to a page is always initialized
618** to zeros the first time a page is loaded into memory.
619**
drhd9b02572001-04-15 00:37:09 +0000620** The acquisition might fail for several reasons. In all cases,
621** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000622**
623** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
624** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000625** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000626** just returns 0. This routine acquires a read-lock the first time it
627** has to go to disk, and could also playback an old journal if necessary.
628** Since _lookup() never goes to disk, it never has to deal with locks
629** or journal files.
drhed7c8552001-04-11 14:29:21 +0000630*/
drhd9b02572001-04-15 00:37:09 +0000631int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000632 PgHdr *pPg;
633
drhd9b02572001-04-15 00:37:09 +0000634 /* Make sure we have not hit any critical errors.
635 */
636 if( pPager==0 || pgno==0 ){
637 return SQLITE_ERROR;
638 }
639 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
640 return pager_errcode(pPager);
641 }
642
drhed7c8552001-04-11 14:29:21 +0000643 /* If this is the first page accessed, then get a read lock
644 ** on the database file.
645 */
646 if( pPager->nRef==0 ){
drh8cfbf082001-09-19 13:22:39 +0000647 if( sqliteOsLock(pPager->fd, 0)!=SQLITE_OK ){
drhed7c8552001-04-11 14:29:21 +0000648 *ppPage = 0;
649 return SQLITE_BUSY;
650 }
drhd9b02572001-04-15 00:37:09 +0000651 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000652
653 /* If a journal file exists, try to play it back.
654 */
drh8cfbf082001-09-19 13:22:39 +0000655 if( sqliteOsFileExists(pPager->zJournal) ){
drhf57b3392001-10-08 13:22:32 +0000656 int rc, dummy;
drhed7c8552001-04-11 14:29:21 +0000657
658 /* Open the journal for exclusive access. Return SQLITE_BUSY if
drhf57b3392001-10-08 13:22:32 +0000659 ** we cannot get exclusive access to the journal file.
660 **
661 ** Even though we will only be reading from the journal, not writing,
662 ** we have to open the journal for writing in order to obtain an
663 ** exclusive access lock.
drhed7c8552001-04-11 14:29:21 +0000664 */
drhf57b3392001-10-08 13:22:32 +0000665 rc = sqliteOsOpenReadWrite(pPager->zJournal, &pPager->jfd, &dummy);
drh8cfbf082001-09-19 13:22:39 +0000666 if( rc==SQLITE_OK ){
667 pPager->journalOpen = 1;
668 }
669 if( rc!=SQLITE_OK || sqliteOsLock(pPager->jfd, 1)!=SQLITE_OK ){
670 if( pPager->journalOpen ){
671 sqliteOsClose(pPager->jfd);
672 pPager->journalOpen = 0;
673 }
674 sqliteOsUnlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000675 *ppPage = 0;
676 return SQLITE_BUSY;
677 }
678
679 /* Get a write lock on the database */
drh8cfbf082001-09-19 13:22:39 +0000680 sqliteOsUnlock(pPager->fd);
681 if( sqliteOsLock(pPager->fd, 1)!=SQLITE_OK ){
682 sqliteOsClose(pPager->jfd);
683 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000684 *ppPage = 0;
685 return SQLITE_PROTOCOL;
686 }
drh8cfbf082001-09-19 13:22:39 +0000687 pPager->state = SQLITE_WRITELOCK;
drhed7c8552001-04-11 14:29:21 +0000688
689 /* Playback and delete the journal. Drop the database write
690 ** lock and reacquire the read lock.
691 */
drhd9b02572001-04-15 00:37:09 +0000692 rc = pager_playback(pPager);
693 if( rc!=SQLITE_OK ){
694 return rc;
695 }
drhed7c8552001-04-11 14:29:21 +0000696 }
697 pPg = 0;
698 }else{
699 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000700 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000701 }
702 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000703 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000704 int h;
drh7e3b0a02001-04-28 16:52:40 +0000705 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000706 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
707 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000708 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000709 if( pPg==0 ){
710 *ppPage = 0;
711 pager_unwritelock(pPager);
712 pPager->errMask |= PAGER_ERR_MEM;
713 return SQLITE_NOMEM;
714 }
drhed7c8552001-04-11 14:29:21 +0000715 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000716 pPg->pNextAll = pPager->pAll;
717 if( pPager->pAll ){
718 pPager->pAll->pPrevAll = pPg;
719 }
720 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000721 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000722 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000723 }else{
drhd9b02572001-04-15 00:37:09 +0000724 /* Recycle an older page. First locate the page to be recycled.
725 ** Try to find one that is not dirty and is near the head of
726 ** of the free list */
drh50e5dad2001-09-15 00:57:28 +0000727 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000728 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000729 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000730 pPg = pPg->pNextFree;
731 }
drhb19a2bc2001-09-16 00:13:26 +0000732
733 /* If we could not find a page that has not been used recently
734 ** and which is not dirty, then sync the journal and write all
735 ** dirty free pages into the database file, thus making them
736 ** clean pages and available for recycling.
737 **
738 ** We have to sync the journal before writing a page to the main
739 ** database. But syncing is a very slow operation. So after a
740 ** sync, it is best to write everything we can back to the main
741 ** database to minimize the risk of having to sync again in the
742 ** near future. That is way we write all dirty pages after a
743 ** sync.
744 */
drh50e5dad2001-09-15 00:57:28 +0000745 if( pPg==0 || pPg->dirty ){
746 int rc = syncAllPages(pPager);
747 if( rc!=0 ){
748 sqlitepager_rollback(pPager);
749 *ppPage = 0;
750 return SQLITE_IOERR;
751 }
752 pPg = pPager->pFirst;
753 }
drhd9b02572001-04-15 00:37:09 +0000754 assert( pPg->nRef==0 );
drh50e5dad2001-09-15 00:57:28 +0000755 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000756
757 /* Unlink the old page from the free list and the hash table
758 */
drh6019e162001-07-02 17:51:45 +0000759 if( pPg->pPrevFree ){
760 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000761 }else{
drh6019e162001-07-02 17:51:45 +0000762 assert( pPager->pFirst==pPg );
763 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000764 }
drh6019e162001-07-02 17:51:45 +0000765 if( pPg->pNextFree ){
766 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
767 }else{
768 assert( pPager->pLast==pPg );
769 pPager->pLast = pPg->pPrevFree;
770 }
771 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000772 if( pPg->pNextHash ){
773 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
774 }
775 if( pPg->pPrevHash ){
776 pPg->pPrevHash->pNextHash = pPg->pNextHash;
777 }else{
drhd9b02572001-04-15 00:37:09 +0000778 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000779 assert( pPager->aHash[h]==pPg );
780 pPager->aHash[h] = pPg->pNextHash;
781 }
drh6019e162001-07-02 17:51:45 +0000782 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000783 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000784 }
785 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000786 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
787 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
788 }else{
789 pPg->inJournal = 0;
790 }
drhed7c8552001-04-11 14:29:21 +0000791 pPg->dirty = 0;
792 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000793 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000794 pPager->nRef++;
795 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000796 pPg->pNextHash = pPager->aHash[h];
797 pPager->aHash[h] = pPg;
798 if( pPg->pNextHash ){
799 assert( pPg->pNextHash->pPrevHash==0 );
800 pPg->pNextHash->pPrevHash = pPg;
801 }
drh306dc212001-05-21 13:45:10 +0000802 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
803 if( pPager->dbSize<pgno ){
804 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
805 }else{
drh81a20f22001-10-12 17:30:04 +0000806 int rc;
drh8cfbf082001-09-19 13:22:39 +0000807 sqliteOsSeek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
drh81a20f22001-10-12 17:30:04 +0000808 rc = sqliteOsRead(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
809 if( rc!=SQLITE_OK ){
810 return rc;
811 }
drh306dc212001-05-21 13:45:10 +0000812 }
drh7e3b0a02001-04-28 16:52:40 +0000813 if( pPager->nExtra>0 ){
814 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
815 }
drhed7c8552001-04-11 14:29:21 +0000816 }else{
drhd9b02572001-04-15 00:37:09 +0000817 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000818 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000819 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000820 }
821 *ppPage = PGHDR_TO_DATA(pPg);
822 return SQLITE_OK;
823}
824
825/*
drh7e3b0a02001-04-28 16:52:40 +0000826** Acquire a page if it is already in the in-memory cache. Do
827** not read the page from disk. Return a pointer to the page,
828** or 0 if the page is not in cache.
829**
830** See also sqlitepager_get(). The difference between this routine
831** and sqlitepager_get() is that _get() will go to the disk and read
832** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000833** returns NULL if the page is not in cache or if a disk I/O error
834** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000835*/
836void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
837 PgHdr *pPg;
838
839 /* Make sure we have not hit any critical errors.
840 */
841 if( pPager==0 || pgno==0 ){
842 return 0;
843 }
844 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
845 return 0;
846 }
847 if( pPager->nRef==0 ){
848 return 0;
849 }
850 pPg = pager_lookup(pPager, pgno);
851 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000852 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000853 return PGHDR_TO_DATA(pPg);
854}
855
856/*
drhed7c8552001-04-11 14:29:21 +0000857** Release a page.
858**
859** If the number of references to the page drop to zero, then the
860** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000861** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000862** removed.
863*/
drhd9b02572001-04-15 00:37:09 +0000864int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000865 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000866
867 /* Decrement the reference count for this page
868 */
drhed7c8552001-04-11 14:29:21 +0000869 pPg = DATA_TO_PGHDR(pData);
870 assert( pPg->nRef>0 );
drhed7c8552001-04-11 14:29:21 +0000871 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000872 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000873
drh72f82862001-05-24 21:06:34 +0000874 /* When the number of references to a page reach 0, call the
875 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000876 */
drhed7c8552001-04-11 14:29:21 +0000877 if( pPg->nRef==0 ){
drh1eaa2692001-09-18 02:02:23 +0000878 Pager *pPager;
879 pPager = pPg->pPager;
drhd9b02572001-04-15 00:37:09 +0000880 pPg->pNextFree = 0;
881 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000882 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000883 if( pPg->pPrevFree ){
884 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000885 }else{
886 pPager->pFirst = pPg;
887 }
drh72f82862001-05-24 21:06:34 +0000888 if( pPager->xDestructor ){
889 pPager->xDestructor(pData);
890 }
drhd9b02572001-04-15 00:37:09 +0000891
892 /* When all pages reach the freelist, drop the read lock from
893 ** the database file.
894 */
895 pPager->nRef--;
896 assert( pPager->nRef>=0 );
897 if( pPager->nRef==0 ){
898 pager_reset(pPager);
899 }
drhed7c8552001-04-11 14:29:21 +0000900 }
drhd9b02572001-04-15 00:37:09 +0000901 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000902}
903
904/*
905** Mark a data page as writeable. The page is written into the journal
906** if it is not there already. This routine must be called before making
907** changes to a page.
908**
909** The first time this routine is called, the pager creates a new
910** journal and acquires a write lock on the database. If the write
911** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000912** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000913** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000914**
915** If the journal file could not be written because the disk is full,
916** then this routine returns SQLITE_FULL and does an immediate rollback.
917** All subsequent write attempts also return SQLITE_FULL until there
918** is a call to sqlitepager_commit() or sqlitepager_rollback() to
919** reset.
drhed7c8552001-04-11 14:29:21 +0000920*/
drhd9b02572001-04-15 00:37:09 +0000921int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000922 PgHdr *pPg = DATA_TO_PGHDR(pData);
923 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000924 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000925
drhd9b02572001-04-15 00:37:09 +0000926 if( pPager->errMask ){
927 return pager_errcode(pPager);
928 }
drh5e00f6c2001-09-13 13:46:56 +0000929 if( pPager->readOnly ){
930 return SQLITE_PERM;
931 }
drhd9b02572001-04-15 00:37:09 +0000932 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +0000933 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +0000934 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +0000935 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +0000936 assert( pPager->aInJournal==0 );
937 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
938 if( pPager->aInJournal==0 ){
drh6d4abfb2001-10-22 02:58:08 +0000939 sqliteFree(pPager->aInJournal);
drh6019e162001-07-02 17:51:45 +0000940 return SQLITE_NOMEM;
941 }
drh8cfbf082001-09-19 13:22:39 +0000942 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd);
943 if( rc!=SQLITE_OK ){
drh6d4abfb2001-10-22 02:58:08 +0000944 sqliteFree(pPager->aInJournal);
drhed7c8552001-04-11 14:29:21 +0000945 return SQLITE_CANTOPEN;
946 }
drh8cfbf082001-09-19 13:22:39 +0000947 pPager->journalOpen = 1;
drhf57b14a2001-09-14 18:54:08 +0000948 pPager->needSync = 0;
drh8cfbf082001-09-19 13:22:39 +0000949 if( sqliteOsLock(pPager->jfd, 1)!=SQLITE_OK ){
drh6d4abfb2001-10-22 02:58:08 +0000950 sqliteFree(pPager->aInJournal);
drh8cfbf082001-09-19 13:22:39 +0000951 sqliteOsClose(pPager->jfd);
drh6d4abfb2001-10-22 02:58:08 +0000952 sqliteOsDelete(pPager->zJournal);
drh8cfbf082001-09-19 13:22:39 +0000953 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000954 return SQLITE_BUSY;
955 }
drh8cfbf082001-09-19 13:22:39 +0000956 sqliteOsUnlock(pPager->fd);
957 if( sqliteOsLock(pPager->fd, 1)!=SQLITE_OK ){
drh6d4abfb2001-10-22 02:58:08 +0000958 sqliteFree(pPager->aInJournal);
drh8cfbf082001-09-19 13:22:39 +0000959 sqliteOsClose(pPager->jfd);
drh6d4abfb2001-10-22 02:58:08 +0000960 sqliteOsDelete(pPager->zJournal);
drh8cfbf082001-09-19 13:22:39 +0000961 pPager->journalOpen = 0;
drhed7c8552001-04-11 14:29:21 +0000962 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000963 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000964 return SQLITE_PROTOCOL;
965 }
966 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +0000967 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +0000968 pPager->origDbSize = pPager->dbSize;
drh8cfbf082001-09-19 13:22:39 +0000969 rc = sqliteOsWrite(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
drhd9b02572001-04-15 00:37:09 +0000970 if( rc==SQLITE_OK ){
drh8cfbf082001-09-19 13:22:39 +0000971 rc = sqliteOsWrite(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000972 }
973 if( rc!=SQLITE_OK ){
974 rc = pager_unwritelock(pPager);
975 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
976 return rc;
977 }
drhed7c8552001-04-11 14:29:21 +0000978 }
drhd9b02572001-04-15 00:37:09 +0000979 assert( pPager->state==SQLITE_WRITELOCK );
drh8cfbf082001-09-19 13:22:39 +0000980 assert( pPager->journalOpen );
drhd9b02572001-04-15 00:37:09 +0000981 if( pPg->pgno <= pPager->origDbSize ){
drh8cfbf082001-09-19 13:22:39 +0000982 rc = sqliteOsWrite(pPager->jfd, &pPg->pgno, sizeof(Pgno));
drhd9b02572001-04-15 00:37:09 +0000983 if( rc==SQLITE_OK ){
drh8cfbf082001-09-19 13:22:39 +0000984 rc = sqliteOsWrite(pPager->jfd, pData, SQLITE_PAGE_SIZE);
drhd9b02572001-04-15 00:37:09 +0000985 }
986 if( rc!=SQLITE_OK ){
987 sqlitepager_rollback(pPager);
988 pPager->errMask |= PAGER_ERR_FULL;
989 return rc;
990 }
drh6019e162001-07-02 17:51:45 +0000991 assert( pPager->aInJournal!=0 );
992 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhf57b14a2001-09-14 18:54:08 +0000993 pPager->needSync = 1;
drh69688d52001-04-14 16:38:23 +0000994 }
drh69688d52001-04-14 16:38:23 +0000995 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +0000996 if( pPager->dbSize<pPg->pgno ){
997 pPager->dbSize = pPg->pgno;
998 }
drh69688d52001-04-14 16:38:23 +0000999 return rc;
drhed7c8552001-04-11 14:29:21 +00001000}
1001
1002/*
drh6019e162001-07-02 17:51:45 +00001003** Return TRUE if the page given in the argument was previous passed
1004** to sqlitepager_write(). In other words, return TRUE if it is ok
1005** to change the content of the page.
1006*/
1007int sqlitepager_iswriteable(void *pData){
1008 PgHdr *pPg = DATA_TO_PGHDR(pData);
1009 return pPg->dirty;
1010}
1011
1012/*
drhed7c8552001-04-11 14:29:21 +00001013** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001014**
1015** If the commit fails for any reason, a rollback attempt is made
1016** and an error code is returned. If the commit worked, SQLITE_OK
1017** is returned.
drhed7c8552001-04-11 14:29:21 +00001018*/
drhd9b02572001-04-15 00:37:09 +00001019int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001020 int rc;
drhed7c8552001-04-11 14:29:21 +00001021 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001022
1023 if( pPager->errMask==PAGER_ERR_FULL ){
1024 rc = sqlitepager_rollback(pPager);
1025 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1026 return rc;
1027 }
1028 if( pPager->errMask!=0 ){
1029 rc = pager_errcode(pPager);
1030 return rc;
1031 }
1032 if( pPager->state!=SQLITE_WRITELOCK ){
1033 return SQLITE_ERROR;
1034 }
drh8cfbf082001-09-19 13:22:39 +00001035 assert( pPager->journalOpen );
1036 if( pPager->needSync && sqliteOsSync(pPager->jfd)!=SQLITE_OK ){
drhd9b02572001-04-15 00:37:09 +00001037 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001038 }
drha1b351a2001-09-14 16:42:12 +00001039 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1040 if( pPg->dirty==0 ) continue;
drh8cfbf082001-09-19 13:22:39 +00001041 rc = sqliteOsSeek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001042 if( rc!=SQLITE_OK ) goto commit_abort;
drh8cfbf082001-09-19 13:22:39 +00001043 rc = sqliteOsWrite(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drha1b351a2001-09-14 16:42:12 +00001044 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001045 }
drh8cfbf082001-09-19 13:22:39 +00001046 if( sqliteOsSync(pPager->fd)!=SQLITE_OK ) goto commit_abort;
drhd9b02572001-04-15 00:37:09 +00001047 rc = pager_unwritelock(pPager);
1048 pPager->dbSize = -1;
1049 return rc;
1050
1051 /* Jump here if anything goes wrong during the commit process.
1052 */
1053commit_abort:
1054 rc = sqlitepager_rollback(pPager);
1055 if( rc==SQLITE_OK ){
1056 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001057 }
drhed7c8552001-04-11 14:29:21 +00001058 return rc;
1059}
1060
1061/*
1062** Rollback all changes. The database falls back to read-only mode.
1063** All in-memory cache pages revert to their original data contents.
1064** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001065**
1066** This routine cannot fail unless some other process is not following
1067** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1068** process is writing trash into the journal file (SQLITE_CORRUPT) or
1069** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1070** codes are returned for all these occasions. Otherwise,
1071** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001072*/
drhd9b02572001-04-15 00:37:09 +00001073int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001074 int rc;
drhd9b02572001-04-15 00:37:09 +00001075 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1076 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001077 }
drhd9b02572001-04-15 00:37:09 +00001078 if( pPager->state!=SQLITE_WRITELOCK ){
1079 return SQLITE_OK;
1080 }
1081 rc = pager_playback(pPager);
1082 if( rc!=SQLITE_OK ){
1083 rc = SQLITE_CORRUPT;
1084 pPager->errMask |= PAGER_ERR_CORRUPT;
1085 }
1086 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001087 return rc;
drh98808ba2001-10-18 12:34:46 +00001088}
drhd9b02572001-04-15 00:37:09 +00001089
1090/*
drh5e00f6c2001-09-13 13:46:56 +00001091** Return TRUE if the database file is opened read-only. Return FALSE
1092** if the database is (in theory) writable.
1093*/
1094int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001095 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001096}
1097
1098/*
drhd9b02572001-04-15 00:37:09 +00001099** This routine is used for testing and analysis only.
1100*/
1101int *sqlitepager_stats(Pager *pPager){
1102 static int a[9];
1103 a[0] = pPager->nRef;
1104 a[1] = pPager->nPage;
1105 a[2] = pPager->mxPage;
1106 a[3] = pPager->dbSize;
1107 a[4] = pPager->state;
1108 a[5] = pPager->errMask;
1109 a[6] = pPager->nHit;
1110 a[7] = pPager->nMiss;
1111 a[8] = pPager->nOvfl;
1112 return a;
1113}
drhdd793422001-06-28 01:54:48 +00001114
1115#if SQLITE_TEST
1116/*
1117** Print a listing of all referenced pages and their ref count.
1118*/
1119void sqlitepager_refdump(Pager *pPager){
1120 PgHdr *pPg;
1121 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1122 if( pPg->nRef<=0 ) continue;
1123 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1124 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1125 }
1126}
1127#endif