blob: 04492d00bacc3388aecab335fb3c804558b49a22 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
2** Copyright (c) 2001 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
24** This is the implementation of the page cache subsystem.
25**
26** The page cache is used to access a database file. The pager journals
27** all writes in order to support rollback. Locking is used to limit
28** access to one or more reader or on writer.
29**
drhd79caeb2001-04-15 02:27:24 +000030** @(#) $Id: pager.c,v 1.4 2001/04/15 02:27:25 drh Exp $
drhed7c8552001-04-11 14:29:21 +000031*/
drhd9b02572001-04-15 00:37:09 +000032#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000033#include "pager.h"
34#include <fcntl.h>
35#include <sys/stat.h>
36#include <unistd.h>
37#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000038#include <string.h>
drhed7c8552001-04-11 14:29:21 +000039
40/*
41** The page cache as a whole is always in one of the following
42** states:
43**
44** SQLITE_UNLOCK The page cache is not currently reading or
45** writing the database file. There is no
46** data held in memory. This is the initial
47** state.
48**
49** SQLITE_READLOCK The page cache is reading the database.
50** Writing is not permitted. There can be
51** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000052** file at the same time.
drhed7c8552001-04-11 14:29:21 +000053**
54** SQLITE_WRITELOCK The page cache is writing the database.
55** Access is exclusive. No other processes or
56** threads can be reading or writing while one
57** process is writing.
58**
59** The page cache comes up in PCS_UNLOCK. The first time a
60** sqlite_page_get() occurs, the state transitions to PCS_READLOCK.
61** After all pages have been released using sqlite_page_unref(),
62** the state transitions back to PCS_UNLOCK. The first time
63** that sqlite_page_write() is called, the state transitions to
64** PCS_WRITELOCK. The sqlite_page_rollback() and sqlite_page_commit()
65** functions transition the state back to PCS_READLOCK.
66*/
67#define SQLITE_UNLOCK 0
68#define SQLITE_READLOCK 1
69#define SQLITE_WRITELOCK 2
70
drhd9b02572001-04-15 00:37:09 +000071
drhed7c8552001-04-11 14:29:21 +000072/*
73** Each in-memory image of a page begins with the following header.
74*/
drhd9b02572001-04-15 00:37:09 +000075typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000076struct PgHdr {
77 Pager *pPager; /* The pager to which this page belongs */
78 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000079 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000080 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000081 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
82 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000083 char inJournal; /* TRUE if has been written to journal */
84 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000085 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drhed7c8552001-04-11 14:29:21 +000086};
87
88/*
drh69688d52001-04-14 16:38:23 +000089** Convert a pointer to a PgHdr into a pointer to its data
90** and back again.
drhed7c8552001-04-11 14:29:21 +000091*/
92#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
93#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
94
95/*
drhed7c8552001-04-11 14:29:21 +000096** How big to make the hash table used for locating in-memory pages
97** by page number.
98*/
drhd9b02572001-04-15 00:37:09 +000099#define N_PG_HASH 101
drhed7c8552001-04-11 14:29:21 +0000100
101/*
102** A open page cache is an instance of the following structure.
103*/
104struct Pager {
105 char *zFilename; /* Name of the database file */
106 char *zJournal; /* Name of the journal file */
107 int fd, jfd; /* File descriptors for database and journal */
drhed7c8552001-04-11 14:29:21 +0000108 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000109 int origDbSize; /* dbSize before the current change */
drhed7c8552001-04-11 14:29:21 +0000110 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000111 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000112 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000113 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
114 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
115 unsigned char errMask; /* One of several kinds of errors */
drhed7c8552001-04-11 14:29:21 +0000116 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000117 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000118 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000119};
120
121/*
122** These are bits that can be set in Pager.errMask.
123*/
124#define PAGER_ERR_FULL 0x01 /* a write() failed */
125#define PAGER_ERR_MEM 0x02 /* malloc() failed */
126#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
127#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
128
129/*
130** The journal file contains page records in the following
131** format.
132*/
133typedef struct PageRecord PageRecord;
134struct PageRecord {
135 Pgno pgno; /* The page number */
136 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
137};
138
139/*
140** Journal files begin with the following magic string. This data
141** is completely random. It is used only as a sanity check.
142*/
143static const unsigned char aJournalMagic[] = {
144 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000145};
146
147/*
148** Hash a page number
149*/
drhd9b02572001-04-15 00:37:09 +0000150#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000151
152/*
153** Attempt to acquire a read lock (if wrlock==0) or a write lock (if wrlock==1)
154** on the database file. Return 0 on success and non-zero if the lock
155** could not be acquired.
156*/
drhd9b02572001-04-15 00:37:09 +0000157static int pager_lock(int fd, int wrlock){
158 int rc;
drhed7c8552001-04-11 14:29:21 +0000159 struct flock lock;
drhd9b02572001-04-15 00:37:09 +0000160 lock.l_type = wrlock ? F_WRLCK : F_RDLCK;
161 lock.l_whence = SEEK_SET;
162 lock.l_start = lock.l_len = 0L;
163 rc = fcntl(fd, F_SETLK, &lock);
164 return rc!=0;
drhed7c8552001-04-11 14:29:21 +0000165}
166
167/*
168** Unlock the database file.
169*/
drhd9b02572001-04-15 00:37:09 +0000170static int pager_unlock(fd){
171 int rc;
drhed7c8552001-04-11 14:29:21 +0000172 struct flock lock;
173 lock.l_type = F_UNLCK;
drhd9b02572001-04-15 00:37:09 +0000174 lock.l_whence = SEEK_SET;
175 lock.l_start = lock.l_len = 0L;
176 rc = fcntl(fd, F_SETLK, &lock);
177 return rc!=0;
178}
179
180/*
181** Move the cursor for file descriptor fd to the point whereto from
182** the beginning of the file.
183*/
184static int pager_seek(int fd, off_t whereto){
185 lseek(fd, whereto, SEEK_SET);
186 return SQLITE_OK;
187}
188
189/*
190** Truncate the given file so that it contains exactly mxPg pages
191** of data.
192*/
193static int pager_truncate(int fd, Pgno mxPg){
194 int rc;
195 rc = ftruncate(fd, mxPg*SQLITE_PAGE_SIZE);
196 return rc!=0 ? SQLITE_IOERR : SQLITE_OK;
197}
198
199/*
200** Read nBytes of data from fd into pBuf. If the data cannot be
201** read or only a partial read occurs, then the unread parts of
202** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
203** If the read is completely successful, return SQLITE_OK.
204*/
205static int pager_read(int fd, void *pBuf, int nByte){
206 int rc;
207 rc = read(fd, pBuf, nByte);
208 if( rc<0 ){
209 memset(pBuf, 0, nByte);
210 return SQLITE_IOERR;
211 }
212 if( rc<nByte ){
213 memset(&((char*)pBuf)[rc], 0, nByte - rc);
214 rc = SQLITE_IOERR;
215 }else{
216 rc = SQLITE_OK;
217 }
218 return rc;
219}
220
221/*
222** Write nBytes of data into fd. If any problem occurs or if the
223** write is incomplete, SQLITE_IOERR is returned. SQLITE_OK is
224** returned upon complete success.
225*/
226static int pager_write(int fd, const void *pBuf, int nByte){
227 int rc;
228 rc = write(fd, pBuf, nByte);
229 if( rc<nByte ){
230 return SQLITE_FULL;
231 }else{
232 return SQLITE_OK;
233 }
234}
235
236/*
237** Convert the bits in the pPager->errMask into an approprate
238** return code.
239*/
240static int pager_errcode(Pager *pPager){
241 int rc = SQLITE_OK;
242 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
243 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
244 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
245 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
246 return rc;
drhed7c8552001-04-11 14:29:21 +0000247}
248
249/*
250** Find a page in the hash table given its page number. Return
251** a pointer to the page or NULL if not found.
252*/
drhd9b02572001-04-15 00:37:09 +0000253static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000254 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
255 while( p && p->pgno!=pgno ){
256 p = p->pNextHash;
257 }
258 return p;
259}
260
261/*
262** Unlock the database and clear the in-memory cache. This routine
263** sets the state of the pager back to what it was when it was first
264** opened. Any outstanding pages are invalidated and subsequent attempts
265** to access those pages will likely result in a coredump.
266*/
drhd9b02572001-04-15 00:37:09 +0000267static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000268 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000269 for(pPg=pPager->pAll; pPg; pPg=pNext){
270 pNext = pPg->pNextAll;
271 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000272 }
273 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000274 pPager->pLast = 0;
275 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000276 memset(pPager->aHash, 0, sizeof(pPager->aHash));
277 pPager->nPage = 0;
278 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000279 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000280 }
drhd9b02572001-04-15 00:37:09 +0000281 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000282 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000283 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000284 pPager->nRef = 0;
285}
286
287/*
288** When this routine is called, the pager has the journal file open and
289** a write lock on the database. This routine releases the database
290** write lock and acquires a read lock in its place. The journal file
291** is deleted and closed.
292**
293** We have to release the write lock before acquiring the read lock,
294** so there is a race condition where another process can get the lock
295** while we are not holding it. But, no other process should do this
296** because we are also holding a lock on the journal, and no process
297** should get a write lock on the database without first getting a lock
298** on the journal. So this routine should never fail. But it can fail
299** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000300** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
301** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
302** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000303*/
drhd9b02572001-04-15 00:37:09 +0000304static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000305 int rc;
drhd9b02572001-04-15 00:37:09 +0000306 PgHdr *pPg;
307 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
308 pager_unlock(pPager->fd);
309 rc = pager_lock(pPager->fd, 0);
drhed7c8552001-04-11 14:29:21 +0000310 unlink(pPager->zJournal);
311 close(pPager->jfd);
312 pPager->jfd = -1;
drhd9b02572001-04-15 00:37:09 +0000313 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
314 pPg->inJournal = 0;
315 pPg->dirty = 0;
316 }
drhed7c8552001-04-11 14:29:21 +0000317 if( rc!=SQLITE_OK ){
318 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000319 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000320 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000321 }else{
drhd9b02572001-04-15 00:37:09 +0000322 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000323 pPager->state = SQLITE_READLOCK;
324 }
325 return rc;
326}
327
drhed7c8552001-04-11 14:29:21 +0000328/*
329** Playback the journal and thus restore the database file to
330** the state it was in before we started making changes.
331**
drhd9b02572001-04-15 00:37:09 +0000332** The journal file format is as follows: There is an initial
333** file-type string for sanity checking. Then there is a single
334** Pgno number which is the number of pages in the database before
335** changes were made. The database is truncated to this size.
336** Next come zero or more page records which each page record
337** consists of a Pgno, SQLITE_PAGE_SIZE bytes of data.
drhed7c8552001-04-11 14:29:21 +0000338**
drhd9b02572001-04-15 00:37:09 +0000339** For playback, the pages have to be read from the journal in
340** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000341**
drhd9b02572001-04-15 00:37:09 +0000342** If the file opened as the journal file is not a well-formed
343** journal file (as determined by looking at the magic number
344** at the beginning) then this routine returns SQLITE_PROTOCOL.
345** If any other errors occur during playback, the database will
346** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
347** pPager->errMask and SQLITE_CORRUPT is returned. If it all
348** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000349*/
drhd9b02572001-04-15 00:37:09 +0000350static int pager_playback(Pager *pPager){
351 int nRec; /* Number of Records */
352 int i; /* Loop counter */
353 Pgno mxPg = 0; /* Size of the original file in pages */
354 struct stat statbuf; /* Used to size the journal */
355 PgHdr *pPg; /* An existing page in the cache */
356 PageRecord pgRec;
357 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000358 int rc;
359
drhd9b02572001-04-15 00:37:09 +0000360 /* Read the beginning of the journal and truncate the
361 ** database file back to its original size.
drhed7c8552001-04-11 14:29:21 +0000362 */
drhd9b02572001-04-15 00:37:09 +0000363 assert( pPager->jfd>=0 );
364 pager_seek(pPager->jfd, 0);
365 rc = pager_read(pPager->jfd, aMagic, sizeof(aMagic));
366 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
367 return SQLITE_PROTOCOL;
368 }
369 rc = pager_read(pPager->jfd, &mxPg, sizeof(mxPg));
370 if( rc!=SQLITE_OK ){
371 return SQLITE_PROTOCOL;
372 }
373 pager_truncate(pPager->fd, mxPg);
374 pPager->dbSize = mxPg;
375
376 /* Begin reading the journal beginning at the end and moving
377 ** toward the beginning.
378 */
379 if( fstat(pPager->jfd, &statbuf)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000380 return SQLITE_OK;
381 }
drhd9b02572001-04-15 00:37:09 +0000382 nRec = (statbuf.st_size - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhed7c8552001-04-11 14:29:21 +0000383
384 /* Process segments beginning with the last and working backwards
385 ** to the first.
386 */
drhd9b02572001-04-15 00:37:09 +0000387 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000388 /* Seek to the beginning of the segment */
drhd9b02572001-04-15 00:37:09 +0000389 off_t ofst;
390 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
391 rc = pager_seek(pPager->jfd, ofst);
392 if( rc!=SQLITE_OK ) break;
393 rc = pager_read(pPager->jfd, &pgRec, sizeof(pgRec));
394 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000395
drhd9b02572001-04-15 00:37:09 +0000396 /* Sanity checking on the page */
397 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
398 rc = SQLITE_CORRUPT;
399 break;
drhed7c8552001-04-11 14:29:21 +0000400 }
401
drhd9b02572001-04-15 00:37:09 +0000402 /* Playback the page. Update the in-memory copy of the page
403 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000404 */
drhd9b02572001-04-15 00:37:09 +0000405 pPg = pager_lookup(pPager, pgRec.pgno);
406 if( pPg ){
407 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drhed7c8552001-04-11 14:29:21 +0000408 }
drhd9b02572001-04-15 00:37:09 +0000409 rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
410 if( rc!=SQLITE_OK ) break;
411 rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
412 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000413 }
drhd9b02572001-04-15 00:37:09 +0000414 if( rc!=SQLITE_OK ){
415 pager_unwritelock(pPager);
416 pPager->errMask |= PAGER_ERR_CORRUPT;
417 rc = SQLITE_CORRUPT;
418 }else{
419 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000420 }
drhd9b02572001-04-15 00:37:09 +0000421 return rc;
drhed7c8552001-04-11 14:29:21 +0000422}
423
424/*
425** Create a new page cache and put a pointer to the page cache in *ppPager.
426** The file to be cached need not exist. The file is not opened until
drhd9b02572001-04-15 00:37:09 +0000427** the first call to sqlitepager_get() and is only held open until the
428** last page is released using sqlitepager_unref().
drhed7c8552001-04-11 14:29:21 +0000429*/
drhd9b02572001-04-15 00:37:09 +0000430int sqlitepager_open(Pager **ppPager, const char *zFilename, int mxPage){
drhed7c8552001-04-11 14:29:21 +0000431 Pager *pPager;
432 int nameLen;
433 int fd;
434
drhd9b02572001-04-15 00:37:09 +0000435 *ppPager = 0;
436 if( sqlite_malloc_failed ){
437 return SQLITE_NOMEM;
438 }
439 fd = open(zFilename, O_RDWR|O_CREAT, 0644);
drhed7c8552001-04-11 14:29:21 +0000440 if( fd<0 ){
441 return SQLITE_CANTOPEN;
442 }
443 nameLen = strlen(zFilename);
444 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000445 if( pPager==0 ){
446 close(fd);
447 return SQLITE_NOMEM;
448 }
drhed7c8552001-04-11 14:29:21 +0000449 pPager->zFilename = (char*)&pPager[1];
450 pPager->zJournal = &pPager->zFilename[nameLen+1];
451 strcpy(pPager->zFilename, zFilename);
452 strcpy(pPager->zJournal, zFilename);
453 strcpy(&pPager->zJournal[nameLen], "-journal");
454 pPager->fd = fd;
455 pPager->jfd = -1;
456 pPager->nRef = 0;
457 pPager->dbSize = -1;
458 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000459 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000460 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000461 pPager->errMask = 0;
drhed7c8552001-04-11 14:29:21 +0000462 pPager->pFirst = 0;
463 pPager->pLast = 0;
464 memset(pPager->aHash, 0, sizeof(pPager->aHash));
465 *ppPager = pPager;
466 return SQLITE_OK;
467}
468
469/*
470** Return the total number of pages in the file opened by pPager.
471*/
drhd9b02572001-04-15 00:37:09 +0000472int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000473 int n;
474 struct stat statbuf;
drhd9b02572001-04-15 00:37:09 +0000475 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000476 if( pPager->dbSize>=0 ){
477 return pPager->dbSize;
478 }
479 if( fstat(pPager->fd, &statbuf)!=0 ){
480 n = 0;
481 }else{
482 n = statbuf.st_size/SQLITE_PAGE_SIZE;
483 }
drhd9b02572001-04-15 00:37:09 +0000484 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000485 pPager->dbSize = n;
486 }
487 return n;
488}
489
490/*
491** Shutdown the page cache. Free all memory and close all files.
492**
493** If a transaction was in progress when this routine is called, that
494** transaction is rolled back. All outstanding pages are invalidated
495** and their memory is freed. Any attempt to use a page associated
496** with this page cache after this function returns will likely
497** result in a coredump.
498*/
drhd9b02572001-04-15 00:37:09 +0000499int sqlitepager_close(Pager *pPager){
500 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000501 switch( pPager->state ){
502 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000503 sqlitepager_rollback(pPager);
504 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000505 break;
506 }
507 case SQLITE_READLOCK: {
drhd9b02572001-04-15 00:37:09 +0000508 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000509 break;
510 }
511 default: {
512 /* Do nothing */
513 break;
514 }
515 }
drhd9b02572001-04-15 00:37:09 +0000516 for(pPg=pPager->pAll; pPg; pPg=pNext){
517 pNext = pPg->pNextAll;
518 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000519 }
520 if( pPager->fd>=0 ) close(pPager->fd);
521 assert( pPager->jfd<0 );
522 sqliteFree(pPager);
523 return SQLITE_OK;
524}
525
526/*
527** Return the page number for the given page data
528*/
drhd9b02572001-04-15 00:37:09 +0000529Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000530 PgHdr *p = DATA_TO_PGHDR(pData);
531 return p->pgno;
532}
533
534/*
drhd9b02572001-04-15 00:37:09 +0000535** Acquire a page.
536**
537** A read lock is obtained for the first page acquired. The lock
538** is dropped when the last page is released.
539**
540** The acquisition might fail for several reasons. In all cases,
541** an appropriate error code is returned and *ppPage is set to NULL.
drhed7c8552001-04-11 14:29:21 +0000542*/
drhd9b02572001-04-15 00:37:09 +0000543int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000544 PgHdr *pPg;
545
drhd9b02572001-04-15 00:37:09 +0000546 /* Make sure we have not hit any critical errors.
547 */
548 if( pPager==0 || pgno==0 ){
549 return SQLITE_ERROR;
550 }
551 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
552 return pager_errcode(pPager);
553 }
554
drhed7c8552001-04-11 14:29:21 +0000555 /* If this is the first page accessed, then get a read lock
556 ** on the database file.
557 */
558 if( pPager->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000559 if( pager_lock(pPager->fd, 0)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000560 *ppPage = 0;
561 return SQLITE_BUSY;
562 }
drhd9b02572001-04-15 00:37:09 +0000563 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000564
565 /* If a journal file exists, try to play it back.
566 */
567 if( access(pPager->zJournal,0)==0 ){
568 int rc;
569
570 /* Open the journal for exclusive access. Return SQLITE_BUSY if
571 ** we cannot get exclusive access to the journal file
572 */
573 pPager->jfd = open(pPager->zJournal, O_RDONLY, 0);
drhd9b02572001-04-15 00:37:09 +0000574 if( pPager->jfd<0 || pager_lock(pPager->jfd, 1)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000575 if( pPager->jfd>=0 ){ close(pPager->jfd); pPager->jfd = -1; }
drhd9b02572001-04-15 00:37:09 +0000576 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000577 *ppPage = 0;
578 return SQLITE_BUSY;
579 }
580
581 /* Get a write lock on the database */
drhd9b02572001-04-15 00:37:09 +0000582 pager_unlock(pPager->fd);
583 if( pager_lock(pPager->fd, 1)!=0 ){
584 close(pPager->jfd);
585 pPager->jfd = -1;
drhed7c8552001-04-11 14:29:21 +0000586 *ppPage = 0;
587 return SQLITE_PROTOCOL;
588 }
589
590 /* Playback and delete the journal. Drop the database write
591 ** lock and reacquire the read lock.
592 */
drhd9b02572001-04-15 00:37:09 +0000593 rc = pager_playback(pPager);
594 if( rc!=SQLITE_OK ){
595 return rc;
596 }
drhed7c8552001-04-11 14:29:21 +0000597 }
598 pPg = 0;
drhd9b02572001-04-15 00:37:09 +0000599 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000600 }else{
601 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000602 pPg = pager_lookup(pPager, pgno);
603 pPager->nHit++;
drhed7c8552001-04-11 14:29:21 +0000604 }
605 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000606 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000607 int h;
608 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
609 /* Create a new page */
drhd9b02572001-04-15 00:37:09 +0000610 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE );
611 if( pPg==0 ){
612 *ppPage = 0;
613 pager_unwritelock(pPager);
614 pPager->errMask |= PAGER_ERR_MEM;
615 return SQLITE_NOMEM;
616 }
drhed7c8552001-04-11 14:29:21 +0000617 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000618 pPg->pNextAll = pPager->pAll;
619 if( pPager->pAll ){
620 pPager->pAll->pPrevAll = pPg;
621 }
622 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000623 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000624 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000625 }else{
drhd9b02572001-04-15 00:37:09 +0000626 /* Recycle an older page. First locate the page to be recycled.
627 ** Try to find one that is not dirty and is near the head of
628 ** of the free list */
629 int cnt = 4;
drhed7c8552001-04-11 14:29:21 +0000630 pPg = pPager->pFirst;
drhd9b02572001-04-15 00:37:09 +0000631 while( pPg->dirty && 0<cnt-- ){
632 pPg = pPg->pNextFree;
633 }
634 if( pPg==0 || pPg->dirty ) pPg = pPager->pFirst;
635 assert( pPg->nRef==0 );
636
637 /* If the page to be recycled is dirty, sync the journal and write
638 ** the old page into the database. */
drhed7c8552001-04-11 14:29:21 +0000639 if( pPg->dirty ){
640 int rc;
drhd9b02572001-04-15 00:37:09 +0000641 assert( pPg->inJournal==1 );
642 assert( pPager->state==SQLITE_WRITELOCK );
643 rc = fsync(pPager->jfd);
644 if( rc!=0 ){
645 rc = sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000646 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +0000647 if( rc==SQLITE_OK ) rc = SQLITE_IOERR;
drhed7c8552001-04-11 14:29:21 +0000648 return rc;
649 }
drhd9b02572001-04-15 00:37:09 +0000650 pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
651 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
652 if( rc!=SQLITE_OK ){
653 rc = sqlitepager_rollback(pPager);
654 *ppPage = 0;
655 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
656 return rc;
657 }
658 }
659
660 /* Unlink the old page from the free list and the hash table
661 */
662 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000663 if( pPager->pFirst ){
drhd9b02572001-04-15 00:37:09 +0000664 pPager->pFirst->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000665 }else{
666 pPager->pLast = 0;
667 }
668 if( pPg->pNextHash ){
669 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
670 }
671 if( pPg->pPrevHash ){
672 pPg->pPrevHash->pNextHash = pPg->pNextHash;
673 }else{
drhd9b02572001-04-15 00:37:09 +0000674 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000675 assert( pPager->aHash[h]==pPg );
676 pPager->aHash[h] = pPg->pNextHash;
677 }
drhd9b02572001-04-15 00:37:09 +0000678 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000679 }
680 pPg->pgno = pgno;
681 pPg->inJournal = 0;
682 pPg->dirty = 0;
683 pPg->nRef = 1;
drhd9b02572001-04-15 00:37:09 +0000684 pPager->nRef++;
685 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000686 pPg->pNextHash = pPager->aHash[h];
687 pPager->aHash[h] = pPg;
688 if( pPg->pNextHash ){
689 assert( pPg->pNextHash->pPrevHash==0 );
690 pPg->pNextHash->pPrevHash = pPg;
691 }
drhd9b02572001-04-15 00:37:09 +0000692 pager_seek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
693 pager_read(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
drhed7c8552001-04-11 14:29:21 +0000694 }else{
drhd9b02572001-04-15 00:37:09 +0000695 /* The requested page is in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000696 if( pPg->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000697 /* The page is currently on the freelist. Remove it. */
698 if( pPg->pPrevFree ){
699 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000700 }else{
drhd9b02572001-04-15 00:37:09 +0000701 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000702 }
drhd9b02572001-04-15 00:37:09 +0000703 if( pPg->pNextFree ){
704 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
drhed7c8552001-04-11 14:29:21 +0000705 }else{
drhd9b02572001-04-15 00:37:09 +0000706 pPager->pLast = pPg->pPrevFree;
drhed7c8552001-04-11 14:29:21 +0000707 }
drhd9b02572001-04-15 00:37:09 +0000708 pPager->nRef++;
drhed7c8552001-04-11 14:29:21 +0000709 }
710 pPg->nRef++;
711 }
712 *ppPage = PGHDR_TO_DATA(pPg);
713 return SQLITE_OK;
714}
715
716/*
717** Release a page.
718**
719** If the number of references to the page drop to zero, then the
720** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000721** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000722** removed.
723*/
drhd9b02572001-04-15 00:37:09 +0000724int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000725 Pager *pPager;
726 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000727
728 /* Decrement the reference count for this page
729 */
drhed7c8552001-04-11 14:29:21 +0000730 pPg = DATA_TO_PGHDR(pData);
731 assert( pPg->nRef>0 );
732 pPager = pPg->pPager;
733 pPg->nRef--;
drhd9b02572001-04-15 00:37:09 +0000734
735 /* When the number of references to a page reach 0, add the
736 ** page to the freelist.
737 */
drhed7c8552001-04-11 14:29:21 +0000738 if( pPg->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000739 pPg->pNextFree = 0;
740 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000741 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000742 if( pPg->pPrevFree ){
743 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000744 }else{
745 pPager->pFirst = pPg;
746 }
drhd9b02572001-04-15 00:37:09 +0000747
748 /* When all pages reach the freelist, drop the read lock from
749 ** the database file.
750 */
751 pPager->nRef--;
752 assert( pPager->nRef>=0 );
753 if( pPager->nRef==0 ){
754 pager_reset(pPager);
755 }
drhed7c8552001-04-11 14:29:21 +0000756 }
drhd9b02572001-04-15 00:37:09 +0000757 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000758}
759
760/*
761** Mark a data page as writeable. The page is written into the journal
762** if it is not there already. This routine must be called before making
763** changes to a page.
764**
765** The first time this routine is called, the pager creates a new
766** journal and acquires a write lock on the database. If the write
767** lock could not be acquired, this routine returns SQLITE_BUSY. The
768** calling routine must check for that routine and be careful not to
769** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000770**
771** If the journal file could not be written because the disk is full,
772** then this routine returns SQLITE_FULL and does an immediate rollback.
773** All subsequent write attempts also return SQLITE_FULL until there
774** is a call to sqlitepager_commit() or sqlitepager_rollback() to
775** reset.
drhed7c8552001-04-11 14:29:21 +0000776*/
drhd9b02572001-04-15 00:37:09 +0000777int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000778 PgHdr *pPg = DATA_TO_PGHDR(pData);
779 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000780 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000781
drhd9b02572001-04-15 00:37:09 +0000782 if( pPager->errMask ){
783 return pager_errcode(pPager);
784 }
785 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +0000786 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +0000787 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +0000788 if( pPager->state==SQLITE_READLOCK ){
789 pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
790 if( pPager->jfd<0 ){
791 return SQLITE_CANTOPEN;
792 }
drhd9b02572001-04-15 00:37:09 +0000793 if( pager_lock(pPager->jfd, 1) ){
drhed7c8552001-04-11 14:29:21 +0000794 close(pPager->jfd);
795 pPager->jfd = -1;
796 return SQLITE_BUSY;
797 }
drhd9b02572001-04-15 00:37:09 +0000798 pager_unlock(pPager->fd);
799 if( pager_lock(pPager->fd, 1) ){
drhed7c8552001-04-11 14:29:21 +0000800 close(pPager->jfd);
801 pPager->jfd = -1;
802 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000803 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000804 return SQLITE_PROTOCOL;
805 }
806 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +0000807 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +0000808 pPager->origDbSize = pPager->dbSize;
drhd9b02572001-04-15 00:37:09 +0000809 rc = pager_write(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
810 if( rc==SQLITE_OK ){
811 rc = pager_write(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
812 }
813 if( rc!=SQLITE_OK ){
814 rc = pager_unwritelock(pPager);
815 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
816 return rc;
817 }
drhed7c8552001-04-11 14:29:21 +0000818 }
drhd9b02572001-04-15 00:37:09 +0000819 assert( pPager->state==SQLITE_WRITELOCK );
drh69688d52001-04-14 16:38:23 +0000820 assert( pPager->jfd>=0 );
drhd9b02572001-04-15 00:37:09 +0000821 if( pPg->pgno <= pPager->origDbSize ){
822 rc = pager_write(pPager->jfd, &pPg->pgno, sizeof(Pgno));
823 if( rc==SQLITE_OK ){
824 rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
825 }
826 if( rc!=SQLITE_OK ){
827 sqlitepager_rollback(pPager);
828 pPager->errMask |= PAGER_ERR_FULL;
829 return rc;
830 }
drh69688d52001-04-14 16:38:23 +0000831 }
drh69688d52001-04-14 16:38:23 +0000832 pPg->inJournal = 1;
drh69688d52001-04-14 16:38:23 +0000833 return rc;
drhed7c8552001-04-11 14:29:21 +0000834}
835
836/*
837** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +0000838**
839** If the commit fails for any reason, a rollback attempt is made
840** and an error code is returned. If the commit worked, SQLITE_OK
841** is returned.
drhed7c8552001-04-11 14:29:21 +0000842*/
drhd9b02572001-04-15 00:37:09 +0000843int sqlitepager_commit(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000844 int i, rc;
845 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000846
847 if( pPager->errMask==PAGER_ERR_FULL ){
848 rc = sqlitepager_rollback(pPager);
849 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
850 return rc;
851 }
852 if( pPager->errMask!=0 ){
853 rc = pager_errcode(pPager);
854 return rc;
855 }
856 if( pPager->state!=SQLITE_WRITELOCK ){
857 return SQLITE_ERROR;
858 }
drhed7c8552001-04-11 14:29:21 +0000859 assert( pPager->jfd>=0 );
860 if( fsync(pPager->jfd) ){
drhd9b02572001-04-15 00:37:09 +0000861 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +0000862 }
863 for(i=0; i<N_PG_HASH; i++){
864 for(pPg=pPager->aHash[i]; pPg; pPg=pPg->pNextHash){
865 if( pPg->dirty==0 ) continue;
drhd9b02572001-04-15 00:37:09 +0000866 rc = pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
867 if( rc!=SQLITE_OK ) goto commit_abort;
868 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
869 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +0000870 }
871 }
drhd9b02572001-04-15 00:37:09 +0000872 if( fsync(pPager->fd) ) goto commit_abort;
873 rc = pager_unwritelock(pPager);
874 pPager->dbSize = -1;
875 return rc;
876
877 /* Jump here if anything goes wrong during the commit process.
878 */
879commit_abort:
880 rc = sqlitepager_rollback(pPager);
881 if( rc==SQLITE_OK ){
882 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +0000883 }
drhed7c8552001-04-11 14:29:21 +0000884 return rc;
885}
886
887/*
888** Rollback all changes. The database falls back to read-only mode.
889** All in-memory cache pages revert to their original data contents.
890** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +0000891**
892** This routine cannot fail unless some other process is not following
893** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
894** process is writing trash into the journal file (SQLITE_CORRUPT) or
895** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
896** codes are returned for all these occasions. Otherwise,
897** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +0000898*/
drhd9b02572001-04-15 00:37:09 +0000899int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000900 int rc;
drhd9b02572001-04-15 00:37:09 +0000901 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
902 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +0000903 }
drhd9b02572001-04-15 00:37:09 +0000904 if( pPager->state!=SQLITE_WRITELOCK ){
905 return SQLITE_OK;
906 }
907 rc = pager_playback(pPager);
908 if( rc!=SQLITE_OK ){
909 rc = SQLITE_CORRUPT;
910 pPager->errMask |= PAGER_ERR_CORRUPT;
911 }
912 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000913 return rc;
914};
drhd9b02572001-04-15 00:37:09 +0000915
916/*
917** This routine is used for testing and analysis only.
918*/
919int *sqlitepager_stats(Pager *pPager){
920 static int a[9];
921 a[0] = pPager->nRef;
922 a[1] = pPager->nPage;
923 a[2] = pPager->mxPage;
924 a[3] = pPager->dbSize;
925 a[4] = pPager->state;
926 a[5] = pPager->errMask;
927 a[6] = pPager->nHit;
928 a[7] = pPager->nMiss;
929 a[8] = pPager->nOvfl;
930 return a;
931}