blob: 57866918a49f88ac56d584d2a0c264d11b1882bb [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
2** Copyright (c) 2001 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
24** This is the implementation of the page cache subsystem.
25**
26** The page cache is used to access a database file. The pager journals
27** all writes in order to support rollback. Locking is used to limit
drh306dc212001-05-21 13:45:10 +000028** access to one or more reader or one writer.
drhed7c8552001-04-11 14:29:21 +000029**
drh6019e162001-07-02 17:51:45 +000030** @(#) $Id: pager.c,v 1.13 2001/07/02 17:51:46 drh Exp $
drhed7c8552001-04-11 14:29:21 +000031*/
drhd9b02572001-04-15 00:37:09 +000032#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000033#include "pager.h"
34#include <fcntl.h>
35#include <sys/stat.h>
36#include <unistd.h>
37#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000038#include <string.h>
drhed7c8552001-04-11 14:29:21 +000039
40/*
41** The page cache as a whole is always in one of the following
42** states:
43**
44** SQLITE_UNLOCK The page cache is not currently reading or
45** writing the database file. There is no
46** data held in memory. This is the initial
47** state.
48**
49** SQLITE_READLOCK The page cache is reading the database.
50** Writing is not permitted. There can be
51** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000052** file at the same time.
drhed7c8552001-04-11 14:29:21 +000053**
54** SQLITE_WRITELOCK The page cache is writing the database.
55** Access is exclusive. No other processes or
56** threads can be reading or writing while one
57** process is writing.
58**
drh306dc212001-05-21 13:45:10 +000059** The page cache comes up in SQLITE_UNLOCK. The first time a
60** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000061** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000062** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000063** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000064** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
65** called on an outstanding page which means that the pager must
66** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
67** The sqlite_page_rollback() and sqlite_page_commit() functions
68** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069*/
70#define SQLITE_UNLOCK 0
71#define SQLITE_READLOCK 1
72#define SQLITE_WRITELOCK 2
73
drhd9b02572001-04-15 00:37:09 +000074
drhed7c8552001-04-11 14:29:21 +000075/*
76** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000077** This header is only visible to this pager module. The client
78** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000079*/
drhd9b02572001-04-15 00:37:09 +000080typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000081struct PgHdr {
82 Pager *pPager; /* The pager to which this page belongs */
83 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000084 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000085 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000086 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
87 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000088 char inJournal; /* TRUE if has been written to journal */
89 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000090 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000091 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000092};
93
94/*
drh69688d52001-04-14 16:38:23 +000095** Convert a pointer to a PgHdr into a pointer to its data
96** and back again.
drhed7c8552001-04-11 14:29:21 +000097*/
98#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
99#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +0000100#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000101
102/*
drhed7c8552001-04-11 14:29:21 +0000103** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +0000104** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +0000105*/
drhd9b02572001-04-15 00:37:09 +0000106#define N_PG_HASH 101
drhed7c8552001-04-11 14:29:21 +0000107
108/*
109** A open page cache is an instance of the following structure.
110*/
111struct Pager {
112 char *zFilename; /* Name of the database file */
113 char *zJournal; /* Name of the journal file */
114 int fd, jfd; /* File descriptors for database and journal */
drhed7c8552001-04-11 14:29:21 +0000115 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000116 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000117 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000118 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000119 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000120 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000121 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000122 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
123 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
124 unsigned char errMask; /* One of several kinds of errors */
drh6019e162001-07-02 17:51:45 +0000125 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000126 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000127 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000128 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000129};
130
131/*
132** These are bits that can be set in Pager.errMask.
133*/
134#define PAGER_ERR_FULL 0x01 /* a write() failed */
135#define PAGER_ERR_MEM 0x02 /* malloc() failed */
136#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
137#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
138
139/*
140** The journal file contains page records in the following
141** format.
142*/
143typedef struct PageRecord PageRecord;
144struct PageRecord {
145 Pgno pgno; /* The page number */
146 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
147};
148
149/*
150** Journal files begin with the following magic string. This data
151** is completely random. It is used only as a sanity check.
152*/
153static const unsigned char aJournalMagic[] = {
154 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000155};
156
157/*
158** Hash a page number
159*/
drhd9b02572001-04-15 00:37:09 +0000160#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000161
162/*
drhdd793422001-06-28 01:54:48 +0000163** Enable reference count tracking here:
164*/
165#if SQLITE_TEST
166int pager_refinfo_enable = 0;
167 static void pager_refinfo(PgHdr *p){
168 static int cnt = 0;
169 if( !pager_refinfo_enable ) return;
170 printf(
171 "REFCNT: %4d addr=0x%08x nRef=%d\n",
172 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
173 );
174 cnt++; /* Something to set a breakpoint on */
175 }
176# define REFINFO(X) pager_refinfo(X)
177#else
178# define REFINFO(X)
179#endif
180
181/*
drhed7c8552001-04-11 14:29:21 +0000182** Attempt to acquire a read lock (if wrlock==0) or a write lock (if wrlock==1)
183** on the database file. Return 0 on success and non-zero if the lock
184** could not be acquired.
185*/
drhd9b02572001-04-15 00:37:09 +0000186static int pager_lock(int fd, int wrlock){
187 int rc;
drhed7c8552001-04-11 14:29:21 +0000188 struct flock lock;
drhd9b02572001-04-15 00:37:09 +0000189 lock.l_type = wrlock ? F_WRLCK : F_RDLCK;
190 lock.l_whence = SEEK_SET;
191 lock.l_start = lock.l_len = 0L;
192 rc = fcntl(fd, F_SETLK, &lock);
193 return rc!=0;
drhed7c8552001-04-11 14:29:21 +0000194}
195
196/*
197** Unlock the database file.
198*/
drhd9b02572001-04-15 00:37:09 +0000199static int pager_unlock(fd){
200 int rc;
drhed7c8552001-04-11 14:29:21 +0000201 struct flock lock;
202 lock.l_type = F_UNLCK;
drhd9b02572001-04-15 00:37:09 +0000203 lock.l_whence = SEEK_SET;
204 lock.l_start = lock.l_len = 0L;
205 rc = fcntl(fd, F_SETLK, &lock);
206 return rc!=0;
207}
208
209/*
210** Move the cursor for file descriptor fd to the point whereto from
211** the beginning of the file.
212*/
213static int pager_seek(int fd, off_t whereto){
drh6019e162001-07-02 17:51:45 +0000214 /*printf("SEEK to page %d\n", whereto/SQLITE_PAGE_SIZE + 1);*/
drhd9b02572001-04-15 00:37:09 +0000215 lseek(fd, whereto, SEEK_SET);
216 return SQLITE_OK;
217}
218
219/*
220** Truncate the given file so that it contains exactly mxPg pages
221** of data.
222*/
223static int pager_truncate(int fd, Pgno mxPg){
224 int rc;
225 rc = ftruncate(fd, mxPg*SQLITE_PAGE_SIZE);
226 return rc!=0 ? SQLITE_IOERR : SQLITE_OK;
227}
228
229/*
230** Read nBytes of data from fd into pBuf. If the data cannot be
231** read or only a partial read occurs, then the unread parts of
232** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
233** If the read is completely successful, return SQLITE_OK.
234*/
235static int pager_read(int fd, void *pBuf, int nByte){
236 int rc;
drh6019e162001-07-02 17:51:45 +0000237 /* printf("READ\n");*/
drhd9b02572001-04-15 00:37:09 +0000238 rc = read(fd, pBuf, nByte);
239 if( rc<0 ){
240 memset(pBuf, 0, nByte);
241 return SQLITE_IOERR;
242 }
243 if( rc<nByte ){
244 memset(&((char*)pBuf)[rc], 0, nByte - rc);
245 rc = SQLITE_IOERR;
246 }else{
247 rc = SQLITE_OK;
248 }
249 return rc;
250}
251
252/*
253** Write nBytes of data into fd. If any problem occurs or if the
254** write is incomplete, SQLITE_IOERR is returned. SQLITE_OK is
255** returned upon complete success.
256*/
257static int pager_write(int fd, const void *pBuf, int nByte){
258 int rc;
drh6019e162001-07-02 17:51:45 +0000259 /*printf("WRITE\n");*/
drhd9b02572001-04-15 00:37:09 +0000260 rc = write(fd, pBuf, nByte);
261 if( rc<nByte ){
262 return SQLITE_FULL;
263 }else{
264 return SQLITE_OK;
265 }
266}
267
268/*
269** Convert the bits in the pPager->errMask into an approprate
270** return code.
271*/
272static int pager_errcode(Pager *pPager){
273 int rc = SQLITE_OK;
274 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
275 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
276 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
277 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
278 return rc;
drhed7c8552001-04-11 14:29:21 +0000279}
280
281/*
282** Find a page in the hash table given its page number. Return
283** a pointer to the page or NULL if not found.
284*/
drhd9b02572001-04-15 00:37:09 +0000285static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000286 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
287 while( p && p->pgno!=pgno ){
288 p = p->pNextHash;
289 }
290 return p;
291}
292
293/*
294** Unlock the database and clear the in-memory cache. This routine
295** sets the state of the pager back to what it was when it was first
296** opened. Any outstanding pages are invalidated and subsequent attempts
297** to access those pages will likely result in a coredump.
298*/
drhd9b02572001-04-15 00:37:09 +0000299static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000300 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000301 for(pPg=pPager->pAll; pPg; pPg=pNext){
302 pNext = pPg->pNextAll;
303 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000304 }
305 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000306 pPager->pLast = 0;
307 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000308 memset(pPager->aHash, 0, sizeof(pPager->aHash));
309 pPager->nPage = 0;
310 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000311 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000312 }
drhd9b02572001-04-15 00:37:09 +0000313 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000314 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000315 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000316 pPager->nRef = 0;
317}
318
319/*
320** When this routine is called, the pager has the journal file open and
321** a write lock on the database. This routine releases the database
322** write lock and acquires a read lock in its place. The journal file
323** is deleted and closed.
324**
325** We have to release the write lock before acquiring the read lock,
326** so there is a race condition where another process can get the lock
327** while we are not holding it. But, no other process should do this
328** because we are also holding a lock on the journal, and no process
329** should get a write lock on the database without first getting a lock
330** on the journal. So this routine should never fail. But it can fail
331** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000332** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
333** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
334** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000335*/
drhd9b02572001-04-15 00:37:09 +0000336static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000337 int rc;
drhd9b02572001-04-15 00:37:09 +0000338 PgHdr *pPg;
339 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
340 pager_unlock(pPager->fd);
341 rc = pager_lock(pPager->fd, 0);
drhed7c8552001-04-11 14:29:21 +0000342 unlink(pPager->zJournal);
343 close(pPager->jfd);
344 pPager->jfd = -1;
drh6019e162001-07-02 17:51:45 +0000345 sqliteFree( pPager->aInJournal );
346 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000347 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
348 pPg->inJournal = 0;
349 pPg->dirty = 0;
350 }
drhed7c8552001-04-11 14:29:21 +0000351 if( rc!=SQLITE_OK ){
352 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000353 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000354 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000355 }else{
drhd9b02572001-04-15 00:37:09 +0000356 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000357 pPager->state = SQLITE_READLOCK;
358 }
359 return rc;
360}
361
drhed7c8552001-04-11 14:29:21 +0000362/*
363** Playback the journal and thus restore the database file to
364** the state it was in before we started making changes.
365**
drhd9b02572001-04-15 00:37:09 +0000366** The journal file format is as follows: There is an initial
367** file-type string for sanity checking. Then there is a single
368** Pgno number which is the number of pages in the database before
369** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000370** Next come zero or more page records where each page record
371** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
372** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000373**
drhd9b02572001-04-15 00:37:09 +0000374** For playback, the pages have to be read from the journal in
375** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000376**
drhd9b02572001-04-15 00:37:09 +0000377** If the file opened as the journal file is not a well-formed
378** journal file (as determined by looking at the magic number
379** at the beginning) then this routine returns SQLITE_PROTOCOL.
380** If any other errors occur during playback, the database will
381** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
382** pPager->errMask and SQLITE_CORRUPT is returned. If it all
383** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000384*/
drhd9b02572001-04-15 00:37:09 +0000385static int pager_playback(Pager *pPager){
386 int nRec; /* Number of Records */
387 int i; /* Loop counter */
388 Pgno mxPg = 0; /* Size of the original file in pages */
389 struct stat statbuf; /* Used to size the journal */
390 PgHdr *pPg; /* An existing page in the cache */
391 PageRecord pgRec;
392 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000393 int rc;
394
drhd9b02572001-04-15 00:37:09 +0000395 /* Read the beginning of the journal and truncate the
396 ** database file back to its original size.
drhed7c8552001-04-11 14:29:21 +0000397 */
drhd9b02572001-04-15 00:37:09 +0000398 assert( pPager->jfd>=0 );
399 pager_seek(pPager->jfd, 0);
400 rc = pager_read(pPager->jfd, aMagic, sizeof(aMagic));
401 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
402 return SQLITE_PROTOCOL;
403 }
404 rc = pager_read(pPager->jfd, &mxPg, sizeof(mxPg));
405 if( rc!=SQLITE_OK ){
406 return SQLITE_PROTOCOL;
407 }
408 pager_truncate(pPager->fd, mxPg);
409 pPager->dbSize = mxPg;
410
411 /* Begin reading the journal beginning at the end and moving
412 ** toward the beginning.
413 */
414 if( fstat(pPager->jfd, &statbuf)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000415 return SQLITE_OK;
416 }
drhd9b02572001-04-15 00:37:09 +0000417 nRec = (statbuf.st_size - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhed7c8552001-04-11 14:29:21 +0000418
419 /* Process segments beginning with the last and working backwards
420 ** to the first.
421 */
drhd9b02572001-04-15 00:37:09 +0000422 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000423 /* Seek to the beginning of the segment */
drhd9b02572001-04-15 00:37:09 +0000424 off_t ofst;
425 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
426 rc = pager_seek(pPager->jfd, ofst);
427 if( rc!=SQLITE_OK ) break;
428 rc = pager_read(pPager->jfd, &pgRec, sizeof(pgRec));
429 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000430
drhd9b02572001-04-15 00:37:09 +0000431 /* Sanity checking on the page */
432 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
433 rc = SQLITE_CORRUPT;
434 break;
drhed7c8552001-04-11 14:29:21 +0000435 }
436
drhd9b02572001-04-15 00:37:09 +0000437 /* Playback the page. Update the in-memory copy of the page
438 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000439 */
drhd9b02572001-04-15 00:37:09 +0000440 pPg = pager_lookup(pPager, pgRec.pgno);
441 if( pPg ){
442 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000443 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000444 }
drhd9b02572001-04-15 00:37:09 +0000445 rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
446 if( rc!=SQLITE_OK ) break;
447 rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
448 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000449 }
drhd9b02572001-04-15 00:37:09 +0000450 if( rc!=SQLITE_OK ){
451 pager_unwritelock(pPager);
452 pPager->errMask |= PAGER_ERR_CORRUPT;
453 rc = SQLITE_CORRUPT;
454 }else{
455 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000456 }
drhd9b02572001-04-15 00:37:09 +0000457 return rc;
drhed7c8552001-04-11 14:29:21 +0000458}
459
460/*
461** Create a new page cache and put a pointer to the page cache in *ppPager.
462** The file to be cached need not exist. The file is not opened until
drhd9b02572001-04-15 00:37:09 +0000463** the first call to sqlitepager_get() and is only held open until the
464** last page is released using sqlitepager_unref().
drhed7c8552001-04-11 14:29:21 +0000465*/
drh7e3b0a02001-04-28 16:52:40 +0000466int sqlitepager_open(
467 Pager **ppPager, /* Return the Pager structure here */
468 const char *zFilename, /* Name of the database file to open */
469 int mxPage, /* Max number of in-memory cache pages */
470 int nExtra /* Extra bytes append to each in-memory page */
471){
drhed7c8552001-04-11 14:29:21 +0000472 Pager *pPager;
473 int nameLen;
474 int fd;
475
drhd9b02572001-04-15 00:37:09 +0000476 *ppPager = 0;
477 if( sqlite_malloc_failed ){
478 return SQLITE_NOMEM;
479 }
480 fd = open(zFilename, O_RDWR|O_CREAT, 0644);
drhed7c8552001-04-11 14:29:21 +0000481 if( fd<0 ){
482 return SQLITE_CANTOPEN;
483 }
484 nameLen = strlen(zFilename);
485 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000486 if( pPager==0 ){
487 close(fd);
488 return SQLITE_NOMEM;
489 }
drhed7c8552001-04-11 14:29:21 +0000490 pPager->zFilename = (char*)&pPager[1];
491 pPager->zJournal = &pPager->zFilename[nameLen+1];
492 strcpy(pPager->zFilename, zFilename);
493 strcpy(pPager->zJournal, zFilename);
494 strcpy(&pPager->zJournal[nameLen], "-journal");
495 pPager->fd = fd;
496 pPager->jfd = -1;
497 pPager->nRef = 0;
498 pPager->dbSize = -1;
499 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000500 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000501 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000502 pPager->errMask = 0;
drhed7c8552001-04-11 14:29:21 +0000503 pPager->pFirst = 0;
504 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000505 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000506 memset(pPager->aHash, 0, sizeof(pPager->aHash));
507 *ppPager = pPager;
508 return SQLITE_OK;
509}
510
511/*
drh72f82862001-05-24 21:06:34 +0000512** Set the destructor for this pager. If not NULL, the destructor is called
513** when the reference count on the page reaches zero.
514**
515** The destructor is not called as a result sqlitepager_close().
516** Destructors are only called by sqlitepager_unref().
517*/
518void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
519 pPager->xDestructor = xDesc;
520}
521
522/*
drhed7c8552001-04-11 14:29:21 +0000523** Return the total number of pages in the file opened by pPager.
524*/
drhd9b02572001-04-15 00:37:09 +0000525int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000526 int n;
527 struct stat statbuf;
drhd9b02572001-04-15 00:37:09 +0000528 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000529 if( pPager->dbSize>=0 ){
530 return pPager->dbSize;
531 }
532 if( fstat(pPager->fd, &statbuf)!=0 ){
533 n = 0;
534 }else{
535 n = statbuf.st_size/SQLITE_PAGE_SIZE;
536 }
drhd9b02572001-04-15 00:37:09 +0000537 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000538 pPager->dbSize = n;
539 }
540 return n;
541}
542
543/*
544** Shutdown the page cache. Free all memory and close all files.
545**
546** If a transaction was in progress when this routine is called, that
547** transaction is rolled back. All outstanding pages are invalidated
548** and their memory is freed. Any attempt to use a page associated
549** with this page cache after this function returns will likely
550** result in a coredump.
551*/
drhd9b02572001-04-15 00:37:09 +0000552int sqlitepager_close(Pager *pPager){
553 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000554 switch( pPager->state ){
555 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000556 sqlitepager_rollback(pPager);
557 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000558 break;
559 }
560 case SQLITE_READLOCK: {
drhd9b02572001-04-15 00:37:09 +0000561 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000562 break;
563 }
564 default: {
565 /* Do nothing */
566 break;
567 }
568 }
drhd9b02572001-04-15 00:37:09 +0000569 for(pPg=pPager->pAll; pPg; pPg=pNext){
570 pNext = pPg->pNextAll;
571 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000572 }
573 if( pPager->fd>=0 ) close(pPager->fd);
574 assert( pPager->jfd<0 );
575 sqliteFree(pPager);
576 return SQLITE_OK;
577}
578
579/*
580** Return the page number for the given page data
581*/
drhd9b02572001-04-15 00:37:09 +0000582Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000583 PgHdr *p = DATA_TO_PGHDR(pData);
584 return p->pgno;
585}
586
587/*
drh7e3b0a02001-04-28 16:52:40 +0000588** Increment the reference count for a page. If the page is
589** currently on the freelist (the reference count is zero) then
590** remove it from the freelist.
591*/
drhdf0b3b02001-06-23 11:36:20 +0000592static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000593 if( pPg->nRef==0 ){
594 /* The page is currently on the freelist. Remove it. */
595 if( pPg->pPrevFree ){
596 pPg->pPrevFree->pNextFree = pPg->pNextFree;
597 }else{
598 pPg->pPager->pFirst = pPg->pNextFree;
599 }
600 if( pPg->pNextFree ){
601 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
602 }else{
603 pPg->pPager->pLast = pPg->pPrevFree;
604 }
605 pPg->pPager->nRef++;
606 }
607 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000608 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000609}
610
611/*
612** Increment the reference count for a page. The input pointer is
613** a reference to the page data.
614*/
615int sqlitepager_ref(void *pData){
616 PgHdr *pPg = DATA_TO_PGHDR(pData);
617 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000618 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000619}
620
621/*
drhd9b02572001-04-15 00:37:09 +0000622** Acquire a page.
623**
624** A read lock is obtained for the first page acquired. The lock
625** is dropped when the last page is released.
626**
drh306dc212001-05-21 13:45:10 +0000627** A _get works for any page number greater than 0. If the database
628** file is smaller than the requested page, then no actual disk
629** read occurs and the memory image of the page is initialized to
630** all zeros. The extra data appended to a page is always initialized
631** to zeros the first time a page is loaded into memory.
632**
drhd9b02572001-04-15 00:37:09 +0000633** The acquisition might fail for several reasons. In all cases,
634** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000635**
636** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
637** to find a page in the in-memory cache first. If the page is not already
638** in cache, this routine goes to disk to read it in whereas _lookup()
639** just returns 0. This routine acquires a read-lock the first time it
640** has to go to disk, and could also playback an old journal if necessary.
641** Since _lookup() never goes to disk, it never has to deal with locks
642** or journal files.
drhed7c8552001-04-11 14:29:21 +0000643*/
drhd9b02572001-04-15 00:37:09 +0000644int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000645 PgHdr *pPg;
646
drhd9b02572001-04-15 00:37:09 +0000647 /* Make sure we have not hit any critical errors.
648 */
649 if( pPager==0 || pgno==0 ){
650 return SQLITE_ERROR;
651 }
652 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
653 return pager_errcode(pPager);
654 }
655
drhed7c8552001-04-11 14:29:21 +0000656 /* If this is the first page accessed, then get a read lock
657 ** on the database file.
658 */
659 if( pPager->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000660 if( pager_lock(pPager->fd, 0)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000661 *ppPage = 0;
662 return SQLITE_BUSY;
663 }
drhd9b02572001-04-15 00:37:09 +0000664 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000665
666 /* If a journal file exists, try to play it back.
667 */
668 if( access(pPager->zJournal,0)==0 ){
669 int rc;
670
671 /* Open the journal for exclusive access. Return SQLITE_BUSY if
672 ** we cannot get exclusive access to the journal file
673 */
674 pPager->jfd = open(pPager->zJournal, O_RDONLY, 0);
drhd9b02572001-04-15 00:37:09 +0000675 if( pPager->jfd<0 || pager_lock(pPager->jfd, 1)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000676 if( pPager->jfd>=0 ){ close(pPager->jfd); pPager->jfd = -1; }
drhd9b02572001-04-15 00:37:09 +0000677 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000678 *ppPage = 0;
679 return SQLITE_BUSY;
680 }
681
682 /* Get a write lock on the database */
drhd9b02572001-04-15 00:37:09 +0000683 pager_unlock(pPager->fd);
684 if( pager_lock(pPager->fd, 1)!=0 ){
685 close(pPager->jfd);
686 pPager->jfd = -1;
drhed7c8552001-04-11 14:29:21 +0000687 *ppPage = 0;
688 return SQLITE_PROTOCOL;
689 }
690
691 /* Playback and delete the journal. Drop the database write
692 ** lock and reacquire the read lock.
693 */
drhd9b02572001-04-15 00:37:09 +0000694 rc = pager_playback(pPager);
695 if( rc!=SQLITE_OK ){
696 return rc;
697 }
drhed7c8552001-04-11 14:29:21 +0000698 }
699 pPg = 0;
700 }else{
701 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000702 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000703 }
704 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000705 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000706 int h;
drh7e3b0a02001-04-28 16:52:40 +0000707 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000708 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
709 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000710 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000711 if( pPg==0 ){
712 *ppPage = 0;
713 pager_unwritelock(pPager);
714 pPager->errMask |= PAGER_ERR_MEM;
715 return SQLITE_NOMEM;
716 }
drhed7c8552001-04-11 14:29:21 +0000717 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000718 pPg->pNextAll = pPager->pAll;
719 if( pPager->pAll ){
720 pPager->pAll->pPrevAll = pPg;
721 }
722 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000723 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000724 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000725 }else{
drhd9b02572001-04-15 00:37:09 +0000726 /* Recycle an older page. First locate the page to be recycled.
727 ** Try to find one that is not dirty and is near the head of
728 ** of the free list */
drh6019e162001-07-02 17:51:45 +0000729 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000730 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000731 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000732 pPg = pPg->pNextFree;
733 }
734 if( pPg==0 || pPg->dirty ) pPg = pPager->pFirst;
735 assert( pPg->nRef==0 );
736
737 /* If the page to be recycled is dirty, sync the journal and write
738 ** the old page into the database. */
drhed7c8552001-04-11 14:29:21 +0000739 if( pPg->dirty ){
740 int rc;
drhd9b02572001-04-15 00:37:09 +0000741 assert( pPg->inJournal==1 );
742 assert( pPager->state==SQLITE_WRITELOCK );
743 rc = fsync(pPager->jfd);
744 if( rc!=0 ){
745 rc = sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000746 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +0000747 if( rc==SQLITE_OK ) rc = SQLITE_IOERR;
drhed7c8552001-04-11 14:29:21 +0000748 return rc;
749 }
drhd9b02572001-04-15 00:37:09 +0000750 pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
751 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
752 if( rc!=SQLITE_OK ){
753 rc = sqlitepager_rollback(pPager);
754 *ppPage = 0;
755 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
756 return rc;
757 }
758 }
759
760 /* Unlink the old page from the free list and the hash table
761 */
drh6019e162001-07-02 17:51:45 +0000762 if( pPg->pPrevFree ){
763 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000764 }else{
drh6019e162001-07-02 17:51:45 +0000765 assert( pPager->pFirst==pPg );
766 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000767 }
drh6019e162001-07-02 17:51:45 +0000768 if( pPg->pNextFree ){
769 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
770 }else{
771 assert( pPager->pLast==pPg );
772 pPager->pLast = pPg->pPrevFree;
773 }
774 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000775 if( pPg->pNextHash ){
776 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
777 }
778 if( pPg->pPrevHash ){
779 pPg->pPrevHash->pNextHash = pPg->pNextHash;
780 }else{
drhd9b02572001-04-15 00:37:09 +0000781 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000782 assert( pPager->aHash[h]==pPg );
783 pPager->aHash[h] = pPg->pNextHash;
784 }
drh6019e162001-07-02 17:51:45 +0000785 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000786 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000787 }
788 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000789 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
790 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
791 }else{
792 pPg->inJournal = 0;
793 }
drhed7c8552001-04-11 14:29:21 +0000794 pPg->dirty = 0;
795 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000796 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000797 pPager->nRef++;
798 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000799 pPg->pNextHash = pPager->aHash[h];
800 pPager->aHash[h] = pPg;
801 if( pPg->pNextHash ){
802 assert( pPg->pNextHash->pPrevHash==0 );
803 pPg->pNextHash->pPrevHash = pPg;
804 }
drh306dc212001-05-21 13:45:10 +0000805 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
806 if( pPager->dbSize<pgno ){
807 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
808 }else{
809 pager_seek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
810 pager_read(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
811 }
drh7e3b0a02001-04-28 16:52:40 +0000812 if( pPager->nExtra>0 ){
813 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
814 }
drhed7c8552001-04-11 14:29:21 +0000815 }else{
drhd9b02572001-04-15 00:37:09 +0000816 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000817 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000818 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000819 }
820 *ppPage = PGHDR_TO_DATA(pPg);
821 return SQLITE_OK;
822}
823
824/*
drh7e3b0a02001-04-28 16:52:40 +0000825** Acquire a page if it is already in the in-memory cache. Do
826** not read the page from disk. Return a pointer to the page,
827** or 0 if the page is not in cache.
828**
829** See also sqlitepager_get(). The difference between this routine
830** and sqlitepager_get() is that _get() will go to the disk and read
831** in the page if the page is not already in cache. This routine
drh306dc212001-05-21 13:45:10 +0000832** returns NULL if the page is not in cache of if a disk I/O has ever
833** happened.
drh7e3b0a02001-04-28 16:52:40 +0000834*/
835void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
836 PgHdr *pPg;
837
838 /* Make sure we have not hit any critical errors.
839 */
840 if( pPager==0 || pgno==0 ){
841 return 0;
842 }
843 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
844 return 0;
845 }
846 if( pPager->nRef==0 ){
847 return 0;
848 }
849 pPg = pager_lookup(pPager, pgno);
850 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000851 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000852 return PGHDR_TO_DATA(pPg);
853}
854
855/*
drhed7c8552001-04-11 14:29:21 +0000856** Release a page.
857**
858** If the number of references to the page drop to zero, then the
859** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000860** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000861** removed.
862*/
drhd9b02572001-04-15 00:37:09 +0000863int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000864 Pager *pPager;
865 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000866
867 /* Decrement the reference count for this page
868 */
drhed7c8552001-04-11 14:29:21 +0000869 pPg = DATA_TO_PGHDR(pData);
870 assert( pPg->nRef>0 );
871 pPager = pPg->pPager;
872 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000873 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000874
drh72f82862001-05-24 21:06:34 +0000875 /* When the number of references to a page reach 0, call the
876 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000877 */
drhed7c8552001-04-11 14:29:21 +0000878 if( pPg->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000879 pPg->pNextFree = 0;
880 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000881 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000882 if( pPg->pPrevFree ){
883 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000884 }else{
885 pPager->pFirst = pPg;
886 }
drh72f82862001-05-24 21:06:34 +0000887 if( pPager->xDestructor ){
888 pPager->xDestructor(pData);
889 }
drhd9b02572001-04-15 00:37:09 +0000890
891 /* When all pages reach the freelist, drop the read lock from
892 ** the database file.
893 */
894 pPager->nRef--;
895 assert( pPager->nRef>=0 );
896 if( pPager->nRef==0 ){
897 pager_reset(pPager);
898 }
drhed7c8552001-04-11 14:29:21 +0000899 }
drhd9b02572001-04-15 00:37:09 +0000900 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000901}
902
903/*
904** Mark a data page as writeable. The page is written into the journal
905** if it is not there already. This routine must be called before making
906** changes to a page.
907**
908** The first time this routine is called, the pager creates a new
909** journal and acquires a write lock on the database. If the write
910** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000911** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000912** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000913**
914** If the journal file could not be written because the disk is full,
915** then this routine returns SQLITE_FULL and does an immediate rollback.
916** All subsequent write attempts also return SQLITE_FULL until there
917** is a call to sqlitepager_commit() or sqlitepager_rollback() to
918** reset.
drhed7c8552001-04-11 14:29:21 +0000919*/
drhd9b02572001-04-15 00:37:09 +0000920int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000921 PgHdr *pPg = DATA_TO_PGHDR(pData);
922 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000923 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000924
drhd9b02572001-04-15 00:37:09 +0000925 if( pPager->errMask ){
926 return pager_errcode(pPager);
927 }
928 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +0000929 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +0000930 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +0000931 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +0000932 assert( pPager->aInJournal==0 );
933 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
934 if( pPager->aInJournal==0 ){
935 return SQLITE_NOMEM;
936 }
drhed7c8552001-04-11 14:29:21 +0000937 pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
938 if( pPager->jfd<0 ){
939 return SQLITE_CANTOPEN;
940 }
drhd9b02572001-04-15 00:37:09 +0000941 if( pager_lock(pPager->jfd, 1) ){
drhed7c8552001-04-11 14:29:21 +0000942 close(pPager->jfd);
943 pPager->jfd = -1;
944 return SQLITE_BUSY;
945 }
drhd9b02572001-04-15 00:37:09 +0000946 pager_unlock(pPager->fd);
947 if( pager_lock(pPager->fd, 1) ){
drhed7c8552001-04-11 14:29:21 +0000948 close(pPager->jfd);
949 pPager->jfd = -1;
950 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000951 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000952 return SQLITE_PROTOCOL;
953 }
954 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +0000955 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +0000956 pPager->origDbSize = pPager->dbSize;
drhd9b02572001-04-15 00:37:09 +0000957 rc = pager_write(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
958 if( rc==SQLITE_OK ){
959 rc = pager_write(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
960 }
961 if( rc!=SQLITE_OK ){
962 rc = pager_unwritelock(pPager);
963 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
964 return rc;
965 }
drhed7c8552001-04-11 14:29:21 +0000966 }
drhd9b02572001-04-15 00:37:09 +0000967 assert( pPager->state==SQLITE_WRITELOCK );
drh69688d52001-04-14 16:38:23 +0000968 assert( pPager->jfd>=0 );
drhd9b02572001-04-15 00:37:09 +0000969 if( pPg->pgno <= pPager->origDbSize ){
970 rc = pager_write(pPager->jfd, &pPg->pgno, sizeof(Pgno));
971 if( rc==SQLITE_OK ){
972 rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
973 }
974 if( rc!=SQLITE_OK ){
975 sqlitepager_rollback(pPager);
976 pPager->errMask |= PAGER_ERR_FULL;
977 return rc;
978 }
drh6019e162001-07-02 17:51:45 +0000979 assert( pPager->aInJournal!=0 );
980 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh69688d52001-04-14 16:38:23 +0000981 }
drh69688d52001-04-14 16:38:23 +0000982 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +0000983 if( pPager->dbSize<pPg->pgno ){
984 pPager->dbSize = pPg->pgno;
985 }
drh69688d52001-04-14 16:38:23 +0000986 return rc;
drhed7c8552001-04-11 14:29:21 +0000987}
988
989/*
drh6019e162001-07-02 17:51:45 +0000990** Return TRUE if the page given in the argument was previous passed
991** to sqlitepager_write(). In other words, return TRUE if it is ok
992** to change the content of the page.
993*/
994int sqlitepager_iswriteable(void *pData){
995 PgHdr *pPg = DATA_TO_PGHDR(pData);
996 return pPg->dirty;
997}
998
999/*
drhed7c8552001-04-11 14:29:21 +00001000** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001001**
1002** If the commit fails for any reason, a rollback attempt is made
1003** and an error code is returned. If the commit worked, SQLITE_OK
1004** is returned.
drhed7c8552001-04-11 14:29:21 +00001005*/
drhd9b02572001-04-15 00:37:09 +00001006int sqlitepager_commit(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001007 int i, rc;
1008 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001009
1010 if( pPager->errMask==PAGER_ERR_FULL ){
1011 rc = sqlitepager_rollback(pPager);
1012 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1013 return rc;
1014 }
1015 if( pPager->errMask!=0 ){
1016 rc = pager_errcode(pPager);
1017 return rc;
1018 }
1019 if( pPager->state!=SQLITE_WRITELOCK ){
1020 return SQLITE_ERROR;
1021 }
drhed7c8552001-04-11 14:29:21 +00001022 assert( pPager->jfd>=0 );
1023 if( fsync(pPager->jfd) ){
drhd9b02572001-04-15 00:37:09 +00001024 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001025 }
1026 for(i=0; i<N_PG_HASH; i++){
1027 for(pPg=pPager->aHash[i]; pPg; pPg=pPg->pNextHash){
1028 if( pPg->dirty==0 ) continue;
drhd9b02572001-04-15 00:37:09 +00001029 rc = pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
1030 if( rc!=SQLITE_OK ) goto commit_abort;
1031 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1032 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001033 }
1034 }
drhd9b02572001-04-15 00:37:09 +00001035 if( fsync(pPager->fd) ) goto commit_abort;
1036 rc = pager_unwritelock(pPager);
1037 pPager->dbSize = -1;
1038 return rc;
1039
1040 /* Jump here if anything goes wrong during the commit process.
1041 */
1042commit_abort:
1043 rc = sqlitepager_rollback(pPager);
1044 if( rc==SQLITE_OK ){
1045 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001046 }
drhed7c8552001-04-11 14:29:21 +00001047 return rc;
1048}
1049
1050/*
1051** Rollback all changes. The database falls back to read-only mode.
1052** All in-memory cache pages revert to their original data contents.
1053** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001054**
1055** This routine cannot fail unless some other process is not following
1056** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1057** process is writing trash into the journal file (SQLITE_CORRUPT) or
1058** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1059** codes are returned for all these occasions. Otherwise,
1060** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001061*/
drhd9b02572001-04-15 00:37:09 +00001062int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001063 int rc;
drhd9b02572001-04-15 00:37:09 +00001064 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1065 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001066 }
drhd9b02572001-04-15 00:37:09 +00001067 if( pPager->state!=SQLITE_WRITELOCK ){
1068 return SQLITE_OK;
1069 }
1070 rc = pager_playback(pPager);
1071 if( rc!=SQLITE_OK ){
1072 rc = SQLITE_CORRUPT;
1073 pPager->errMask |= PAGER_ERR_CORRUPT;
1074 }
1075 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001076 return rc;
1077};
drhd9b02572001-04-15 00:37:09 +00001078
1079/*
1080** This routine is used for testing and analysis only.
1081*/
1082int *sqlitepager_stats(Pager *pPager){
1083 static int a[9];
1084 a[0] = pPager->nRef;
1085 a[1] = pPager->nPage;
1086 a[2] = pPager->mxPage;
1087 a[3] = pPager->dbSize;
1088 a[4] = pPager->state;
1089 a[5] = pPager->errMask;
1090 a[6] = pPager->nHit;
1091 a[7] = pPager->nMiss;
1092 a[8] = pPager->nOvfl;
1093 return a;
1094}
drhdd793422001-06-28 01:54:48 +00001095
1096#if SQLITE_TEST
1097/*
1098** Print a listing of all referenced pages and their ref count.
1099*/
1100void sqlitepager_refdump(Pager *pPager){
1101 PgHdr *pPg;
1102 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1103 if( pPg->nRef<=0 ) continue;
1104 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1105 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1106 }
1107}
1108#endif