blob: 4ca2b20a1793fbdf116be39100644c7b939dbac9 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
2** Copyright (c) 2001 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
24** This is the implementation of the page cache subsystem.
25**
26** The page cache is used to access a database file. The pager journals
27** all writes in order to support rollback. Locking is used to limit
drh5e00f6c2001-09-13 13:46:56 +000028** access to one or more reader or to one writer.
drhed7c8552001-04-11 14:29:21 +000029**
drh50e5dad2001-09-15 00:57:28 +000030** @(#) $Id: pager.c,v 1.19 2001/09/15 00:57:29 drh Exp $
drhed7c8552001-04-11 14:29:21 +000031*/
drhd9b02572001-04-15 00:37:09 +000032#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000033#include "pager.h"
34#include <fcntl.h>
35#include <sys/stat.h>
36#include <unistd.h>
37#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000038#include <string.h>
drhed7c8552001-04-11 14:29:21 +000039
40/*
41** The page cache as a whole is always in one of the following
42** states:
43**
44** SQLITE_UNLOCK The page cache is not currently reading or
45** writing the database file. There is no
46** data held in memory. This is the initial
47** state.
48**
49** SQLITE_READLOCK The page cache is reading the database.
50** Writing is not permitted. There can be
51** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000052** file at the same time.
drhed7c8552001-04-11 14:29:21 +000053**
54** SQLITE_WRITELOCK The page cache is writing the database.
55** Access is exclusive. No other processes or
56** threads can be reading or writing while one
57** process is writing.
58**
drh306dc212001-05-21 13:45:10 +000059** The page cache comes up in SQLITE_UNLOCK. The first time a
60** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000061** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000062** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000063** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000064** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
65** called on an outstanding page which means that the pager must
66** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
67** The sqlite_page_rollback() and sqlite_page_commit() functions
68** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069*/
70#define SQLITE_UNLOCK 0
71#define SQLITE_READLOCK 1
72#define SQLITE_WRITELOCK 2
73
drhd9b02572001-04-15 00:37:09 +000074
drhed7c8552001-04-11 14:29:21 +000075/*
76** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000077** This header is only visible to this pager module. The client
78** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000079*/
drhd9b02572001-04-15 00:37:09 +000080typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000081struct PgHdr {
82 Pager *pPager; /* The pager to which this page belongs */
83 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000084 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000085 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000086 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
87 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000088 char inJournal; /* TRUE if has been written to journal */
89 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000090 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000091 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000092};
93
94/*
drh69688d52001-04-14 16:38:23 +000095** Convert a pointer to a PgHdr into a pointer to its data
96** and back again.
drhed7c8552001-04-11 14:29:21 +000097*/
98#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
99#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +0000100#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000101
102/*
drhed7c8552001-04-11 14:29:21 +0000103** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +0000104** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +0000105*/
drha1b351a2001-09-14 16:42:12 +0000106#define N_PG_HASH 907
drhed7c8552001-04-11 14:29:21 +0000107
108/*
109** A open page cache is an instance of the following structure.
110*/
111struct Pager {
112 char *zFilename; /* Name of the database file */
113 char *zJournal; /* Name of the journal file */
114 int fd, jfd; /* File descriptors for database and journal */
drhed7c8552001-04-11 14:29:21 +0000115 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000116 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000117 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000118 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000119 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000120 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000121 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000122 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
123 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
124 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000125 unsigned char tempFile; /* zFilename is a temporary file */
126 unsigned char readOnly; /* True for a read-only database */
drhf57b14a2001-09-14 18:54:08 +0000127 unsigned char needSync; /* True if an fsync() is needed on the journal */
drh6019e162001-07-02 17:51:45 +0000128 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000129 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000130 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000131 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000132};
133
134/*
135** These are bits that can be set in Pager.errMask.
136*/
137#define PAGER_ERR_FULL 0x01 /* a write() failed */
138#define PAGER_ERR_MEM 0x02 /* malloc() failed */
139#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
140#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
141
142/*
143** The journal file contains page records in the following
144** format.
145*/
146typedef struct PageRecord PageRecord;
147struct PageRecord {
148 Pgno pgno; /* The page number */
149 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
150};
151
152/*
drh5e00f6c2001-09-13 13:46:56 +0000153** Journal files begin with the following magic string. The data
154** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000155*/
156static const unsigned char aJournalMagic[] = {
157 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000158};
159
160/*
161** Hash a page number
162*/
drhd9b02572001-04-15 00:37:09 +0000163#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000164
165/*
drhdd793422001-06-28 01:54:48 +0000166** Enable reference count tracking here:
167*/
168#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000169 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000170 static void pager_refinfo(PgHdr *p){
171 static int cnt = 0;
172 if( !pager_refinfo_enable ) return;
173 printf(
174 "REFCNT: %4d addr=0x%08x nRef=%d\n",
175 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
176 );
177 cnt++; /* Something to set a breakpoint on */
178 }
179# define REFINFO(X) pager_refinfo(X)
180#else
181# define REFINFO(X)
182#endif
183
184/*
drhed7c8552001-04-11 14:29:21 +0000185** Attempt to acquire a read lock (if wrlock==0) or a write lock (if wrlock==1)
186** on the database file. Return 0 on success and non-zero if the lock
187** could not be acquired.
188*/
drhd9b02572001-04-15 00:37:09 +0000189static int pager_lock(int fd, int wrlock){
190 int rc;
drhed7c8552001-04-11 14:29:21 +0000191 struct flock lock;
drhd9b02572001-04-15 00:37:09 +0000192 lock.l_type = wrlock ? F_WRLCK : F_RDLCK;
193 lock.l_whence = SEEK_SET;
194 lock.l_start = lock.l_len = 0L;
195 rc = fcntl(fd, F_SETLK, &lock);
196 return rc!=0;
drhed7c8552001-04-11 14:29:21 +0000197}
198
199/*
200** Unlock the database file.
201*/
drhd9b02572001-04-15 00:37:09 +0000202static int pager_unlock(fd){
203 int rc;
drhed7c8552001-04-11 14:29:21 +0000204 struct flock lock;
205 lock.l_type = F_UNLCK;
drhd9b02572001-04-15 00:37:09 +0000206 lock.l_whence = SEEK_SET;
207 lock.l_start = lock.l_len = 0L;
208 rc = fcntl(fd, F_SETLK, &lock);
209 return rc!=0;
210}
211
212/*
213** Move the cursor for file descriptor fd to the point whereto from
214** the beginning of the file.
215*/
216static int pager_seek(int fd, off_t whereto){
drh6019e162001-07-02 17:51:45 +0000217 /*printf("SEEK to page %d\n", whereto/SQLITE_PAGE_SIZE + 1);*/
drhd9b02572001-04-15 00:37:09 +0000218 lseek(fd, whereto, SEEK_SET);
219 return SQLITE_OK;
220}
221
222/*
223** Truncate the given file so that it contains exactly mxPg pages
224** of data.
225*/
226static int pager_truncate(int fd, Pgno mxPg){
227 int rc;
228 rc = ftruncate(fd, mxPg*SQLITE_PAGE_SIZE);
229 return rc!=0 ? SQLITE_IOERR : SQLITE_OK;
230}
231
232/*
233** Read nBytes of data from fd into pBuf. If the data cannot be
234** read or only a partial read occurs, then the unread parts of
235** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
236** If the read is completely successful, return SQLITE_OK.
237*/
238static int pager_read(int fd, void *pBuf, int nByte){
239 int rc;
drh6019e162001-07-02 17:51:45 +0000240 /* printf("READ\n");*/
drhd9b02572001-04-15 00:37:09 +0000241 rc = read(fd, pBuf, nByte);
242 if( rc<0 ){
243 memset(pBuf, 0, nByte);
244 return SQLITE_IOERR;
245 }
246 if( rc<nByte ){
247 memset(&((char*)pBuf)[rc], 0, nByte - rc);
248 rc = SQLITE_IOERR;
249 }else{
250 rc = SQLITE_OK;
251 }
252 return rc;
253}
254
255/*
256** Write nBytes of data into fd. If any problem occurs or if the
257** write is incomplete, SQLITE_IOERR is returned. SQLITE_OK is
258** returned upon complete success.
259*/
260static int pager_write(int fd, const void *pBuf, int nByte){
261 int rc;
drh6019e162001-07-02 17:51:45 +0000262 /*printf("WRITE\n");*/
drhd9b02572001-04-15 00:37:09 +0000263 rc = write(fd, pBuf, nByte);
264 if( rc<nByte ){
265 return SQLITE_FULL;
266 }else{
267 return SQLITE_OK;
268 }
269}
270
271/*
272** Convert the bits in the pPager->errMask into an approprate
273** return code.
274*/
275static int pager_errcode(Pager *pPager){
276 int rc = SQLITE_OK;
277 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
278 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
279 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
280 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
281 return rc;
drhed7c8552001-04-11 14:29:21 +0000282}
283
284/*
285** Find a page in the hash table given its page number. Return
286** a pointer to the page or NULL if not found.
287*/
drhd9b02572001-04-15 00:37:09 +0000288static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000289 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
290 while( p && p->pgno!=pgno ){
291 p = p->pNextHash;
292 }
293 return p;
294}
295
296/*
297** Unlock the database and clear the in-memory cache. This routine
298** sets the state of the pager back to what it was when it was first
299** opened. Any outstanding pages are invalidated and subsequent attempts
300** to access those pages will likely result in a coredump.
301*/
drhd9b02572001-04-15 00:37:09 +0000302static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000303 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000304 for(pPg=pPager->pAll; pPg; pPg=pNext){
305 pNext = pPg->pNextAll;
306 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000307 }
308 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000309 pPager->pLast = 0;
310 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000311 memset(pPager->aHash, 0, sizeof(pPager->aHash));
312 pPager->nPage = 0;
313 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000314 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000315 }
drhd9b02572001-04-15 00:37:09 +0000316 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000317 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000318 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000319 pPager->nRef = 0;
320}
321
322/*
323** When this routine is called, the pager has the journal file open and
324** a write lock on the database. This routine releases the database
325** write lock and acquires a read lock in its place. The journal file
326** is deleted and closed.
327**
328** We have to release the write lock before acquiring the read lock,
329** so there is a race condition where another process can get the lock
330** while we are not holding it. But, no other process should do this
331** because we are also holding a lock on the journal, and no process
332** should get a write lock on the database without first getting a lock
333** on the journal. So this routine should never fail. But it can fail
334** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000335** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
336** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
337** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000338*/
drhd9b02572001-04-15 00:37:09 +0000339static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000340 int rc;
drhd9b02572001-04-15 00:37:09 +0000341 PgHdr *pPg;
342 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
343 pager_unlock(pPager->fd);
344 rc = pager_lock(pPager->fd, 0);
drhed7c8552001-04-11 14:29:21 +0000345 unlink(pPager->zJournal);
346 close(pPager->jfd);
347 pPager->jfd = -1;
drh6019e162001-07-02 17:51:45 +0000348 sqliteFree( pPager->aInJournal );
349 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000350 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
351 pPg->inJournal = 0;
352 pPg->dirty = 0;
353 }
drhed7c8552001-04-11 14:29:21 +0000354 if( rc!=SQLITE_OK ){
355 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000356 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000357 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000358 }else{
drhd9b02572001-04-15 00:37:09 +0000359 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000360 pPager->state = SQLITE_READLOCK;
361 }
362 return rc;
363}
364
drhed7c8552001-04-11 14:29:21 +0000365/*
366** Playback the journal and thus restore the database file to
367** the state it was in before we started making changes.
368**
drhd9b02572001-04-15 00:37:09 +0000369** The journal file format is as follows: There is an initial
370** file-type string for sanity checking. Then there is a single
371** Pgno number which is the number of pages in the database before
372** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000373** Next come zero or more page records where each page record
374** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
375** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000376**
drhd9b02572001-04-15 00:37:09 +0000377** For playback, the pages have to be read from the journal in
378** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000379**
drhd9b02572001-04-15 00:37:09 +0000380** If the file opened as the journal file is not a well-formed
381** journal file (as determined by looking at the magic number
382** at the beginning) then this routine returns SQLITE_PROTOCOL.
383** If any other errors occur during playback, the database will
384** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
385** pPager->errMask and SQLITE_CORRUPT is returned. If it all
386** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000387*/
drhd9b02572001-04-15 00:37:09 +0000388static int pager_playback(Pager *pPager){
389 int nRec; /* Number of Records */
390 int i; /* Loop counter */
391 Pgno mxPg = 0; /* Size of the original file in pages */
392 struct stat statbuf; /* Used to size the journal */
393 PgHdr *pPg; /* An existing page in the cache */
394 PageRecord pgRec;
395 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000396 int rc;
397
drhd9b02572001-04-15 00:37:09 +0000398 /* Read the beginning of the journal and truncate the
399 ** database file back to its original size.
drhed7c8552001-04-11 14:29:21 +0000400 */
drhd9b02572001-04-15 00:37:09 +0000401 assert( pPager->jfd>=0 );
402 pager_seek(pPager->jfd, 0);
403 rc = pager_read(pPager->jfd, aMagic, sizeof(aMagic));
404 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
405 return SQLITE_PROTOCOL;
406 }
407 rc = pager_read(pPager->jfd, &mxPg, sizeof(mxPg));
408 if( rc!=SQLITE_OK ){
409 return SQLITE_PROTOCOL;
410 }
411 pager_truncate(pPager->fd, mxPg);
412 pPager->dbSize = mxPg;
413
414 /* Begin reading the journal beginning at the end and moving
415 ** toward the beginning.
416 */
417 if( fstat(pPager->jfd, &statbuf)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000418 return SQLITE_OK;
419 }
drhd9b02572001-04-15 00:37:09 +0000420 nRec = (statbuf.st_size - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhed7c8552001-04-11 14:29:21 +0000421
422 /* Process segments beginning with the last and working backwards
423 ** to the first.
424 */
drhd9b02572001-04-15 00:37:09 +0000425 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000426 /* Seek to the beginning of the segment */
drhd9b02572001-04-15 00:37:09 +0000427 off_t ofst;
428 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
429 rc = pager_seek(pPager->jfd, ofst);
430 if( rc!=SQLITE_OK ) break;
431 rc = pager_read(pPager->jfd, &pgRec, sizeof(pgRec));
432 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000433
drhd9b02572001-04-15 00:37:09 +0000434 /* Sanity checking on the page */
435 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
436 rc = SQLITE_CORRUPT;
437 break;
drhed7c8552001-04-11 14:29:21 +0000438 }
439
drhd9b02572001-04-15 00:37:09 +0000440 /* Playback the page. Update the in-memory copy of the page
441 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000442 */
drhd9b02572001-04-15 00:37:09 +0000443 pPg = pager_lookup(pPager, pgRec.pgno);
444 if( pPg ){
445 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000446 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000447 }
drhd9b02572001-04-15 00:37:09 +0000448 rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
449 if( rc!=SQLITE_OK ) break;
450 rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
451 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000452 }
drhd9b02572001-04-15 00:37:09 +0000453 if( rc!=SQLITE_OK ){
454 pager_unwritelock(pPager);
455 pPager->errMask |= PAGER_ERR_CORRUPT;
456 rc = SQLITE_CORRUPT;
457 }else{
458 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000459 }
drhd9b02572001-04-15 00:37:09 +0000460 return rc;
drhed7c8552001-04-11 14:29:21 +0000461}
462
463/*
drh5e00f6c2001-09-13 13:46:56 +0000464** Locate a directory where we can potentially create a temporary
465** file.
466*/
467static const char *findTempDir(void){
468 static const char *azDirs[] = {
469 ".",
470 "/var/tmp",
471 "/usr/tmp",
472 "/tmp",
473 "/temp",
474 "./temp",
475 };
476 int i;
477 struct stat buf;
478 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drh3fc190c2001-09-14 03:24:23 +0000479 if( stat(azDirs[i], &buf) ) continue;
480 if( !S_ISDIR(buf.st_mode) ) continue;
481 if( access(azDirs[i], 07) ) continue;
482 return azDirs[i];
drh5e00f6c2001-09-13 13:46:56 +0000483 }
484 return 0;
485}
486
487/*
drhf57b14a2001-09-14 18:54:08 +0000488** Change the maximum number of in-memory pages that are allowed.
489*/
490void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
491 if( mxPage>10 ){
492 pPager->mxPage = mxPage;
493 }
494}
495
496/*
drhed7c8552001-04-11 14:29:21 +0000497** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000498** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000499** the first call to sqlitepager_get() and is only held open until the
500** last page is released using sqlitepager_unref().
drhed7c8552001-04-11 14:29:21 +0000501*/
drh7e3b0a02001-04-28 16:52:40 +0000502int sqlitepager_open(
503 Pager **ppPager, /* Return the Pager structure here */
504 const char *zFilename, /* Name of the database file to open */
505 int mxPage, /* Max number of in-memory cache pages */
506 int nExtra /* Extra bytes append to each in-memory page */
507){
drhed7c8552001-04-11 14:29:21 +0000508 Pager *pPager;
509 int nameLen;
510 int fd;
drh5e00f6c2001-09-13 13:46:56 +0000511 int tempFile;
512 int readOnly = 0;
513 char zTemp[300];
drhed7c8552001-04-11 14:29:21 +0000514
drhd9b02572001-04-15 00:37:09 +0000515 *ppPager = 0;
516 if( sqlite_malloc_failed ){
517 return SQLITE_NOMEM;
518 }
drh5e00f6c2001-09-13 13:46:56 +0000519 if( zFilename ){
520 fd = open(zFilename, O_RDWR|O_CREAT, 0644);
521 if( fd<0 ){
522 fd = open(zFilename, O_RDONLY, 0);
523 readOnly = 1;
524 }
525 tempFile = 0;
526 }else{
527 int cnt = 8;
drhbe0072d2001-09-13 14:46:09 +0000528 const char *zDir = findTempDir();
drh5e00f6c2001-09-13 13:46:56 +0000529 if( zDir==0 ) return SQLITE_CANTOPEN;
530 do{
531 cnt--;
drhbe0072d2001-09-13 14:46:09 +0000532 sprintf(zTemp,"%s/_sqlite_%u", zDir, (unsigned)sqliteRandomInteger());
drh5e00f6c2001-09-13 13:46:56 +0000533 fd = open(zTemp, O_RDWR|O_CREAT|O_EXCL, 0600);
534 }while( cnt>0 && fd<0 );
535 zFilename = zTemp;
536 tempFile = 1;
537 }
drhed7c8552001-04-11 14:29:21 +0000538 if( fd<0 ){
539 return SQLITE_CANTOPEN;
540 }
541 nameLen = strlen(zFilename);
542 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000543 if( pPager==0 ){
544 close(fd);
545 return SQLITE_NOMEM;
546 }
drhed7c8552001-04-11 14:29:21 +0000547 pPager->zFilename = (char*)&pPager[1];
548 pPager->zJournal = &pPager->zFilename[nameLen+1];
549 strcpy(pPager->zFilename, zFilename);
550 strcpy(pPager->zJournal, zFilename);
551 strcpy(&pPager->zJournal[nameLen], "-journal");
552 pPager->fd = fd;
553 pPager->jfd = -1;
554 pPager->nRef = 0;
555 pPager->dbSize = -1;
556 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000557 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000558 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000559 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000560 pPager->tempFile = tempFile;
561 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000562 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000563 pPager->pFirst = 0;
564 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000565 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000566 memset(pPager->aHash, 0, sizeof(pPager->aHash));
567 *ppPager = pPager;
568 return SQLITE_OK;
569}
570
571/*
drh72f82862001-05-24 21:06:34 +0000572** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000573** when the reference count on each page reaches zero. The destructor can
574** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000575**
576** The destructor is not called as a result sqlitepager_close().
577** Destructors are only called by sqlitepager_unref().
578*/
579void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
580 pPager->xDestructor = xDesc;
581}
582
583/*
drh5e00f6c2001-09-13 13:46:56 +0000584** Return the total number of pages in the disk file associated with
585** pPager.
drhed7c8552001-04-11 14:29:21 +0000586*/
drhd9b02572001-04-15 00:37:09 +0000587int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000588 int n;
589 struct stat statbuf;
drhd9b02572001-04-15 00:37:09 +0000590 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000591 if( pPager->dbSize>=0 ){
592 return pPager->dbSize;
593 }
594 if( fstat(pPager->fd, &statbuf)!=0 ){
595 n = 0;
596 }else{
597 n = statbuf.st_size/SQLITE_PAGE_SIZE;
598 }
drhd9b02572001-04-15 00:37:09 +0000599 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000600 pPager->dbSize = n;
601 }
602 return n;
603}
604
605/*
606** Shutdown the page cache. Free all memory and close all files.
607**
608** If a transaction was in progress when this routine is called, that
609** transaction is rolled back. All outstanding pages are invalidated
610** and their memory is freed. Any attempt to use a page associated
611** with this page cache after this function returns will likely
612** result in a coredump.
613*/
drhd9b02572001-04-15 00:37:09 +0000614int sqlitepager_close(Pager *pPager){
615 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000616 switch( pPager->state ){
617 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000618 sqlitepager_rollback(pPager);
619 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000620 break;
621 }
622 case SQLITE_READLOCK: {
drhd9b02572001-04-15 00:37:09 +0000623 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000624 break;
625 }
626 default: {
627 /* Do nothing */
628 break;
629 }
630 }
drhd9b02572001-04-15 00:37:09 +0000631 for(pPg=pPager->pAll; pPg; pPg=pNext){
632 pNext = pPg->pNextAll;
633 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000634 }
635 if( pPager->fd>=0 ) close(pPager->fd);
636 assert( pPager->jfd<0 );
drh5e00f6c2001-09-13 13:46:56 +0000637 if( pPager->tempFile ){
638 unlink(pPager->zFilename);
639 }
drhed7c8552001-04-11 14:29:21 +0000640 sqliteFree(pPager);
641 return SQLITE_OK;
642}
643
644/*
drh5e00f6c2001-09-13 13:46:56 +0000645** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000646*/
drhd9b02572001-04-15 00:37:09 +0000647Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000648 PgHdr *p = DATA_TO_PGHDR(pData);
649 return p->pgno;
650}
651
652/*
drh7e3b0a02001-04-28 16:52:40 +0000653** Increment the reference count for a page. If the page is
654** currently on the freelist (the reference count is zero) then
655** remove it from the freelist.
656*/
drhdf0b3b02001-06-23 11:36:20 +0000657static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000658 if( pPg->nRef==0 ){
659 /* The page is currently on the freelist. Remove it. */
660 if( pPg->pPrevFree ){
661 pPg->pPrevFree->pNextFree = pPg->pNextFree;
662 }else{
663 pPg->pPager->pFirst = pPg->pNextFree;
664 }
665 if( pPg->pNextFree ){
666 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
667 }else{
668 pPg->pPager->pLast = pPg->pPrevFree;
669 }
670 pPg->pPager->nRef++;
671 }
672 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000673 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000674}
675
676/*
677** Increment the reference count for a page. The input pointer is
678** a reference to the page data.
679*/
680int sqlitepager_ref(void *pData){
681 PgHdr *pPg = DATA_TO_PGHDR(pData);
682 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000683 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000684}
685
686/*
drh50e5dad2001-09-15 00:57:28 +0000687** Sync the journal and write all free dirty pages to the database file.
688*/
689static int syncAllPages(Pager *pPager){
690 PgHdr *pPg;
691 int rc = SQLITE_OK;
692 if( pPager->needSync ){
693 rc = fsync(pPager->jfd);
694 if( rc!=0 ) return rc;
695 pPager->needSync = 0;
696 }
697 for(pPg=pPager->pFirst; pPg; pPg=pPg->pNextFree){
698 if( pPg->dirty ){
699 pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
700 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
701 if( rc!=SQLITE_OK ) break;
702 pPg->dirty = 0;
703 }
704 }
705 return SQLITE_OK;
706}
707
708/*
drhd9b02572001-04-15 00:37:09 +0000709** Acquire a page.
710**
drh5e00f6c2001-09-13 13:46:56 +0000711** A read lock on the disk file is obtained when the first page acquired.
712** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000713**
drh306dc212001-05-21 13:45:10 +0000714** A _get works for any page number greater than 0. If the database
715** file is smaller than the requested page, then no actual disk
716** read occurs and the memory image of the page is initialized to
717** all zeros. The extra data appended to a page is always initialized
718** to zeros the first time a page is loaded into memory.
719**
drhd9b02572001-04-15 00:37:09 +0000720** The acquisition might fail for several reasons. In all cases,
721** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000722**
723** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
724** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000725** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000726** just returns 0. This routine acquires a read-lock the first time it
727** has to go to disk, and could also playback an old journal if necessary.
728** Since _lookup() never goes to disk, it never has to deal with locks
729** or journal files.
drhed7c8552001-04-11 14:29:21 +0000730*/
drhd9b02572001-04-15 00:37:09 +0000731int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000732 PgHdr *pPg;
733
drhd9b02572001-04-15 00:37:09 +0000734 /* Make sure we have not hit any critical errors.
735 */
736 if( pPager==0 || pgno==0 ){
737 return SQLITE_ERROR;
738 }
739 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
740 return pager_errcode(pPager);
741 }
742
drhed7c8552001-04-11 14:29:21 +0000743 /* If this is the first page accessed, then get a read lock
744 ** on the database file.
745 */
746 if( pPager->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000747 if( pager_lock(pPager->fd, 0)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000748 *ppPage = 0;
749 return SQLITE_BUSY;
750 }
drhd9b02572001-04-15 00:37:09 +0000751 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000752
753 /* If a journal file exists, try to play it back.
754 */
755 if( access(pPager->zJournal,0)==0 ){
756 int rc;
757
758 /* Open the journal for exclusive access. Return SQLITE_BUSY if
759 ** we cannot get exclusive access to the journal file
760 */
761 pPager->jfd = open(pPager->zJournal, O_RDONLY, 0);
drhd9b02572001-04-15 00:37:09 +0000762 if( pPager->jfd<0 || pager_lock(pPager->jfd, 1)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000763 if( pPager->jfd>=0 ){ close(pPager->jfd); pPager->jfd = -1; }
drhd9b02572001-04-15 00:37:09 +0000764 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000765 *ppPage = 0;
766 return SQLITE_BUSY;
767 }
768
769 /* Get a write lock on the database */
drhd9b02572001-04-15 00:37:09 +0000770 pager_unlock(pPager->fd);
771 if( pager_lock(pPager->fd, 1)!=0 ){
772 close(pPager->jfd);
773 pPager->jfd = -1;
drhed7c8552001-04-11 14:29:21 +0000774 *ppPage = 0;
775 return SQLITE_PROTOCOL;
776 }
777
778 /* Playback and delete the journal. Drop the database write
779 ** lock and reacquire the read lock.
780 */
drhd9b02572001-04-15 00:37:09 +0000781 rc = pager_playback(pPager);
782 if( rc!=SQLITE_OK ){
783 return rc;
784 }
drhed7c8552001-04-11 14:29:21 +0000785 }
786 pPg = 0;
787 }else{
788 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000789 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000790 }
791 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000792 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000793 int h;
drh7e3b0a02001-04-28 16:52:40 +0000794 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000795 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
796 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000797 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000798 if( pPg==0 ){
799 *ppPage = 0;
800 pager_unwritelock(pPager);
801 pPager->errMask |= PAGER_ERR_MEM;
802 return SQLITE_NOMEM;
803 }
drhed7c8552001-04-11 14:29:21 +0000804 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000805 pPg->pNextAll = pPager->pAll;
806 if( pPager->pAll ){
807 pPager->pAll->pPrevAll = pPg;
808 }
809 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000810 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000811 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000812 }else{
drhd9b02572001-04-15 00:37:09 +0000813 /* Recycle an older page. First locate the page to be recycled.
814 ** Try to find one that is not dirty and is near the head of
815 ** of the free list */
drh50e5dad2001-09-15 00:57:28 +0000816 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000817 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000818 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000819 pPg = pPg->pNextFree;
820 }
drh50e5dad2001-09-15 00:57:28 +0000821 if( pPg==0 || pPg->dirty ){
822 int rc = syncAllPages(pPager);
823 if( rc!=0 ){
824 sqlitepager_rollback(pPager);
825 *ppPage = 0;
826 return SQLITE_IOERR;
827 }
828 pPg = pPager->pFirst;
829 }
drhd9b02572001-04-15 00:37:09 +0000830 assert( pPg->nRef==0 );
831
drh50e5dad2001-09-15 00:57:28 +0000832
833#if 0
834 /**** Since putting in the call to syncAllPages() above, this code
835 ** is no longer used. I've kept it here for historical reference
836 ** only.
837 */
drhd9b02572001-04-15 00:37:09 +0000838 /* If the page to be recycled is dirty, sync the journal and write
839 ** the old page into the database. */
drhed7c8552001-04-11 14:29:21 +0000840 if( pPg->dirty ){
841 int rc;
drhd9b02572001-04-15 00:37:09 +0000842 assert( pPg->inJournal==1 );
843 assert( pPager->state==SQLITE_WRITELOCK );
drhf57b14a2001-09-14 18:54:08 +0000844 if( pPager->needSync ){
845 rc = fsync(pPager->jfd);
846 if( rc!=0 ){
847 rc = sqlitepager_rollback(pPager);
848 *ppPage = 0;
849 if( rc==SQLITE_OK ) rc = SQLITE_IOERR;
850 return rc;
851 }
852 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000853 }
drhd9b02572001-04-15 00:37:09 +0000854 pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
855 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
856 if( rc!=SQLITE_OK ){
857 rc = sqlitepager_rollback(pPager);
858 *ppPage = 0;
859 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
860 return rc;
861 }
862 }
drh50e5dad2001-09-15 00:57:28 +0000863#endif
864 assert( pPg->dirty==0 );
drhd9b02572001-04-15 00:37:09 +0000865
866 /* Unlink the old page from the free list and the hash table
867 */
drh6019e162001-07-02 17:51:45 +0000868 if( pPg->pPrevFree ){
869 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000870 }else{
drh6019e162001-07-02 17:51:45 +0000871 assert( pPager->pFirst==pPg );
872 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000873 }
drh6019e162001-07-02 17:51:45 +0000874 if( pPg->pNextFree ){
875 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
876 }else{
877 assert( pPager->pLast==pPg );
878 pPager->pLast = pPg->pPrevFree;
879 }
880 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000881 if( pPg->pNextHash ){
882 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
883 }
884 if( pPg->pPrevHash ){
885 pPg->pPrevHash->pNextHash = pPg->pNextHash;
886 }else{
drhd9b02572001-04-15 00:37:09 +0000887 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000888 assert( pPager->aHash[h]==pPg );
889 pPager->aHash[h] = pPg->pNextHash;
890 }
drh6019e162001-07-02 17:51:45 +0000891 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000892 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000893 }
894 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000895 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
896 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
897 }else{
898 pPg->inJournal = 0;
899 }
drhed7c8552001-04-11 14:29:21 +0000900 pPg->dirty = 0;
901 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000902 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000903 pPager->nRef++;
904 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000905 pPg->pNextHash = pPager->aHash[h];
906 pPager->aHash[h] = pPg;
907 if( pPg->pNextHash ){
908 assert( pPg->pNextHash->pPrevHash==0 );
909 pPg->pNextHash->pPrevHash = pPg;
910 }
drh306dc212001-05-21 13:45:10 +0000911 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
912 if( pPager->dbSize<pgno ){
913 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
914 }else{
915 pager_seek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
916 pager_read(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
917 }
drh7e3b0a02001-04-28 16:52:40 +0000918 if( pPager->nExtra>0 ){
919 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
920 }
drhed7c8552001-04-11 14:29:21 +0000921 }else{
drhd9b02572001-04-15 00:37:09 +0000922 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000923 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000924 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000925 }
926 *ppPage = PGHDR_TO_DATA(pPg);
927 return SQLITE_OK;
928}
929
930/*
drh7e3b0a02001-04-28 16:52:40 +0000931** Acquire a page if it is already in the in-memory cache. Do
932** not read the page from disk. Return a pointer to the page,
933** or 0 if the page is not in cache.
934**
935** See also sqlitepager_get(). The difference between this routine
936** and sqlitepager_get() is that _get() will go to the disk and read
937** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000938** returns NULL if the page is not in cache or if a disk I/O error
939** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000940*/
941void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
942 PgHdr *pPg;
943
944 /* Make sure we have not hit any critical errors.
945 */
946 if( pPager==0 || pgno==0 ){
947 return 0;
948 }
949 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
950 return 0;
951 }
952 if( pPager->nRef==0 ){
953 return 0;
954 }
955 pPg = pager_lookup(pPager, pgno);
956 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000957 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000958 return PGHDR_TO_DATA(pPg);
959}
960
961/*
drhed7c8552001-04-11 14:29:21 +0000962** Release a page.
963**
964** If the number of references to the page drop to zero, then the
965** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000966** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000967** removed.
968*/
drhd9b02572001-04-15 00:37:09 +0000969int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000970 Pager *pPager;
971 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000972
973 /* Decrement the reference count for this page
974 */
drhed7c8552001-04-11 14:29:21 +0000975 pPg = DATA_TO_PGHDR(pData);
976 assert( pPg->nRef>0 );
977 pPager = pPg->pPager;
978 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000979 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000980
drh72f82862001-05-24 21:06:34 +0000981 /* When the number of references to a page reach 0, call the
982 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000983 */
drhed7c8552001-04-11 14:29:21 +0000984 if( pPg->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000985 pPg->pNextFree = 0;
986 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000987 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000988 if( pPg->pPrevFree ){
989 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000990 }else{
991 pPager->pFirst = pPg;
992 }
drh72f82862001-05-24 21:06:34 +0000993 if( pPager->xDestructor ){
994 pPager->xDestructor(pData);
995 }
drhd9b02572001-04-15 00:37:09 +0000996
997 /* When all pages reach the freelist, drop the read lock from
998 ** the database file.
999 */
1000 pPager->nRef--;
1001 assert( pPager->nRef>=0 );
1002 if( pPager->nRef==0 ){
1003 pager_reset(pPager);
1004 }
drhed7c8552001-04-11 14:29:21 +00001005 }
drhd9b02572001-04-15 00:37:09 +00001006 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +00001007}
1008
1009/*
1010** Mark a data page as writeable. The page is written into the journal
1011** if it is not there already. This routine must be called before making
1012** changes to a page.
1013**
1014** The first time this routine is called, the pager creates a new
1015** journal and acquires a write lock on the database. If the write
1016** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +00001017** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +00001018** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +00001019**
1020** If the journal file could not be written because the disk is full,
1021** then this routine returns SQLITE_FULL and does an immediate rollback.
1022** All subsequent write attempts also return SQLITE_FULL until there
1023** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1024** reset.
drhed7c8552001-04-11 14:29:21 +00001025*/
drhd9b02572001-04-15 00:37:09 +00001026int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +00001027 PgHdr *pPg = DATA_TO_PGHDR(pData);
1028 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +00001029 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +00001030
drhd9b02572001-04-15 00:37:09 +00001031 if( pPager->errMask ){
1032 return pager_errcode(pPager);
1033 }
drh5e00f6c2001-09-13 13:46:56 +00001034 if( pPager->readOnly ){
1035 return SQLITE_PERM;
1036 }
drhd9b02572001-04-15 00:37:09 +00001037 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +00001038 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +00001039 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +00001040 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +00001041 assert( pPager->aInJournal==0 );
1042 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1043 if( pPager->aInJournal==0 ){
1044 return SQLITE_NOMEM;
1045 }
drhed7c8552001-04-11 14:29:21 +00001046 pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
1047 if( pPager->jfd<0 ){
1048 return SQLITE_CANTOPEN;
1049 }
drhf57b14a2001-09-14 18:54:08 +00001050 pPager->needSync = 0;
drhd9b02572001-04-15 00:37:09 +00001051 if( pager_lock(pPager->jfd, 1) ){
drhed7c8552001-04-11 14:29:21 +00001052 close(pPager->jfd);
1053 pPager->jfd = -1;
1054 return SQLITE_BUSY;
1055 }
drhd9b02572001-04-15 00:37:09 +00001056 pager_unlock(pPager->fd);
1057 if( pager_lock(pPager->fd, 1) ){
drhed7c8552001-04-11 14:29:21 +00001058 close(pPager->jfd);
1059 pPager->jfd = -1;
1060 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +00001061 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +00001062 return SQLITE_PROTOCOL;
1063 }
1064 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +00001065 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +00001066 pPager->origDbSize = pPager->dbSize;
drhd9b02572001-04-15 00:37:09 +00001067 rc = pager_write(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1068 if( rc==SQLITE_OK ){
1069 rc = pager_write(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
1070 }
1071 if( rc!=SQLITE_OK ){
1072 rc = pager_unwritelock(pPager);
1073 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1074 return rc;
1075 }
drhed7c8552001-04-11 14:29:21 +00001076 }
drhd9b02572001-04-15 00:37:09 +00001077 assert( pPager->state==SQLITE_WRITELOCK );
drh69688d52001-04-14 16:38:23 +00001078 assert( pPager->jfd>=0 );
drhd9b02572001-04-15 00:37:09 +00001079 if( pPg->pgno <= pPager->origDbSize ){
1080 rc = pager_write(pPager->jfd, &pPg->pgno, sizeof(Pgno));
1081 if( rc==SQLITE_OK ){
1082 rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
1083 }
1084 if( rc!=SQLITE_OK ){
1085 sqlitepager_rollback(pPager);
1086 pPager->errMask |= PAGER_ERR_FULL;
1087 return rc;
1088 }
drh6019e162001-07-02 17:51:45 +00001089 assert( pPager->aInJournal!=0 );
1090 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhf57b14a2001-09-14 18:54:08 +00001091 pPager->needSync = 1;
drh69688d52001-04-14 16:38:23 +00001092 }
drh69688d52001-04-14 16:38:23 +00001093 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +00001094 if( pPager->dbSize<pPg->pgno ){
1095 pPager->dbSize = pPg->pgno;
1096 }
drh69688d52001-04-14 16:38:23 +00001097 return rc;
drhed7c8552001-04-11 14:29:21 +00001098}
1099
1100/*
drh6019e162001-07-02 17:51:45 +00001101** Return TRUE if the page given in the argument was previous passed
1102** to sqlitepager_write(). In other words, return TRUE if it is ok
1103** to change the content of the page.
1104*/
1105int sqlitepager_iswriteable(void *pData){
1106 PgHdr *pPg = DATA_TO_PGHDR(pData);
1107 return pPg->dirty;
1108}
1109
1110/*
drhed7c8552001-04-11 14:29:21 +00001111** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001112**
1113** If the commit fails for any reason, a rollback attempt is made
1114** and an error code is returned. If the commit worked, SQLITE_OK
1115** is returned.
drhed7c8552001-04-11 14:29:21 +00001116*/
drhd9b02572001-04-15 00:37:09 +00001117int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001118 int rc;
drhed7c8552001-04-11 14:29:21 +00001119 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001120
1121 if( pPager->errMask==PAGER_ERR_FULL ){
1122 rc = sqlitepager_rollback(pPager);
1123 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1124 return rc;
1125 }
1126 if( pPager->errMask!=0 ){
1127 rc = pager_errcode(pPager);
1128 return rc;
1129 }
1130 if( pPager->state!=SQLITE_WRITELOCK ){
1131 return SQLITE_ERROR;
1132 }
drhed7c8552001-04-11 14:29:21 +00001133 assert( pPager->jfd>=0 );
drhf57b14a2001-09-14 18:54:08 +00001134 if( pPager->needSync && fsync(pPager->jfd) ){
drhd9b02572001-04-15 00:37:09 +00001135 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001136 }
drha1b351a2001-09-14 16:42:12 +00001137 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1138 if( pPg->dirty==0 ) continue;
1139 rc = pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
1140 if( rc!=SQLITE_OK ) goto commit_abort;
1141 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1142 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001143 }
drhd9b02572001-04-15 00:37:09 +00001144 if( fsync(pPager->fd) ) goto commit_abort;
1145 rc = pager_unwritelock(pPager);
1146 pPager->dbSize = -1;
1147 return rc;
1148
1149 /* Jump here if anything goes wrong during the commit process.
1150 */
1151commit_abort:
1152 rc = sqlitepager_rollback(pPager);
1153 if( rc==SQLITE_OK ){
1154 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001155 }
drhed7c8552001-04-11 14:29:21 +00001156 return rc;
1157}
1158
1159/*
1160** Rollback all changes. The database falls back to read-only mode.
1161** All in-memory cache pages revert to their original data contents.
1162** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001163**
1164** This routine cannot fail unless some other process is not following
1165** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1166** process is writing trash into the journal file (SQLITE_CORRUPT) or
1167** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1168** codes are returned for all these occasions. Otherwise,
1169** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001170*/
drhd9b02572001-04-15 00:37:09 +00001171int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001172 int rc;
drhd9b02572001-04-15 00:37:09 +00001173 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1174 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001175 }
drhd9b02572001-04-15 00:37:09 +00001176 if( pPager->state!=SQLITE_WRITELOCK ){
1177 return SQLITE_OK;
1178 }
1179 rc = pager_playback(pPager);
1180 if( rc!=SQLITE_OK ){
1181 rc = SQLITE_CORRUPT;
1182 pPager->errMask |= PAGER_ERR_CORRUPT;
1183 }
1184 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001185 return rc;
1186};
drhd9b02572001-04-15 00:37:09 +00001187
1188/*
drh5e00f6c2001-09-13 13:46:56 +00001189** Return TRUE if the database file is opened read-only. Return FALSE
1190** if the database is (in theory) writable.
1191*/
1192int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001193 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001194}
1195
1196/*
drhd9b02572001-04-15 00:37:09 +00001197** This routine is used for testing and analysis only.
1198*/
1199int *sqlitepager_stats(Pager *pPager){
1200 static int a[9];
1201 a[0] = pPager->nRef;
1202 a[1] = pPager->nPage;
1203 a[2] = pPager->mxPage;
1204 a[3] = pPager->dbSize;
1205 a[4] = pPager->state;
1206 a[5] = pPager->errMask;
1207 a[6] = pPager->nHit;
1208 a[7] = pPager->nMiss;
1209 a[8] = pPager->nOvfl;
1210 return a;
1211}
drhdd793422001-06-28 01:54:48 +00001212
1213#if SQLITE_TEST
1214/*
1215** Print a listing of all referenced pages and their ref count.
1216*/
1217void sqlitepager_refdump(Pager *pPager){
1218 PgHdr *pPg;
1219 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1220 if( pPg->nRef<=0 ) continue;
1221 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1222 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1223 }
1224}
1225#endif