blob: cd8b87d8aaecad6d7aad524298c497aee5045cd0 [file] [log] [blame]
danielk197739281b42008-10-17 19:13:04 +00001/*
drh27c3bd72008-10-28 18:12:36 +00002** 2008 October 7
danielk197739281b42008-10-17 19:13:04 +00003**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
drh27c3bd72008-10-28 18:12:36 +000013** This file contains code use to implement an in-memory rollback journal.
14** The in-memory rollback journal is used to journal transactions for
15** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
drh70b8d6b2016-04-12 11:58:18 +000016**
17** Update: The in-memory journal is also used to temporarily cache
18** smaller journals that are not critical for power-loss recovery.
19** For example, statement journals that are not too big will be held
20** entirely in memory, thus reducing the number of file I/O calls, and
21** more importantly, reducing temporary file creation events. If these
22** journals become too large for memory, they are spilled to disk. But
23** in the common case, they are usually small and no file I/O needs to
24** occur.
danielk197739281b42008-10-17 19:13:04 +000025*/
danielk197739281b42008-10-17 19:13:04 +000026#include "sqliteInt.h"
27
drh27c3bd72008-10-28 18:12:36 +000028/* Forward references to internal structures */
danielk197739281b42008-10-17 19:13:04 +000029typedef struct MemJournal MemJournal;
30typedef struct FilePoint FilePoint;
31typedef struct FileChunk FileChunk;
32
drh27c3bd72008-10-28 18:12:36 +000033/*
34** The rollback journal is composed of a linked list of these structures.
dan2491de22016-02-27 20:14:55 +000035**
36** The zChunk array is always at least 8 bytes in size - usually much more.
37** Its actual size is stored in the MemJournal.nChunkSize variable.
drh27c3bd72008-10-28 18:12:36 +000038*/
danielk197739281b42008-10-17 19:13:04 +000039struct FileChunk {
drh27c3bd72008-10-28 18:12:36 +000040 FileChunk *pNext; /* Next chunk in the journal */
dan2491de22016-02-27 20:14:55 +000041 u8 zChunk[8]; /* Content of this chunk */
danielk197739281b42008-10-17 19:13:04 +000042};
43
drh27c3bd72008-10-28 18:12:36 +000044/*
dan2491de22016-02-27 20:14:55 +000045** By default, allocate this many bytes of memory for each FileChunk object.
46*/
47#define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
48
49/*
50** For chunk size nChunkSize, return the number of bytes that should
51** be allocated for each FileChunk structure.
52*/
53#define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
54
55/*
drh27c3bd72008-10-28 18:12:36 +000056** An instance of this object serves as a cursor into the rollback journal.
57** The cursor can be either for reading or writing.
58*/
danielk197739281b42008-10-17 19:13:04 +000059struct FilePoint {
drh27c3bd72008-10-28 18:12:36 +000060 sqlite3_int64 iOffset; /* Offset from the beginning of the file */
61 FileChunk *pChunk; /* Specific chunk into which cursor points */
danielk197739281b42008-10-17 19:13:04 +000062};
63
drh27c3bd72008-10-28 18:12:36 +000064/*
dan2491de22016-02-27 20:14:55 +000065** This structure is a subclass of sqlite3_file. Each open memory-journal
drh27c3bd72008-10-28 18:12:36 +000066** is an instance of this class.
67*/
danielk197739281b42008-10-17 19:13:04 +000068struct MemJournal {
dan2491de22016-02-27 20:14:55 +000069 const sqlite3_io_methods *pMethod; /* Parent class. MUST BE FIRST */
70 int nChunkSize; /* In-memory chunk-size */
71
drhc2f18ad2016-03-05 15:35:09 +000072 int nSpill; /* Bytes of data before flushing */
dan2491de22016-02-27 20:14:55 +000073 int nSize; /* Bytes of data currently in memory */
danielk197739281b42008-10-17 19:13:04 +000074 FileChunk *pFirst; /* Head of in-memory chunk-list */
75 FilePoint endpoint; /* Pointer to the end of the file */
76 FilePoint readpoint; /* Pointer to the end of the last xRead() */
dan2491de22016-02-27 20:14:55 +000077
78 int flags; /* xOpen flags */
79 sqlite3_vfs *pVfs; /* The "real" underlying VFS */
80 const char *zJournal; /* Name of the journal file */
danielk197739281b42008-10-17 19:13:04 +000081};
82
83/*
drh2206a2b2009-04-01 23:09:43 +000084** Read data from the in-memory journal file. This is the implementation
85** of the sqlite3_vfs.xRead method.
danielk197739281b42008-10-17 19:13:04 +000086*/
87static int memjrnlRead(
88 sqlite3_file *pJfd, /* The journal file from which to read */
89 void *zBuf, /* Put the results here */
90 int iAmt, /* Number of bytes to read */
91 sqlite_int64 iOfst /* Begin reading at this offset */
92){
93 MemJournal *p = (MemJournal *)pJfd;
drhd93b2b82016-03-09 04:17:17 +000094 u8 *zOut = zBuf;
95 int nRead = iAmt;
96 int iChunkOffset;
97 FileChunk *pChunk;
98
99#ifdef SQLITE_ENABLE_ATOMIC_WRITE
dan7ed40202016-03-08 17:44:08 +0000100 if( (iAmt+iOfst)>p->endpoint.iOffset ){
dan2491de22016-02-27 20:14:55 +0000101 return SQLITE_IOERR_SHORT_READ;
dan2491de22016-02-27 20:14:55 +0000102 }
drhd93b2b82016-03-09 04:17:17 +0000103#endif
104
105 assert( (iAmt+iOfst)<=p->endpoint.iOffset );
drh38b3dde2016-04-07 18:42:23 +0000106 assert( p->readpoint.iOffset==0 || p->readpoint.pChunk!=0 );
drhd93b2b82016-03-09 04:17:17 +0000107 if( p->readpoint.iOffset!=iOfst || iOfst==0 ){
108 sqlite3_int64 iOff = 0;
109 for(pChunk=p->pFirst;
110 ALWAYS(pChunk) && (iOff+p->nChunkSize)<=iOfst;
111 pChunk=pChunk->pNext
112 ){
113 iOff += p->nChunkSize;
114 }
115 }else{
116 pChunk = p->readpoint.pChunk;
drh38b3dde2016-04-07 18:42:23 +0000117 assert( pChunk!=0 );
drhd93b2b82016-03-09 04:17:17 +0000118 }
119
120 iChunkOffset = (int)(iOfst%p->nChunkSize);
121 do {
122 int iSpace = p->nChunkSize - iChunkOffset;
123 int nCopy = MIN(nRead, (p->nChunkSize - iChunkOffset));
124 memcpy(zOut, (u8*)pChunk->zChunk + iChunkOffset, nCopy);
125 zOut += nCopy;
126 nRead -= iSpace;
127 iChunkOffset = 0;
128 } while( nRead>=0 && (pChunk=pChunk->pNext)!=0 && nRead>0 );
drh38b3dde2016-04-07 18:42:23 +0000129 p->readpoint.iOffset = pChunk ? iOfst+iAmt : 0;
drhd93b2b82016-03-09 04:17:17 +0000130 p->readpoint.pChunk = pChunk;
danielk197739281b42008-10-17 19:13:04 +0000131
132 return SQLITE_OK;
133}
134
135/*
dan2491de22016-02-27 20:14:55 +0000136** Free the list of FileChunk structures headed at MemJournal.pFirst.
137*/
138static void memjrnlFreeChunks(MemJournal *p){
139 FileChunk *pIter;
140 FileChunk *pNext;
141 for(pIter=p->pFirst; pIter; pIter=pNext){
142 pNext = pIter->pNext;
143 sqlite3_free(pIter);
144 }
145 p->pFirst = 0;
146}
147
148/*
149** Flush the contents of memory to a real file on disk.
150*/
drhc2f18ad2016-03-05 15:35:09 +0000151static int memjrnlCreateFile(MemJournal *p){
dan7ed40202016-03-08 17:44:08 +0000152 int rc;
153 sqlite3_file *pReal = (sqlite3_file*)p;
154 MemJournal copy = *p;
155
156 memset(p, 0, sizeof(MemJournal));
157 rc = sqlite3OsOpen(copy.pVfs, copy.zJournal, pReal, copy.flags, 0);
158 if( rc==SQLITE_OK ){
159 int nChunk = copy.nChunkSize;
160 i64 iOff = 0;
161 FileChunk *pIter;
drh769b4c92016-03-09 03:44:32 +0000162 for(pIter=copy.pFirst; pIter; pIter=pIter->pNext){
drhd93b2b82016-03-09 04:17:17 +0000163 if( iOff + nChunk > copy.endpoint.iOffset ){
164 nChunk = copy.endpoint.iOffset - iOff;
dan2491de22016-02-27 20:14:55 +0000165 }
drhd93b2b82016-03-09 04:17:17 +0000166 rc = sqlite3OsWrite(pReal, (u8*)pIter->zChunk, nChunk, iOff);
drh769b4c92016-03-09 03:44:32 +0000167 if( rc ) break;
drhd93b2b82016-03-09 04:17:17 +0000168 iOff += nChunk;
dan2491de22016-02-27 20:14:55 +0000169 }
dan7ed40202016-03-08 17:44:08 +0000170 if( rc==SQLITE_OK ){
171 /* No error has occurred. Free the in-memory buffers. */
172 memjrnlFreeChunks(&copy);
173 }
174 }
175 if( rc!=SQLITE_OK ){
176 /* If an error occurred while creating or writing to the file, restore
177 ** the original before returning. This way, SQLite uses the in-memory
178 ** journal data to roll back changes made to the internal page-cache
179 ** before this function was called. */
180 sqlite3OsClose(pReal);
181 *p = copy;
dan2491de22016-02-27 20:14:55 +0000182 }
183 return rc;
184}
185
186
187/*
danielk197739281b42008-10-17 19:13:04 +0000188** Write data to the file.
189*/
190static int memjrnlWrite(
191 sqlite3_file *pJfd, /* The journal file into which to write */
192 const void *zBuf, /* Take data to be written from here */
193 int iAmt, /* Number of bytes to write */
194 sqlite_int64 iOfst /* Begin writing at this offset into the file */
195){
196 MemJournal *p = (MemJournal *)pJfd;
197 int nWrite = iAmt;
198 u8 *zWrite = (u8 *)zBuf;
199
dan7ed40202016-03-08 17:44:08 +0000200 /* If the file should be created now, create it and write the new data
201 ** into the file on disk. */
202 if( p->nSpill>0 && (iAmt+iOfst)>p->nSpill ){
drhc2f18ad2016-03-05 15:35:09 +0000203 int rc = memjrnlCreateFile(p);
dan2491de22016-02-27 20:14:55 +0000204 if( rc==SQLITE_OK ){
dan7ed40202016-03-08 17:44:08 +0000205 rc = sqlite3OsWrite(pJfd, zBuf, iAmt, iOfst);
danielk197739281b42008-10-17 19:13:04 +0000206 }
dan2491de22016-02-27 20:14:55 +0000207 return rc;
208 }
danielk197739281b42008-10-17 19:13:04 +0000209
dan2491de22016-02-27 20:14:55 +0000210 /* If the contents of this write should be stored in memory */
211 else{
212 /* An in-memory journal file should only ever be appended to. Random
213 ** access writes are not required. The only exception to this is when
214 ** the in-memory journal is being used by a connection using the
215 ** atomic-write optimization. In this case the first 28 bytes of the
216 ** journal file may be written as part of committing the transaction. */
217 assert( iOfst==p->endpoint.iOffset || iOfst==0 );
drh273021d2016-03-09 02:03:03 +0000218#ifdef SQLITE_ENABLE_ATOMIC_WRITE
dan2491de22016-02-27 20:14:55 +0000219 if( iOfst==0 && p->pFirst ){
220 assert( p->nChunkSize>iAmt );
drh65a7e762016-03-05 15:03:31 +0000221 memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt);
drh273021d2016-03-09 02:03:03 +0000222 }else
223#else
224 assert( iOfst>0 || p->pFirst==0 );
225#endif
226 {
dan2491de22016-02-27 20:14:55 +0000227 while( nWrite>0 ){
228 FileChunk *pChunk = p->endpoint.pChunk;
229 int iChunkOffset = (int)(p->endpoint.iOffset%p->nChunkSize);
230 int iSpace = MIN(nWrite, p->nChunkSize - iChunkOffset);
231
232 if( iChunkOffset==0 ){
233 /* New chunk is required to extend the file. */
234 FileChunk *pNew = sqlite3_malloc(fileChunkSize(p->nChunkSize));
235 if( !pNew ){
236 return SQLITE_IOERR_NOMEM_BKPT;
237 }
238 pNew->pNext = 0;
239 if( pChunk ){
240 assert( p->pFirst );
241 pChunk->pNext = pNew;
242 }else{
243 assert( !p->pFirst );
244 p->pFirst = pNew;
245 }
246 p->endpoint.pChunk = pNew;
247 }
248
drh65a7e762016-03-05 15:03:31 +0000249 memcpy((u8*)p->endpoint.pChunk->zChunk + iChunkOffset, zWrite, iSpace);
dan2491de22016-02-27 20:14:55 +0000250 zWrite += iSpace;
251 nWrite -= iSpace;
252 p->endpoint.iOffset += iSpace;
253 }
254 p->nSize = iAmt + iOfst;
255 }
danielk197739281b42008-10-17 19:13:04 +0000256 }
257
258 return SQLITE_OK;
259}
260
261/*
262** Truncate the file.
dan5f37ed52016-02-29 20:00:13 +0000263**
264** If the journal file is already on disk, truncate it there. Or, if it
265** is still in main memory but is being truncated to zero bytes in size,
266** ignore
danielk197739281b42008-10-17 19:13:04 +0000267*/
268static int memjrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
269 MemJournal *p = (MemJournal *)pJfd;
drh273021d2016-03-09 02:03:03 +0000270 if( ALWAYS(size==0) ){
dan2491de22016-02-27 20:14:55 +0000271 memjrnlFreeChunks(p);
272 p->nSize = 0;
273 p->endpoint.pChunk = 0;
274 p->endpoint.iOffset = 0;
275 p->readpoint.pChunk = 0;
276 p->readpoint.iOffset = 0;
danielk197739281b42008-10-17 19:13:04 +0000277 }
danielk197739281b42008-10-17 19:13:04 +0000278 return SQLITE_OK;
279}
280
281/*
282** Close the file.
283*/
284static int memjrnlClose(sqlite3_file *pJfd){
dan2491de22016-02-27 20:14:55 +0000285 MemJournal *p = (MemJournal *)pJfd;
286 memjrnlFreeChunks(p);
danielk197739281b42008-10-17 19:13:04 +0000287 return SQLITE_OK;
288}
289
danielk197739281b42008-10-17 19:13:04 +0000290/*
291** Sync the file.
drh2206a2b2009-04-01 23:09:43 +0000292**
dan2491de22016-02-27 20:14:55 +0000293** If the real file has been created, call its xSync method. Otherwise,
294** syncing an in-memory journal is a no-op.
danielk197739281b42008-10-17 19:13:04 +0000295*/
dan2491de22016-02-27 20:14:55 +0000296static int memjrnlSync(sqlite3_file *pJfd, int flags){
dan7ed40202016-03-08 17:44:08 +0000297 UNUSED_PARAMETER2(pJfd, flags);
drh09c0f6d2010-04-12 19:44:22 +0000298 return SQLITE_OK;
299}
danielk197739281b42008-10-17 19:13:04 +0000300
301/*
302** Query the size of the file in bytes.
303*/
304static int memjrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
305 MemJournal *p = (MemJournal *)pJfd;
306 *pSize = (sqlite_int64) p->endpoint.iOffset;
307 return SQLITE_OK;
308}
309
310/*
311** Table of methods for MemJournal sqlite3_file object.
312*/
drhf83dc1e2010-06-03 12:09:52 +0000313static const struct sqlite3_io_methods MemJournalMethods = {
danielk197739281b42008-10-17 19:13:04 +0000314 1, /* iVersion */
315 memjrnlClose, /* xClose */
316 memjrnlRead, /* xRead */
317 memjrnlWrite, /* xWrite */
318 memjrnlTruncate, /* xTruncate */
319 memjrnlSync, /* xSync */
320 memjrnlFileSize, /* xFileSize */
321 0, /* xLock */
322 0, /* xUnlock */
323 0, /* xCheckReservedLock */
324 0, /* xFileControl */
325 0, /* xSectorSize */
drhff828942010-06-26 21:34:06 +0000326 0, /* xDeviceCharacteristics */
drhff828942010-06-26 21:34:06 +0000327 0, /* xShmMap */
drh6e1f4822010-07-13 23:41:40 +0000328 0, /* xShmLock */
drhff828942010-06-26 21:34:06 +0000329 0, /* xShmBarrier */
drhda8caa02013-04-22 23:38:50 +0000330 0, /* xShmUnmap */
331 0, /* xFetch */
332 0 /* xUnfetch */
danielk197739281b42008-10-17 19:13:04 +0000333};
334
335/*
dan2491de22016-02-27 20:14:55 +0000336** Open a journal file.
337**
338** The behaviour of the journal file depends on the value of parameter
drhc2f18ad2016-03-05 15:35:09 +0000339** nSpill. If nSpill is 0, then the journal file is always create and
340** accessed using the underlying VFS. If nSpill is less than zero, then
341** all content is always stored in main-memory. Finally, if nSpill is a
dan2491de22016-02-27 20:14:55 +0000342** positive value, then the journal file is initially created in-memory
343** but may be flushed to disk later on. In this case the journal file is
drhc2f18ad2016-03-05 15:35:09 +0000344** flushed to disk either when it grows larger than nSpill bytes in size,
dan2491de22016-02-27 20:14:55 +0000345** or when sqlite3JournalCreate() is called.
danielk197739281b42008-10-17 19:13:04 +0000346*/
dan2491de22016-02-27 20:14:55 +0000347int sqlite3JournalOpen(
348 sqlite3_vfs *pVfs, /* The VFS to use for actual file I/O */
349 const char *zName, /* Name of the journal file */
350 sqlite3_file *pJfd, /* Preallocated, blank file handle */
351 int flags, /* Opening flags */
drhc2f18ad2016-03-05 15:35:09 +0000352 int nSpill /* Bytes buffered before opening the file */
dan2491de22016-02-27 20:14:55 +0000353){
354 MemJournal *p = (MemJournal*)pJfd;
355
drhc2f18ad2016-03-05 15:35:09 +0000356 /* Zero the file-handle object. If nSpill was passed zero, initialize
dan2491de22016-02-27 20:14:55 +0000357 ** it using the sqlite3OsOpen() function of the underlying VFS. In this
358 ** case none of the code in this module is executed as a result of calls
359 ** made on the journal file-handle. */
dan7ed40202016-03-08 17:44:08 +0000360 memset(p, 0, sizeof(MemJournal));
drhc2f18ad2016-03-05 15:35:09 +0000361 if( nSpill==0 ){
dan2491de22016-02-27 20:14:55 +0000362 return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
363 }
364
drhc2f18ad2016-03-05 15:35:09 +0000365 if( nSpill>0 ){
366 p->nChunkSize = nSpill;
dan2491de22016-02-27 20:14:55 +0000367 }else{
368 p->nChunkSize = 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE - sizeof(FileChunk);
369 assert( MEMJOURNAL_DFLT_FILECHUNKSIZE==fileChunkSize(p->nChunkSize) );
370 }
371
372 p->pMethod = (const sqlite3_io_methods*)&MemJournalMethods;
drhc2f18ad2016-03-05 15:35:09 +0000373 p->nSpill = nSpill;
dan2491de22016-02-27 20:14:55 +0000374 p->flags = flags;
375 p->zJournal = zName;
376 p->pVfs = pVfs;
377 return SQLITE_OK;
danielk197739281b42008-10-17 19:13:04 +0000378}
379
380/*
dan2491de22016-02-27 20:14:55 +0000381** Open an in-memory journal file.
382*/
383void sqlite3MemJournalOpen(sqlite3_file *pJfd){
384 sqlite3JournalOpen(0, 0, pJfd, 0, -1);
385}
386
drhff6b8262016-03-04 00:13:29 +0000387#ifdef SQLITE_ENABLE_ATOMIC_WRITE
dan2491de22016-02-27 20:14:55 +0000388/*
dan2491de22016-02-27 20:14:55 +0000389** If the argument p points to a MemJournal structure that is not an
dan5f37ed52016-02-29 20:00:13 +0000390** in-memory-only journal file (i.e. is one that was opened with a +ve
drhc2f18ad2016-03-05 15:35:09 +0000391** nSpill parameter), and the underlying file has not yet been created,
dan5f37ed52016-02-29 20:00:13 +0000392** create it now.
dan2491de22016-02-27 20:14:55 +0000393*/
394int sqlite3JournalCreate(sqlite3_file *p){
395 int rc = SQLITE_OK;
drhc2f18ad2016-03-05 15:35:09 +0000396 if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){
397 rc = memjrnlCreateFile((MemJournal*)p);
dan2491de22016-02-27 20:14:55 +0000398 }
399 return rc;
400}
drhff6b8262016-03-04 00:13:29 +0000401#endif
dan2491de22016-02-27 20:14:55 +0000402
403/*
dan5f37ed52016-02-29 20:00:13 +0000404** The file-handle passed as the only argument is open on a journal file.
405** Return true if this "journal file" is currently stored in heap memory,
dan2491de22016-02-27 20:14:55 +0000406** or false otherwise.
407*/
408int sqlite3JournalIsInMemory(sqlite3_file *p){
dan7ed40202016-03-08 17:44:08 +0000409 return p->pMethods==&MemJournalMethods;
danielk197739281b42008-10-17 19:13:04 +0000410}
411
412/*
dan2491de22016-02-27 20:14:55 +0000413** Return the number of bytes required to store a JournalFile that uses vfs
414** pVfs to create the underlying on-disk files.
danielk197739281b42008-10-17 19:13:04 +0000415*/
dan2491de22016-02-27 20:14:55 +0000416int sqlite3JournalSize(sqlite3_vfs *pVfs){
drh13969f52016-03-21 22:28:51 +0000417 return MAX(pVfs->szOsFile, (int)sizeof(MemJournal));
danielk197739281b42008-10-17 19:13:04 +0000418}