blob: d9272eef97352f6f75584dfe7466badd5ddc358c [file] [log] [blame]
drhbbd42a62004-05-22 17:41:58 +00001/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains code that is specific to Unix systems.
danielk1977822a5162008-05-16 04:51:54 +000014**
drh40bbb0a2008-09-23 10:23:26 +000015** $Id: os_unix.c,v 1.203 2008/09/23 10:23:26 drh Exp $
drhbbd42a62004-05-22 17:41:58 +000016*/
drhbbd42a62004-05-22 17:41:58 +000017#include "sqliteInt.h"
danielk197729bafea2008-06-26 10:41:19 +000018#if SQLITE_OS_UNIX /* This file is used on unix only */
drh66560ad2006-01-06 14:32:19 +000019
danielk1977e339d652008-06-28 11:23:00 +000020/*
drh40bbb0a2008-09-23 10:23:26 +000021** If SQLITE_ENABLE_LOCKING_STYLE is defined and is non-zero, then several
22** alternative locking implementations are provided:
danielk1977e339d652008-06-28 11:23:00 +000023**
24** * POSIX locking (the default),
25** * No locking,
26** * Dot-file locking,
27** * flock() locking,
28** * AFP locking (OSX only).
drh40bbb0a2008-09-23 10:23:26 +000029**
30** SQLITE_ENABLE_LOCKING_STYLE only works on a Mac. It is turned on by
31** default on a Mac and disabled on all other posix platforms.
danielk1977e339d652008-06-28 11:23:00 +000032*/
drh40bbb0a2008-09-23 10:23:26 +000033#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
34# if defined(__DARWIN__)
35# define SQLITE_ENABLE_LOCKING_STYLE 1
36# else
37# define SQLITE_ENABLE_LOCKING_STYLE 0
38# endif
39#endif
drhbfe66312006-10-03 17:40:40 +000040
drh9cbe6352005-11-29 03:13:21 +000041/*
42** These #defines should enable >2GB file support on Posix if the
43** underlying operating system supports it. If the OS lacks
drhf1a221e2006-01-15 17:27:17 +000044** large file support, these should be no-ops.
drh9cbe6352005-11-29 03:13:21 +000045**
46** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
47** on the compiler command line. This is necessary if you are compiling
48** on a recent machine (ex: RedHat 7.2) but you want your code to work
49** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
50** without this option, LFS is enable. But LFS does not exist in the kernel
51** in RedHat 6.0, so the code won't work. Hence, for maximum binary
52** portability you should omit LFS.
drh9cbe6352005-11-29 03:13:21 +000053*/
54#ifndef SQLITE_DISABLE_LFS
55# define _LARGE_FILE 1
56# ifndef _FILE_OFFSET_BITS
57# define _FILE_OFFSET_BITS 64
58# endif
59# define _LARGEFILE_SOURCE 1
60#endif
drhbbd42a62004-05-22 17:41:58 +000061
drh9cbe6352005-11-29 03:13:21 +000062/*
63** standard include files.
64*/
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <fcntl.h>
68#include <unistd.h>
drhbbd42a62004-05-22 17:41:58 +000069#include <time.h>
drh19e2d372005-08-29 23:00:03 +000070#include <sys/time.h>
drhbbd42a62004-05-22 17:41:58 +000071#include <errno.h>
danielk1977e339d652008-06-28 11:23:00 +000072
drh40bbb0a2008-09-23 10:23:26 +000073#if SQLITE_ENABLE_LOCKING_STYLE
drhbfe66312006-10-03 17:40:40 +000074#include <sys/ioctl.h>
75#include <sys/param.h>
76#include <sys/mount.h>
77#endif /* SQLITE_ENABLE_LOCKING_STYLE */
drh9cbe6352005-11-29 03:13:21 +000078
79/*
drhf1a221e2006-01-15 17:27:17 +000080** If we are to be thread-safe, include the pthreads header and define
81** the SQLITE_UNIX_THREADS macro.
drh9cbe6352005-11-29 03:13:21 +000082*/
drhd677b3d2007-08-20 22:48:41 +000083#if SQLITE_THREADSAFE
drh9cbe6352005-11-29 03:13:21 +000084# include <pthread.h>
85# define SQLITE_UNIX_THREADS 1
86#endif
87
88/*
89** Default permissions when creating a new file
90*/
91#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
92# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
93#endif
94
danielk1977b4b47412007-08-17 15:53:36 +000095/*
96** Maximum supported path-length.
97*/
98#define MAX_PATHNAME 512
drh9cbe6352005-11-29 03:13:21 +000099
100
101/*
danielk1977ad94b582007-08-20 06:44:22 +0000102** The unixFile structure is subclass of sqlite3_file specific for the unix
drh054889e2005-11-30 03:20:31 +0000103** protability layer.
drh9cbe6352005-11-29 03:13:21 +0000104*/
drh054889e2005-11-30 03:20:31 +0000105typedef struct unixFile unixFile;
106struct unixFile {
danielk197762079062007-08-15 17:08:46 +0000107 sqlite3_io_methods const *pMethod; /* Always the first entry */
danielk1977967a4a12007-08-20 14:23:44 +0000108#ifdef SQLITE_TEST
109 /* In test mode, increase the size of this structure a bit so that
110 ** it is larger than the struct CrashFile defined in test6.c.
111 */
112 char aPadding[32];
113#endif
drh9cbe6352005-11-29 03:13:21 +0000114 struct openCnt *pOpen; /* Info about all open fd's on this inode */
115 struct lockInfo *pLock; /* Info about locks on this inode */
drh40bbb0a2008-09-23 10:23:26 +0000116#if SQLITE_ENABLE_LOCKING_STYLE
drhbfe66312006-10-03 17:40:40 +0000117 void *lockingContext; /* Locking style specific state */
danielk1977e339d652008-06-28 11:23:00 +0000118#endif
drh9cbe6352005-11-29 03:13:21 +0000119 int h; /* The file descriptor */
120 unsigned char locktype; /* The type of lock held on this fd */
drh9cbe6352005-11-29 03:13:21 +0000121 int dirfd; /* File descriptor for the directory */
drhd677b3d2007-08-20 22:48:41 +0000122#if SQLITE_THREADSAFE
danielk1977ad94b582007-08-20 06:44:22 +0000123 pthread_t tid; /* The thread that "owns" this unixFile */
drh9cbe6352005-11-29 03:13:21 +0000124#endif
aswift5b1a2562008-08-22 00:22:35 +0000125 int lastErrno; /* The unix errno from the last I/O error */
drh9cbe6352005-11-29 03:13:21 +0000126};
127
drh0ccebe72005-06-07 22:22:50 +0000128/*
drh198bf392006-01-06 21:52:49 +0000129** Include code that is common to all os_*.c files
130*/
131#include "os_common.h"
132
133/*
drh0ccebe72005-06-07 22:22:50 +0000134** Define various macros that are missing from some systems.
135*/
drhbbd42a62004-05-22 17:41:58 +0000136#ifndef O_LARGEFILE
137# define O_LARGEFILE 0
138#endif
139#ifdef SQLITE_DISABLE_LFS
140# undef O_LARGEFILE
141# define O_LARGEFILE 0
142#endif
143#ifndef O_NOFOLLOW
144# define O_NOFOLLOW 0
145#endif
146#ifndef O_BINARY
147# define O_BINARY 0
148#endif
149
150/*
151** The DJGPP compiler environment looks mostly like Unix, but it
152** lacks the fcntl() system call. So redefine fcntl() to be something
153** that always succeeds. This means that locking does not occur under
drh85b623f2007-12-13 21:54:09 +0000154** DJGPP. But it is DOS - what did you expect?
drhbbd42a62004-05-22 17:41:58 +0000155*/
156#ifdef __DJGPP__
157# define fcntl(A,B,C) 0
158#endif
159
160/*
drh2b4b5962005-06-15 17:47:55 +0000161** The threadid macro resolves to the thread-id or to 0. Used for
162** testing and debugging only.
163*/
drhd677b3d2007-08-20 22:48:41 +0000164#if SQLITE_THREADSAFE
drh2b4b5962005-06-15 17:47:55 +0000165#define threadid pthread_self()
166#else
167#define threadid 0
168#endif
169
170/*
danielk1977ad94b582007-08-20 06:44:22 +0000171** Set or check the unixFile.tid field. This field is set when an unixFile
172** is first opened. All subsequent uses of the unixFile verify that the
173** same thread is operating on the unixFile. Some operating systems do
drh2b4b5962005-06-15 17:47:55 +0000174** not allow locks to be overridden by other threads and that restriction
175** means that sqlite3* database handles cannot be moved from one thread
176** to another. This logic makes sure a user does not try to do that
177** by mistake.
drhf1a221e2006-01-15 17:27:17 +0000178**
danielk1977ad94b582007-08-20 06:44:22 +0000179** Version 3.3.1 (2006-01-15): unixFile can be moved from one thread to
drhf1a221e2006-01-15 17:27:17 +0000180** another as long as we are running on a system that supports threads
181** overriding each others locks (which now the most common behavior)
danielk1977ad94b582007-08-20 06:44:22 +0000182** or if no locks are held. But the unixFile.pLock field needs to be
drhf1a221e2006-01-15 17:27:17 +0000183** recomputed because its key includes the thread-id. See the
184** transferOwnership() function below for additional information
drh2b4b5962005-06-15 17:47:55 +0000185*/
drhd677b3d2007-08-20 22:48:41 +0000186#if SQLITE_THREADSAFE
drh9cbe6352005-11-29 03:13:21 +0000187# define SET_THREADID(X) (X)->tid = pthread_self()
drh029b44b2006-01-15 00:13:15 +0000188# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
189 !pthread_equal((X)->tid, pthread_self()))
drh2b4b5962005-06-15 17:47:55 +0000190#else
191# define SET_THREADID(X)
192# define CHECK_THREADID(X) 0
danielk197713adf8a2004-06-03 16:08:41 +0000193#endif
194
drhbbd42a62004-05-22 17:41:58 +0000195/*
196** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
197** section 6.5.2.2 lines 483 through 490 specify that when a process
198** sets or clears a lock, that operation overrides any prior locks set
199** by the same process. It does not explicitly say so, but this implies
200** that it overrides locks set by the same process using a different
201** file descriptor. Consider this test case:
drhbbd42a62004-05-22 17:41:58 +0000202** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
203**
204** Suppose ./file1 and ./file2 are really the same file (because
205** one is a hard or symbolic link to the other) then if you set
206** an exclusive lock on fd1, then try to get an exclusive lock
207** on fd2, it works. I would have expected the second lock to
208** fail since there was already a lock on the file due to fd1.
209** But not so. Since both locks came from the same process, the
210** second overrides the first, even though they were on different
211** file descriptors opened on different file names.
212**
213** Bummer. If you ask me, this is broken. Badly broken. It means
214** that we cannot use POSIX locks to synchronize file access among
215** competing threads of the same process. POSIX locks will work fine
216** to synchronize access for threads in separate processes, but not
217** threads within the same process.
218**
219** To work around the problem, SQLite has to manage file locks internally
220** on its own. Whenever a new database is opened, we have to find the
221** specific inode of the database file (the inode is determined by the
222** st_dev and st_ino fields of the stat structure that fstat() fills in)
223** and check for locks already existing on that inode. When locks are
224** created or removed, we have to look at our own internal record of the
225** locks to see if another thread has previously set a lock on that same
226** inode.
227**
danielk1977ad94b582007-08-20 06:44:22 +0000228** The sqlite3_file structure for POSIX is no longer just an integer file
drhbbd42a62004-05-22 17:41:58 +0000229** descriptor. It is now a structure that holds the integer file
230** descriptor and a pointer to a structure that describes the internal
231** locks on the corresponding inode. There is one locking structure
danielk1977ad94b582007-08-20 06:44:22 +0000232** per inode, so if the same inode is opened twice, both unixFile structures
drhbbd42a62004-05-22 17:41:58 +0000233** point to the same locking structure. The locking structure keeps
234** a reference count (so we will know when to delete it) and a "cnt"
235** field that tells us its internal lock status. cnt==0 means the
236** file is unlocked. cnt==-1 means the file has an exclusive lock.
237** cnt>0 means there are cnt shared locks on the file.
238**
239** Any attempt to lock or unlock a file first checks the locking
240** structure. The fcntl() system call is only invoked to set a
241** POSIX lock if the internal lock structure transitions between
242** a locked and an unlocked state.
243**
244** 2004-Jan-11:
245** More recent discoveries about POSIX advisory locks. (The more
246** I discover, the more I realize the a POSIX advisory locks are
247** an abomination.)
248**
249** If you close a file descriptor that points to a file that has locks,
250** all locks on that file that are owned by the current process are
danielk1977ad94b582007-08-20 06:44:22 +0000251** released. To work around this problem, each unixFile structure contains
drhbbd42a62004-05-22 17:41:58 +0000252** a pointer to an openCnt structure. There is one openCnt structure
danielk1977ad94b582007-08-20 06:44:22 +0000253** per open inode, which means that multiple unixFile can point to a single
254** openCnt. When an attempt is made to close an unixFile, if there are
255** other unixFile open on the same inode that are holding locks, the call
drhbbd42a62004-05-22 17:41:58 +0000256** to close() the file descriptor is deferred until all of the locks clear.
257** The openCnt structure keeps a list of file descriptors that need to
258** be closed and that list is walked (and cleared) when the last lock
259** clears.
260**
261** First, under Linux threads, because each thread has a separate
262** process ID, lock operations in one thread do not override locks
263** to the same file in other threads. Linux threads behave like
264** separate processes in this respect. But, if you close a file
265** descriptor in linux threads, all locks are cleared, even locks
266** on other threads and even though the other threads have different
267** process IDs. Linux threads is inconsistent in this respect.
268** (I'm beginning to think that linux threads is an abomination too.)
269** The consequence of this all is that the hash table for the lockInfo
270** structure has to include the process id as part of its key because
271** locks in different threads are treated as distinct. But the
272** openCnt structure should not include the process id in its
273** key because close() clears lock on all threads, not just the current
274** thread. Were it not for this goofiness in linux threads, we could
275** combine the lockInfo and openCnt structures into a single structure.
drh5fdae772004-06-29 03:29:00 +0000276**
277** 2004-Jun-28:
278** On some versions of linux, threads can override each others locks.
279** On others not. Sometimes you can change the behavior on the same
280** system by setting the LD_ASSUME_KERNEL environment variable. The
281** POSIX standard is silent as to which behavior is correct, as far
282** as I can tell, so other versions of unix might show the same
283** inconsistency. There is no little doubt in my mind that posix
284** advisory locks and linux threads are profoundly broken.
285**
286** To work around the inconsistencies, we have to test at runtime
287** whether or not threads can override each others locks. This test
288** is run once, the first time any lock is attempted. A static
289** variable is set to record the results of this test for future
290** use.
drhbbd42a62004-05-22 17:41:58 +0000291*/
292
293/*
294** An instance of the following structure serves as the key used
drh5fdae772004-06-29 03:29:00 +0000295** to locate a particular lockInfo structure given its inode.
296**
297** If threads cannot override each others locks, then we set the
298** lockKey.tid field to the thread ID. If threads can override
drhf1a221e2006-01-15 17:27:17 +0000299** each others locks then tid is always set to zero. tid is omitted
300** if we compile without threading support.
drhbbd42a62004-05-22 17:41:58 +0000301*/
302struct lockKey {
drh5fdae772004-06-29 03:29:00 +0000303 dev_t dev; /* Device number */
304 ino_t ino; /* Inode number */
drhd677b3d2007-08-20 22:48:41 +0000305#if SQLITE_THREADSAFE
drhd9cb6ac2005-10-20 07:28:17 +0000306 pthread_t tid; /* Thread ID or zero if threads can override each other */
drh5fdae772004-06-29 03:29:00 +0000307#endif
drhbbd42a62004-05-22 17:41:58 +0000308};
309
310/*
311** An instance of the following structure is allocated for each open
312** inode on each thread with a different process ID. (Threads have
313** different process IDs on linux, but not on most other unixes.)
314**
danielk1977ad94b582007-08-20 06:44:22 +0000315** A single inode can have multiple file descriptors, so each unixFile
drhbbd42a62004-05-22 17:41:58 +0000316** structure contains a pointer to an instance of this object and this
danielk1977ad94b582007-08-20 06:44:22 +0000317** object keeps a count of the number of unixFile pointing to it.
drhbbd42a62004-05-22 17:41:58 +0000318*/
319struct lockInfo {
320 struct lockKey key; /* The lookup key */
drh2ac3ee92004-06-07 16:27:46 +0000321 int cnt; /* Number of SHARED locks held */
danielk19779a1d0ab2004-06-01 14:09:28 +0000322 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drhbbd42a62004-05-22 17:41:58 +0000323 int nRef; /* Number of pointers to this structure */
drhda0e7682008-07-30 15:27:54 +0000324 struct lockInfo *pNext, *pPrev; /* List of all lockInfo objects */
drhbbd42a62004-05-22 17:41:58 +0000325};
326
327/*
328** An instance of the following structure serves as the key used
329** to locate a particular openCnt structure given its inode. This
drh5fdae772004-06-29 03:29:00 +0000330** is the same as the lockKey except that the thread ID is omitted.
drhbbd42a62004-05-22 17:41:58 +0000331*/
332struct openKey {
333 dev_t dev; /* Device number */
334 ino_t ino; /* Inode number */
335};
336
337/*
338** An instance of the following structure is allocated for each open
339** inode. This structure keeps track of the number of locks on that
340** inode. If a close is attempted against an inode that is holding
341** locks, the close is deferred until all locks clear by adding the
342** file descriptor to be closed to the pending list.
343*/
344struct openCnt {
345 struct openKey key; /* The lookup key */
346 int nRef; /* Number of pointers to this structure */
347 int nLock; /* Number of outstanding locks */
348 int nPending; /* Number of pending close() operations */
349 int *aPending; /* Malloced space holding fd's awaiting a close() */
drhda0e7682008-07-30 15:27:54 +0000350 struct openCnt *pNext, *pPrev; /* List of all openCnt objects */
drhbbd42a62004-05-22 17:41:58 +0000351};
352
drhda0e7682008-07-30 15:27:54 +0000353/*
354** List of all lockInfo and openCnt objects. This used to be a hash
355** table. But the number of objects is rarely more than a dozen and
356** never exceeds a few thousand. And lookup is not on a critical
357** path oo a simple linked list will suffice.
drhbbd42a62004-05-22 17:41:58 +0000358*/
drhda0e7682008-07-30 15:27:54 +0000359static struct lockInfo *lockList = 0;
360static struct openCnt *openList = 0;
drh5fdae772004-06-29 03:29:00 +0000361
drhbfe66312006-10-03 17:40:40 +0000362/*
363** The locking styles are associated with the different file locking
364** capabilities supported by different file systems.
365**
366** POSIX locking style fully supports shared and exclusive byte-range locks
danielk1977e339d652008-06-28 11:23:00 +0000367** AFP locking only supports exclusive byte-range locks
drhbfe66312006-10-03 17:40:40 +0000368** FLOCK only supports a single file-global exclusive lock
369** DOTLOCK isn't a true locking style, it refers to the use of a special
370** file named the same as the database file with a '.lock' extension, this
371** can be used on file systems that do not offer any reliable file locking
372** NO locking means that no locking will be attempted, this is only used for
373** read-only file systems currently
374** UNSUPPORTED means that no locking will be attempted, this is only used for
375** file systems that are known to be unsupported
376*/
danielk1977e339d652008-06-28 11:23:00 +0000377#define LOCKING_STYLE_POSIX 1
drhda0e7682008-07-30 15:27:54 +0000378#define LOCKING_STYLE_NONE 2
danielk1977e339d652008-06-28 11:23:00 +0000379#define LOCKING_STYLE_DOTFILE 3
drhda0e7682008-07-30 15:27:54 +0000380#define LOCKING_STYLE_FLOCK 4
danielk1977e339d652008-06-28 11:23:00 +0000381#define LOCKING_STYLE_AFP 5
drhbfe66312006-10-03 17:40:40 +0000382
danielk1977ad94b582007-08-20 06:44:22 +0000383/*
aswift5b1a2562008-08-22 00:22:35 +0000384** Only set the lastErrno if the error code is a real error and not
385** a normal expected return code of SQLITE_BUSY or SQLITE_OK
386*/
387#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
388
389/*
danielk1977ad94b582007-08-20 06:44:22 +0000390** Helper functions to obtain and relinquish the global mutex.
391*/
danielk1977b4b47412007-08-17 15:53:36 +0000392static void enterMutex(){
danielk197759f8c082008-06-18 17:09:10 +0000393 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
danielk1977b4b47412007-08-17 15:53:36 +0000394}
395static void leaveMutex(){
danielk197759f8c082008-06-18 17:09:10 +0000396 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
danielk1977b4b47412007-08-17 15:53:36 +0000397}
398
drhd677b3d2007-08-20 22:48:41 +0000399#if SQLITE_THREADSAFE
drh5fdae772004-06-29 03:29:00 +0000400/*
401** This variable records whether or not threads can override each others
402** locks.
403**
404** 0: No. Threads cannot override each others locks.
405** 1: Yes. Threads can override each others locks.
406** -1: We don't know yet.
drhf1a221e2006-01-15 17:27:17 +0000407**
drh5062d3a2006-01-31 23:03:35 +0000408** On some systems, we know at compile-time if threads can override each
409** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
410** will be set appropriately. On other systems, we have to check at
411** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
412** undefined.
413**
drhf1a221e2006-01-15 17:27:17 +0000414** This variable normally has file scope only. But during testing, we make
415** it a global so that the test code can change its value in order to verify
416** that the right stuff happens in either case.
drh5fdae772004-06-29 03:29:00 +0000417*/
drh5062d3a2006-01-31 23:03:35 +0000418#ifndef SQLITE_THREAD_OVERRIDE_LOCK
419# define SQLITE_THREAD_OVERRIDE_LOCK -1
420#endif
drh029b44b2006-01-15 00:13:15 +0000421#ifdef SQLITE_TEST
drh5062d3a2006-01-31 23:03:35 +0000422int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
drh029b44b2006-01-15 00:13:15 +0000423#else
drh5062d3a2006-01-31 23:03:35 +0000424static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
drh029b44b2006-01-15 00:13:15 +0000425#endif
drh5fdae772004-06-29 03:29:00 +0000426
427/*
428** This structure holds information passed into individual test
429** threads by the testThreadLockingBehavior() routine.
430*/
431struct threadTestData {
432 int fd; /* File to be locked */
433 struct flock lock; /* The locking operation */
434 int result; /* Result of the locking operation */
435};
436
drh2b4b5962005-06-15 17:47:55 +0000437#ifdef SQLITE_LOCK_TRACE
438/*
439** Print out information about all locking operations.
440**
441** This routine is used for troubleshooting locks on multithreaded
442** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
443** command-line option on the compiler. This code is normally
drhf1a221e2006-01-15 17:27:17 +0000444** turned off.
drh2b4b5962005-06-15 17:47:55 +0000445*/
446static int lockTrace(int fd, int op, struct flock *p){
447 char *zOpName, *zType;
448 int s;
449 int savedErrno;
450 if( op==F_GETLK ){
451 zOpName = "GETLK";
452 }else if( op==F_SETLK ){
453 zOpName = "SETLK";
454 }else{
455 s = fcntl(fd, op, p);
456 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
457 return s;
458 }
459 if( p->l_type==F_RDLCK ){
460 zType = "RDLCK";
461 }else if( p->l_type==F_WRLCK ){
462 zType = "WRLCK";
463 }else if( p->l_type==F_UNLCK ){
464 zType = "UNLCK";
465 }else{
466 assert( 0 );
467 }
468 assert( p->l_whence==SEEK_SET );
469 s = fcntl(fd, op, p);
470 savedErrno = errno;
471 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
472 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
473 (int)p->l_pid, s);
drhe2396a12007-03-29 20:19:58 +0000474 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
drh2b4b5962005-06-15 17:47:55 +0000475 struct flock l2;
476 l2 = *p;
477 fcntl(fd, F_GETLK, &l2);
478 if( l2.l_type==F_RDLCK ){
479 zType = "RDLCK";
480 }else if( l2.l_type==F_WRLCK ){
481 zType = "WRLCK";
482 }else if( l2.l_type==F_UNLCK ){
483 zType = "UNLCK";
484 }else{
485 assert( 0 );
486 }
487 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
488 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
489 }
490 errno = savedErrno;
491 return s;
492}
493#define fcntl lockTrace
494#endif /* SQLITE_LOCK_TRACE */
495
drh5fdae772004-06-29 03:29:00 +0000496/*
497** The testThreadLockingBehavior() routine launches two separate
498** threads on this routine. This routine attempts to lock a file
499** descriptor then returns. The success or failure of that attempt
500** allows the testThreadLockingBehavior() procedure to determine
501** whether or not threads can override each others locks.
502*/
503static void *threadLockingTest(void *pArg){
504 struct threadTestData *pData = (struct threadTestData*)pArg;
505 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
506 return pArg;
507}
508
509/*
510** This procedure attempts to determine whether or not threads
511** can override each others locks then sets the
512** threadsOverrideEachOthersLocks variable appropriately.
513*/
danielk19774d5238f2006-01-27 06:32:00 +0000514static void testThreadLockingBehavior(int fd_orig){
drh5fdae772004-06-29 03:29:00 +0000515 int fd;
516 struct threadTestData d[2];
517 pthread_t t[2];
518
519 fd = dup(fd_orig);
520 if( fd<0 ) return;
521 memset(d, 0, sizeof(d));
522 d[0].fd = fd;
523 d[0].lock.l_type = F_RDLCK;
524 d[0].lock.l_len = 1;
525 d[0].lock.l_start = 0;
526 d[0].lock.l_whence = SEEK_SET;
527 d[1] = d[0];
528 d[1].lock.l_type = F_WRLCK;
529 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
530 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
531 pthread_join(t[0], 0);
532 pthread_join(t[1], 0);
533 close(fd);
534 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
535}
drhd677b3d2007-08-20 22:48:41 +0000536#endif /* SQLITE_THREADSAFE */
drh5fdae772004-06-29 03:29:00 +0000537
drhbbd42a62004-05-22 17:41:58 +0000538/*
539** Release a lockInfo structure previously allocated by findLockInfo().
540*/
541static void releaseLockInfo(struct lockInfo *pLock){
danielk1977e339d652008-06-28 11:23:00 +0000542 if( pLock ){
543 pLock->nRef--;
544 if( pLock->nRef==0 ){
drhda0e7682008-07-30 15:27:54 +0000545 if( pLock->pPrev ){
546 assert( pLock->pPrev->pNext==pLock );
547 pLock->pPrev->pNext = pLock->pNext;
548 }else{
549 assert( lockList==pLock );
550 lockList = pLock->pNext;
551 }
552 if( pLock->pNext ){
553 assert( pLock->pNext->pPrev==pLock );
554 pLock->pNext->pPrev = pLock->pPrev;
555 }
danielk1977e339d652008-06-28 11:23:00 +0000556 sqlite3_free(pLock);
557 }
drhbbd42a62004-05-22 17:41:58 +0000558 }
559}
560
561/*
562** Release a openCnt structure previously allocated by findLockInfo().
563*/
564static void releaseOpenCnt(struct openCnt *pOpen){
danielk1977e339d652008-06-28 11:23:00 +0000565 if( pOpen ){
566 pOpen->nRef--;
567 if( pOpen->nRef==0 ){
drhda0e7682008-07-30 15:27:54 +0000568 if( pOpen->pPrev ){
569 assert( pOpen->pPrev->pNext==pOpen );
570 pOpen->pPrev->pNext = pOpen->pNext;
571 }else{
572 assert( openList==pOpen );
573 openList = pOpen->pNext;
574 }
575 if( pOpen->pNext ){
576 assert( pOpen->pNext->pPrev==pOpen );
577 pOpen->pNext->pPrev = pOpen->pPrev;
578 }
579 sqlite3_free(pOpen->aPending);
danielk1977e339d652008-06-28 11:23:00 +0000580 sqlite3_free(pOpen);
581 }
drhbbd42a62004-05-22 17:41:58 +0000582 }
583}
584
drh40bbb0a2008-09-23 10:23:26 +0000585#if SQLITE_ENABLE_LOCKING_STYLE
drhbfe66312006-10-03 17:40:40 +0000586/*
587** Tests a byte-range locking query to see if byte range locks are
588** supported, if not we fall back to dotlockLockingStyle.
589*/
danielk1977e339d652008-06-28 11:23:00 +0000590static int testLockingStyle(int fd){
drhbfe66312006-10-03 17:40:40 +0000591 struct flock lockInfo;
danielk1977e339d652008-06-28 11:23:00 +0000592
593 /* Test byte-range lock using fcntl(). If the call succeeds,
594 ** assume that the file-system supports POSIX style locks.
595 */
drhbfe66312006-10-03 17:40:40 +0000596 lockInfo.l_len = 1;
597 lockInfo.l_start = 0;
598 lockInfo.l_whence = SEEK_SET;
599 lockInfo.l_type = F_RDLCK;
danielk1977ad94b582007-08-20 06:44:22 +0000600 if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
danielk1977e339d652008-06-28 11:23:00 +0000601 return LOCKING_STYLE_POSIX;
602 }
drhbfe66312006-10-03 17:40:40 +0000603
danielk1977e339d652008-06-28 11:23:00 +0000604 /* Testing for flock() can give false positives. So if if the above
605 ** test fails, then we fall back to using dot-file style locking.
drhbfe66312006-10-03 17:40:40 +0000606 */
danielk1977e339d652008-06-28 11:23:00 +0000607 return LOCKING_STYLE_DOTFILE;
drhbfe66312006-10-03 17:40:40 +0000608}
drh93a960a2008-07-10 00:32:42 +0000609#endif
drhbfe66312006-10-03 17:40:40 +0000610
611/*
danielk1977e339d652008-06-28 11:23:00 +0000612** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the
613** f_fstypename entry in the statfs structure as returned by stat() for
614** the file system hosting the database file and selects the appropriate
615** locking style based on its value. These values and assignments are
616** based on Darwin/OSX behavior and have not been thoroughly tested on
drhbfe66312006-10-03 17:40:40 +0000617** other systems.
danielk1977e339d652008-06-28 11:23:00 +0000618**
619** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
620** returns LOCKING_STYLE_POSIX.
drhbfe66312006-10-03 17:40:40 +0000621*/
danielk1977e339d652008-06-28 11:23:00 +0000622static int detectLockingStyle(
623 sqlite3_vfs *pVfs,
danielk1977ad94b582007-08-20 06:44:22 +0000624 const char *filePath,
625 int fd
626){
drh40bbb0a2008-09-23 10:23:26 +0000627#if SQLITE_ENABLE_LOCKING_STYLE
danielk1977e339d652008-06-28 11:23:00 +0000628 struct Mapping {
629 const char *zFilesystem;
630 int eLockingStyle;
631 } aMap[] = {
632 { "hfs", LOCKING_STYLE_POSIX },
633 { "ufs", LOCKING_STYLE_POSIX },
634 { "afpfs", LOCKING_STYLE_AFP },
aswift5b1a2562008-08-22 00:22:35 +0000635#ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
636 { "smbfs", LOCKING_STYLE_AFP },
637#else
danielk1977e339d652008-06-28 11:23:00 +0000638 { "smbfs", LOCKING_STYLE_FLOCK },
aswift5b1a2562008-08-22 00:22:35 +0000639#endif
danielk1977e339d652008-06-28 11:23:00 +0000640 { "msdos", LOCKING_STYLE_DOTFILE },
641 { "webdav", LOCKING_STYLE_NONE },
642 { 0, 0 }
643 };
644 int i;
drhbfe66312006-10-03 17:40:40 +0000645 struct statfs fsInfo;
646
danielk1977e339d652008-06-28 11:23:00 +0000647 if( !filePath ){
648 return LOCKING_STYLE_NONE;
drh339eb0b2008-03-07 15:34:11 +0000649 }
danielk1977e339d652008-06-28 11:23:00 +0000650 if( pVfs->pAppData ){
aswiftf54b1b32008-08-22 18:41:37 +0000651 return SQLITE_PTR_TO_INT(pVfs->pAppData);
drh339eb0b2008-03-07 15:34:11 +0000652 }
drhbfe66312006-10-03 17:40:40 +0000653
danielk1977e339d652008-06-28 11:23:00 +0000654 if( statfs(filePath, &fsInfo) != -1 ){
655 if( fsInfo.f_flags & MNT_RDONLY ){
656 return LOCKING_STYLE_NONE;
657 }
658 for(i=0; aMap[i].zFilesystem; i++){
659 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
660 return aMap[i].eLockingStyle;
661 }
662 }
663 }
664
665 /* Default case. Handles, amongst others, "nfs". */
666 return testLockingStyle(fd);
667#endif
668 return LOCKING_STYLE_POSIX;
669}
drhbfe66312006-10-03 17:40:40 +0000670
drhbbd42a62004-05-22 17:41:58 +0000671/*
672** Given a file descriptor, locate lockInfo and openCnt structures that
drh029b44b2006-01-15 00:13:15 +0000673** describes that file descriptor. Create new ones if necessary. The
674** return values might be uninitialized if an error occurs.
drhbbd42a62004-05-22 17:41:58 +0000675**
drh65594042008-05-05 16:56:34 +0000676** Return an appropriate error code.
drhbbd42a62004-05-22 17:41:58 +0000677*/
drh38f82712004-06-18 17:10:16 +0000678static int findLockInfo(
drhbbd42a62004-05-22 17:41:58 +0000679 int fd, /* The file descriptor used in the key */
680 struct lockInfo **ppLock, /* Return the lockInfo structure here */
drh5fdae772004-06-29 03:29:00 +0000681 struct openCnt **ppOpen /* Return the openCnt structure here */
drhbbd42a62004-05-22 17:41:58 +0000682){
683 int rc;
684 struct lockKey key1;
685 struct openKey key2;
686 struct stat statbuf;
687 struct lockInfo *pLock;
688 struct openCnt *pOpen;
689 rc = fstat(fd, &statbuf);
drh65594042008-05-05 16:56:34 +0000690 if( rc!=0 ){
691#ifdef EOVERFLOW
692 if( errno==EOVERFLOW ) return SQLITE_NOLFS;
693#endif
694 return SQLITE_IOERR;
695 }
danielk1977441b09a2006-01-05 13:48:29 +0000696
drh54626242008-07-30 17:28:04 +0000697 /* On OS X on an msdos filesystem, the inode number is reported
698 ** incorrectly for zero-size files. See ticket #3260. To work
699 ** around this problem (we consider it a bug in OS X, not SQLite)
700 ** we always increase the file size to 1 by writing a single byte
701 ** prior to accessing the inode number. The one byte written is
702 ** an ASCII 'S' character which also happens to be the first byte
703 ** in the header of every SQLite database. In this way, if there
704 ** is a race condition such that another thread has already populated
705 ** the first page of the database, no damage is done.
706 */
707 if( statbuf.st_size==0 ){
708 write(fd, "S", 1);
709 rc = fstat(fd, &statbuf);
710 if( rc!=0 ){
711 return SQLITE_IOERR;
712 }
713 }
714
drhbbd42a62004-05-22 17:41:58 +0000715 memset(&key1, 0, sizeof(key1));
716 key1.dev = statbuf.st_dev;
717 key1.ino = statbuf.st_ino;
drhd677b3d2007-08-20 22:48:41 +0000718#if SQLITE_THREADSAFE
drh5fdae772004-06-29 03:29:00 +0000719 if( threadsOverrideEachOthersLocks<0 ){
720 testThreadLockingBehavior(fd);
721 }
722 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
723#endif
drhbbd42a62004-05-22 17:41:58 +0000724 memset(&key2, 0, sizeof(key2));
725 key2.dev = statbuf.st_dev;
726 key2.ino = statbuf.st_ino;
drhda0e7682008-07-30 15:27:54 +0000727 pLock = lockList;
728 while( pLock && memcmp(&key1, &pLock->key, sizeof(key1)) ){
729 pLock = pLock->pNext;
730 }
drhbbd42a62004-05-22 17:41:58 +0000731 if( pLock==0 ){
drh17435752007-08-16 04:30:38 +0000732 pLock = sqlite3_malloc( sizeof(*pLock) );
danielk1977441b09a2006-01-05 13:48:29 +0000733 if( pLock==0 ){
drh65594042008-05-05 16:56:34 +0000734 rc = SQLITE_NOMEM;
danielk1977441b09a2006-01-05 13:48:29 +0000735 goto exit_findlockinfo;
736 }
drhbbd42a62004-05-22 17:41:58 +0000737 pLock->key = key1;
738 pLock->nRef = 1;
739 pLock->cnt = 0;
danielk19779a1d0ab2004-06-01 14:09:28 +0000740 pLock->locktype = 0;
drhda0e7682008-07-30 15:27:54 +0000741 pLock->pNext = lockList;
742 pLock->pPrev = 0;
743 if( lockList ) lockList->pPrev = pLock;
744 lockList = pLock;
drhbbd42a62004-05-22 17:41:58 +0000745 }else{
746 pLock->nRef++;
747 }
748 *ppLock = pLock;
drh029b44b2006-01-15 00:13:15 +0000749 if( ppOpen!=0 ){
drhda0e7682008-07-30 15:27:54 +0000750 pOpen = openList;
751 while( pOpen && memcmp(&key2, &pOpen->key, sizeof(key2)) ){
752 pOpen = pOpen->pNext;
753 }
drhbbd42a62004-05-22 17:41:58 +0000754 if( pOpen==0 ){
drh17435752007-08-16 04:30:38 +0000755 pOpen = sqlite3_malloc( sizeof(*pOpen) );
drh029b44b2006-01-15 00:13:15 +0000756 if( pOpen==0 ){
757 releaseLockInfo(pLock);
drh65594042008-05-05 16:56:34 +0000758 rc = SQLITE_NOMEM;
drh029b44b2006-01-15 00:13:15 +0000759 goto exit_findlockinfo;
760 }
761 pOpen->key = key2;
762 pOpen->nRef = 1;
763 pOpen->nLock = 0;
764 pOpen->nPending = 0;
765 pOpen->aPending = 0;
drhda0e7682008-07-30 15:27:54 +0000766 pOpen->pNext = openList;
767 pOpen->pPrev = 0;
768 if( openList ) openList->pPrev = pOpen;
769 openList = pOpen;
drh029b44b2006-01-15 00:13:15 +0000770 }else{
771 pOpen->nRef++;
drhbbd42a62004-05-22 17:41:58 +0000772 }
drh029b44b2006-01-15 00:13:15 +0000773 *ppOpen = pOpen;
drhbbd42a62004-05-22 17:41:58 +0000774 }
danielk1977441b09a2006-01-05 13:48:29 +0000775
776exit_findlockinfo:
danielk1977441b09a2006-01-05 13:48:29 +0000777 return rc;
drhbbd42a62004-05-22 17:41:58 +0000778}
779
drh64b1bea2006-01-15 02:30:57 +0000780#ifdef SQLITE_DEBUG
781/*
782** Helper function for printing out trace information from debugging
783** binaries. This returns the string represetation of the supplied
784** integer lock-type.
785*/
786static const char *locktypeName(int locktype){
787 switch( locktype ){
788 case NO_LOCK: return "NONE";
789 case SHARED_LOCK: return "SHARED";
790 case RESERVED_LOCK: return "RESERVED";
791 case PENDING_LOCK: return "PENDING";
792 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
793 }
794 return "ERROR";
795}
796#endif
797
drhbbd42a62004-05-22 17:41:58 +0000798/*
drh029b44b2006-01-15 00:13:15 +0000799** If we are currently in a different thread than the thread that the
800** unixFile argument belongs to, then transfer ownership of the unixFile
801** over to the current thread.
802**
803** A unixFile is only owned by a thread on systems where one thread is
804** unable to override locks created by a different thread. RedHat9 is
805** an example of such a system.
806**
807** Ownership transfer is only allowed if the unixFile is currently unlocked.
808** If the unixFile is locked and an ownership is wrong, then return
drhf1a221e2006-01-15 17:27:17 +0000809** SQLITE_MISUSE. SQLITE_OK is returned if everything works.
drh029b44b2006-01-15 00:13:15 +0000810*/
drhd677b3d2007-08-20 22:48:41 +0000811#if SQLITE_THREADSAFE
drh029b44b2006-01-15 00:13:15 +0000812static int transferOwnership(unixFile *pFile){
drh64b1bea2006-01-15 02:30:57 +0000813 int rc;
drh029b44b2006-01-15 00:13:15 +0000814 pthread_t hSelf;
815 if( threadsOverrideEachOthersLocks ){
816 /* Ownership transfers not needed on this system */
817 return SQLITE_OK;
818 }
819 hSelf = pthread_self();
820 if( pthread_equal(pFile->tid, hSelf) ){
821 /* We are still in the same thread */
drh4f0c5872007-03-26 22:05:01 +0000822 OSTRACE1("No-transfer, same thread\n");
drh029b44b2006-01-15 00:13:15 +0000823 return SQLITE_OK;
824 }
825 if( pFile->locktype!=NO_LOCK ){
826 /* We cannot change ownership while we are holding a lock! */
827 return SQLITE_MISUSE;
828 }
drh4f0c5872007-03-26 22:05:01 +0000829 OSTRACE4("Transfer ownership of %d from %d to %d\n",
830 pFile->h, pFile->tid, hSelf);
drh029b44b2006-01-15 00:13:15 +0000831 pFile->tid = hSelf;
drhbfe66312006-10-03 17:40:40 +0000832 if (pFile->pLock != NULL) {
833 releaseLockInfo(pFile->pLock);
834 rc = findLockInfo(pFile->h, &pFile->pLock, 0);
drh4f0c5872007-03-26 22:05:01 +0000835 OSTRACE5("LOCK %d is now %s(%s,%d)\n", pFile->h,
drhbfe66312006-10-03 17:40:40 +0000836 locktypeName(pFile->locktype),
837 locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
838 return rc;
839 } else {
840 return SQLITE_OK;
841 }
drh029b44b2006-01-15 00:13:15 +0000842}
843#else
drhf1a221e2006-01-15 17:27:17 +0000844 /* On single-threaded builds, ownership transfer is a no-op */
drh029b44b2006-01-15 00:13:15 +0000845# define transferOwnership(X) SQLITE_OK
846#endif
847
848/*
danielk19772a6bdf62007-08-20 16:07:00 +0000849** Seek to the offset passed as the second argument, then read cnt
850** bytes into pBuf. Return the number of bytes actually read.
drh9e0ebbf2007-10-23 15:59:18 +0000851**
852** NB: If you define USE_PREAD or USE_PREAD64, then it might also
853** be necessary to define _XOPEN_SOURCE to be 500. This varies from
854** one system to another. Since SQLite does not define USE_PREAD
855** any any form by default, we will not attempt to define _XOPEN_SOURCE.
856** See tickets #2741 and #2681.
drhb912b282006-03-23 22:42:20 +0000857*/
danielk197762079062007-08-15 17:08:46 +0000858static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
drhb912b282006-03-23 22:42:20 +0000859 int got;
drh8ebf6702007-02-06 11:11:08 +0000860 i64 newOffset;
drh15d00c42007-02-27 02:01:14 +0000861 TIMER_START;
drh8350a212007-03-22 15:22:06 +0000862#if defined(USE_PREAD)
danielk197762079062007-08-15 17:08:46 +0000863 got = pread(id->h, pBuf, cnt, offset);
drhbb5f18d2007-04-06 18:23:17 +0000864 SimulateIOError( got = -1 );
drh8350a212007-03-22 15:22:06 +0000865#elif defined(USE_PREAD64)
danielk197762079062007-08-15 17:08:46 +0000866 got = pread64(id->h, pBuf, cnt, offset);
drhbb5f18d2007-04-06 18:23:17 +0000867 SimulateIOError( got = -1 );
drhb912b282006-03-23 22:42:20 +0000868#else
danielk197762079062007-08-15 17:08:46 +0000869 newOffset = lseek(id->h, offset, SEEK_SET);
drhbb5f18d2007-04-06 18:23:17 +0000870 SimulateIOError( newOffset-- );
danielk197762079062007-08-15 17:08:46 +0000871 if( newOffset!=offset ){
drh8ebf6702007-02-06 11:11:08 +0000872 return -1;
873 }
drhb912b282006-03-23 22:42:20 +0000874 got = read(id->h, pBuf, cnt);
875#endif
drh15d00c42007-02-27 02:01:14 +0000876 TIMER_END;
shane9bcbdad2008-05-29 20:22:37 +0000877 OSTRACE5("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
drhb912b282006-03-23 22:42:20 +0000878 return got;
879}
880
881/*
drhbbd42a62004-05-22 17:41:58 +0000882** Read data from a file into a buffer. Return SQLITE_OK if all
883** bytes were read successfully and SQLITE_IOERR if anything goes
884** wrong.
885*/
danielk197762079062007-08-15 17:08:46 +0000886static int unixRead(
887 sqlite3_file *id,
888 void *pBuf,
889 int amt,
890 sqlite3_int64 offset
891){
drhbbd42a62004-05-22 17:41:58 +0000892 int got;
drh9cbe6352005-11-29 03:13:21 +0000893 assert( id );
danielk197762079062007-08-15 17:08:46 +0000894 got = seekAndRead((unixFile*)id, offset, pBuf, amt);
drhbbd42a62004-05-22 17:41:58 +0000895 if( got==amt ){
896 return SQLITE_OK;
drh4ac285a2006-09-15 07:28:50 +0000897 }else if( got<0 ){
898 return SQLITE_IOERR_READ;
drhbbd42a62004-05-22 17:41:58 +0000899 }else{
drhbafda092007-01-03 23:36:22 +0000900 memset(&((char*)pBuf)[got], 0, amt-got);
drh4ac285a2006-09-15 07:28:50 +0000901 return SQLITE_IOERR_SHORT_READ;
drhbbd42a62004-05-22 17:41:58 +0000902 }
903}
904
905/*
drhb912b282006-03-23 22:42:20 +0000906** Seek to the offset in id->offset then read cnt bytes into pBuf.
907** Return the number of bytes actually read. Update the offset.
908*/
danielk197762079062007-08-15 17:08:46 +0000909static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
drhb912b282006-03-23 22:42:20 +0000910 int got;
drh8ebf6702007-02-06 11:11:08 +0000911 i64 newOffset;
drh15d00c42007-02-27 02:01:14 +0000912 TIMER_START;
drh8350a212007-03-22 15:22:06 +0000913#if defined(USE_PREAD)
danielk197762079062007-08-15 17:08:46 +0000914 got = pwrite(id->h, pBuf, cnt, offset);
drh8350a212007-03-22 15:22:06 +0000915#elif defined(USE_PREAD64)
danielk197762079062007-08-15 17:08:46 +0000916 got = pwrite64(id->h, pBuf, cnt, offset);
drhb912b282006-03-23 22:42:20 +0000917#else
danielk197762079062007-08-15 17:08:46 +0000918 newOffset = lseek(id->h, offset, SEEK_SET);
919 if( newOffset!=offset ){
drh8ebf6702007-02-06 11:11:08 +0000920 return -1;
921 }
drhb912b282006-03-23 22:42:20 +0000922 got = write(id->h, pBuf, cnt);
923#endif
drh15d00c42007-02-27 02:01:14 +0000924 TIMER_END;
shane9bcbdad2008-05-29 20:22:37 +0000925 OSTRACE5("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
drhb912b282006-03-23 22:42:20 +0000926 return got;
927}
928
929
930/*
drhbbd42a62004-05-22 17:41:58 +0000931** Write data from a buffer into a file. Return SQLITE_OK on success
932** or some other error code on failure.
933*/
danielk197762079062007-08-15 17:08:46 +0000934static int unixWrite(
935 sqlite3_file *id,
936 const void *pBuf,
937 int amt,
938 sqlite3_int64 offset
939){
drhbbd42a62004-05-22 17:41:58 +0000940 int wrote = 0;
drh9cbe6352005-11-29 03:13:21 +0000941 assert( id );
drh4c7f9412005-02-03 00:29:47 +0000942 assert( amt>0 );
danielk197762079062007-08-15 17:08:46 +0000943 while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
drhbbd42a62004-05-22 17:41:58 +0000944 amt -= wrote;
danielk197762079062007-08-15 17:08:46 +0000945 offset += wrote;
drhbbd42a62004-05-22 17:41:58 +0000946 pBuf = &((char*)pBuf)[wrote];
947 }
drh59685932006-09-14 13:47:11 +0000948 SimulateIOError(( wrote=(-1), amt=1 ));
949 SimulateDiskfullError(( wrote=0, amt=1 ));
drhbbd42a62004-05-22 17:41:58 +0000950 if( amt>0 ){
drh59685932006-09-14 13:47:11 +0000951 if( wrote<0 ){
drh4ac285a2006-09-15 07:28:50 +0000952 return SQLITE_IOERR_WRITE;
drh59685932006-09-14 13:47:11 +0000953 }else{
954 return SQLITE_FULL;
955 }
drhbbd42a62004-05-22 17:41:58 +0000956 }
957 return SQLITE_OK;
958}
959
drhb851b2c2005-03-10 14:11:12 +0000960#ifdef SQLITE_TEST
961/*
962** Count the number of fullsyncs and normal syncs. This is used to test
963** that syncs and fullsyncs are occuring at the right times.
964*/
965int sqlite3_sync_count = 0;
966int sqlite3_fullsync_count = 0;
967#endif
968
drhf2f23912005-10-05 10:29:36 +0000969/*
970** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
971** Otherwise use fsync() in its place.
972*/
973#ifndef HAVE_FDATASYNC
974# define fdatasync fsync
975#endif
976
drhac530b12006-02-11 01:25:50 +0000977/*
978** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
979** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
980** only available on Mac OS X. But that could change.
981*/
982#ifdef F_FULLFSYNC
983# define HAVE_FULLFSYNC 1
984#else
985# define HAVE_FULLFSYNC 0
986#endif
987
drhb851b2c2005-03-10 14:11:12 +0000988
drhbbd42a62004-05-22 17:41:58 +0000989/*
drhdd809b02004-07-17 21:44:57 +0000990** The fsync() system call does not work as advertised on many
991** unix systems. The following procedure is an attempt to make
992** it work better.
drh1398ad32005-01-19 23:24:50 +0000993**
994** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
995** for testing when we want to run through the test suite quickly.
996** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
997** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
998** or power failure will likely corrupt the database file.
drhdd809b02004-07-17 21:44:57 +0000999*/
drheb796a72005-09-08 12:38:41 +00001000static int full_fsync(int fd, int fullSync, int dataOnly){
drhdd809b02004-07-17 21:44:57 +00001001 int rc;
drhb851b2c2005-03-10 14:11:12 +00001002
1003 /* Record the number of times that we do a normal fsync() and
1004 ** FULLSYNC. This is used during testing to verify that this procedure
1005 ** gets called with the correct arguments.
1006 */
1007#ifdef SQLITE_TEST
1008 if( fullSync ) sqlite3_fullsync_count++;
1009 sqlite3_sync_count++;
1010#endif
1011
1012 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
1013 ** no-op
1014 */
1015#ifdef SQLITE_NO_SYNC
1016 rc = SQLITE_OK;
1017#else
1018
drhac530b12006-02-11 01:25:50 +00001019#if HAVE_FULLFSYNC
drhb851b2c2005-03-10 14:11:12 +00001020 if( fullSync ){
drhf30cc942005-03-11 17:52:34 +00001021 rc = fcntl(fd, F_FULLFSYNC, 0);
aswiftae0943b2007-01-31 23:37:07 +00001022 }else{
1023 rc = 1;
1024 }
1025 /* If the FULLFSYNC failed, fall back to attempting an fsync().
1026 * It shouldn't be possible for fullfsync to fail on the local
1027 * file system (on OSX), so failure indicates that FULLFSYNC
1028 * isn't supported for this file system. So, attempt an fsync
1029 * and (for now) ignore the overhead of a superfluous fcntl call.
1030 * It'd be better to detect fullfsync support once and avoid
1031 * the fcntl call every time sync is called.
1032 */
1033 if( rc ) rc = fsync(fd);
1034
1035#else
drheb796a72005-09-08 12:38:41 +00001036 if( dataOnly ){
1037 rc = fdatasync(fd);
drhf2f23912005-10-05 10:29:36 +00001038 }else{
drheb796a72005-09-08 12:38:41 +00001039 rc = fsync(fd);
1040 }
aswiftae0943b2007-01-31 23:37:07 +00001041#endif /* HAVE_FULLFSYNC */
drhb851b2c2005-03-10 14:11:12 +00001042#endif /* defined(SQLITE_NO_SYNC) */
1043
drhdd809b02004-07-17 21:44:57 +00001044 return rc;
1045}
1046
1047/*
drhbbd42a62004-05-22 17:41:58 +00001048** Make sure all writes to a particular file are committed to disk.
1049**
drheb796a72005-09-08 12:38:41 +00001050** If dataOnly==0 then both the file itself and its metadata (file
1051** size, access time, etc) are synced. If dataOnly!=0 then only the
1052** file data is synced.
1053**
drhbbd42a62004-05-22 17:41:58 +00001054** Under Unix, also make sure that the directory entry for the file
1055** has been created by fsync-ing the directory that contains the file.
1056** If we do not do this and we encounter a power failure, the directory
1057** entry for the journal might not exist after we reboot. The next
1058** SQLite to access the file will not know that the journal exists (because
1059** the directory entry for the journal was never created) and the transaction
1060** will not roll back - possibly leading to database corruption.
1061*/
danielk197790949c22007-08-17 16:50:38 +00001062static int unixSync(sqlite3_file *id, int flags){
drh59685932006-09-14 13:47:11 +00001063 int rc;
drh054889e2005-11-30 03:20:31 +00001064 unixFile *pFile = (unixFile*)id;
danielk197790949c22007-08-17 16:50:38 +00001065
danielk1977f036aef2007-08-20 05:36:51 +00001066 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
1067 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
1068
danielk1977c16d4632007-08-30 14:49:58 +00001069 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
danielk1977f036aef2007-08-20 05:36:51 +00001070 assert((flags&0x0F)==SQLITE_SYNC_NORMAL
1071 || (flags&0x0F)==SQLITE_SYNC_FULL
danielk1977f036aef2007-08-20 05:36:51 +00001072 );
danielk197790949c22007-08-17 16:50:38 +00001073
danielk1977cd3b3c82008-09-22 11:46:32 +00001074 /* Unix cannot, but some systems may return SQLITE_FULL from here. This
1075 ** line is to test that doing so does not cause any problems.
1076 */
1077 SimulateDiskfullError( return SQLITE_FULL );
1078
drh054889e2005-11-30 03:20:31 +00001079 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00001080 OSTRACE2("SYNC %-3d\n", pFile->h);
danielk197790949c22007-08-17 16:50:38 +00001081 rc = full_fsync(pFile->h, isFullsync, isDataOnly);
drh59685932006-09-14 13:47:11 +00001082 SimulateIOError( rc=1 );
1083 if( rc ){
drh4ac285a2006-09-15 07:28:50 +00001084 return SQLITE_IOERR_FSYNC;
drhbbd42a62004-05-22 17:41:58 +00001085 }
drh054889e2005-11-30 03:20:31 +00001086 if( pFile->dirfd>=0 ){
drh4f0c5872007-03-26 22:05:01 +00001087 OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
danielk197790949c22007-08-17 16:50:38 +00001088 HAVE_FULLFSYNC, isFullsync);
danielk1977d7c03f72005-11-25 10:38:22 +00001089#ifndef SQLITE_DISABLE_DIRSYNC
drhac530b12006-02-11 01:25:50 +00001090 /* The directory sync is only attempted if full_fsync is
1091 ** turned off or unavailable. If a full_fsync occurred above,
1092 ** then the directory sync is superfluous.
1093 */
danielk197790949c22007-08-17 16:50:38 +00001094 if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
drhac530b12006-02-11 01:25:50 +00001095 /*
1096 ** We have received multiple reports of fsync() returning
drh86631a52006-02-09 23:05:51 +00001097 ** errors when applied to directories on certain file systems.
1098 ** A failed directory sync is not a big deal. So it seems
1099 ** better to ignore the error. Ticket #1657
1100 */
1101 /* return SQLITE_IOERR; */
danielk19770964b232005-11-25 08:47:57 +00001102 }
danielk1977d7c03f72005-11-25 10:38:22 +00001103#endif
drh054889e2005-11-30 03:20:31 +00001104 close(pFile->dirfd); /* Only need to sync once, so close the directory */
1105 pFile->dirfd = -1; /* when we are done. */
drha2854222004-06-17 19:04:17 +00001106 }
drha2854222004-06-17 19:04:17 +00001107 return SQLITE_OK;
drhbbd42a62004-05-22 17:41:58 +00001108}
1109
1110/*
1111** Truncate an open file to a specified size
1112*/
danielk197762079062007-08-15 17:08:46 +00001113static int unixTruncate(sqlite3_file *id, i64 nByte){
drh59685932006-09-14 13:47:11 +00001114 int rc;
drh9cbe6352005-11-29 03:13:21 +00001115 assert( id );
drh93aed5a2008-01-16 17:46:38 +00001116 SimulateIOError( return SQLITE_IOERR_TRUNCATE );
drh63fff5f2007-06-19 10:50:38 +00001117 rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
drh59685932006-09-14 13:47:11 +00001118 if( rc ){
drh4ac285a2006-09-15 07:28:50 +00001119 return SQLITE_IOERR_TRUNCATE;
drh59685932006-09-14 13:47:11 +00001120 }else{
1121 return SQLITE_OK;
1122 }
drhbbd42a62004-05-22 17:41:58 +00001123}
1124
1125/*
1126** Determine the current size of a file in bytes
1127*/
danielk197762079062007-08-15 17:08:46 +00001128static int unixFileSize(sqlite3_file *id, i64 *pSize){
drh59685932006-09-14 13:47:11 +00001129 int rc;
drhbbd42a62004-05-22 17:41:58 +00001130 struct stat buf;
drh9cbe6352005-11-29 03:13:21 +00001131 assert( id );
drh59685932006-09-14 13:47:11 +00001132 rc = fstat(((unixFile*)id)->h, &buf);
1133 SimulateIOError( rc=1 );
1134 if( rc!=0 ){
drh4ac285a2006-09-15 07:28:50 +00001135 return SQLITE_IOERR_FSTAT;
drhbbd42a62004-05-22 17:41:58 +00001136 }
1137 *pSize = buf.st_size;
drh54626242008-07-30 17:28:04 +00001138
1139 /* When opening a zero-size database, the findLockInfo() procedure
1140 ** writes a single byte into that file in order to work around a bug
1141 ** in the OS-X msdos filesystem. In order to avoid problems with upper
1142 ** layers, we need to report this file size as zero even though it is
1143 ** really 1. Ticket #3260.
1144 */
1145 if( *pSize==1 ) *pSize = 0;
1146
1147
drhbbd42a62004-05-22 17:41:58 +00001148 return SQLITE_OK;
1149}
1150
danielk19779a1d0ab2004-06-01 14:09:28 +00001151/*
aswift5b1a2562008-08-22 00:22:35 +00001152** This routine translates a standard POSIX errno code into something
1153** useful to the clients of the sqlite3 functions. Specifically, it is
1154** intended to translate a variety of "try again" errors into SQLITE_BUSY
1155** and a variety of "please close the file descriptor NOW" errors into
1156** SQLITE_IOERR
1157**
1158** Errors during initialization of locks, or file system support for locks,
1159** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
1160*/
1161static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
1162 switch (posixError) {
1163 case 0:
1164 return SQLITE_OK;
1165
1166 case EAGAIN:
1167 case ETIMEDOUT:
1168 case EBUSY:
1169 case EINTR:
1170 case ENOLCK:
1171 /* random NFS retry error, unless during file system support
1172 * introspection, in which it actually means what it says */
1173 return SQLITE_BUSY;
1174
1175 case EACCES:
1176 /* EACCES is like EAGAIN during locking operations, but not any other time*/
1177 if( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
1178 (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
1179 (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
1180 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
1181 return SQLITE_BUSY;
1182 }
1183 /* else fall through */
1184 case EPERM:
1185 return SQLITE_PERM;
1186
1187 case EDEADLK:
1188 return SQLITE_IOERR_BLOCKED;
1189
drhf489c452008-08-22 00:47:53 +00001190#if EOPNOTSUPP!=ENOTSUP
aswift5b1a2562008-08-22 00:22:35 +00001191 case EOPNOTSUPP:
1192 /* something went terribly awry, unless during file system support
1193 * introspection, in which it actually means what it says */
drhf489c452008-08-22 00:47:53 +00001194#endif
danielk19775ad6a882008-09-15 04:20:31 +00001195#ifdef ENOTSUP
aswift5b1a2562008-08-22 00:22:35 +00001196 case ENOTSUP:
1197 /* invalid fd, unless during file system support introspection, in which
1198 * it actually means what it says */
danielk19775ad6a882008-09-15 04:20:31 +00001199#endif
aswift5b1a2562008-08-22 00:22:35 +00001200 case EIO:
1201 case EBADF:
1202 case EINVAL:
1203 case ENOTCONN:
1204 case ENODEV:
1205 case ENXIO:
1206 case ENOENT:
1207 case ESTALE:
1208 case ENOSYS:
1209 /* these should force the client to close the file and reconnect */
1210
1211 default:
1212 return sqliteIOErr;
1213 }
1214}
1215
1216/*
danielk197713adf8a2004-06-03 16:08:41 +00001217** This routine checks if there is a RESERVED lock held on the specified
aswift5b1a2562008-08-22 00:22:35 +00001218** file by this or any other process. If such a lock is held, set *pResOut
1219** to a non-zero value otherwise *pResOut is set to zero. The return value
1220** is set to SQLITE_OK unless an I/O error occurs during lock checking.
danielk197713adf8a2004-06-03 16:08:41 +00001221*/
danielk1977861f7452008-06-05 11:39:11 +00001222static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
aswift5b1a2562008-08-22 00:22:35 +00001223 int rc = SQLITE_OK;
1224 int reserved = 0;
drh054889e2005-11-30 03:20:31 +00001225 unixFile *pFile = (unixFile*)id;
danielk197713adf8a2004-06-03 16:08:41 +00001226
danielk1977861f7452008-06-05 11:39:11 +00001227 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
1228
drh054889e2005-11-30 03:20:31 +00001229 assert( pFile );
danielk1977b4b47412007-08-17 15:53:36 +00001230 enterMutex(); /* Because pFile->pLock is shared across threads */
danielk197713adf8a2004-06-03 16:08:41 +00001231
1232 /* Check if a thread in this process holds such a lock */
drh054889e2005-11-30 03:20:31 +00001233 if( pFile->pLock->locktype>SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00001234 reserved = 1;
danielk197713adf8a2004-06-03 16:08:41 +00001235 }
1236
drh2ac3ee92004-06-07 16:27:46 +00001237 /* Otherwise see if some other process holds it.
danielk197713adf8a2004-06-03 16:08:41 +00001238 */
aswift5b1a2562008-08-22 00:22:35 +00001239 if( !reserved ){
danielk197713adf8a2004-06-03 16:08:41 +00001240 struct flock lock;
1241 lock.l_whence = SEEK_SET;
drh2ac3ee92004-06-07 16:27:46 +00001242 lock.l_start = RESERVED_BYTE;
1243 lock.l_len = 1;
1244 lock.l_type = F_WRLCK;
aswift5b1a2562008-08-22 00:22:35 +00001245 if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
1246 int tErrno = errno;
1247 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
1248 pFile->lastErrno = tErrno;
1249 } else if( lock.l_type!=F_UNLCK ){
1250 reserved = 1;
danielk197713adf8a2004-06-03 16:08:41 +00001251 }
1252 }
1253
danielk1977b4b47412007-08-17 15:53:36 +00001254 leaveMutex();
aswift5b1a2562008-08-22 00:22:35 +00001255 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
danielk197713adf8a2004-06-03 16:08:41 +00001256
aswift5b1a2562008-08-22 00:22:35 +00001257 *pResOut = reserved;
1258 return rc;
danielk197713adf8a2004-06-03 16:08:41 +00001259}
1260
1261/*
danielk19779a1d0ab2004-06-01 14:09:28 +00001262** Lock the file with the lock specified by parameter locktype - one
1263** of the following:
1264**
drh2ac3ee92004-06-07 16:27:46 +00001265** (1) SHARED_LOCK
1266** (2) RESERVED_LOCK
1267** (3) PENDING_LOCK
1268** (4) EXCLUSIVE_LOCK
1269**
drhb3e04342004-06-08 00:47:47 +00001270** Sometimes when requesting one lock state, additional lock states
1271** are inserted in between. The locking might fail on one of the later
1272** transitions leaving the lock state different from what it started but
1273** still short of its goal. The following chart shows the allowed
1274** transitions and the inserted intermediate states:
1275**
1276** UNLOCKED -> SHARED
1277** SHARED -> RESERVED
1278** SHARED -> (PENDING) -> EXCLUSIVE
1279** RESERVED -> (PENDING) -> EXCLUSIVE
1280** PENDING -> EXCLUSIVE
drh2ac3ee92004-06-07 16:27:46 +00001281**
drha6abd042004-06-09 17:37:22 +00001282** This routine will only increase a lock. Use the sqlite3OsUnlock()
1283** routine to lower a locking level.
danielk19779a1d0ab2004-06-01 14:09:28 +00001284*/
danielk197762079062007-08-15 17:08:46 +00001285static int unixLock(sqlite3_file *id, int locktype){
danielk1977f42f25c2004-06-25 07:21:28 +00001286 /* The following describes the implementation of the various locks and
1287 ** lock transitions in terms of the POSIX advisory shared and exclusive
1288 ** lock primitives (called read-locks and write-locks below, to avoid
1289 ** confusion with SQLite lock names). The algorithms are complicated
1290 ** slightly in order to be compatible with windows systems simultaneously
1291 ** accessing the same database file, in case that is ever required.
1292 **
1293 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1294 ** byte', each single bytes at well known offsets, and the 'shared byte
1295 ** range', a range of 510 bytes at a well known offset.
1296 **
1297 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1298 ** byte'. If this is successful, a random byte from the 'shared byte
1299 ** range' is read-locked and the lock on the 'pending byte' released.
1300 **
danielk197790ba3bd2004-06-25 08:32:25 +00001301 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1302 ** A RESERVED lock is implemented by grabbing a write-lock on the
1303 ** 'reserved byte'.
danielk1977f42f25c2004-06-25 07:21:28 +00001304 **
1305 ** A process may only obtain a PENDING lock after it has obtained a
danielk197790ba3bd2004-06-25 08:32:25 +00001306 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1307 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1308 ** obtained, but existing SHARED locks are allowed to persist. A process
1309 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1310 ** This property is used by the algorithm for rolling back a journal file
1311 ** after a crash.
danielk1977f42f25c2004-06-25 07:21:28 +00001312 **
danielk197790ba3bd2004-06-25 08:32:25 +00001313 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1314 ** implemented by obtaining a write-lock on the entire 'shared byte
1315 ** range'. Since all other locks require a read-lock on one of the bytes
1316 ** within this range, this ensures that no other locks are held on the
1317 ** database.
danielk1977f42f25c2004-06-25 07:21:28 +00001318 **
1319 ** The reason a single byte cannot be used instead of the 'shared byte
1320 ** range' is that some versions of windows do not support read-locks. By
1321 ** locking a random byte from a range, concurrent SHARED locks may exist
1322 ** even if the locking primitive used is always a write-lock.
1323 */
danielk19779a1d0ab2004-06-01 14:09:28 +00001324 int rc = SQLITE_OK;
drh054889e2005-11-30 03:20:31 +00001325 unixFile *pFile = (unixFile*)id;
1326 struct lockInfo *pLock = pFile->pLock;
danielk19779a1d0ab2004-06-01 14:09:28 +00001327 struct flock lock;
1328 int s;
1329
drh054889e2005-11-30 03:20:31 +00001330 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00001331 OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
drh054889e2005-11-30 03:20:31 +00001332 locktypeName(locktype), locktypeName(pFile->locktype),
1333 locktypeName(pLock->locktype), pLock->cnt , getpid());
danielk19779a1d0ab2004-06-01 14:09:28 +00001334
1335 /* If there is already a lock of this type or more restrictive on the
danielk1977ad94b582007-08-20 06:44:22 +00001336 ** unixFile, do nothing. Don't use the end_lock: exit path, as
danielk1977b4b47412007-08-17 15:53:36 +00001337 ** enterMutex() hasn't been called yet.
danielk19779a1d0ab2004-06-01 14:09:28 +00001338 */
drh054889e2005-11-30 03:20:31 +00001339 if( pFile->locktype>=locktype ){
drh4f0c5872007-03-26 22:05:01 +00001340 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
drh054889e2005-11-30 03:20:31 +00001341 locktypeName(locktype));
danielk19779a1d0ab2004-06-01 14:09:28 +00001342 return SQLITE_OK;
1343 }
1344
drhb3e04342004-06-08 00:47:47 +00001345 /* Make sure the locking sequence is correct
drh2ac3ee92004-06-07 16:27:46 +00001346 */
drh054889e2005-11-30 03:20:31 +00001347 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
drhb3e04342004-06-08 00:47:47 +00001348 assert( locktype!=PENDING_LOCK );
drh054889e2005-11-30 03:20:31 +00001349 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
drh2ac3ee92004-06-07 16:27:46 +00001350
drh054889e2005-11-30 03:20:31 +00001351 /* This mutex is needed because pFile->pLock is shared across threads
drhb3e04342004-06-08 00:47:47 +00001352 */
danielk1977b4b47412007-08-17 15:53:36 +00001353 enterMutex();
danielk19779a1d0ab2004-06-01 14:09:28 +00001354
drh029b44b2006-01-15 00:13:15 +00001355 /* Make sure the current thread owns the pFile.
1356 */
1357 rc = transferOwnership(pFile);
1358 if( rc!=SQLITE_OK ){
danielk1977b4b47412007-08-17 15:53:36 +00001359 leaveMutex();
drh029b44b2006-01-15 00:13:15 +00001360 return rc;
1361 }
drh64b1bea2006-01-15 02:30:57 +00001362 pLock = pFile->pLock;
drh029b44b2006-01-15 00:13:15 +00001363
danielk1977ad94b582007-08-20 06:44:22 +00001364 /* If some thread using this PID has a lock via a different unixFile*
danielk19779a1d0ab2004-06-01 14:09:28 +00001365 ** handle that precludes the requested lock, return BUSY.
1366 */
drh054889e2005-11-30 03:20:31 +00001367 if( (pFile->locktype!=pLock->locktype &&
drh2ac3ee92004-06-07 16:27:46 +00001368 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
danielk19779a1d0ab2004-06-01 14:09:28 +00001369 ){
1370 rc = SQLITE_BUSY;
1371 goto end_lock;
1372 }
1373
1374 /* If a SHARED lock is requested, and some thread using this PID already
1375 ** has a SHARED or RESERVED lock, then increment reference counts and
1376 ** return SQLITE_OK.
1377 */
1378 if( locktype==SHARED_LOCK &&
1379 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1380 assert( locktype==SHARED_LOCK );
drh054889e2005-11-30 03:20:31 +00001381 assert( pFile->locktype==0 );
danielk1977ecb2a962004-06-02 06:30:16 +00001382 assert( pLock->cnt>0 );
drh054889e2005-11-30 03:20:31 +00001383 pFile->locktype = SHARED_LOCK;
danielk19779a1d0ab2004-06-01 14:09:28 +00001384 pLock->cnt++;
drh054889e2005-11-30 03:20:31 +00001385 pFile->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001386 goto end_lock;
1387 }
1388
danielk197713adf8a2004-06-03 16:08:41 +00001389 lock.l_len = 1L;
drh2b4b5962005-06-15 17:47:55 +00001390
danielk19779a1d0ab2004-06-01 14:09:28 +00001391 lock.l_whence = SEEK_SET;
1392
drh3cde3bb2004-06-12 02:17:14 +00001393 /* A PENDING lock is needed before acquiring a SHARED lock and before
1394 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1395 ** be released.
danielk19779a1d0ab2004-06-01 14:09:28 +00001396 */
drh3cde3bb2004-06-12 02:17:14 +00001397 if( locktype==SHARED_LOCK
drh054889e2005-11-30 03:20:31 +00001398 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
drh3cde3bb2004-06-12 02:17:14 +00001399 ){
danielk1977489468c2004-06-28 08:25:47 +00001400 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh2ac3ee92004-06-07 16:27:46 +00001401 lock.l_start = PENDING_BYTE;
drh054889e2005-11-30 03:20:31 +00001402 s = fcntl(pFile->h, F_SETLK, &lock);
drhe2396a12007-03-29 20:19:58 +00001403 if( s==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001404 int tErrno = errno;
1405 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1406 if( IS_LOCK_ERROR(rc) ){
1407 pFile->lastErrno = tErrno;
1408 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001409 goto end_lock;
1410 }
drh3cde3bb2004-06-12 02:17:14 +00001411 }
1412
1413
1414 /* If control gets to this point, then actually go ahead and make
1415 ** operating system calls for the specified lock.
1416 */
1417 if( locktype==SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00001418 int tErrno = 0;
drh3cde3bb2004-06-12 02:17:14 +00001419 assert( pLock->cnt==0 );
1420 assert( pLock->locktype==0 );
danielk19779a1d0ab2004-06-01 14:09:28 +00001421
drh2ac3ee92004-06-07 16:27:46 +00001422 /* Now get the read-lock */
1423 lock.l_start = SHARED_FIRST;
1424 lock.l_len = SHARED_SIZE;
aswift5b1a2562008-08-22 00:22:35 +00001425 if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
1426 tErrno = errno;
1427 }
drh2ac3ee92004-06-07 16:27:46 +00001428 /* Drop the temporary PENDING lock */
1429 lock.l_start = PENDING_BYTE;
1430 lock.l_len = 1L;
danielk19779a1d0ab2004-06-01 14:09:28 +00001431 lock.l_type = F_UNLCK;
drh054889e2005-11-30 03:20:31 +00001432 if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
aswift5b1a2562008-08-22 00:22:35 +00001433 if( s != -1 ){
1434 /* This could happen with a network mount */
1435 tErrno = errno;
1436 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1437 if( IS_LOCK_ERROR(rc) ){
1438 pFile->lastErrno = tErrno;
1439 }
1440 goto end_lock;
1441 }
drh2b4b5962005-06-15 17:47:55 +00001442 }
drhe2396a12007-03-29 20:19:58 +00001443 if( s==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001444 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1445 if( IS_LOCK_ERROR(rc) ){
1446 pFile->lastErrno = tErrno;
1447 }
drhbbd42a62004-05-22 17:41:58 +00001448 }else{
drh054889e2005-11-30 03:20:31 +00001449 pFile->locktype = SHARED_LOCK;
1450 pFile->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001451 pLock->cnt = 1;
drhbbd42a62004-05-22 17:41:58 +00001452 }
drh3cde3bb2004-06-12 02:17:14 +00001453 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1454 /* We are trying for an exclusive lock but another thread in this
1455 ** same process is still holding a shared lock. */
1456 rc = SQLITE_BUSY;
drhbbd42a62004-05-22 17:41:58 +00001457 }else{
drh3cde3bb2004-06-12 02:17:14 +00001458 /* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk19779a1d0ab2004-06-01 14:09:28 +00001459 ** assumed that there is a SHARED or greater lock on the file
1460 ** already.
1461 */
drh054889e2005-11-30 03:20:31 +00001462 assert( 0!=pFile->locktype );
danielk19779a1d0ab2004-06-01 14:09:28 +00001463 lock.l_type = F_WRLCK;
1464 switch( locktype ){
1465 case RESERVED_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001466 lock.l_start = RESERVED_BYTE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001467 break;
danielk19779a1d0ab2004-06-01 14:09:28 +00001468 case EXCLUSIVE_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001469 lock.l_start = SHARED_FIRST;
1470 lock.l_len = SHARED_SIZE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001471 break;
1472 default:
1473 assert(0);
1474 }
drh054889e2005-11-30 03:20:31 +00001475 s = fcntl(pFile->h, F_SETLK, &lock);
drhe2396a12007-03-29 20:19:58 +00001476 if( s==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001477 int tErrno = errno;
1478 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1479 if( IS_LOCK_ERROR(rc) ){
1480 pFile->lastErrno = tErrno;
1481 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001482 }
drhbbd42a62004-05-22 17:41:58 +00001483 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001484
danielk1977ecb2a962004-06-02 06:30:16 +00001485 if( rc==SQLITE_OK ){
drh054889e2005-11-30 03:20:31 +00001486 pFile->locktype = locktype;
danielk1977ecb2a962004-06-02 06:30:16 +00001487 pLock->locktype = locktype;
drh3cde3bb2004-06-12 02:17:14 +00001488 }else if( locktype==EXCLUSIVE_LOCK ){
drh054889e2005-11-30 03:20:31 +00001489 pFile->locktype = PENDING_LOCK;
drh3cde3bb2004-06-12 02:17:14 +00001490 pLock->locktype = PENDING_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001491 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001492
1493end_lock:
danielk1977b4b47412007-08-17 15:53:36 +00001494 leaveMutex();
drh4f0c5872007-03-26 22:05:01 +00001495 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001496 rc==SQLITE_OK ? "ok" : "failed");
drhbbd42a62004-05-22 17:41:58 +00001497 return rc;
1498}
1499
1500/*
drh054889e2005-11-30 03:20:31 +00001501** Lower the locking level on file descriptor pFile to locktype. locktype
drha6abd042004-06-09 17:37:22 +00001502** must be either NO_LOCK or SHARED_LOCK.
1503**
1504** If the locking level of the file descriptor is already at or below
1505** the requested locking level, this routine is a no-op.
drhbbd42a62004-05-22 17:41:58 +00001506*/
danielk197762079062007-08-15 17:08:46 +00001507static int unixUnlock(sqlite3_file *id, int locktype){
drha6abd042004-06-09 17:37:22 +00001508 struct lockInfo *pLock;
1509 struct flock lock;
drh9c105bb2004-10-02 20:38:28 +00001510 int rc = SQLITE_OK;
drh054889e2005-11-30 03:20:31 +00001511 unixFile *pFile = (unixFile*)id;
drh1aa5af12008-03-07 19:51:14 +00001512 int h;
drha6abd042004-06-09 17:37:22 +00001513
drh054889e2005-11-30 03:20:31 +00001514 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00001515 OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
drh054889e2005-11-30 03:20:31 +00001516 pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
drha6abd042004-06-09 17:37:22 +00001517
1518 assert( locktype<=SHARED_LOCK );
drh054889e2005-11-30 03:20:31 +00001519 if( pFile->locktype<=locktype ){
drha6abd042004-06-09 17:37:22 +00001520 return SQLITE_OK;
1521 }
drhf1a221e2006-01-15 17:27:17 +00001522 if( CHECK_THREADID(pFile) ){
1523 return SQLITE_MISUSE;
1524 }
danielk1977b4b47412007-08-17 15:53:36 +00001525 enterMutex();
drh1aa5af12008-03-07 19:51:14 +00001526 h = pFile->h;
drh054889e2005-11-30 03:20:31 +00001527 pLock = pFile->pLock;
drha6abd042004-06-09 17:37:22 +00001528 assert( pLock->cnt!=0 );
drh054889e2005-11-30 03:20:31 +00001529 if( pFile->locktype>SHARED_LOCK ){
1530 assert( pLock->locktype==pFile->locktype );
drh1aa5af12008-03-07 19:51:14 +00001531 SimulateIOErrorBenign(1);
1532 SimulateIOError( h=(-1) )
1533 SimulateIOErrorBenign(0);
drh9c105bb2004-10-02 20:38:28 +00001534 if( locktype==SHARED_LOCK ){
1535 lock.l_type = F_RDLCK;
1536 lock.l_whence = SEEK_SET;
1537 lock.l_start = SHARED_FIRST;
1538 lock.l_len = SHARED_SIZE;
drh1aa5af12008-03-07 19:51:14 +00001539 if( fcntl(h, F_SETLK, &lock)==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001540 int tErrno = errno;
1541 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
1542 if( IS_LOCK_ERROR(rc) ){
1543 pFile->lastErrno = tErrno;
1544 }
1545 goto end_unlock;
drh9c105bb2004-10-02 20:38:28 +00001546 }
1547 }
drhbbd42a62004-05-22 17:41:58 +00001548 lock.l_type = F_UNLCK;
1549 lock.l_whence = SEEK_SET;
drha6abd042004-06-09 17:37:22 +00001550 lock.l_start = PENDING_BYTE;
1551 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh1aa5af12008-03-07 19:51:14 +00001552 if( fcntl(h, F_SETLK, &lock)!=(-1) ){
drh2b4b5962005-06-15 17:47:55 +00001553 pLock->locktype = SHARED_LOCK;
1554 }else{
aswift5b1a2562008-08-22 00:22:35 +00001555 int tErrno = errno;
1556 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1557 if( IS_LOCK_ERROR(rc) ){
1558 pFile->lastErrno = tErrno;
1559 }
1560 goto end_unlock;
drh2b4b5962005-06-15 17:47:55 +00001561 }
drhbbd42a62004-05-22 17:41:58 +00001562 }
drha6abd042004-06-09 17:37:22 +00001563 if( locktype==NO_LOCK ){
1564 struct openCnt *pOpen;
danielk1977ecb2a962004-06-02 06:30:16 +00001565
drha6abd042004-06-09 17:37:22 +00001566 /* Decrement the shared lock counter. Release the lock using an
1567 ** OS call only when all threads in this same process have released
1568 ** the lock.
1569 */
1570 pLock->cnt--;
1571 if( pLock->cnt==0 ){
1572 lock.l_type = F_UNLCK;
1573 lock.l_whence = SEEK_SET;
1574 lock.l_start = lock.l_len = 0L;
drh1aa5af12008-03-07 19:51:14 +00001575 SimulateIOErrorBenign(1);
1576 SimulateIOError( h=(-1) )
1577 SimulateIOErrorBenign(0);
1578 if( fcntl(h, F_SETLK, &lock)!=(-1) ){
drh2b4b5962005-06-15 17:47:55 +00001579 pLock->locktype = NO_LOCK;
1580 }else{
aswift5b1a2562008-08-22 00:22:35 +00001581 int tErrno = errno;
danielk19775ad6a882008-09-15 04:20:31 +00001582 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
aswift5b1a2562008-08-22 00:22:35 +00001583 if( IS_LOCK_ERROR(rc) ){
1584 pFile->lastErrno = tErrno;
1585 }
drh1aa5af12008-03-07 19:51:14 +00001586 pLock->cnt = 1;
aswift5b1a2562008-08-22 00:22:35 +00001587 goto end_unlock;
drh2b4b5962005-06-15 17:47:55 +00001588 }
drha6abd042004-06-09 17:37:22 +00001589 }
1590
drhbbd42a62004-05-22 17:41:58 +00001591 /* Decrement the count of locks against this same file. When the
1592 ** count reaches zero, close any other file descriptors whose close
1593 ** was deferred because of outstanding locks.
1594 */
drh1aa5af12008-03-07 19:51:14 +00001595 if( rc==SQLITE_OK ){
1596 pOpen = pFile->pOpen;
1597 pOpen->nLock--;
1598 assert( pOpen->nLock>=0 );
1599 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1600 int i;
1601 for(i=0; i<pOpen->nPending; i++){
1602 close(pOpen->aPending[i]);
1603 }
drhda0e7682008-07-30 15:27:54 +00001604 sqlite3_free(pOpen->aPending);
drh1aa5af12008-03-07 19:51:14 +00001605 pOpen->nPending = 0;
1606 pOpen->aPending = 0;
drhbbd42a62004-05-22 17:41:58 +00001607 }
drhbbd42a62004-05-22 17:41:58 +00001608 }
1609 }
aswift5b1a2562008-08-22 00:22:35 +00001610
1611end_unlock:
danielk1977b4b47412007-08-17 15:53:36 +00001612 leaveMutex();
drh1aa5af12008-03-07 19:51:14 +00001613 if( rc==SQLITE_OK ) pFile->locktype = locktype;
drh9c105bb2004-10-02 20:38:28 +00001614 return rc;
drhbbd42a62004-05-22 17:41:58 +00001615}
1616
1617/*
danielk1977e339d652008-06-28 11:23:00 +00001618** This function performs the parts of the "close file" operation
1619** common to all locking schemes. It closes the directory and file
1620** handles, if they are valid, and sets all fields of the unixFile
1621** structure to 0.
1622*/
1623static int closeUnixFile(sqlite3_file *id){
1624 unixFile *pFile = (unixFile*)id;
1625 if( pFile ){
1626 if( pFile->dirfd>=0 ){
1627 close(pFile->dirfd);
1628 }
1629 if( pFile->h>=0 ){
1630 close(pFile->h);
1631 }
1632 OSTRACE2("CLOSE %-3d\n", pFile->h);
1633 OpenCounter(-1);
1634 memset(pFile, 0, sizeof(unixFile));
1635 }
1636 return SQLITE_OK;
1637}
1638
1639/*
danielk1977e3026632004-06-22 11:29:02 +00001640** Close a file.
1641*/
danielk197762079062007-08-15 17:08:46 +00001642static int unixClose(sqlite3_file *id){
danielk1977e339d652008-06-28 11:23:00 +00001643 if( id ){
1644 unixFile *pFile = (unixFile *)id;
1645 unixUnlock(id, NO_LOCK);
1646 enterMutex();
danielk19776cb427f2008-06-30 10:16:04 +00001647 if( pFile->pOpen && pFile->pOpen->nLock ){
danielk1977e339d652008-06-28 11:23:00 +00001648 /* If there are outstanding locks, do not actually close the file just
1649 ** yet because that would clear those locks. Instead, add the file
1650 ** descriptor to pOpen->aPending. It will be automatically closed when
1651 ** the last lock is cleared.
1652 */
1653 int *aNew;
1654 struct openCnt *pOpen = pFile->pOpen;
drhda0e7682008-07-30 15:27:54 +00001655 aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977e339d652008-06-28 11:23:00 +00001656 if( aNew==0 ){
1657 /* If a malloc fails, just leak the file descriptor */
1658 }else{
1659 pOpen->aPending = aNew;
1660 pOpen->aPending[pOpen->nPending] = pFile->h;
1661 pOpen->nPending++;
1662 pFile->h = -1;
1663 }
danielk1977e3026632004-06-22 11:29:02 +00001664 }
danielk1977e339d652008-06-28 11:23:00 +00001665 releaseLockInfo(pFile->pLock);
1666 releaseOpenCnt(pFile->pOpen);
1667 closeUnixFile(id);
1668 leaveMutex();
danielk1977e3026632004-06-22 11:29:02 +00001669 }
drh02afc862006-01-20 18:10:57 +00001670 return SQLITE_OK;
danielk1977e3026632004-06-22 11:29:02 +00001671}
1672
drhbfe66312006-10-03 17:40:40 +00001673
drh40bbb0a2008-09-23 10:23:26 +00001674#if SQLITE_ENABLE_LOCKING_STYLE
drhbfe66312006-10-03 17:40:40 +00001675#pragma mark AFP Support
1676
1677/*
1678 ** The afpLockingContext structure contains all afp lock specific state
1679 */
1680typedef struct afpLockingContext afpLockingContext;
1681struct afpLockingContext {
drh1aa5af12008-03-07 19:51:14 +00001682 unsigned long long sharedLockByte;
drh308aa322008-03-07 20:14:38 +00001683 const char *filePath;
drhbfe66312006-10-03 17:40:40 +00001684};
1685
1686struct ByteRangeLockPB2
1687{
1688 unsigned long long offset; /* offset to first byte to lock */
1689 unsigned long long length; /* nbr of bytes to lock */
1690 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
1691 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
1692 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
1693 int fd; /* file desc to assoc this lock with */
1694};
1695
drhfd131da2007-08-07 17:13:03 +00001696#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
drhbfe66312006-10-03 17:40:40 +00001697
danielk1977ad94b582007-08-20 06:44:22 +00001698/*
aswift5b1a2562008-08-22 00:22:35 +00001699 ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
1700 */
danielk1977ad94b582007-08-20 06:44:22 +00001701static int _AFPFSSetLock(
1702 const char *path,
aswift5b1a2562008-08-22 00:22:35 +00001703 unixFile *pFile,
danielk1977ad94b582007-08-20 06:44:22 +00001704 unsigned long long offset,
1705 unsigned long long length,
1706 int setLockFlag
1707){
drhfd131da2007-08-07 17:13:03 +00001708 struct ByteRangeLockPB2 pb;
drhbfe66312006-10-03 17:40:40 +00001709 int err;
1710
1711 pb.unLockFlag = setLockFlag ? 0 : 1;
1712 pb.startEndFlag = 0;
1713 pb.offset = offset;
1714 pb.length = length;
aswift5b1a2562008-08-22 00:22:35 +00001715 pb.fd = pFile->h;
drh4f0c5872007-03-26 22:05:01 +00001716 OSTRACE5("AFPLOCK setting lock %s for %d in range %llx:%llx\n",
aswift5b1a2562008-08-22 00:22:35 +00001717 (setLockFlag?"ON":"OFF"), pFile->h, offset, length);
drhbfe66312006-10-03 17:40:40 +00001718 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
1719 if ( err==-1 ) {
aswift5b1a2562008-08-22 00:22:35 +00001720 int rc;
1721 int tErrno = errno;
1722 OSTRACE4("AFPLOCK failed to fsctl() '%s' %d %s\n", path, tErrno, strerror(tErrno));
1723 rc = sqliteErrorFromPosixError(tErrno, setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK); /* error */
1724 if( IS_LOCK_ERROR(rc) ){
1725 pFile->lastErrno = tErrno;
1726 }
1727 return rc;
drhbfe66312006-10-03 17:40:40 +00001728 } else {
aswift5b1a2562008-08-22 00:22:35 +00001729 return SQLITE_OK;
drhbfe66312006-10-03 17:40:40 +00001730 }
1731}
1732
aswift5b1a2562008-08-22 00:22:35 +00001733/* AFP-style reserved lock checking following the behavior of
1734** unixCheckReservedLock, see the unixCheckReservedLock function comments */
danielk1977e339d652008-06-28 11:23:00 +00001735static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
aswift5b1a2562008-08-22 00:22:35 +00001736 int rc = SQLITE_OK;
1737 int reserved = 0;
drhbfe66312006-10-03 17:40:40 +00001738 unixFile *pFile = (unixFile*)id;
1739
aswift5b1a2562008-08-22 00:22:35 +00001740 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
1741
1742 assert( pFile );
drhbfe66312006-10-03 17:40:40 +00001743 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1744
1745 /* Check if a thread in this process holds such a lock */
1746 if( pFile->locktype>SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00001747 reserved = 1;
drhbfe66312006-10-03 17:40:40 +00001748 }
1749
1750 /* Otherwise see if some other process holds it.
1751 */
aswift5b1a2562008-08-22 00:22:35 +00001752 if( !reserved ){
1753 /* lock the RESERVED byte */
1754 int lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1);
1755 if( SQLITE_OK==lrc ){
drhbfe66312006-10-03 17:40:40 +00001756 /* if we succeeded in taking the reserved lock, unlock it to restore
1757 ** the original state */
aswift5b1a2562008-08-22 00:22:35 +00001758 lrc = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1, 0);
1759 } else {
1760 /* if we failed to get the lock then someone else must have it */
1761 reserved = 1;
1762 }
1763 if( IS_LOCK_ERROR(lrc) ){
1764 rc=lrc;
drhbfe66312006-10-03 17:40:40 +00001765 }
1766 }
drhbfe66312006-10-03 17:40:40 +00001767
aswift5b1a2562008-08-22 00:22:35 +00001768 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
1769
1770 *pResOut = reserved;
1771 return rc;
drhbfe66312006-10-03 17:40:40 +00001772}
1773
1774/* AFP-style locking following the behavior of unixLock, see the unixLock
1775** function comments for details of lock management. */
danielk1977e339d652008-06-28 11:23:00 +00001776static int afpLock(sqlite3_file *id, int locktype){
drhbfe66312006-10-03 17:40:40 +00001777 int rc = SQLITE_OK;
1778 unixFile *pFile = (unixFile*)id;
1779 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
drhbfe66312006-10-03 17:40:40 +00001780
1781 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00001782 OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
drh339eb0b2008-03-07 15:34:11 +00001783 locktypeName(locktype), locktypeName(pFile->locktype), getpid());
1784
drhbfe66312006-10-03 17:40:40 +00001785 /* If there is already a lock of this type or more restrictive on the
drh339eb0b2008-03-07 15:34:11 +00001786 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
1787 ** enterMutex() hasn't been called yet.
1788 */
drhbfe66312006-10-03 17:40:40 +00001789 if( pFile->locktype>=locktype ){
drh4f0c5872007-03-26 22:05:01 +00001790 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
drhbfe66312006-10-03 17:40:40 +00001791 locktypeName(locktype));
1792 return SQLITE_OK;
1793 }
1794
1795 /* Make sure the locking sequence is correct
drh339eb0b2008-03-07 15:34:11 +00001796 */
drhbfe66312006-10-03 17:40:40 +00001797 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1798 assert( locktype!=PENDING_LOCK );
1799 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
1800
1801 /* This mutex is needed because pFile->pLock is shared across threads
drh339eb0b2008-03-07 15:34:11 +00001802 */
danielk1977b4b47412007-08-17 15:53:36 +00001803 enterMutex();
drhbfe66312006-10-03 17:40:40 +00001804
1805 /* Make sure the current thread owns the pFile.
drh339eb0b2008-03-07 15:34:11 +00001806 */
drhbfe66312006-10-03 17:40:40 +00001807 rc = transferOwnership(pFile);
1808 if( rc!=SQLITE_OK ){
danielk1977b4b47412007-08-17 15:53:36 +00001809 leaveMutex();
drhbfe66312006-10-03 17:40:40 +00001810 return rc;
1811 }
1812
1813 /* A PENDING lock is needed before acquiring a SHARED lock and before
drh339eb0b2008-03-07 15:34:11 +00001814 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1815 ** be released.
1816 */
drhbfe66312006-10-03 17:40:40 +00001817 if( locktype==SHARED_LOCK
1818 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
drh339eb0b2008-03-07 15:34:11 +00001819 ){
1820 int failed;
aswift5b1a2562008-08-22 00:22:35 +00001821 failed = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 1);
drhbfe66312006-10-03 17:40:40 +00001822 if (failed) {
aswift5b1a2562008-08-22 00:22:35 +00001823 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001824 goto afp_end_lock;
1825 }
1826 }
1827
1828 /* If control gets to this point, then actually go ahead and make
drh339eb0b2008-03-07 15:34:11 +00001829 ** operating system calls for the specified lock.
1830 */
drhbfe66312006-10-03 17:40:40 +00001831 if( locktype==SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00001832 int lk, lrc1, lrc2, lrc1Errno;
drhbfe66312006-10-03 17:40:40 +00001833
aswift5b1a2562008-08-22 00:22:35 +00001834 /* Now get the read-lock SHARED_LOCK */
drhbfe66312006-10-03 17:40:40 +00001835 /* note that the quality of the randomness doesn't matter that much */
1836 lk = random();
1837 context->sharedLockByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
aswift5b1a2562008-08-22 00:22:35 +00001838 lrc1 = _AFPFSSetLock(context->filePath, pFile,
1839 SHARED_FIRST+context->sharedLockByte, 1, 1);
1840 if( IS_LOCK_ERROR(lrc1) ){
1841 lrc1Errno = pFile->lastErrno;
drhbfe66312006-10-03 17:40:40 +00001842 }
aswift5b1a2562008-08-22 00:22:35 +00001843 /* Drop the temporary PENDING lock */
1844 lrc2 = _AFPFSSetLock(context->filePath, pFile, PENDING_BYTE, 1, 0);
drhbfe66312006-10-03 17:40:40 +00001845
aswift5b1a2562008-08-22 00:22:35 +00001846 if( IS_LOCK_ERROR(lrc1) ) {
1847 pFile->lastErrno = lrc1Errno;
1848 rc = lrc1;
1849 goto afp_end_lock;
1850 } else if( IS_LOCK_ERROR(lrc2) ){
1851 rc = lrc2;
1852 goto afp_end_lock;
1853 } else if( lrc1 != SQLITE_OK ) {
1854 rc = lrc1;
drhbfe66312006-10-03 17:40:40 +00001855 } else {
1856 pFile->locktype = SHARED_LOCK;
1857 }
1858 }else{
1859 /* The request was for a RESERVED or EXCLUSIVE lock. It is
1860 ** assumed that there is a SHARED or greater lock on the file
1861 ** already.
1862 */
1863 int failed = 0;
1864 assert( 0!=pFile->locktype );
1865 if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
1866 /* Acquire a RESERVED lock */
aswift5b1a2562008-08-22 00:22:35 +00001867 failed = _AFPFSSetLock(context->filePath, pFile, RESERVED_BYTE, 1,1);
drhbfe66312006-10-03 17:40:40 +00001868 }
1869 if (!failed && locktype == EXCLUSIVE_LOCK) {
1870 /* Acquire an EXCLUSIVE lock */
1871
1872 /* Remove the shared lock before trying the range. we'll need to
danielk1977e339d652008-06-28 11:23:00 +00001873 ** reestablish the shared lock if we can't get the afpUnlock
drhbfe66312006-10-03 17:40:40 +00001874 */
aswift5b1a2562008-08-22 00:22:35 +00001875 if (!(failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST +
1876 context->sharedLockByte, 1, 0))) {
drhbfe66312006-10-03 17:40:40 +00001877 /* now attemmpt to get the exclusive lock range */
aswift5b1a2562008-08-22 00:22:35 +00001878 failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST,
drhbfe66312006-10-03 17:40:40 +00001879 SHARED_SIZE, 1);
aswift5b1a2562008-08-22 00:22:35 +00001880 if (failed && (failed = _AFPFSSetLock(context->filePath, pFile,
1881 SHARED_FIRST + context->sharedLockByte, 1, 1))) {
1882 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001883 }
1884 } else {
aswift5b1a2562008-08-22 00:22:35 +00001885 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001886 }
1887 }
aswift5b1a2562008-08-22 00:22:35 +00001888 if( failed ){
1889 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001890 }
1891 }
1892
1893 if( rc==SQLITE_OK ){
1894 pFile->locktype = locktype;
1895 }else if( locktype==EXCLUSIVE_LOCK ){
1896 pFile->locktype = PENDING_LOCK;
1897 }
1898
1899afp_end_lock:
drh339eb0b2008-03-07 15:34:11 +00001900 leaveMutex();
drh4f0c5872007-03-26 22:05:01 +00001901 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
drhbfe66312006-10-03 17:40:40 +00001902 rc==SQLITE_OK ? "ok" : "failed");
1903 return rc;
1904}
1905
1906/*
drh339eb0b2008-03-07 15:34:11 +00001907** Lower the locking level on file descriptor pFile to locktype. locktype
1908** must be either NO_LOCK or SHARED_LOCK.
1909**
1910** If the locking level of the file descriptor is already at or below
1911** the requested locking level, this routine is a no-op.
1912*/
danielk1977e339d652008-06-28 11:23:00 +00001913static int afpUnlock(sqlite3_file *id, int locktype) {
drhbfe66312006-10-03 17:40:40 +00001914 int rc = SQLITE_OK;
1915 unixFile *pFile = (unixFile*)id;
1916 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
1917
1918 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00001919 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
drhbfe66312006-10-03 17:40:40 +00001920 pFile->locktype, getpid());
aswift5b1a2562008-08-22 00:22:35 +00001921
drhbfe66312006-10-03 17:40:40 +00001922 assert( locktype<=SHARED_LOCK );
1923 if( pFile->locktype<=locktype ){
1924 return SQLITE_OK;
1925 }
1926 if( CHECK_THREADID(pFile) ){
1927 return SQLITE_MISUSE;
1928 }
danielk1977b4b47412007-08-17 15:53:36 +00001929 enterMutex();
aswift5b1a2562008-08-22 00:22:35 +00001930 int failed = SQLITE_OK;
drhbfe66312006-10-03 17:40:40 +00001931 if( pFile->locktype>SHARED_LOCK ){
1932 if( locktype==SHARED_LOCK ){
drhbfe66312006-10-03 17:40:40 +00001933
1934 /* unlock the exclusive range - then re-establish the shared lock */
1935 if (pFile->locktype==EXCLUSIVE_LOCK) {
aswift5b1a2562008-08-22 00:22:35 +00001936 failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST,
drhbfe66312006-10-03 17:40:40 +00001937 SHARED_SIZE, 0);
1938 if (!failed) {
1939 /* successfully removed the exclusive lock */
aswift5b1a2562008-08-22 00:22:35 +00001940 if ((failed = _AFPFSSetLock(context->filePath, pFile, SHARED_FIRST+
1941 context->sharedLockByte, 1, 1))) {
drhbfe66312006-10-03 17:40:40 +00001942 /* failed to re-establish our shared lock */
aswift5b1a2562008-08-22 00:22:35 +00001943 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001944 }
1945 } else {
aswift5b1a2562008-08-22 00:22:35 +00001946 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001947 }
1948 }
1949 }
1950 if (rc == SQLITE_OK && pFile->locktype>=PENDING_LOCK) {
aswift5b1a2562008-08-22 00:22:35 +00001951 if ((failed = _AFPFSSetLock(context->filePath, pFile,
1952 PENDING_BYTE, 1, 0))){
drhbfe66312006-10-03 17:40:40 +00001953 /* failed to release the pending lock */
aswift5b1a2562008-08-22 00:22:35 +00001954 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001955 }
1956 }
1957 if (rc == SQLITE_OK && pFile->locktype>=RESERVED_LOCK) {
aswift5b1a2562008-08-22 00:22:35 +00001958 if ((failed = _AFPFSSetLock(context->filePath, pFile,
1959 RESERVED_BYTE, 1, 0))) {
drhbfe66312006-10-03 17:40:40 +00001960 /* failed to release the reserved lock */
aswift5b1a2562008-08-22 00:22:35 +00001961 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001962 }
1963 }
1964 }
1965 if( locktype==NO_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00001966 int failed = _AFPFSSetLock(context->filePath, pFile,
drhbfe66312006-10-03 17:40:40 +00001967 SHARED_FIRST + context->sharedLockByte, 1, 0);
1968 if (failed) {
aswift5b1a2562008-08-22 00:22:35 +00001969 rc = failed;
drhbfe66312006-10-03 17:40:40 +00001970 }
1971 }
1972 if (rc == SQLITE_OK)
1973 pFile->locktype = locktype;
danielk1977b4b47412007-08-17 15:53:36 +00001974 leaveMutex();
drhbfe66312006-10-03 17:40:40 +00001975 return rc;
1976}
1977
1978/*
drh339eb0b2008-03-07 15:34:11 +00001979** Close a file & cleanup AFP specific locking context
1980*/
danielk1977e339d652008-06-28 11:23:00 +00001981static int afpClose(sqlite3_file *id) {
1982 if( id ){
1983 unixFile *pFile = (unixFile*)id;
1984 afpUnlock(id, NO_LOCK);
1985 sqlite3_free(pFile->lockingContext);
1986 }
1987 return closeUnixFile(id);
drhbfe66312006-10-03 17:40:40 +00001988}
1989
1990
1991#pragma mark flock() style locking
1992
1993/*
drh339eb0b2008-03-07 15:34:11 +00001994** The flockLockingContext is not used
1995*/
drhbfe66312006-10-03 17:40:40 +00001996typedef void flockLockingContext;
1997
aswift5b1a2562008-08-22 00:22:35 +00001998/* flock-style reserved lock checking following the behavior of
1999 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
danielk1977e339d652008-06-28 11:23:00 +00002000static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
aswift5b1a2562008-08-22 00:22:35 +00002001 int rc = SQLITE_OK;
2002 int reserved = 0;
drhbfe66312006-10-03 17:40:40 +00002003 unixFile *pFile = (unixFile*)id;
2004
aswift5b1a2562008-08-22 00:22:35 +00002005 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
2006
2007 assert( pFile );
2008
2009 /* Check if a thread in this process holds such a lock */
2010 if( pFile->locktype>SHARED_LOCK ){
2011 reserved = 1;
2012 }
2013
2014 /* Otherwise see if some other process holds it. */
2015 if( !reserved ){
drh3b62b2f2007-06-08 18:27:03 +00002016 /* attempt to get the lock */
aswift5b1a2562008-08-22 00:22:35 +00002017 int lrc = flock(pFile->h, LOCK_EX | LOCK_NB);
2018 if( !lrc ){
drh3b62b2f2007-06-08 18:27:03 +00002019 /* got the lock, unlock it */
aswift5b1a2562008-08-22 00:22:35 +00002020 lrc = flock(pFile->h, LOCK_UN);
2021 if ( lrc ) {
2022 int tErrno = errno;
2023 /* unlock failed with an error */
2024 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
2025 if( IS_LOCK_ERROR(lrc) ){
2026 pFile->lastErrno = tErrno;
2027 rc = lrc;
2028 }
2029 }
2030 } else {
2031 int tErrno = errno;
2032 reserved = 1;
2033 /* someone else might have it reserved */
2034 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
2035 if( IS_LOCK_ERROR(lrc) ){
2036 pFile->lastErrno = tErrno;
2037 rc = lrc;
2038 }
drhbfe66312006-10-03 17:40:40 +00002039 }
drhbfe66312006-10-03 17:40:40 +00002040 }
aswift5b1a2562008-08-22 00:22:35 +00002041 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
danielk1977861f7452008-06-05 11:39:11 +00002042
aswift5b1a2562008-08-22 00:22:35 +00002043 *pResOut = reserved;
2044 return rc;
drhbfe66312006-10-03 17:40:40 +00002045}
2046
danielk1977e339d652008-06-28 11:23:00 +00002047static int flockLock(sqlite3_file *id, int locktype) {
aswift5b1a2562008-08-22 00:22:35 +00002048 int rc = SQLITE_OK;
drhbfe66312006-10-03 17:40:40 +00002049 unixFile *pFile = (unixFile*)id;
aswift5b1a2562008-08-22 00:22:35 +00002050
2051 assert( pFile );
2052
drh3b62b2f2007-06-08 18:27:03 +00002053 /* if we already have a lock, it is exclusive.
2054 ** Just adjust level and punt on outta here. */
drhbfe66312006-10-03 17:40:40 +00002055 if (pFile->locktype > NO_LOCK) {
2056 pFile->locktype = locktype;
2057 return SQLITE_OK;
2058 }
2059
drh3b62b2f2007-06-08 18:27:03 +00002060 /* grab an exclusive lock */
aswift5b1a2562008-08-22 00:22:35 +00002061
2062 if (flock(pFile->h, LOCK_EX | LOCK_NB)) {
2063 int tErrno = errno;
drh3b62b2f2007-06-08 18:27:03 +00002064 /* didn't get, must be busy */
aswift5b1a2562008-08-22 00:22:35 +00002065 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
2066 if( IS_LOCK_ERROR(rc) ){
2067 pFile->lastErrno = tErrno;
2068 }
drhbfe66312006-10-03 17:40:40 +00002069 } else {
drh3b62b2f2007-06-08 18:27:03 +00002070 /* got it, set the type and return ok */
drhbfe66312006-10-03 17:40:40 +00002071 pFile->locktype = locktype;
drhbfe66312006-10-03 17:40:40 +00002072 }
aswift5b1a2562008-08-22 00:22:35 +00002073 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
2074 rc==SQLITE_OK ? "ok" : "failed");
2075 return rc;
drhbfe66312006-10-03 17:40:40 +00002076}
2077
danielk1977e339d652008-06-28 11:23:00 +00002078static int flockUnlock(sqlite3_file *id, int locktype) {
drhbfe66312006-10-03 17:40:40 +00002079 unixFile *pFile = (unixFile*)id;
2080
aswift5b1a2562008-08-22 00:22:35 +00002081 assert( pFile );
2082 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
2083 pFile->locktype, getpid());
drhbfe66312006-10-03 17:40:40 +00002084 assert( locktype<=SHARED_LOCK );
2085
drh3b62b2f2007-06-08 18:27:03 +00002086 /* no-op if possible */
drhbfe66312006-10-03 17:40:40 +00002087 if( pFile->locktype==locktype ){
2088 return SQLITE_OK;
2089 }
2090
drh3b62b2f2007-06-08 18:27:03 +00002091 /* shared can just be set because we always have an exclusive */
drhbfe66312006-10-03 17:40:40 +00002092 if (locktype==SHARED_LOCK) {
2093 pFile->locktype = locktype;
2094 return SQLITE_OK;
2095 }
2096
drh3b62b2f2007-06-08 18:27:03 +00002097 /* no, really, unlock. */
drhbfe66312006-10-03 17:40:40 +00002098 int rc = flock(pFile->h, LOCK_UN);
aswift5b1a2562008-08-22 00:22:35 +00002099 if (rc) {
2100 int r, tErrno = errno;
2101 r = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
2102 if( IS_LOCK_ERROR(r) ){
2103 pFile->lastErrno = tErrno;
2104 }
2105 return r;
2106 } else {
drhbfe66312006-10-03 17:40:40 +00002107 pFile->locktype = NO_LOCK;
2108 return SQLITE_OK;
2109 }
2110}
2111
2112/*
drh339eb0b2008-03-07 15:34:11 +00002113** Close a file.
2114*/
danielk1977e339d652008-06-28 11:23:00 +00002115static int flockClose(sqlite3_file *id) {
2116 if( id ){
2117 flockUnlock(id, NO_LOCK);
2118 }
2119 return closeUnixFile(id);
drhbfe66312006-10-03 17:40:40 +00002120}
2121
2122#pragma mark Old-School .lock file based locking
2123
aswift5b1a2562008-08-22 00:22:35 +00002124/* Dotlock-style reserved lock checking following the behavior of
2125** unixCheckReservedLock, see the unixCheckReservedLock function comments */
danielk1977e339d652008-06-28 11:23:00 +00002126static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
aswift5b1a2562008-08-22 00:22:35 +00002127 int rc = SQLITE_OK;
2128 int reserved = 0;
drhbfe66312006-10-03 17:40:40 +00002129 unixFile *pFile = (unixFile*)id;
drh339eb0b2008-03-07 15:34:11 +00002130
aswift5b1a2562008-08-22 00:22:35 +00002131 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
2132
2133 assert( pFile );
2134
2135 /* Check if a thread in this process holds such a lock */
2136 if( pFile->locktype>SHARED_LOCK ){
2137 reserved = 1;
2138 }
2139
2140 /* Otherwise see if some other process holds it. */
2141 if( !reserved ){
2142 char *zLockFile = (char *)pFile->lockingContext;
drhbfe66312006-10-03 17:40:40 +00002143 struct stat statBuf;
aswift5b1a2562008-08-22 00:22:35 +00002144
2145 if( lstat(zLockFile, &statBuf)==0 ){
2146 /* file exists, someone else has the lock */
2147 reserved = 1;
2148 }else{
drh3b62b2f2007-06-08 18:27:03 +00002149 /* file does not exist, we could have it if we want it */
aswift5b1a2562008-08-22 00:22:35 +00002150 int tErrno = errno;
2151 if( ENOENT != tErrno ){
2152 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
2153 pFile->lastErrno = tErrno;
2154 }
drh339eb0b2008-03-07 15:34:11 +00002155 }
drhbfe66312006-10-03 17:40:40 +00002156 }
aswift5b1a2562008-08-22 00:22:35 +00002157 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
danielk1977861f7452008-06-05 11:39:11 +00002158
aswift5b1a2562008-08-22 00:22:35 +00002159 *pResOut = reserved;
2160 return rc;
drhbfe66312006-10-03 17:40:40 +00002161}
2162
danielk1977e339d652008-06-28 11:23:00 +00002163static int dotlockLock(sqlite3_file *id, int locktype) {
drhbfe66312006-10-03 17:40:40 +00002164 unixFile *pFile = (unixFile*)id;
drh339eb0b2008-03-07 15:34:11 +00002165 int fd;
danielk1977e339d652008-06-28 11:23:00 +00002166 char *zLockFile = (char *)pFile->lockingContext;
aswift5b1a2562008-08-22 00:22:35 +00002167 int rc=SQLITE_OK;
drh339eb0b2008-03-07 15:34:11 +00002168
drh3b62b2f2007-06-08 18:27:03 +00002169 /* if we already have a lock, it is exclusive.
2170 ** Just adjust level and punt on outta here. */
drhbfe66312006-10-03 17:40:40 +00002171 if (pFile->locktype > NO_LOCK) {
2172 pFile->locktype = locktype;
2173
2174 /* Always update the timestamp on the old file */
danielk1977e339d652008-06-28 11:23:00 +00002175 utimes(zLockFile, NULL);
aswift5b1a2562008-08-22 00:22:35 +00002176 rc = SQLITE_OK;
2177 goto dotlock_end_lock;
drhbfe66312006-10-03 17:40:40 +00002178 }
2179
drh3b62b2f2007-06-08 18:27:03 +00002180 /* check to see if lock file already exists */
drhbfe66312006-10-03 17:40:40 +00002181 struct stat statBuf;
danielk1977e339d652008-06-28 11:23:00 +00002182 if (lstat(zLockFile,&statBuf) == 0){
aswift5b1a2562008-08-22 00:22:35 +00002183 rc = SQLITE_BUSY; /* it does, busy */
2184 goto dotlock_end_lock;
drhbfe66312006-10-03 17:40:40 +00002185 }
2186
drh3b62b2f2007-06-08 18:27:03 +00002187 /* grab an exclusive lock */
danielk1977e339d652008-06-28 11:23:00 +00002188 fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
drh339eb0b2008-03-07 15:34:11 +00002189 if( fd<0 ){
drh3b62b2f2007-06-08 18:27:03 +00002190 /* failed to open/create the file, someone else may have stolen the lock */
aswift5b1a2562008-08-22 00:22:35 +00002191 int tErrno = errno;
2192 if( EEXIST == tErrno ){
2193 rc = SQLITE_BUSY;
2194 } else {
2195 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
2196 if( IS_LOCK_ERROR(rc) ){
2197 pFile->lastErrno = tErrno;
2198 }
2199 }
2200 goto dotlock_end_lock;
2201 }
drhbfe66312006-10-03 17:40:40 +00002202 close(fd);
2203
drh3b62b2f2007-06-08 18:27:03 +00002204 /* got it, set the type and return ok */
drhbfe66312006-10-03 17:40:40 +00002205 pFile->locktype = locktype;
aswift5b1a2562008-08-22 00:22:35 +00002206
2207 dotlock_end_lock:
2208 return rc;
drhbfe66312006-10-03 17:40:40 +00002209}
2210
danielk1977e339d652008-06-28 11:23:00 +00002211static int dotlockUnlock(sqlite3_file *id, int locktype) {
drhbfe66312006-10-03 17:40:40 +00002212 unixFile *pFile = (unixFile*)id;
danielk1977e339d652008-06-28 11:23:00 +00002213 char *zLockFile = (char *)pFile->lockingContext;
drh339eb0b2008-03-07 15:34:11 +00002214
aswift5b1a2562008-08-22 00:22:35 +00002215 assert( pFile );
2216 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
2217 pFile->locktype, getpid());
drhbfe66312006-10-03 17:40:40 +00002218 assert( locktype<=SHARED_LOCK );
2219
drh3b62b2f2007-06-08 18:27:03 +00002220 /* no-op if possible */
drhbfe66312006-10-03 17:40:40 +00002221 if( pFile->locktype==locktype ){
2222 return SQLITE_OK;
2223 }
2224
drh3b62b2f2007-06-08 18:27:03 +00002225 /* shared can just be set because we always have an exclusive */
drhbfe66312006-10-03 17:40:40 +00002226 if (locktype==SHARED_LOCK) {
2227 pFile->locktype = locktype;
2228 return SQLITE_OK;
2229 }
2230
drh3b62b2f2007-06-08 18:27:03 +00002231 /* no, really, unlock. */
aswift5b1a2562008-08-22 00:22:35 +00002232 if (unlink(zLockFile) ) {
2233 int rc, tErrno = errno;
2234 if( ENOENT != tErrno ){
2235 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
2236 }
2237 if( IS_LOCK_ERROR(rc) ){
2238 pFile->lastErrno = tErrno;
2239 }
2240 return rc;
2241 }
drhbfe66312006-10-03 17:40:40 +00002242 pFile->locktype = NO_LOCK;
2243 return SQLITE_OK;
2244}
2245
2246/*
2247 ** Close a file.
2248 */
danielk1977e339d652008-06-28 11:23:00 +00002249static int dotlockClose(sqlite3_file *id) {
2250 if( id ){
2251 unixFile *pFile = (unixFile*)id;
2252 dotlockUnlock(id, NO_LOCK);
2253 sqlite3_free(pFile->lockingContext);
2254 }
2255 return closeUnixFile(id);
drhbfe66312006-10-03 17:40:40 +00002256}
2257
2258
drhda0e7682008-07-30 15:27:54 +00002259#endif /* SQLITE_ENABLE_LOCKING_STYLE */
drhbfe66312006-10-03 17:40:40 +00002260
2261/*
drh339eb0b2008-03-07 15:34:11 +00002262** The nolockLockingContext is void
2263*/
drhbfe66312006-10-03 17:40:40 +00002264typedef void nolockLockingContext;
2265
danielk1977e339d652008-06-28 11:23:00 +00002266static int nolockCheckReservedLock(sqlite3_file *id, int *pResOut) {
danielk1977861f7452008-06-05 11:39:11 +00002267 *pResOut = 0;
2268 return SQLITE_OK;
drhbfe66312006-10-03 17:40:40 +00002269}
2270
danielk1977e339d652008-06-28 11:23:00 +00002271static int nolockLock(sqlite3_file *id, int locktype) {
drhbfe66312006-10-03 17:40:40 +00002272 return SQLITE_OK;
2273}
2274
danielk1977e339d652008-06-28 11:23:00 +00002275static int nolockUnlock(sqlite3_file *id, int locktype) {
drhbfe66312006-10-03 17:40:40 +00002276 return SQLITE_OK;
2277}
2278
2279/*
drh339eb0b2008-03-07 15:34:11 +00002280** Close a file.
2281*/
danielk1977e339d652008-06-28 11:23:00 +00002282static int nolockClose(sqlite3_file *id) {
2283 return closeUnixFile(id);
drhbfe66312006-10-03 17:40:40 +00002284}
2285
danielk1977ad94b582007-08-20 06:44:22 +00002286
danielk1977e3026632004-06-22 11:29:02 +00002287/*
drh9e33c2c2007-08-31 18:34:59 +00002288** Information and control of an open file handle.
drh18839212005-11-26 03:43:23 +00002289*/
drhcc6bb3e2007-08-31 16:11:35 +00002290static int unixFileControl(sqlite3_file *id, int op, void *pArg){
drh9e33c2c2007-08-31 18:34:59 +00002291 switch( op ){
2292 case SQLITE_FCNTL_LOCKSTATE: {
2293 *(int*)pArg = ((unixFile*)id)->locktype;
2294 return SQLITE_OK;
2295 }
2296 }
drhcc6bb3e2007-08-31 16:11:35 +00002297 return SQLITE_ERROR;
drh9cbe6352005-11-29 03:13:21 +00002298}
2299
2300/*
danielk1977a3d4c882007-03-23 10:08:38 +00002301** Return the sector size in bytes of the underlying block device for
2302** the specified file. This is almost always 512 bytes, but may be
2303** larger for some devices.
2304**
2305** SQLite code assumes this function cannot fail. It also assumes that
2306** if two files are created in the same file-system directory (i.e.
drh85b623f2007-12-13 21:54:09 +00002307** a database and its journal file) that the sector size will be the
danielk1977a3d4c882007-03-23 10:08:38 +00002308** same for both.
2309*/
danielk197762079062007-08-15 17:08:46 +00002310static int unixSectorSize(sqlite3_file *id){
drh3ceeb752007-03-29 18:19:52 +00002311 return SQLITE_DEFAULT_SECTOR_SIZE;
danielk1977a3d4c882007-03-23 10:08:38 +00002312}
2313
danielk197790949c22007-08-17 16:50:38 +00002314/*
2315** Return the device characteristics for the file. This is always 0.
2316*/
danielk197762079062007-08-15 17:08:46 +00002317static int unixDeviceCharacteristics(sqlite3_file *id){
2318 return 0;
2319}
2320
danielk1977a3d4c882007-03-23 10:08:38 +00002321/*
danielk1977e339d652008-06-28 11:23:00 +00002322** Initialize the contents of the unixFile structure pointed to by pId.
2323**
danielk1977ad94b582007-08-20 06:44:22 +00002324** When locking extensions are enabled, the filepath and locking style
2325** are needed to determine the unixFile pMethod to use for locking operations.
2326** The locking-style specific lockingContext data structure is created
2327** and assigned here also.
2328*/
2329static int fillInUnixFile(
danielk1977e339d652008-06-28 11:23:00 +00002330 sqlite3_vfs *pVfs, /* Pointer to vfs object */
drhbfe66312006-10-03 17:40:40 +00002331 int h, /* Open file descriptor of file being opened */
danielk1977ad94b582007-08-20 06:44:22 +00002332 int dirfd, /* Directory file descriptor */
drh218c5082008-03-07 00:27:10 +00002333 sqlite3_file *pId, /* Write to the unixFile structure here */
drhda0e7682008-07-30 15:27:54 +00002334 const char *zFilename, /* Name of the file being opened */
2335 int noLock /* Omit locking if true */
drhbfe66312006-10-03 17:40:40 +00002336){
drhda0e7682008-07-30 15:27:54 +00002337 int eLockingStyle;
2338 unixFile *pNew = (unixFile *)pId;
2339 int rc = SQLITE_OK;
2340
danielk1977e339d652008-06-28 11:23:00 +00002341 /* Macro to define the static contents of an sqlite3_io_methods
2342 ** structure for a unix backend file. Different locking methods
2343 ** require different functions for the xClose, xLock, xUnlock and
2344 ** xCheckReservedLock methods.
2345 */
2346 #define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) { \
2347 1, /* iVersion */ \
2348 xClose, /* xClose */ \
2349 unixRead, /* xRead */ \
2350 unixWrite, /* xWrite */ \
2351 unixTruncate, /* xTruncate */ \
2352 unixSync, /* xSync */ \
2353 unixFileSize, /* xFileSize */ \
2354 xLock, /* xLock */ \
2355 xUnlock, /* xUnlock */ \
2356 xCheckReservedLock, /* xCheckReservedLock */ \
2357 unixFileControl, /* xFileControl */ \
2358 unixSectorSize, /* xSectorSize */ \
2359 unixDeviceCharacteristics /* xDeviceCapabilities */ \
2360 }
2361 static sqlite3_io_methods aIoMethod[] = {
2362 IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock)
danielk1977e339d652008-06-28 11:23:00 +00002363 ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
drh40bbb0a2008-09-23 10:23:26 +00002364#if SQLITE_ENABLE_LOCKING_STYLE
drhda0e7682008-07-30 15:27:54 +00002365 ,IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock)
2366 ,IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock)
danielk1977e339d652008-06-28 11:23:00 +00002367 ,IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock)
drh218c5082008-03-07 00:27:10 +00002368#endif
danielk1977e339d652008-06-28 11:23:00 +00002369 };
drhda0e7682008-07-30 15:27:54 +00002370 /* The order of the IOMETHODS macros above is important. It must be the
2371 ** same order as the LOCKING_STYLE numbers
2372 */
2373 assert(LOCKING_STYLE_POSIX==1);
2374 assert(LOCKING_STYLE_NONE==2);
2375 assert(LOCKING_STYLE_DOTFILE==3);
2376 assert(LOCKING_STYLE_FLOCK==4);
2377 assert(LOCKING_STYLE_AFP==5);
drh218c5082008-03-07 00:27:10 +00002378
danielk197717b90b52008-06-06 11:11:25 +00002379 assert( pNew->pLock==NULL );
2380 assert( pNew->pOpen==NULL );
drh218c5082008-03-07 00:27:10 +00002381
2382 OSTRACE3("OPEN %-3d %s\n", h, zFilename);
danielk1977ad94b582007-08-20 06:44:22 +00002383 pNew->h = h;
drh218c5082008-03-07 00:27:10 +00002384 pNew->dirfd = dirfd;
danielk1977ad94b582007-08-20 06:44:22 +00002385 SET_THREADID(pNew);
drh339eb0b2008-03-07 15:34:11 +00002386
drhda0e7682008-07-30 15:27:54 +00002387 if( noLock ){
2388 eLockingStyle = LOCKING_STYLE_NONE;
2389 }else{
2390 eLockingStyle = detectLockingStyle(pVfs, zFilename, h);
2391 }
danielk1977e339d652008-06-28 11:23:00 +00002392
2393 switch( eLockingStyle ){
2394
2395 case LOCKING_STYLE_POSIX: {
2396 enterMutex();
2397 rc = findLockInfo(h, &pNew->pLock, &pNew->pOpen);
2398 leaveMutex();
drh218c5082008-03-07 00:27:10 +00002399 break;
drhbfe66312006-10-03 17:40:40 +00002400 }
danielk1977e339d652008-06-28 11:23:00 +00002401
drh40bbb0a2008-09-23 10:23:26 +00002402#if SQLITE_ENABLE_LOCKING_STYLE
danielk1977e339d652008-06-28 11:23:00 +00002403 case LOCKING_STYLE_AFP: {
2404 /* AFP locking uses the file path so it needs to be included in
2405 ** the afpLockingContext.
2406 */
2407 afpLockingContext *pCtx;
2408 pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );
2409 if( pCtx==0 ){
2410 rc = SQLITE_NOMEM;
2411 }else{
2412 /* NB: zFilename exists and remains valid until the file is closed
2413 ** according to requirement F11141. So we do not need to make a
2414 ** copy of the filename. */
2415 pCtx->filePath = zFilename;
2416 srandomdev();
2417 }
drh218c5082008-03-07 00:27:10 +00002418 break;
danielk1977e339d652008-06-28 11:23:00 +00002419 }
2420
2421 case LOCKING_STYLE_DOTFILE: {
2422 /* Dotfile locking uses the file path so it needs to be included in
2423 ** the dotlockLockingContext
2424 */
2425 char *zLockFile;
drh218c5082008-03-07 00:27:10 +00002426 int nFilename;
danielk1977e339d652008-06-28 11:23:00 +00002427 nFilename = strlen(zFilename) + 6;
2428 zLockFile = (char *)sqlite3_malloc(nFilename);
2429 if( zLockFile==0 ){
2430 rc = SQLITE_NOMEM;
2431 }else{
2432 sqlite3_snprintf(nFilename, zLockFile, "%s.lock", zFilename);
drh339eb0b2008-03-07 15:34:11 +00002433 }
danielk1977e339d652008-06-28 11:23:00 +00002434 pNew->lockingContext = zLockFile;
drh218c5082008-03-07 00:27:10 +00002435 break;
2436 }
danielk1977e339d652008-06-28 11:23:00 +00002437
2438 case LOCKING_STYLE_FLOCK:
2439 case LOCKING_STYLE_NONE:
drh218c5082008-03-07 00:27:10 +00002440 break;
drhe78669b2007-06-29 12:04:26 +00002441#endif
danielk1977e339d652008-06-28 11:23:00 +00002442 }
aswift5b1a2562008-08-22 00:22:35 +00002443
2444 pNew->lastErrno = 0;
danielk1977e339d652008-06-28 11:23:00 +00002445 if( rc!=SQLITE_OK ){
danielk19777c055b92007-10-30 17:28:51 +00002446 if( dirfd>=0 ) close(dirfd);
drhbfe66312006-10-03 17:40:40 +00002447 close(h);
danielk1977e339d652008-06-28 11:23:00 +00002448 }else{
danielk19776cb427f2008-06-30 10:16:04 +00002449 pNew->pMethod = &aIoMethod[eLockingStyle-1];
danielk1977e339d652008-06-28 11:23:00 +00002450 OpenCounter(+1);
drhbfe66312006-10-03 17:40:40 +00002451 }
danielk1977e339d652008-06-28 11:23:00 +00002452 return rc;
drh054889e2005-11-30 03:20:31 +00002453}
drh9c06c952005-11-26 00:25:00 +00002454
danielk1977ad94b582007-08-20 06:44:22 +00002455/*
2456** Open a file descriptor to the directory containing file zFilename.
2457** If successful, *pFd is set to the opened file descriptor and
2458** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
2459** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
2460** value.
2461**
2462** If SQLITE_OK is returned, the caller is responsible for closing
2463** the file descriptor *pFd using close().
2464*/
danielk1977fee2d252007-08-18 10:59:19 +00002465static int openDirectory(const char *zFilename, int *pFd){
danielk1977fee2d252007-08-18 10:59:19 +00002466 int ii;
drh777b17a2007-09-20 10:02:54 +00002467 int fd = -1;
drhf3a65f72007-08-22 20:18:21 +00002468 char zDirname[MAX_PATHNAME+1];
danielk1977fee2d252007-08-18 10:59:19 +00002469
drh153c62c2007-08-24 03:51:33 +00002470 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
danielk1977fee2d252007-08-18 10:59:19 +00002471 for(ii=strlen(zDirname); ii>=0 && zDirname[ii]!='/'; ii--);
2472 if( ii>0 ){
2473 zDirname[ii] = '\0';
2474 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
drh777b17a2007-09-20 10:02:54 +00002475 if( fd>=0 ){
danielk1977fee2d252007-08-18 10:59:19 +00002476#ifdef FD_CLOEXEC
2477 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
2478#endif
2479 OSTRACE3("OPENDIR %-3d %s\n", fd, zDirname);
2480 }
2481 }
danielk1977fee2d252007-08-18 10:59:19 +00002482 *pFd = fd;
drh777b17a2007-09-20 10:02:54 +00002483 return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN);
danielk1977fee2d252007-08-18 10:59:19 +00002484}
2485
danielk1977b4b47412007-08-17 15:53:36 +00002486/*
danielk197717b90b52008-06-06 11:11:25 +00002487** Create a temporary file name in zBuf. zBuf must be allocated
2488** by the calling process and must be big enough to hold at least
2489** pVfs->mxPathname bytes.
2490*/
2491static int getTempname(int nBuf, char *zBuf){
2492 static const char *azDirs[] = {
2493 0,
2494 "/var/tmp",
2495 "/usr/tmp",
2496 "/tmp",
2497 ".",
2498 };
2499 static const unsigned char zChars[] =
2500 "abcdefghijklmnopqrstuvwxyz"
2501 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
2502 "0123456789";
2503 int i, j;
2504 struct stat buf;
2505 const char *zDir = ".";
2506
2507 /* It's odd to simulate an io-error here, but really this is just
2508 ** using the io-error infrastructure to test that SQLite handles this
2509 ** function failing.
2510 */
2511 SimulateIOError( return SQLITE_IOERR );
2512
2513 azDirs[0] = sqlite3_temp_directory;
2514 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
2515 if( azDirs[i]==0 ) continue;
2516 if( stat(azDirs[i], &buf) ) continue;
2517 if( !S_ISDIR(buf.st_mode) ) continue;
2518 if( access(azDirs[i], 07) ) continue;
2519 zDir = azDirs[i];
2520 break;
2521 }
2522
2523 /* Check that the output buffer is large enough for the temporary file
2524 ** name. If it is not, return SQLITE_ERROR.
2525 */
2526 if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= nBuf ){
2527 return SQLITE_ERROR;
2528 }
2529
2530 do{
2531 sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
2532 j = strlen(zBuf);
2533 sqlite3_randomness(15, &zBuf[j]);
2534 for(i=0; i<15; i++, j++){
2535 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
2536 }
2537 zBuf[j] = 0;
2538 }while( access(zBuf,0)==0 );
2539 return SQLITE_OK;
2540}
2541
2542
2543/*
danielk1977ad94b582007-08-20 06:44:22 +00002544** Open the file zPath.
2545**
danielk1977b4b47412007-08-17 15:53:36 +00002546** Previously, the SQLite OS layer used three functions in place of this
2547** one:
2548**
2549** sqlite3OsOpenReadWrite();
2550** sqlite3OsOpenReadOnly();
2551** sqlite3OsOpenExclusive();
2552**
2553** These calls correspond to the following combinations of flags:
2554**
2555** ReadWrite() -> (READWRITE | CREATE)
2556** ReadOnly() -> (READONLY)
2557** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
2558**
2559** The old OpenExclusive() accepted a boolean argument - "delFlag". If
2560** true, the file was configured to be automatically deleted when the
2561** file handle closed. To achieve the same effect using this new
2562** interface, add the DELETEONCLOSE flag to those specified above for
2563** OpenExclusive().
2564*/
2565static int unixOpen(
drh153c62c2007-08-24 03:51:33 +00002566 sqlite3_vfs *pVfs,
danielk1977b4b47412007-08-17 15:53:36 +00002567 const char *zPath,
2568 sqlite3_file *pFile,
2569 int flags,
2570 int *pOutFlags
2571){
danielk1977fee2d252007-08-18 10:59:19 +00002572 int fd = 0; /* File descriptor returned by open() */
2573 int dirfd = -1; /* Directory file descriptor */
2574 int oflags = 0; /* Flags to pass to open() */
2575 int eType = flags&0xFFFFFF00; /* Type of file to open */
drhda0e7682008-07-30 15:27:54 +00002576 int noLock; /* True to omit locking primitives */
danielk1977b4b47412007-08-17 15:53:36 +00002577
2578 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
2579 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
2580 int isCreate = (flags & SQLITE_OPEN_CREATE);
2581 int isReadonly = (flags & SQLITE_OPEN_READONLY);
2582 int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
2583
danielk1977fee2d252007-08-18 10:59:19 +00002584 /* If creating a master or main-file journal, this function will open
2585 ** a file-descriptor on the directory too. The first time unixSync()
2586 ** is called the directory file descriptor will be fsync()ed and close()d.
2587 */
2588 int isOpenDirectory = (isCreate &&
2589 (eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL)
2590 );
2591
danielk197717b90b52008-06-06 11:11:25 +00002592 /* If argument zPath is a NULL pointer, this function is required to open
2593 ** a temporary file. Use this buffer to store the file name in.
2594 */
2595 char zTmpname[MAX_PATHNAME+1];
2596 const char *zName = zPath;
2597
danielk1977fee2d252007-08-18 10:59:19 +00002598 /* Check the following statements are true:
2599 **
2600 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and
2601 ** (b) if CREATE is set, then READWRITE must also be set, and
2602 ** (c) if EXCLUSIVE is set, then CREATE must also be set.
drh33f4e022007-09-03 15:19:34 +00002603 ** (d) if DELETEONCLOSE is set, then CREATE must also be set.
danielk1977fee2d252007-08-18 10:59:19 +00002604 */
danielk1977b4b47412007-08-17 15:53:36 +00002605 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
danielk1977b4b47412007-08-17 15:53:36 +00002606 assert(isCreate==0 || isReadWrite);
danielk1977b4b47412007-08-17 15:53:36 +00002607 assert(isExclusive==0 || isCreate);
drh33f4e022007-09-03 15:19:34 +00002608 assert(isDelete==0 || isCreate);
2609
drh33f4e022007-09-03 15:19:34 +00002610 /* The main DB, main journal, and master journal are never automatically
2611 ** deleted
2612 */
2613 assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
2614 assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
2615 assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );
danielk1977b4b47412007-08-17 15:53:36 +00002616
danielk1977fee2d252007-08-18 10:59:19 +00002617 /* Assert that the upper layer has set one of the "file-type" flags. */
2618 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB
2619 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
2620 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
drh33f4e022007-09-03 15:19:34 +00002621 || eType==SQLITE_OPEN_TRANSIENT_DB
danielk1977fee2d252007-08-18 10:59:19 +00002622 );
2623
danielk1977e339d652008-06-28 11:23:00 +00002624 memset(pFile, 0, sizeof(unixFile));
2625
danielk197717b90b52008-06-06 11:11:25 +00002626 if( !zName ){
2627 int rc;
2628 assert(isDelete && !isOpenDirectory);
2629 rc = getTempname(MAX_PATHNAME+1, zTmpname);
2630 if( rc!=SQLITE_OK ){
2631 return rc;
2632 }
2633 zName = zTmpname;
2634 }
2635
danielk1977b4b47412007-08-17 15:53:36 +00002636 if( isReadonly ) oflags |= O_RDONLY;
2637 if( isReadWrite ) oflags |= O_RDWR;
2638 if( isCreate ) oflags |= O_CREAT;
2639 if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW);
2640 oflags |= (O_LARGEFILE|O_BINARY);
2641
danielk197717b90b52008-06-06 11:11:25 +00002642 fd = open(zName, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
danielk19772f2d8c72007-08-30 16:13:33 +00002643 if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
danielk1977b4b47412007-08-17 15:53:36 +00002644 /* Failed to open the file for read/write access. Try read-only. */
2645 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
2646 flags |= SQLITE_OPEN_READONLY;
drh153c62c2007-08-24 03:51:33 +00002647 return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
danielk1977b4b47412007-08-17 15:53:36 +00002648 }
2649 if( fd<0 ){
2650 return SQLITE_CANTOPEN;
2651 }
2652 if( isDelete ){
danielk197717b90b52008-06-06 11:11:25 +00002653 unlink(zName);
danielk1977b4b47412007-08-17 15:53:36 +00002654 }
2655 if( pOutFlags ){
2656 *pOutFlags = flags;
2657 }
2658
2659 assert(fd!=0);
danielk1977fee2d252007-08-18 10:59:19 +00002660 if( isOpenDirectory ){
2661 int rc = openDirectory(zPath, &dirfd);
2662 if( rc!=SQLITE_OK ){
2663 close(fd);
2664 return rc;
2665 }
2666 }
danielk1977e339d652008-06-28 11:23:00 +00002667
2668#ifdef FD_CLOEXEC
2669 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
2670#endif
2671
drhda0e7682008-07-30 15:27:54 +00002672 noLock = eType!=SQLITE_OPEN_MAIN_DB;
2673 return fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock);
danielk1977b4b47412007-08-17 15:53:36 +00002674}
2675
2676/*
danielk1977fee2d252007-08-18 10:59:19 +00002677** Delete the file at zPath. If the dirSync argument is true, fsync()
2678** the directory after deleting the file.
danielk1977b4b47412007-08-17 15:53:36 +00002679*/
drh153c62c2007-08-24 03:51:33 +00002680static int unixDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){
danielk1977fee2d252007-08-18 10:59:19 +00002681 int rc = SQLITE_OK;
danielk1977b4b47412007-08-17 15:53:36 +00002682 SimulateIOError(return SQLITE_IOERR_DELETE);
2683 unlink(zPath);
danielk1977fee2d252007-08-18 10:59:19 +00002684 if( dirSync ){
2685 int fd;
2686 rc = openDirectory(zPath, &fd);
2687 if( rc==SQLITE_OK ){
2688 if( fsync(fd) ){
2689 rc = SQLITE_IOERR_DIR_FSYNC;
2690 }
2691 close(fd);
2692 }
2693 }
2694 return rc;
danielk1977b4b47412007-08-17 15:53:36 +00002695}
2696
danielk197790949c22007-08-17 16:50:38 +00002697/*
2698** Test the existance of or access permissions of file zPath. The
2699** test performed depends on the value of flags:
2700**
2701** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
2702** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
2703** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
2704**
2705** Otherwise return 0.
2706*/
danielk1977861f7452008-06-05 11:39:11 +00002707static int unixAccess(
2708 sqlite3_vfs *pVfs,
2709 const char *zPath,
2710 int flags,
2711 int *pResOut
2712){
rse25c0d1a2007-09-20 08:38:14 +00002713 int amode = 0;
danielk1977861f7452008-06-05 11:39:11 +00002714 SimulateIOError( return SQLITE_IOERR_ACCESS; );
danielk1977b4b47412007-08-17 15:53:36 +00002715 switch( flags ){
2716 case SQLITE_ACCESS_EXISTS:
2717 amode = F_OK;
2718 break;
2719 case SQLITE_ACCESS_READWRITE:
2720 amode = W_OK|R_OK;
2721 break;
drh50d3f902007-08-27 21:10:36 +00002722 case SQLITE_ACCESS_READ:
danielk1977b4b47412007-08-17 15:53:36 +00002723 amode = R_OK;
2724 break;
2725
2726 default:
2727 assert(!"Invalid flags argument");
2728 }
danielk1977861f7452008-06-05 11:39:11 +00002729 *pResOut = (access(zPath, amode)==0);
2730 return SQLITE_OK;
danielk1977b4b47412007-08-17 15:53:36 +00002731}
2732
danielk1977b4b47412007-08-17 15:53:36 +00002733
2734/*
2735** Turn a relative pathname into a full pathname. The relative path
2736** is stored as a nul-terminated string in the buffer pointed to by
2737** zPath.
2738**
2739** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
2740** (in this case, MAX_PATHNAME bytes). The full-path is written to
2741** this buffer before returning.
2742*/
danielk1977adfb9b02007-09-17 07:02:56 +00002743static int unixFullPathname(
2744 sqlite3_vfs *pVfs, /* Pointer to vfs object */
2745 const char *zPath, /* Possibly relative input path */
2746 int nOut, /* Size of output buffer in bytes */
2747 char *zOut /* Output buffer */
2748){
danielk1977843e65f2007-09-01 16:16:15 +00002749
2750 /* It's odd to simulate an io-error here, but really this is just
2751 ** using the io-error infrastructure to test that SQLite handles this
2752 ** function failing. This function could fail if, for example, the
2753 ** current working directly has been unlinked.
2754 */
2755 SimulateIOError( return SQLITE_ERROR );
2756
drh153c62c2007-08-24 03:51:33 +00002757 assert( pVfs->mxPathname==MAX_PATHNAME );
drh3c7f2dc2007-12-06 13:26:20 +00002758 zOut[nOut-1] = '\0';
danielk1977b4b47412007-08-17 15:53:36 +00002759 if( zPath[0]=='/' ){
drh3c7f2dc2007-12-06 13:26:20 +00002760 sqlite3_snprintf(nOut, zOut, "%s", zPath);
danielk1977b4b47412007-08-17 15:53:36 +00002761 }else{
2762 int nCwd;
drh3c7f2dc2007-12-06 13:26:20 +00002763 if( getcwd(zOut, nOut-1)==0 ){
drh70c01452007-09-03 17:42:17 +00002764 return SQLITE_CANTOPEN;
danielk1977b4b47412007-08-17 15:53:36 +00002765 }
2766 nCwd = strlen(zOut);
drh3c7f2dc2007-12-06 13:26:20 +00002767 sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
danielk1977b4b47412007-08-17 15:53:36 +00002768 }
2769 return SQLITE_OK;
2770
2771#if 0
2772 /*
2773 ** Remove "/./" path elements and convert "/A/./" path elements
2774 ** to just "/".
2775 */
2776 if( zFull ){
2777 int i, j;
2778 for(i=j=0; zFull[i]; i++){
2779 if( zFull[i]=='/' ){
2780 if( zFull[i+1]=='/' ) continue;
2781 if( zFull[i+1]=='.' && zFull[i+2]=='/' ){
2782 i += 1;
2783 continue;
2784 }
2785 if( zFull[i+1]=='.' && zFull[i+2]=='.' && zFull[i+3]=='/' ){
2786 while( j>0 && zFull[j-1]!='/' ){ j--; }
2787 i += 3;
2788 continue;
2789 }
2790 }
2791 zFull[j++] = zFull[i];
2792 }
2793 zFull[j] = 0;
2794 }
2795#endif
2796}
2797
drh0ccebe72005-06-07 22:22:50 +00002798
drh761df872006-12-21 01:29:22 +00002799#ifndef SQLITE_OMIT_LOAD_EXTENSION
2800/*
2801** Interfaces for opening a shared library, finding entry points
2802** within the shared library, and closing the shared library.
2803*/
2804#include <dlfcn.h>
drh153c62c2007-08-24 03:51:33 +00002805static void *unixDlOpen(sqlite3_vfs *pVfs, const char *zFilename){
drh761df872006-12-21 01:29:22 +00002806 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
2807}
danielk197795c8a542007-09-01 06:51:27 +00002808
2809/*
2810** SQLite calls this function immediately after a call to unixDlSym() or
2811** unixDlOpen() fails (returns a null pointer). If a more detailed error
2812** message is available, it is written to zBufOut. If no error message
2813** is available, zBufOut is left unmodified and SQLite uses a default
2814** error message.
2815*/
drh153c62c2007-08-24 03:51:33 +00002816static void unixDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){
danielk1977b4b47412007-08-17 15:53:36 +00002817 char *zErr;
2818 enterMutex();
2819 zErr = dlerror();
2820 if( zErr ){
drh153c62c2007-08-24 03:51:33 +00002821 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
danielk1977b4b47412007-08-17 15:53:36 +00002822 }
2823 leaveMutex();
2824}
drh46c99e02007-08-27 23:26:59 +00002825static void *unixDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol){
drh761df872006-12-21 01:29:22 +00002826 return dlsym(pHandle, zSymbol);
2827}
drh46c99e02007-08-27 23:26:59 +00002828static void unixDlClose(sqlite3_vfs *pVfs, void *pHandle){
danielk1977b4b47412007-08-17 15:53:36 +00002829 dlclose(pHandle);
drh761df872006-12-21 01:29:22 +00002830}
danielk1977b4b47412007-08-17 15:53:36 +00002831#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
2832 #define unixDlOpen 0
2833 #define unixDlError 0
2834 #define unixDlSym 0
2835 #define unixDlClose 0
2836#endif
2837
2838/*
danielk197790949c22007-08-17 16:50:38 +00002839** Write nBuf bytes of random data to the supplied buffer zBuf.
drhbbd42a62004-05-22 17:41:58 +00002840*/
drh153c62c2007-08-24 03:51:33 +00002841static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
danielk197790949c22007-08-17 16:50:38 +00002842
2843 assert(nBuf>=(sizeof(time_t)+sizeof(int)));
2844
drhbbd42a62004-05-22 17:41:58 +00002845 /* We have to initialize zBuf to prevent valgrind from reporting
2846 ** errors. The reports issued by valgrind are incorrect - we would
2847 ** prefer that the randomness be increased by making use of the
2848 ** uninitialized space in zBuf - but valgrind errors tend to worry
2849 ** some users. Rather than argue, it seems easier just to initialize
2850 ** the whole array and silence valgrind, even if that means less randomness
2851 ** in the random seed.
2852 **
2853 ** When testing, initializing zBuf[] to zero is all we do. That means
drhf1a221e2006-01-15 17:27:17 +00002854 ** that we always use the same random number sequence. This makes the
drhbbd42a62004-05-22 17:41:58 +00002855 ** tests repeatable.
2856 */
danielk1977b4b47412007-08-17 15:53:36 +00002857 memset(zBuf, 0, nBuf);
drhbbd42a62004-05-22 17:41:58 +00002858#if !defined(SQLITE_TEST)
2859 {
drh842b8642005-01-21 17:53:17 +00002860 int pid, fd;
2861 fd = open("/dev/urandom", O_RDONLY);
2862 if( fd<0 ){
drh07397232006-01-06 14:46:46 +00002863 time_t t;
2864 time(&t);
danielk197790949c22007-08-17 16:50:38 +00002865 memcpy(zBuf, &t, sizeof(t));
2866 pid = getpid();
2867 memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
drh842b8642005-01-21 17:53:17 +00002868 }else{
danielk1977b4b47412007-08-17 15:53:36 +00002869 read(fd, zBuf, nBuf);
drh842b8642005-01-21 17:53:17 +00002870 close(fd);
2871 }
drhbbd42a62004-05-22 17:41:58 +00002872 }
2873#endif
2874 return SQLITE_OK;
2875}
2876
danielk1977b4b47412007-08-17 15:53:36 +00002877
drhbbd42a62004-05-22 17:41:58 +00002878/*
2879** Sleep for a little while. Return the amount of time slept.
danielk1977b4b47412007-08-17 15:53:36 +00002880** The argument is the number of microseconds we want to sleep.
drh4a50aac2007-08-23 02:47:53 +00002881** The return value is the number of microseconds of sleep actually
2882** requested from the underlying operating system, a number which
2883** might be greater than or equal to the argument, but not less
2884** than the argument.
drhbbd42a62004-05-22 17:41:58 +00002885*/
drh153c62c2007-08-24 03:51:33 +00002886static int unixSleep(sqlite3_vfs *pVfs, int microseconds){
drhbbd42a62004-05-22 17:41:58 +00002887#if defined(HAVE_USLEEP) && HAVE_USLEEP
danielk1977b4b47412007-08-17 15:53:36 +00002888 usleep(microseconds);
2889 return microseconds;
drhbbd42a62004-05-22 17:41:58 +00002890#else
danielk1977b4b47412007-08-17 15:53:36 +00002891 int seconds = (microseconds+999999)/1000000;
2892 sleep(seconds);
drh4a50aac2007-08-23 02:47:53 +00002893 return seconds*1000000;
drha3fad6f2006-01-18 14:06:37 +00002894#endif
drh88f474a2006-01-02 20:00:12 +00002895}
2896
2897/*
drhbbd42a62004-05-22 17:41:58 +00002898** The following variable, if set to a non-zero value, becomes the result
drh66560ad2006-01-06 14:32:19 +00002899** returned from sqlite3OsCurrentTime(). This is used for testing.
drhbbd42a62004-05-22 17:41:58 +00002900*/
2901#ifdef SQLITE_TEST
2902int sqlite3_current_time = 0;
2903#endif
2904
2905/*
2906** Find the current time (in Universal Coordinated Time). Write the
2907** current time and date as a Julian Day number into *prNow and
2908** return 0. Return 1 if the time and date cannot be found.
2909*/
drh153c62c2007-08-24 03:51:33 +00002910static int unixCurrentTime(sqlite3_vfs *pVfs, double *prNow){
drh19e2d372005-08-29 23:00:03 +00002911#ifdef NO_GETTOD
drhbbd42a62004-05-22 17:41:58 +00002912 time_t t;
2913 time(&t);
2914 *prNow = t/86400.0 + 2440587.5;
drh19e2d372005-08-29 23:00:03 +00002915#else
2916 struct timeval sNow;
drhbdcc2762007-04-02 18:06:57 +00002917 gettimeofday(&sNow, 0);
drh19e2d372005-08-29 23:00:03 +00002918 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
2919#endif
drhbbd42a62004-05-22 17:41:58 +00002920#ifdef SQLITE_TEST
2921 if( sqlite3_current_time ){
2922 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
2923 }
2924#endif
2925 return 0;
2926}
danielk1977b4b47412007-08-17 15:53:36 +00002927
danielk1977bcb97fe2008-06-06 15:49:29 +00002928static int unixGetLastError(sqlite3_vfs *pVfs, int nBuf, char *zBuf){
2929 return 0;
2930}
2931
drh153c62c2007-08-24 03:51:33 +00002932/*
danielk1977e339d652008-06-28 11:23:00 +00002933** Initialize the operating system interface.
drh153c62c2007-08-24 03:51:33 +00002934*/
danielk1977c0fa4c52008-06-25 17:19:00 +00002935int sqlite3_os_init(void){
danielk1977e339d652008-06-28 11:23:00 +00002936 /* Macro to define the static contents of an sqlite3_vfs structure for
2937 ** the unix backend. The two parameters are the values to use for
2938 ** the sqlite3_vfs.zName and sqlite3_vfs.pAppData fields, respectively.
2939 **
2940 */
2941 #define UNIXVFS(zVfsName, pVfsAppData) { \
2942 1, /* iVersion */ \
2943 sizeof(unixFile), /* szOsFile */ \
2944 MAX_PATHNAME, /* mxPathname */ \
2945 0, /* pNext */ \
2946 zVfsName, /* zName */ \
2947 (void *)pVfsAppData, /* pAppData */ \
2948 unixOpen, /* xOpen */ \
2949 unixDelete, /* xDelete */ \
2950 unixAccess, /* xAccess */ \
2951 unixFullPathname, /* xFullPathname */ \
2952 unixDlOpen, /* xDlOpen */ \
2953 unixDlError, /* xDlError */ \
2954 unixDlSym, /* xDlSym */ \
2955 unixDlClose, /* xDlClose */ \
2956 unixRandomness, /* xRandomness */ \
2957 unixSleep, /* xSleep */ \
2958 unixCurrentTime, /* xCurrentTime */ \
2959 unixGetLastError /* xGetLastError */ \
2960 }
2961
2962 static sqlite3_vfs unixVfs = UNIXVFS("unix", 0);
drh40bbb0a2008-09-23 10:23:26 +00002963#if SQLITE_ENABLE_LOCKING_STYLE
danielk1977e339d652008-06-28 11:23:00 +00002964 int i;
2965 static sqlite3_vfs aVfs[] = {
2966 UNIXVFS("unix-posix", LOCKING_STYLE_POSIX),
2967 UNIXVFS("unix-afp", LOCKING_STYLE_AFP),
2968 UNIXVFS("unix-flock", LOCKING_STYLE_FLOCK),
2969 UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE),
2970 UNIXVFS("unix-none", LOCKING_STYLE_NONE)
drh153c62c2007-08-24 03:51:33 +00002971 };
danielk1977e339d652008-06-28 11:23:00 +00002972 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
2973 sqlite3_vfs_register(&aVfs[i], 0);
2974 }
2975#endif
danielk1977c0fa4c52008-06-25 17:19:00 +00002976 sqlite3_vfs_register(&unixVfs, 1);
2977 return SQLITE_OK;
drh153c62c2007-08-24 03:51:33 +00002978}
danielk1977e339d652008-06-28 11:23:00 +00002979
2980/*
2981** Shutdown the operating system interface. This is a no-op for unix.
2982*/
danielk1977c0fa4c52008-06-25 17:19:00 +00002983int sqlite3_os_end(void){
2984 return SQLITE_OK;
2985}
drhdce8bdb2007-08-16 13:01:44 +00002986
danielk197729bafea2008-06-26 10:41:19 +00002987#endif /* SQLITE_OS_UNIX */