blob: cf855277009cb5f536570e852b3a1fa07e25e335 [file] [log] [blame]
drhbbd42a62004-05-22 17:41:58 +00001/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains code that is specific to Unix systems.
14*/
drhbbd42a62004-05-22 17:41:58 +000015#include "sqliteInt.h"
drheb206252004-10-01 02:00:31 +000016#include "os.h"
17#if OS_UNIX /* This file is used on unix only */
drh9cbe6352005-11-29 03:13:21 +000018/*
19** These #defines should enable >2GB file support on Posix if the
20** underlying operating system supports it. If the OS lacks
21** large file support, or if the OS is windows, these should be no-ops.
22**
23** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
24** on the compiler command line. This is necessary if you are compiling
25** on a recent machine (ex: RedHat 7.2) but you want your code to work
26** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
27** without this option, LFS is enable. But LFS does not exist in the kernel
28** in RedHat 6.0, so the code won't work. Hence, for maximum binary
29** portability you should omit LFS.
30**
31** Similar is true for MacOS. LFS is only supported on MacOS 9 and later.
32*/
33#ifndef SQLITE_DISABLE_LFS
34# define _LARGE_FILE 1
35# ifndef _FILE_OFFSET_BITS
36# define _FILE_OFFSET_BITS 64
37# endif
38# define _LARGEFILE_SOURCE 1
39#endif
drhbbd42a62004-05-22 17:41:58 +000040
drh9cbe6352005-11-29 03:13:21 +000041/*
42** standard include files.
43*/
44#include <sys/types.h>
45#include <sys/stat.h>
46#include <fcntl.h>
47#include <unistd.h>
drhbbd42a62004-05-22 17:41:58 +000048#include <time.h>
drh19e2d372005-08-29 23:00:03 +000049#include <sys/time.h>
drhbbd42a62004-05-22 17:41:58 +000050#include <errno.h>
drh9cbe6352005-11-29 03:13:21 +000051
52/*
53** Macros used to determine whether or not to use threads. The
54** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
55** Posix threads and SQLITE_W32_THREADS is defined if we are
56** synchronizing using Win32 threads.
57*/
58#if defined(THREADSAFE) && THREADSAFE
59# include <pthread.h>
60# define SQLITE_UNIX_THREADS 1
61#endif
62
63/*
64** Default permissions when creating a new file
65*/
66#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
67# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
68#endif
69
70
71
72/*
73** The OsFile structure is a operating-system dependent representation
74** of an open file handle. It is defined differently for each architecture.
75**
76** This is the definition for Unix.
77**
78** OsFile.locktype takes one of the values SHARED_LOCK, RESERVED_LOCK,
79** PENDING_LOCK or EXCLUSIVE_LOCK.
80*/
81struct OsFile {
82 struct openCnt *pOpen; /* Info about all open fd's on this inode */
83 struct lockInfo *pLock; /* Info about locks on this inode */
84 int h; /* The file descriptor */
85 unsigned char locktype; /* The type of lock held on this fd */
86 unsigned char isOpen; /* True if needs to be closed */
87 unsigned char fullSync; /* Use F_FULLSYNC if available */
88 int dirfd; /* File descriptor for the directory */
89#ifdef SQLITE_UNIX_THREADS
90 pthread_t tid; /* The thread authorized to use this OsFile */
91#endif
92};
93
drh0ccebe72005-06-07 22:22:50 +000094
95/*
96** Do not include any of the File I/O interface procedures if the
97** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
98** will be in-memory only)
99*/
100#ifndef SQLITE_OMIT_DISKIO
101
102
103/*
104** Define various macros that are missing from some systems.
105*/
drhbbd42a62004-05-22 17:41:58 +0000106#ifndef O_LARGEFILE
107# define O_LARGEFILE 0
108#endif
109#ifdef SQLITE_DISABLE_LFS
110# undef O_LARGEFILE
111# define O_LARGEFILE 0
112#endif
113#ifndef O_NOFOLLOW
114# define O_NOFOLLOW 0
115#endif
116#ifndef O_BINARY
117# define O_BINARY 0
118#endif
119
120/*
121** The DJGPP compiler environment looks mostly like Unix, but it
122** lacks the fcntl() system call. So redefine fcntl() to be something
123** that always succeeds. This means that locking does not occur under
danielk197726c5d792005-11-25 09:01:23 +0000124** DJGPP. But it's DOS - what did you expect?
drhbbd42a62004-05-22 17:41:58 +0000125*/
126#ifdef __DJGPP__
127# define fcntl(A,B,C) 0
128#endif
129
130/*
drhbbd42a62004-05-22 17:41:58 +0000131** Include code that is common to all os_*.c files
132*/
133#include "os_common.h"
134
drh2b4b5962005-06-15 17:47:55 +0000135/*
136** The threadid macro resolves to the thread-id or to 0. Used for
137** testing and debugging only.
138*/
139#ifdef SQLITE_UNIX_THREADS
140#define threadid pthread_self()
141#else
142#define threadid 0
143#endif
144
145/*
146** Set or check the OsFile.tid field. This field is set when an OsFile
147** is first opened. All subsequent uses of the OsFile verify that the
148** same thread is operating on the OsFile. Some operating systems do
149** not allow locks to be overridden by other threads and that restriction
150** means that sqlite3* database handles cannot be moved from one thread
151** to another. This logic makes sure a user does not try to do that
152** by mistake.
153*/
drh91636d52005-11-24 23:14:00 +0000154#if defined(SQLITE_UNIX_THREADS) && !defined(SQLITE_ALLOW_XTHREAD_CONNECTIONS)
drh9cbe6352005-11-29 03:13:21 +0000155# define SET_THREADID(X) (X)->tid = pthread_self()
156# define CHECK_THREADID(X) (!pthread_equal((X)->tid, pthread_self()))
drh2b4b5962005-06-15 17:47:55 +0000157#else
158# define SET_THREADID(X)
159# define CHECK_THREADID(X) 0
danielk197713adf8a2004-06-03 16:08:41 +0000160#endif
161
drhbbd42a62004-05-22 17:41:58 +0000162/*
163** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
164** section 6.5.2.2 lines 483 through 490 specify that when a process
165** sets or clears a lock, that operation overrides any prior locks set
166** by the same process. It does not explicitly say so, but this implies
167** that it overrides locks set by the same process using a different
168** file descriptor. Consider this test case:
169**
170** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
171** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
172**
173** Suppose ./file1 and ./file2 are really the same file (because
174** one is a hard or symbolic link to the other) then if you set
175** an exclusive lock on fd1, then try to get an exclusive lock
176** on fd2, it works. I would have expected the second lock to
177** fail since there was already a lock on the file due to fd1.
178** But not so. Since both locks came from the same process, the
179** second overrides the first, even though they were on different
180** file descriptors opened on different file names.
181**
182** Bummer. If you ask me, this is broken. Badly broken. It means
183** that we cannot use POSIX locks to synchronize file access among
184** competing threads of the same process. POSIX locks will work fine
185** to synchronize access for threads in separate processes, but not
186** threads within the same process.
187**
188** To work around the problem, SQLite has to manage file locks internally
189** on its own. Whenever a new database is opened, we have to find the
190** specific inode of the database file (the inode is determined by the
191** st_dev and st_ino fields of the stat structure that fstat() fills in)
192** and check for locks already existing on that inode. When locks are
193** created or removed, we have to look at our own internal record of the
194** locks to see if another thread has previously set a lock on that same
195** inode.
196**
197** The OsFile structure for POSIX is no longer just an integer file
198** descriptor. It is now a structure that holds the integer file
199** descriptor and a pointer to a structure that describes the internal
200** locks on the corresponding inode. There is one locking structure
201** per inode, so if the same inode is opened twice, both OsFile structures
202** point to the same locking structure. The locking structure keeps
203** a reference count (so we will know when to delete it) and a "cnt"
204** field that tells us its internal lock status. cnt==0 means the
205** file is unlocked. cnt==-1 means the file has an exclusive lock.
206** cnt>0 means there are cnt shared locks on the file.
207**
208** Any attempt to lock or unlock a file first checks the locking
209** structure. The fcntl() system call is only invoked to set a
210** POSIX lock if the internal lock structure transitions between
211** a locked and an unlocked state.
212**
213** 2004-Jan-11:
214** More recent discoveries about POSIX advisory locks. (The more
215** I discover, the more I realize the a POSIX advisory locks are
216** an abomination.)
217**
218** If you close a file descriptor that points to a file that has locks,
219** all locks on that file that are owned by the current process are
220** released. To work around this problem, each OsFile structure contains
221** a pointer to an openCnt structure. There is one openCnt structure
222** per open inode, which means that multiple OsFiles can point to a single
223** openCnt. When an attempt is made to close an OsFile, if there are
224** other OsFiles open on the same inode that are holding locks, the call
225** to close() the file descriptor is deferred until all of the locks clear.
226** The openCnt structure keeps a list of file descriptors that need to
227** be closed and that list is walked (and cleared) when the last lock
228** clears.
229**
230** First, under Linux threads, because each thread has a separate
231** process ID, lock operations in one thread do not override locks
232** to the same file in other threads. Linux threads behave like
233** separate processes in this respect. But, if you close a file
234** descriptor in linux threads, all locks are cleared, even locks
235** on other threads and even though the other threads have different
236** process IDs. Linux threads is inconsistent in this respect.
237** (I'm beginning to think that linux threads is an abomination too.)
238** The consequence of this all is that the hash table for the lockInfo
239** structure has to include the process id as part of its key because
240** locks in different threads are treated as distinct. But the
241** openCnt structure should not include the process id in its
242** key because close() clears lock on all threads, not just the current
243** thread. Were it not for this goofiness in linux threads, we could
244** combine the lockInfo and openCnt structures into a single structure.
drh5fdae772004-06-29 03:29:00 +0000245**
246** 2004-Jun-28:
247** On some versions of linux, threads can override each others locks.
248** On others not. Sometimes you can change the behavior on the same
249** system by setting the LD_ASSUME_KERNEL environment variable. The
250** POSIX standard is silent as to which behavior is correct, as far
251** as I can tell, so other versions of unix might show the same
252** inconsistency. There is no little doubt in my mind that posix
253** advisory locks and linux threads are profoundly broken.
254**
255** To work around the inconsistencies, we have to test at runtime
256** whether or not threads can override each others locks. This test
257** is run once, the first time any lock is attempted. A static
258** variable is set to record the results of this test for future
259** use.
drhbbd42a62004-05-22 17:41:58 +0000260*/
261
262/*
263** An instance of the following structure serves as the key used
drh5fdae772004-06-29 03:29:00 +0000264** to locate a particular lockInfo structure given its inode.
265**
266** If threads cannot override each others locks, then we set the
267** lockKey.tid field to the thread ID. If threads can override
268** each others locks then tid is always set to zero. tid is also
269** set to zero if we compile without threading support.
drhbbd42a62004-05-22 17:41:58 +0000270*/
271struct lockKey {
drh5fdae772004-06-29 03:29:00 +0000272 dev_t dev; /* Device number */
273 ino_t ino; /* Inode number */
274#ifdef SQLITE_UNIX_THREADS
drhd9cb6ac2005-10-20 07:28:17 +0000275 pthread_t tid; /* Thread ID or zero if threads can override each other */
drh5fdae772004-06-29 03:29:00 +0000276#endif
drhbbd42a62004-05-22 17:41:58 +0000277};
278
279/*
280** An instance of the following structure is allocated for each open
281** inode on each thread with a different process ID. (Threads have
282** different process IDs on linux, but not on most other unixes.)
283**
284** A single inode can have multiple file descriptors, so each OsFile
285** structure contains a pointer to an instance of this object and this
286** object keeps a count of the number of OsFiles pointing to it.
287*/
288struct lockInfo {
289 struct lockKey key; /* The lookup key */
drh2ac3ee92004-06-07 16:27:46 +0000290 int cnt; /* Number of SHARED locks held */
danielk19779a1d0ab2004-06-01 14:09:28 +0000291 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drhbbd42a62004-05-22 17:41:58 +0000292 int nRef; /* Number of pointers to this structure */
293};
294
295/*
296** An instance of the following structure serves as the key used
297** to locate a particular openCnt structure given its inode. This
drh5fdae772004-06-29 03:29:00 +0000298** is the same as the lockKey except that the thread ID is omitted.
drhbbd42a62004-05-22 17:41:58 +0000299*/
300struct openKey {
301 dev_t dev; /* Device number */
302 ino_t ino; /* Inode number */
303};
304
305/*
306** An instance of the following structure is allocated for each open
307** inode. This structure keeps track of the number of locks on that
308** inode. If a close is attempted against an inode that is holding
309** locks, the close is deferred until all locks clear by adding the
310** file descriptor to be closed to the pending list.
311*/
312struct openCnt {
313 struct openKey key; /* The lookup key */
314 int nRef; /* Number of pointers to this structure */
315 int nLock; /* Number of outstanding locks */
316 int nPending; /* Number of pending close() operations */
317 int *aPending; /* Malloced space holding fd's awaiting a close() */
318};
319
320/*
321** These hash table maps inodes and process IDs into lockInfo and openCnt
322** structures. Access to these hash tables must be protected by a mutex.
323*/
324static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
325static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
326
drh5fdae772004-06-29 03:29:00 +0000327
328#ifdef SQLITE_UNIX_THREADS
329/*
330** This variable records whether or not threads can override each others
331** locks.
332**
333** 0: No. Threads cannot override each others locks.
334** 1: Yes. Threads can override each others locks.
335** -1: We don't know yet.
336*/
337static int threadsOverrideEachOthersLocks = -1;
338
339/*
340** This structure holds information passed into individual test
341** threads by the testThreadLockingBehavior() routine.
342*/
343struct threadTestData {
344 int fd; /* File to be locked */
345 struct flock lock; /* The locking operation */
346 int result; /* Result of the locking operation */
347};
348
drh2b4b5962005-06-15 17:47:55 +0000349#ifdef SQLITE_LOCK_TRACE
350/*
351** Print out information about all locking operations.
352**
353** This routine is used for troubleshooting locks on multithreaded
354** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
355** command-line option on the compiler. This code is normally
356** turnned off.
357*/
358static int lockTrace(int fd, int op, struct flock *p){
359 char *zOpName, *zType;
360 int s;
361 int savedErrno;
362 if( op==F_GETLK ){
363 zOpName = "GETLK";
364 }else if( op==F_SETLK ){
365 zOpName = "SETLK";
366 }else{
367 s = fcntl(fd, op, p);
368 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
369 return s;
370 }
371 if( p->l_type==F_RDLCK ){
372 zType = "RDLCK";
373 }else if( p->l_type==F_WRLCK ){
374 zType = "WRLCK";
375 }else if( p->l_type==F_UNLCK ){
376 zType = "UNLCK";
377 }else{
378 assert( 0 );
379 }
380 assert( p->l_whence==SEEK_SET );
381 s = fcntl(fd, op, p);
382 savedErrno = errno;
383 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
384 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
385 (int)p->l_pid, s);
386 if( s && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
387 struct flock l2;
388 l2 = *p;
389 fcntl(fd, F_GETLK, &l2);
390 if( l2.l_type==F_RDLCK ){
391 zType = "RDLCK";
392 }else if( l2.l_type==F_WRLCK ){
393 zType = "WRLCK";
394 }else if( l2.l_type==F_UNLCK ){
395 zType = "UNLCK";
396 }else{
397 assert( 0 );
398 }
399 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
400 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
401 }
402 errno = savedErrno;
403 return s;
404}
405#define fcntl lockTrace
406#endif /* SQLITE_LOCK_TRACE */
407
drh5fdae772004-06-29 03:29:00 +0000408/*
409** The testThreadLockingBehavior() routine launches two separate
410** threads on this routine. This routine attempts to lock a file
411** descriptor then returns. The success or failure of that attempt
412** allows the testThreadLockingBehavior() procedure to determine
413** whether or not threads can override each others locks.
414*/
415static void *threadLockingTest(void *pArg){
416 struct threadTestData *pData = (struct threadTestData*)pArg;
417 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
418 return pArg;
419}
420
421/*
422** This procedure attempts to determine whether or not threads
423** can override each others locks then sets the
424** threadsOverrideEachOthersLocks variable appropriately.
425*/
426static void testThreadLockingBehavior(fd_orig){
427 int fd;
428 struct threadTestData d[2];
429 pthread_t t[2];
430
431 fd = dup(fd_orig);
432 if( fd<0 ) return;
433 memset(d, 0, sizeof(d));
434 d[0].fd = fd;
435 d[0].lock.l_type = F_RDLCK;
436 d[0].lock.l_len = 1;
437 d[0].lock.l_start = 0;
438 d[0].lock.l_whence = SEEK_SET;
439 d[1] = d[0];
440 d[1].lock.l_type = F_WRLCK;
441 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
442 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
443 pthread_join(t[0], 0);
444 pthread_join(t[1], 0);
445 close(fd);
446 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
447}
448#endif /* SQLITE_UNIX_THREADS */
449
drhbbd42a62004-05-22 17:41:58 +0000450/*
451** Release a lockInfo structure previously allocated by findLockInfo().
452*/
453static void releaseLockInfo(struct lockInfo *pLock){
454 pLock->nRef--;
455 if( pLock->nRef==0 ){
456 sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
457 sqliteFree(pLock);
458 }
459}
460
461/*
462** Release a openCnt structure previously allocated by findLockInfo().
463*/
464static void releaseOpenCnt(struct openCnt *pOpen){
465 pOpen->nRef--;
466 if( pOpen->nRef==0 ){
467 sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
468 sqliteFree(pOpen->aPending);
469 sqliteFree(pOpen);
470 }
471}
472
473/*
474** Given a file descriptor, locate lockInfo and openCnt structures that
475** describes that file descriptor. Create a new ones if necessary. The
476** return values might be unset if an error occurs.
477**
478** Return the number of errors.
479*/
drh38f82712004-06-18 17:10:16 +0000480static int findLockInfo(
drhbbd42a62004-05-22 17:41:58 +0000481 int fd, /* The file descriptor used in the key */
482 struct lockInfo **ppLock, /* Return the lockInfo structure here */
drh5fdae772004-06-29 03:29:00 +0000483 struct openCnt **ppOpen /* Return the openCnt structure here */
drhbbd42a62004-05-22 17:41:58 +0000484){
485 int rc;
486 struct lockKey key1;
487 struct openKey key2;
488 struct stat statbuf;
489 struct lockInfo *pLock;
490 struct openCnt *pOpen;
491 rc = fstat(fd, &statbuf);
492 if( rc!=0 ) return 1;
493 memset(&key1, 0, sizeof(key1));
494 key1.dev = statbuf.st_dev;
495 key1.ino = statbuf.st_ino;
drh5fdae772004-06-29 03:29:00 +0000496#ifdef SQLITE_UNIX_THREADS
497 if( threadsOverrideEachOthersLocks<0 ){
498 testThreadLockingBehavior(fd);
499 }
500 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
501#endif
drhbbd42a62004-05-22 17:41:58 +0000502 memset(&key2, 0, sizeof(key2));
503 key2.dev = statbuf.st_dev;
504 key2.ino = statbuf.st_ino;
505 pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
506 if( pLock==0 ){
507 struct lockInfo *pOld;
508 pLock = sqliteMallocRaw( sizeof(*pLock) );
509 if( pLock==0 ) return 1;
510 pLock->key = key1;
511 pLock->nRef = 1;
512 pLock->cnt = 0;
danielk19779a1d0ab2004-06-01 14:09:28 +0000513 pLock->locktype = 0;
drhbbd42a62004-05-22 17:41:58 +0000514 pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
515 if( pOld!=0 ){
516 assert( pOld==pLock );
517 sqliteFree(pLock);
518 return 1;
519 }
520 }else{
521 pLock->nRef++;
522 }
523 *ppLock = pLock;
524 pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
525 if( pOpen==0 ){
526 struct openCnt *pOld;
527 pOpen = sqliteMallocRaw( sizeof(*pOpen) );
528 if( pOpen==0 ){
529 releaseLockInfo(pLock);
530 return 1;
531 }
532 pOpen->key = key2;
533 pOpen->nRef = 1;
534 pOpen->nLock = 0;
535 pOpen->nPending = 0;
536 pOpen->aPending = 0;
537 pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
538 if( pOld!=0 ){
539 assert( pOld==pOpen );
540 sqliteFree(pOpen);
541 releaseLockInfo(pLock);
542 return 1;
543 }
544 }else{
545 pOpen->nRef++;
546 }
547 *ppOpen = pOpen;
548 return 0;
549}
550
551/*
552** Delete the named file
553*/
drh9c06c952005-11-26 00:25:00 +0000554static int unixDelete(const char *zFilename){
drhbbd42a62004-05-22 17:41:58 +0000555 unlink(zFilename);
556 return SQLITE_OK;
557}
558
559/*
560** Return TRUE if the named file exists.
561*/
drh9c06c952005-11-26 00:25:00 +0000562static int unixFileExists(const char *zFilename){
drhbbd42a62004-05-22 17:41:58 +0000563 return access(zFilename, 0)==0;
564}
565
566/*
drh9cbe6352005-11-29 03:13:21 +0000567** Allocate memory for an OsFile. Initialize the new OsFile
568** to the value given in pInit and return a pointer to the new
569** OsFile. If we run out of memory, close the file and return NULL.
570*/
571static OsFile *allocateOsFile(OsFile *pInit){
572 OsFile *pNew;
573 pNew = sqliteMalloc( sizeof(OsFile) );
574 if( pNew==0 ){
575 close(pInit->h);
576 }else{
577 *pNew = *pInit;
578 }
579 return pNew;
580}
581
582/*
drhbbd42a62004-05-22 17:41:58 +0000583** Attempt to open a file for both reading and writing. If that
584** fails, try opening it read-only. If the file does not exist,
585** try to create it.
586**
587** On success, a handle for the open file is written to *id
588** and *pReadonly is set to 0 if the file was opened for reading and
589** writing or 1 if the file was opened read-only. The function returns
590** SQLITE_OK.
591**
592** On failure, the function returns SQLITE_CANTOPEN and leaves
593** *id and *pReadonly unchanged.
594*/
drh9c06c952005-11-26 00:25:00 +0000595static int unixOpenReadWrite(
drhbbd42a62004-05-22 17:41:58 +0000596 const char *zFilename,
drh9cbe6352005-11-29 03:13:21 +0000597 OsFile **pId,
drhbbd42a62004-05-22 17:41:58 +0000598 int *pReadonly
599){
600 int rc;
drh9cbe6352005-11-29 03:13:21 +0000601 OsFile f;
602
603 assert( 0==*pId );
604 f.dirfd = -1;
605 SET_THREADID(&f);
606 f.h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY,
drh8e855772005-05-17 11:25:31 +0000607 SQLITE_DEFAULT_FILE_PERMISSIONS);
drh9cbe6352005-11-29 03:13:21 +0000608 if( f.h<0 ){
drh6458e392004-07-20 01:14:13 +0000609#ifdef EISDIR
610 if( errno==EISDIR ){
611 return SQLITE_CANTOPEN;
612 }
613#endif
drh9cbe6352005-11-29 03:13:21 +0000614 f.h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
615 if( f.h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000616 return SQLITE_CANTOPEN;
617 }
618 *pReadonly = 1;
619 }else{
620 *pReadonly = 0;
621 }
622 sqlite3OsEnterMutex();
drh9cbe6352005-11-29 03:13:21 +0000623 rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drhbbd42a62004-05-22 17:41:58 +0000624 sqlite3OsLeaveMutex();
625 if( rc ){
drh9cbe6352005-11-29 03:13:21 +0000626 close(f.h);
drhbbd42a62004-05-22 17:41:58 +0000627 return SQLITE_NOMEM;
628 }
drh9cbe6352005-11-29 03:13:21 +0000629 f.locktype = 0;
630 TRACE3("OPEN %-3d %s\n", f.h, zFilename);
631 *pId = allocateOsFile(&f);
632 if( *pId==0 ){
633 return SQLITE_NOMEM;
634 }else{
635 OpenCounter(+1);
636 return SQLITE_OK;
637 }
drhbbd42a62004-05-22 17:41:58 +0000638}
639
640
641/*
642** Attempt to open a new file for exclusive access by this process.
643** The file will be opened for both reading and writing. To avoid
644** a potential security problem, we do not allow the file to have
645** previously existed. Nor do we allow the file to be a symbolic
646** link.
647**
648** If delFlag is true, then make arrangements to automatically delete
649** the file when it is closed.
650**
651** On success, write the file handle into *id and return SQLITE_OK.
652**
653** On failure, return SQLITE_CANTOPEN.
654*/
drh9cbe6352005-11-29 03:13:21 +0000655static int unixOpenExclusive(const char *zFilename, OsFile **pId, int delFlag){
drhbbd42a62004-05-22 17:41:58 +0000656 int rc;
drh9cbe6352005-11-29 03:13:21 +0000657 OsFile f;
658
659 assert( 0==*pId );
drhbbd42a62004-05-22 17:41:58 +0000660 if( access(zFilename, 0)==0 ){
661 return SQLITE_CANTOPEN;
662 }
drh9cbe6352005-11-29 03:13:21 +0000663 SET_THREADID(&f);
664 f.dirfd = -1;
665 f.h = open(zFilename,
drhd6459672005-08-13 17:17:01 +0000666 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY,
667 SQLITE_DEFAULT_FILE_PERMISSIONS);
drh9cbe6352005-11-29 03:13:21 +0000668 if( f.h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000669 return SQLITE_CANTOPEN;
670 }
671 sqlite3OsEnterMutex();
drh9cbe6352005-11-29 03:13:21 +0000672 rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drhbbd42a62004-05-22 17:41:58 +0000673 sqlite3OsLeaveMutex();
674 if( rc ){
drh9cbe6352005-11-29 03:13:21 +0000675 close(f.h);
drhbbd42a62004-05-22 17:41:58 +0000676 unlink(zFilename);
677 return SQLITE_NOMEM;
678 }
drh9cbe6352005-11-29 03:13:21 +0000679 f.locktype = 0;
drhbbd42a62004-05-22 17:41:58 +0000680 if( delFlag ){
681 unlink(zFilename);
682 }
drh9cbe6352005-11-29 03:13:21 +0000683 TRACE3("OPEN-EX %-3d %s\n", f.h, zFilename);
684 *pId = allocateOsFile(&f);
685 if( *pId==0 ){
686 return SQLITE_NOMEM;
687 }else{
688 OpenCounter(+1);
689 return SQLITE_OK;
690 }
drhbbd42a62004-05-22 17:41:58 +0000691}
692
693/*
694** Attempt to open a new file for read-only access.
695**
696** On success, write the file handle into *id and return SQLITE_OK.
697**
698** On failure, return SQLITE_CANTOPEN.
699*/
drh9cbe6352005-11-29 03:13:21 +0000700static int unixOpenReadOnly(const char *zFilename, OsFile **pId){
drhbbd42a62004-05-22 17:41:58 +0000701 int rc;
drh9cbe6352005-11-29 03:13:21 +0000702 OsFile f;
703
704 assert( 0==*pId );
705 SET_THREADID(&f);
706 f.dirfd = -1;
707 f.h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
708 if( f.h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000709 return SQLITE_CANTOPEN;
710 }
711 sqlite3OsEnterMutex();
drh9cbe6352005-11-29 03:13:21 +0000712 rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drhbbd42a62004-05-22 17:41:58 +0000713 sqlite3OsLeaveMutex();
714 if( rc ){
drh9cbe6352005-11-29 03:13:21 +0000715 close(f.h);
drhbbd42a62004-05-22 17:41:58 +0000716 return SQLITE_NOMEM;
717 }
drh9cbe6352005-11-29 03:13:21 +0000718 f.locktype = 0;
719 TRACE3("OPEN-RO %-3d %s\n", f.h, zFilename);
720 *pId = allocateOsFile(&f);
721 if( *pId==0 ){
722 return SQLITE_NOMEM;
723 }else{
724 OpenCounter(+1);
725 return SQLITE_OK;
726 }
drhbbd42a62004-05-22 17:41:58 +0000727}
728
729/*
730** Attempt to open a file descriptor for the directory that contains a
731** file. This file descriptor can be used to fsync() the directory
732** in order to make sure the creation of a new file is actually written
733** to disk.
734**
735** This routine is only meaningful for Unix. It is a no-op under
736** windows since windows does not support hard links.
737**
drh9cbe6352005-11-29 03:13:21 +0000738** On success, a handle for a previously open file at *id is
drhbbd42a62004-05-22 17:41:58 +0000739** updated with the new directory file descriptor and SQLITE_OK is
740** returned.
741**
742** On failure, the function returns SQLITE_CANTOPEN and leaves
743** *id unchanged.
744*/
drh9c06c952005-11-26 00:25:00 +0000745static int unixOpenDirectory(
drhbbd42a62004-05-22 17:41:58 +0000746 const char *zDirname,
747 OsFile *id
748){
drh9cbe6352005-11-29 03:13:21 +0000749 if( id==0 ){
drhbbd42a62004-05-22 17:41:58 +0000750 /* Do not open the directory if the corresponding file is not already
751 ** open. */
752 return SQLITE_CANTOPEN;
753 }
drh2b4b5962005-06-15 17:47:55 +0000754 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000755 assert( id->dirfd<0 );
drh8e855772005-05-17 11:25:31 +0000756 id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0);
drhbbd42a62004-05-22 17:41:58 +0000757 if( id->dirfd<0 ){
758 return SQLITE_CANTOPEN;
759 }
760 TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
761 return SQLITE_OK;
762}
763
764/*
drhab3f9fe2004-08-14 17:10:10 +0000765** If the following global variable points to a string which is the
766** name of a directory, then that directory will be used to store
767** temporary files.
768*/
tpoindex9a09a3c2004-12-20 19:01:32 +0000769char *sqlite3_temp_directory = 0;
drhab3f9fe2004-08-14 17:10:10 +0000770
771/*
drhbbd42a62004-05-22 17:41:58 +0000772** Create a temporary file name in zBuf. zBuf must be big enough to
773** hold at least SQLITE_TEMPNAME_SIZE characters.
774*/
drh9c06c952005-11-26 00:25:00 +0000775static int unixTempFileName(char *zBuf){
drhbbd42a62004-05-22 17:41:58 +0000776 static const char *azDirs[] = {
drhab3f9fe2004-08-14 17:10:10 +0000777 0,
drhbbd42a62004-05-22 17:41:58 +0000778 "/var/tmp",
779 "/usr/tmp",
780 "/tmp",
781 ".",
782 };
drh57196282004-10-06 15:41:16 +0000783 static const unsigned char zChars[] =
drhbbd42a62004-05-22 17:41:58 +0000784 "abcdefghijklmnopqrstuvwxyz"
785 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
786 "0123456789";
787 int i, j;
788 struct stat buf;
789 const char *zDir = ".";
drheffd02b2004-08-29 23:42:13 +0000790 azDirs[0] = sqlite3_temp_directory;
drhbbd42a62004-05-22 17:41:58 +0000791 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drhab3f9fe2004-08-14 17:10:10 +0000792 if( azDirs[i]==0 ) continue;
drhbbd42a62004-05-22 17:41:58 +0000793 if( stat(azDirs[i], &buf) ) continue;
794 if( !S_ISDIR(buf.st_mode) ) continue;
795 if( access(azDirs[i], 07) ) continue;
796 zDir = azDirs[i];
797 break;
798 }
799 do{
800 sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
801 j = strlen(zBuf);
802 sqlite3Randomness(15, &zBuf[j]);
803 for(i=0; i<15; i++, j++){
804 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
805 }
806 zBuf[j] = 0;
807 }while( access(zBuf,0)==0 );
808 return SQLITE_OK;
809}
810
811/*
tpoindex9a09a3c2004-12-20 19:01:32 +0000812** Check that a given pathname is a directory and is writable
813**
814*/
drh9c06c952005-11-26 00:25:00 +0000815static int unixIsDirWritable(char *zBuf){
816#ifndef SQLITE_OMIT_PAGER_PRAGMAS
tpoindex9a09a3c2004-12-20 19:01:32 +0000817 struct stat buf;
818 if( zBuf==0 ) return 0;
drh268283b2005-01-08 15:44:25 +0000819 if( zBuf[0]==0 ) return 0;
tpoindex9a09a3c2004-12-20 19:01:32 +0000820 if( stat(zBuf, &buf) ) return 0;
821 if( !S_ISDIR(buf.st_mode) ) return 0;
822 if( access(zBuf, 07) ) return 0;
drh9c06c952005-11-26 00:25:00 +0000823#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex9a09a3c2004-12-20 19:01:32 +0000824 return 1;
825}
826
827/*
drhbbd42a62004-05-22 17:41:58 +0000828** Read data from a file into a buffer. Return SQLITE_OK if all
829** bytes were read successfully and SQLITE_IOERR if anything goes
830** wrong.
831*/
drh9c06c952005-11-26 00:25:00 +0000832static int unixRead(OsFile *id, void *pBuf, int amt){
drhbbd42a62004-05-22 17:41:58 +0000833 int got;
drh9cbe6352005-11-29 03:13:21 +0000834 assert( id );
drhbbd42a62004-05-22 17:41:58 +0000835 SimulateIOError(SQLITE_IOERR);
836 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000837 got = read(id->h, pBuf, amt);
drhbbd42a62004-05-22 17:41:58 +0000838 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000839 TRACE5("READ %-3d %5d %7d %d\n", id->h, got, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000840 SEEK(0);
841 /* if( got<0 ) got = 0; */
842 if( got==amt ){
843 return SQLITE_OK;
844 }else{
845 return SQLITE_IOERR;
846 }
847}
848
849/*
850** Write data from a buffer into a file. Return SQLITE_OK on success
851** or some other error code on failure.
852*/
drh9c06c952005-11-26 00:25:00 +0000853static int unixWrite(OsFile *id, const void *pBuf, int amt){
drhbbd42a62004-05-22 17:41:58 +0000854 int wrote = 0;
drh9cbe6352005-11-29 03:13:21 +0000855 assert( id );
drh4c7f9412005-02-03 00:29:47 +0000856 assert( amt>0 );
drhbbd42a62004-05-22 17:41:58 +0000857 SimulateIOError(SQLITE_IOERR);
drh047d4832004-10-01 14:38:02 +0000858 SimulateDiskfullError;
drhbbd42a62004-05-22 17:41:58 +0000859 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000860 while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
drhbbd42a62004-05-22 17:41:58 +0000861 amt -= wrote;
862 pBuf = &((char*)pBuf)[wrote];
863 }
864 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000865 TRACE5("WRITE %-3d %5d %7d %d\n", id->h, wrote, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000866 SEEK(0);
867 if( amt>0 ){
868 return SQLITE_FULL;
869 }
870 return SQLITE_OK;
871}
872
873/*
874** Move the read/write pointer in a file.
875*/
drh9c06c952005-11-26 00:25:00 +0000876static int unixSeek(OsFile *id, i64 offset){
drh9cbe6352005-11-29 03:13:21 +0000877 assert( id );
drhbbd42a62004-05-22 17:41:58 +0000878 SEEK(offset/1024 + 1);
drhb4746b92005-09-09 01:32:06 +0000879#ifdef SQLITE_TEST
880 if( offset ) SimulateDiskfullError
881#endif
drha6abd042004-06-09 17:37:22 +0000882 lseek(id->h, offset, SEEK_SET);
drhbbd42a62004-05-22 17:41:58 +0000883 return SQLITE_OK;
884}
885
drhb851b2c2005-03-10 14:11:12 +0000886#ifdef SQLITE_TEST
887/*
888** Count the number of fullsyncs and normal syncs. This is used to test
889** that syncs and fullsyncs are occuring at the right times.
890*/
891int sqlite3_sync_count = 0;
892int sqlite3_fullsync_count = 0;
893#endif
894
drhf2f23912005-10-05 10:29:36 +0000895/*
896** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
897** Otherwise use fsync() in its place.
898*/
899#ifndef HAVE_FDATASYNC
900# define fdatasync fsync
901#endif
902
drhb851b2c2005-03-10 14:11:12 +0000903
drhbbd42a62004-05-22 17:41:58 +0000904/*
drhdd809b02004-07-17 21:44:57 +0000905** The fsync() system call does not work as advertised on many
906** unix systems. The following procedure is an attempt to make
907** it work better.
drh1398ad32005-01-19 23:24:50 +0000908**
909** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
910** for testing when we want to run through the test suite quickly.
911** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
912** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
913** or power failure will likely corrupt the database file.
drhdd809b02004-07-17 21:44:57 +0000914*/
drheb796a72005-09-08 12:38:41 +0000915static int full_fsync(int fd, int fullSync, int dataOnly){
drhdd809b02004-07-17 21:44:57 +0000916 int rc;
drhb851b2c2005-03-10 14:11:12 +0000917
918 /* Record the number of times that we do a normal fsync() and
919 ** FULLSYNC. This is used during testing to verify that this procedure
920 ** gets called with the correct arguments.
921 */
922#ifdef SQLITE_TEST
923 if( fullSync ) sqlite3_fullsync_count++;
924 sqlite3_sync_count++;
925#endif
926
927 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
928 ** no-op
929 */
930#ifdef SQLITE_NO_SYNC
931 rc = SQLITE_OK;
932#else
933
drhdd809b02004-07-17 21:44:57 +0000934#ifdef F_FULLFSYNC
drhb851b2c2005-03-10 14:11:12 +0000935 if( fullSync ){
drhf30cc942005-03-11 17:52:34 +0000936 rc = fcntl(fd, F_FULLFSYNC, 0);
drhb851b2c2005-03-10 14:11:12 +0000937 }else{
938 rc = 1;
939 }
940 /* If the FULLSYNC failed, try to do a normal fsync() */
drhdd809b02004-07-17 21:44:57 +0000941 if( rc ) rc = fsync(fd);
drhb851b2c2005-03-10 14:11:12 +0000942
drhc035e6e2005-09-22 15:45:04 +0000943#else /* if !defined(F_FULLSYNC) */
drheb796a72005-09-08 12:38:41 +0000944 if( dataOnly ){
945 rc = fdatasync(fd);
drhf2f23912005-10-05 10:29:36 +0000946 }else{
drheb796a72005-09-08 12:38:41 +0000947 rc = fsync(fd);
948 }
drhf30cc942005-03-11 17:52:34 +0000949#endif /* defined(F_FULLFSYNC) */
drhb851b2c2005-03-10 14:11:12 +0000950#endif /* defined(SQLITE_NO_SYNC) */
951
drhdd809b02004-07-17 21:44:57 +0000952 return rc;
953}
954
955/*
drhbbd42a62004-05-22 17:41:58 +0000956** Make sure all writes to a particular file are committed to disk.
957**
drheb796a72005-09-08 12:38:41 +0000958** If dataOnly==0 then both the file itself and its metadata (file
959** size, access time, etc) are synced. If dataOnly!=0 then only the
960** file data is synced.
961**
drhbbd42a62004-05-22 17:41:58 +0000962** Under Unix, also make sure that the directory entry for the file
963** has been created by fsync-ing the directory that contains the file.
964** If we do not do this and we encounter a power failure, the directory
965** entry for the journal might not exist after we reboot. The next
966** SQLite to access the file will not know that the journal exists (because
967** the directory entry for the journal was never created) and the transaction
968** will not roll back - possibly leading to database corruption.
969*/
drh9c06c952005-11-26 00:25:00 +0000970static int unixSync(OsFile *id, int dataOnly){
drh9cbe6352005-11-29 03:13:21 +0000971 assert( id );
drhbbd42a62004-05-22 17:41:58 +0000972 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000973 TRACE2("SYNC %-3d\n", id->h);
drheb796a72005-09-08 12:38:41 +0000974 if( full_fsync(id->h, id->fullSync, dataOnly) ){
drhbbd42a62004-05-22 17:41:58 +0000975 return SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000976 }
drha2854222004-06-17 19:04:17 +0000977 if( id->dirfd>=0 ){
978 TRACE2("DIRSYNC %-3d\n", id->dirfd);
danielk1977d7c03f72005-11-25 10:38:22 +0000979#ifndef SQLITE_DISABLE_DIRSYNC
danielk19770964b232005-11-25 08:47:57 +0000980 if( full_fsync(id->dirfd, id->fullSync, 0) ){
981 return SQLITE_IOERR;
982 }
danielk1977d7c03f72005-11-25 10:38:22 +0000983#endif
drha2854222004-06-17 19:04:17 +0000984 close(id->dirfd); /* Only need to sync once, so close the directory */
985 id->dirfd = -1; /* when we are done. */
986 }
drha2854222004-06-17 19:04:17 +0000987 return SQLITE_OK;
drhbbd42a62004-05-22 17:41:58 +0000988}
989
990/*
danielk1977962398d2004-06-14 09:35:16 +0000991** Sync the directory zDirname. This is a no-op on operating systems other
992** than UNIX.
drhb851b2c2005-03-10 14:11:12 +0000993**
994** This is used to make sure the master journal file has truely been deleted
995** before making changes to individual journals on a multi-database commit.
drhf30cc942005-03-11 17:52:34 +0000996** The F_FULLFSYNC option is not needed here.
danielk1977962398d2004-06-14 09:35:16 +0000997*/
drh9c06c952005-11-26 00:25:00 +0000998static int unixSyncDirectory(const char *zDirname){
danielk1977d7c03f72005-11-25 10:38:22 +0000999#ifdef SQLITE_DISABLE_DIRSYNC
1000 return SQLITE_OK;
1001#else
danielk1977962398d2004-06-14 09:35:16 +00001002 int fd;
1003 int r;
danielk1977369f27e2004-06-15 11:40:04 +00001004 SimulateIOError(SQLITE_IOERR);
drh8e855772005-05-17 11:25:31 +00001005 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
danielk1977369f27e2004-06-15 11:40:04 +00001006 TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977962398d2004-06-14 09:35:16 +00001007 if( fd<0 ){
1008 return SQLITE_CANTOPEN;
1009 }
1010 r = fsync(fd);
1011 close(fd);
1012 return ((r==0)?SQLITE_OK:SQLITE_IOERR);
danielk1977d7c03f72005-11-25 10:38:22 +00001013#endif
danielk1977962398d2004-06-14 09:35:16 +00001014}
1015
1016/*
drhbbd42a62004-05-22 17:41:58 +00001017** Truncate an open file to a specified size
1018*/
drh9c06c952005-11-26 00:25:00 +00001019static int unixTruncate(OsFile *id, i64 nByte){
drh9cbe6352005-11-29 03:13:21 +00001020 assert( id );
drhbbd42a62004-05-22 17:41:58 +00001021 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +00001022 return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +00001023}
1024
1025/*
1026** Determine the current size of a file in bytes
1027*/
drh9c06c952005-11-26 00:25:00 +00001028static int unixFileSize(OsFile *id, i64 *pSize){
drhbbd42a62004-05-22 17:41:58 +00001029 struct stat buf;
drh9cbe6352005-11-29 03:13:21 +00001030 assert( id );
drhbbd42a62004-05-22 17:41:58 +00001031 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +00001032 if( fstat(id->h, &buf)!=0 ){
drhbbd42a62004-05-22 17:41:58 +00001033 return SQLITE_IOERR;
1034 }
1035 *pSize = buf.st_size;
1036 return SQLITE_OK;
1037}
1038
danielk19779a1d0ab2004-06-01 14:09:28 +00001039/*
danielk197713adf8a2004-06-03 16:08:41 +00001040** This routine checks if there is a RESERVED lock held on the specified
1041** file by this or any other process. If such a lock is held, return
drh2ac3ee92004-06-07 16:27:46 +00001042** non-zero. If the file is unlocked or holds only SHARED locks, then
1043** return zero.
danielk197713adf8a2004-06-03 16:08:41 +00001044*/
drh9c06c952005-11-26 00:25:00 +00001045static int unixCheckReservedLock(OsFile *id){
danielk197713adf8a2004-06-03 16:08:41 +00001046 int r = 0;
1047
drh9cbe6352005-11-29 03:13:21 +00001048 assert( id );
drh2b4b5962005-06-15 17:47:55 +00001049 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh2ac3ee92004-06-07 16:27:46 +00001050 sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
danielk197713adf8a2004-06-03 16:08:41 +00001051
1052 /* Check if a thread in this process holds such a lock */
1053 if( id->pLock->locktype>SHARED_LOCK ){
1054 r = 1;
1055 }
1056
drh2ac3ee92004-06-07 16:27:46 +00001057 /* Otherwise see if some other process holds it.
danielk197713adf8a2004-06-03 16:08:41 +00001058 */
1059 if( !r ){
1060 struct flock lock;
1061 lock.l_whence = SEEK_SET;
drh2ac3ee92004-06-07 16:27:46 +00001062 lock.l_start = RESERVED_BYTE;
1063 lock.l_len = 1;
1064 lock.l_type = F_WRLCK;
drha6abd042004-06-09 17:37:22 +00001065 fcntl(id->h, F_GETLK, &lock);
danielk197713adf8a2004-06-03 16:08:41 +00001066 if( lock.l_type!=F_UNLCK ){
1067 r = 1;
1068 }
1069 }
1070
1071 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +00001072 TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
danielk197713adf8a2004-06-03 16:08:41 +00001073
1074 return r;
1075}
1076
danielk19772b444852004-06-29 07:45:33 +00001077#ifdef SQLITE_DEBUG
1078/*
1079** Helper function for printing out trace information from debugging
1080** binaries. This returns the string represetation of the supplied
1081** integer lock-type.
1082*/
1083static const char * locktypeName(int locktype){
1084 switch( locktype ){
1085 case NO_LOCK: return "NONE";
1086 case SHARED_LOCK: return "SHARED";
1087 case RESERVED_LOCK: return "RESERVED";
1088 case PENDING_LOCK: return "PENDING";
1089 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
1090 }
1091 return "ERROR";
1092}
1093#endif
1094
danielk197713adf8a2004-06-03 16:08:41 +00001095/*
danielk19779a1d0ab2004-06-01 14:09:28 +00001096** Lock the file with the lock specified by parameter locktype - one
1097** of the following:
1098**
drh2ac3ee92004-06-07 16:27:46 +00001099** (1) SHARED_LOCK
1100** (2) RESERVED_LOCK
1101** (3) PENDING_LOCK
1102** (4) EXCLUSIVE_LOCK
1103**
drhb3e04342004-06-08 00:47:47 +00001104** Sometimes when requesting one lock state, additional lock states
1105** are inserted in between. The locking might fail on one of the later
1106** transitions leaving the lock state different from what it started but
1107** still short of its goal. The following chart shows the allowed
1108** transitions and the inserted intermediate states:
1109**
1110** UNLOCKED -> SHARED
1111** SHARED -> RESERVED
1112** SHARED -> (PENDING) -> EXCLUSIVE
1113** RESERVED -> (PENDING) -> EXCLUSIVE
1114** PENDING -> EXCLUSIVE
drh2ac3ee92004-06-07 16:27:46 +00001115**
drha6abd042004-06-09 17:37:22 +00001116** This routine will only increase a lock. Use the sqlite3OsUnlock()
1117** routine to lower a locking level.
danielk19779a1d0ab2004-06-01 14:09:28 +00001118*/
drh9c06c952005-11-26 00:25:00 +00001119static int unixLock(OsFile *id, int locktype){
danielk1977f42f25c2004-06-25 07:21:28 +00001120 /* The following describes the implementation of the various locks and
1121 ** lock transitions in terms of the POSIX advisory shared and exclusive
1122 ** lock primitives (called read-locks and write-locks below, to avoid
1123 ** confusion with SQLite lock names). The algorithms are complicated
1124 ** slightly in order to be compatible with windows systems simultaneously
1125 ** accessing the same database file, in case that is ever required.
1126 **
1127 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1128 ** byte', each single bytes at well known offsets, and the 'shared byte
1129 ** range', a range of 510 bytes at a well known offset.
1130 **
1131 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1132 ** byte'. If this is successful, a random byte from the 'shared byte
1133 ** range' is read-locked and the lock on the 'pending byte' released.
1134 **
danielk197790ba3bd2004-06-25 08:32:25 +00001135 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1136 ** A RESERVED lock is implemented by grabbing a write-lock on the
1137 ** 'reserved byte'.
danielk1977f42f25c2004-06-25 07:21:28 +00001138 **
1139 ** A process may only obtain a PENDING lock after it has obtained a
danielk197790ba3bd2004-06-25 08:32:25 +00001140 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1141 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1142 ** obtained, but existing SHARED locks are allowed to persist. A process
1143 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1144 ** This property is used by the algorithm for rolling back a journal file
1145 ** after a crash.
danielk1977f42f25c2004-06-25 07:21:28 +00001146 **
danielk197790ba3bd2004-06-25 08:32:25 +00001147 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1148 ** implemented by obtaining a write-lock on the entire 'shared byte
1149 ** range'. Since all other locks require a read-lock on one of the bytes
1150 ** within this range, this ensures that no other locks are held on the
1151 ** database.
danielk1977f42f25c2004-06-25 07:21:28 +00001152 **
1153 ** The reason a single byte cannot be used instead of the 'shared byte
1154 ** range' is that some versions of windows do not support read-locks. By
1155 ** locking a random byte from a range, concurrent SHARED locks may exist
1156 ** even if the locking primitive used is always a write-lock.
1157 */
danielk19779a1d0ab2004-06-01 14:09:28 +00001158 int rc = SQLITE_OK;
1159 struct lockInfo *pLock = id->pLock;
1160 struct flock lock;
1161 int s;
1162
drh9cbe6352005-11-29 03:13:21 +00001163 assert( id );
drhe29b9152005-03-18 14:03:15 +00001164 TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001165 locktypeName(id->locktype), locktypeName(pLock->locktype), pLock->cnt
1166 ,getpid() );
drh2b4b5962005-06-15 17:47:55 +00001167 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001168
1169 /* If there is already a lock of this type or more restrictive on the
1170 ** OsFile, do nothing. Don't use the end_lock: exit path, as
1171 ** sqlite3OsEnterMutex() hasn't been called yet.
1172 */
danielk197713adf8a2004-06-03 16:08:41 +00001173 if( id->locktype>=locktype ){
drhe29b9152005-03-18 14:03:15 +00001174 TRACE3("LOCK %d %s ok (already held)\n", id->h, locktypeName(locktype));
danielk19779a1d0ab2004-06-01 14:09:28 +00001175 return SQLITE_OK;
1176 }
1177
drhb3e04342004-06-08 00:47:47 +00001178 /* Make sure the locking sequence is correct
drh2ac3ee92004-06-07 16:27:46 +00001179 */
drhb3e04342004-06-08 00:47:47 +00001180 assert( id->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1181 assert( locktype!=PENDING_LOCK );
1182 assert( locktype!=RESERVED_LOCK || id->locktype==SHARED_LOCK );
drh2ac3ee92004-06-07 16:27:46 +00001183
drhb3e04342004-06-08 00:47:47 +00001184 /* This mutex is needed because id->pLock is shared across threads
1185 */
1186 sqlite3OsEnterMutex();
danielk19779a1d0ab2004-06-01 14:09:28 +00001187
1188 /* If some thread using this PID has a lock via a different OsFile*
1189 ** handle that precludes the requested lock, return BUSY.
1190 */
danielk197713adf8a2004-06-03 16:08:41 +00001191 if( (id->locktype!=pLock->locktype &&
drh2ac3ee92004-06-07 16:27:46 +00001192 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
danielk19779a1d0ab2004-06-01 14:09:28 +00001193 ){
1194 rc = SQLITE_BUSY;
1195 goto end_lock;
1196 }
1197
1198 /* If a SHARED lock is requested, and some thread using this PID already
1199 ** has a SHARED or RESERVED lock, then increment reference counts and
1200 ** return SQLITE_OK.
1201 */
1202 if( locktype==SHARED_LOCK &&
1203 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1204 assert( locktype==SHARED_LOCK );
danielk197713adf8a2004-06-03 16:08:41 +00001205 assert( id->locktype==0 );
danielk1977ecb2a962004-06-02 06:30:16 +00001206 assert( pLock->cnt>0 );
danielk197713adf8a2004-06-03 16:08:41 +00001207 id->locktype = SHARED_LOCK;
danielk19779a1d0ab2004-06-01 14:09:28 +00001208 pLock->cnt++;
1209 id->pOpen->nLock++;
1210 goto end_lock;
1211 }
1212
danielk197713adf8a2004-06-03 16:08:41 +00001213 lock.l_len = 1L;
drh2b4b5962005-06-15 17:47:55 +00001214
danielk19779a1d0ab2004-06-01 14:09:28 +00001215 lock.l_whence = SEEK_SET;
1216
drh3cde3bb2004-06-12 02:17:14 +00001217 /* A PENDING lock is needed before acquiring a SHARED lock and before
1218 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1219 ** be released.
danielk19779a1d0ab2004-06-01 14:09:28 +00001220 */
drh3cde3bb2004-06-12 02:17:14 +00001221 if( locktype==SHARED_LOCK
1222 || (locktype==EXCLUSIVE_LOCK && id->locktype<PENDING_LOCK)
1223 ){
danielk1977489468c2004-06-28 08:25:47 +00001224 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh2ac3ee92004-06-07 16:27:46 +00001225 lock.l_start = PENDING_BYTE;
drha6abd042004-06-09 17:37:22 +00001226 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001227 if( s ){
1228 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1229 goto end_lock;
1230 }
drh3cde3bb2004-06-12 02:17:14 +00001231 }
1232
1233
1234 /* If control gets to this point, then actually go ahead and make
1235 ** operating system calls for the specified lock.
1236 */
1237 if( locktype==SHARED_LOCK ){
1238 assert( pLock->cnt==0 );
1239 assert( pLock->locktype==0 );
danielk19779a1d0ab2004-06-01 14:09:28 +00001240
drh2ac3ee92004-06-07 16:27:46 +00001241 /* Now get the read-lock */
1242 lock.l_start = SHARED_FIRST;
1243 lock.l_len = SHARED_SIZE;
drha6abd042004-06-09 17:37:22 +00001244 s = fcntl(id->h, F_SETLK, &lock);
drh2ac3ee92004-06-07 16:27:46 +00001245
1246 /* Drop the temporary PENDING lock */
1247 lock.l_start = PENDING_BYTE;
1248 lock.l_len = 1L;
danielk19779a1d0ab2004-06-01 14:09:28 +00001249 lock.l_type = F_UNLCK;
drh2b4b5962005-06-15 17:47:55 +00001250 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1251 rc = SQLITE_IOERR; /* This should never happen */
1252 goto end_lock;
1253 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001254 if( s ){
drhbbd42a62004-05-22 17:41:58 +00001255 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1256 }else{
danielk197713adf8a2004-06-03 16:08:41 +00001257 id->locktype = SHARED_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001258 id->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001259 pLock->cnt = 1;
drhbbd42a62004-05-22 17:41:58 +00001260 }
drh3cde3bb2004-06-12 02:17:14 +00001261 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1262 /* We are trying for an exclusive lock but another thread in this
1263 ** same process is still holding a shared lock. */
1264 rc = SQLITE_BUSY;
drhbbd42a62004-05-22 17:41:58 +00001265 }else{
drh3cde3bb2004-06-12 02:17:14 +00001266 /* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk19779a1d0ab2004-06-01 14:09:28 +00001267 ** assumed that there is a SHARED or greater lock on the file
1268 ** already.
1269 */
danielk197713adf8a2004-06-03 16:08:41 +00001270 assert( 0!=id->locktype );
danielk19779a1d0ab2004-06-01 14:09:28 +00001271 lock.l_type = F_WRLCK;
1272 switch( locktype ){
1273 case RESERVED_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001274 lock.l_start = RESERVED_BYTE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001275 break;
danielk19779a1d0ab2004-06-01 14:09:28 +00001276 case EXCLUSIVE_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001277 lock.l_start = SHARED_FIRST;
1278 lock.l_len = SHARED_SIZE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001279 break;
1280 default:
1281 assert(0);
1282 }
drha6abd042004-06-09 17:37:22 +00001283 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001284 if( s ){
1285 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1286 }
drhbbd42a62004-05-22 17:41:58 +00001287 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001288
danielk1977ecb2a962004-06-02 06:30:16 +00001289 if( rc==SQLITE_OK ){
danielk197713adf8a2004-06-03 16:08:41 +00001290 id->locktype = locktype;
danielk1977ecb2a962004-06-02 06:30:16 +00001291 pLock->locktype = locktype;
drh3cde3bb2004-06-12 02:17:14 +00001292 }else if( locktype==EXCLUSIVE_LOCK ){
1293 id->locktype = PENDING_LOCK;
1294 pLock->locktype = PENDING_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001295 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001296
1297end_lock:
drhbbd42a62004-05-22 17:41:58 +00001298 sqlite3OsLeaveMutex();
drhe29b9152005-03-18 14:03:15 +00001299 TRACE4("LOCK %d %s %s\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001300 rc==SQLITE_OK ? "ok" : "failed");
drhbbd42a62004-05-22 17:41:58 +00001301 return rc;
1302}
1303
1304/*
drha6abd042004-06-09 17:37:22 +00001305** Lower the locking level on file descriptor id to locktype. locktype
1306** must be either NO_LOCK or SHARED_LOCK.
1307**
1308** If the locking level of the file descriptor is already at or below
1309** the requested locking level, this routine is a no-op.
1310**
drh9c105bb2004-10-02 20:38:28 +00001311** It is not possible for this routine to fail if the second argument
1312** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
1313** might return SQLITE_IOERR instead of SQLITE_OK.
drhbbd42a62004-05-22 17:41:58 +00001314*/
drh9c06c952005-11-26 00:25:00 +00001315static int unixUnlock(OsFile *id, int locktype){
drha6abd042004-06-09 17:37:22 +00001316 struct lockInfo *pLock;
1317 struct flock lock;
drh9c105bb2004-10-02 20:38:28 +00001318 int rc = SQLITE_OK;
drha6abd042004-06-09 17:37:22 +00001319
drh9cbe6352005-11-29 03:13:21 +00001320 assert( id );
drhe29b9152005-03-18 14:03:15 +00001321 TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", id->h, locktype, id->locktype,
danielk19772b444852004-06-29 07:45:33 +00001322 id->pLock->locktype, id->pLock->cnt, getpid());
drh2b4b5962005-06-15 17:47:55 +00001323 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drha6abd042004-06-09 17:37:22 +00001324
1325 assert( locktype<=SHARED_LOCK );
1326 if( id->locktype<=locktype ){
1327 return SQLITE_OK;
1328 }
drhbbd42a62004-05-22 17:41:58 +00001329 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +00001330 pLock = id->pLock;
1331 assert( pLock->cnt!=0 );
1332 if( id->locktype>SHARED_LOCK ){
1333 assert( pLock->locktype==id->locktype );
drh9c105bb2004-10-02 20:38:28 +00001334 if( locktype==SHARED_LOCK ){
1335 lock.l_type = F_RDLCK;
1336 lock.l_whence = SEEK_SET;
1337 lock.l_start = SHARED_FIRST;
1338 lock.l_len = SHARED_SIZE;
1339 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1340 /* This should never happen */
1341 rc = SQLITE_IOERR;
1342 }
1343 }
drhbbd42a62004-05-22 17:41:58 +00001344 lock.l_type = F_UNLCK;
1345 lock.l_whence = SEEK_SET;
drha6abd042004-06-09 17:37:22 +00001346 lock.l_start = PENDING_BYTE;
1347 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh2b4b5962005-06-15 17:47:55 +00001348 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1349 pLock->locktype = SHARED_LOCK;
1350 }else{
1351 rc = SQLITE_IOERR; /* This should never happen */
1352 }
drhbbd42a62004-05-22 17:41:58 +00001353 }
drha6abd042004-06-09 17:37:22 +00001354 if( locktype==NO_LOCK ){
1355 struct openCnt *pOpen;
danielk1977ecb2a962004-06-02 06:30:16 +00001356
drha6abd042004-06-09 17:37:22 +00001357 /* Decrement the shared lock counter. Release the lock using an
1358 ** OS call only when all threads in this same process have released
1359 ** the lock.
1360 */
1361 pLock->cnt--;
1362 if( pLock->cnt==0 ){
1363 lock.l_type = F_UNLCK;
1364 lock.l_whence = SEEK_SET;
1365 lock.l_start = lock.l_len = 0L;
drh2b4b5962005-06-15 17:47:55 +00001366 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1367 pLock->locktype = NO_LOCK;
1368 }else{
1369 rc = SQLITE_IOERR; /* This should never happen */
1370 }
drha6abd042004-06-09 17:37:22 +00001371 }
1372
drhbbd42a62004-05-22 17:41:58 +00001373 /* Decrement the count of locks against this same file. When the
1374 ** count reaches zero, close any other file descriptors whose close
1375 ** was deferred because of outstanding locks.
1376 */
drha6abd042004-06-09 17:37:22 +00001377 pOpen = id->pOpen;
drhbbd42a62004-05-22 17:41:58 +00001378 pOpen->nLock--;
1379 assert( pOpen->nLock>=0 );
1380 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1381 int i;
1382 for(i=0; i<pOpen->nPending; i++){
1383 close(pOpen->aPending[i]);
1384 }
1385 sqliteFree(pOpen->aPending);
1386 pOpen->nPending = 0;
1387 pOpen->aPending = 0;
1388 }
1389 }
1390 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +00001391 id->locktype = locktype;
drh9c105bb2004-10-02 20:38:28 +00001392 return rc;
drhbbd42a62004-05-22 17:41:58 +00001393}
1394
1395/*
danielk1977e3026632004-06-22 11:29:02 +00001396** Close a file.
1397*/
drh9cbe6352005-11-29 03:13:21 +00001398static int unixClose(OsFile **pId){
1399 OsFile *id = *pId;
1400 if( !id ) return SQLITE_OK;
drh2b4b5962005-06-15 17:47:55 +00001401 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh9c06c952005-11-26 00:25:00 +00001402 sqlite3Io.xUnlock(id, NO_LOCK);
danielk1977e3026632004-06-22 11:29:02 +00001403 if( id->dirfd>=0 ) close(id->dirfd);
1404 id->dirfd = -1;
1405 sqlite3OsEnterMutex();
1406 if( id->pOpen->nLock ){
1407 /* If there are outstanding locks, do not actually close the file just
1408 ** yet because that would clear those locks. Instead, add the file
1409 ** descriptor to pOpen->aPending. It will be automatically closed when
1410 ** the last lock is cleared.
1411 */
1412 int *aNew;
1413 struct openCnt *pOpen = id->pOpen;
drhad81e872005-08-21 21:45:01 +00001414 aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977e3026632004-06-22 11:29:02 +00001415 if( aNew==0 ){
1416 /* If a malloc fails, just leak the file descriptor */
1417 }else{
1418 pOpen->aPending = aNew;
drhad81e872005-08-21 21:45:01 +00001419 pOpen->aPending[pOpen->nPending] = id->h;
1420 pOpen->nPending++;
danielk1977e3026632004-06-22 11:29:02 +00001421 }
1422 }else{
1423 /* There are no outstanding locks so we can close the file immediately */
1424 close(id->h);
1425 }
1426 releaseLockInfo(id->pLock);
1427 releaseOpenCnt(id->pOpen);
1428 sqlite3OsLeaveMutex();
1429 id->isOpen = 0;
1430 TRACE2("CLOSE %-3d\n", id->h);
1431 OpenCounter(-1);
drh9cbe6352005-11-29 03:13:21 +00001432 sqliteFree(id);
1433 *pId = 0;
danielk1977e3026632004-06-22 11:29:02 +00001434 return SQLITE_OK;
1435}
1436
1437/*
drh0ccebe72005-06-07 22:22:50 +00001438** Turn a relative pathname into a full pathname. Return a pointer
1439** to the full pathname stored in space obtained from sqliteMalloc().
1440** The calling function is responsible for freeing this space once it
1441** is no longer needed.
1442*/
drh9c06c952005-11-26 00:25:00 +00001443static char *unixFullPathname(const char *zRelative){
drh0ccebe72005-06-07 22:22:50 +00001444 char *zFull = 0;
1445 if( zRelative[0]=='/' ){
1446 sqlite3SetString(&zFull, zRelative, (char*)0);
1447 }else{
drh79158e12005-09-06 21:40:45 +00001448 char *zBuf = sqliteMalloc(5000);
1449 if( zBuf==0 ){
1450 return 0;
1451 }
drh0ccebe72005-06-07 22:22:50 +00001452 zBuf[0] = 0;
drh79158e12005-09-06 21:40:45 +00001453 sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh0ccebe72005-06-07 22:22:50 +00001454 (char*)0);
drh79158e12005-09-06 21:40:45 +00001455 sqliteFree(zBuf);
drh0ccebe72005-06-07 22:22:50 +00001456 }
1457 return zFull;
1458}
1459
drh18839212005-11-26 03:43:23 +00001460/*
drh9cbe6352005-11-29 03:13:21 +00001461** Change the value of the fullsync flag in the given file descriptor.
drh18839212005-11-26 03:43:23 +00001462*/
drh9cbe6352005-11-29 03:13:21 +00001463static void unixSetFullSync(OsFile *id, int v){
1464 id->fullSync = v;
1465}
1466
1467/*
1468** Return the underlying file handle for an OsFile
1469*/
1470static int unixFileHandle(OsFile *id){
1471 return id->h;
1472}
1473
1474/*
1475** Return an integer that indices the type of lock currently held
1476** by this handle. (Used for testing and analysis only.)
1477*/
1478static int unixLockState(OsFile *id){
1479 return id->locktype;
drh18839212005-11-26 03:43:23 +00001480}
drh0ccebe72005-06-07 22:22:50 +00001481
drh9c06c952005-11-26 00:25:00 +00001482/*
1483** This is the structure that defines all of the I/O routines.
1484*/
1485struct sqlite3IoVtbl sqlite3Io = {
1486 unixDelete,
1487 unixFileExists,
1488 unixOpenReadWrite,
1489 unixOpenExclusive,
1490 unixOpenReadOnly,
1491 unixOpenDirectory,
1492 unixSyncDirectory,
1493 unixTempFileName,
1494 unixIsDirWritable,
1495 unixClose,
1496 unixRead,
1497 unixWrite,
1498 unixSeek,
1499 unixSync,
1500 unixTruncate,
1501 unixFileSize,
1502 unixFullPathname,
1503 unixLock,
1504 unixUnlock,
1505 unixCheckReservedLock,
drh9cbe6352005-11-29 03:13:21 +00001506 unixSetFullSync,
1507 unixFileHandle,
1508 unixLockState,
drh9c06c952005-11-26 00:25:00 +00001509};
1510
1511
drh0ccebe72005-06-07 22:22:50 +00001512#endif /* SQLITE_OMIT_DISKIO */
1513/***************************************************************************
1514** Everything above deals with file I/O. Everything that follows deals
1515** with other miscellanous aspects of the operating system interface
1516****************************************************************************/
1517
1518
1519/*
drhbbd42a62004-05-22 17:41:58 +00001520** Get information to seed the random number generator. The seed
1521** is written into the buffer zBuf[256]. The calling function must
1522** supply a sufficiently large buffer.
1523*/
1524int sqlite3OsRandomSeed(char *zBuf){
1525 /* We have to initialize zBuf to prevent valgrind from reporting
1526 ** errors. The reports issued by valgrind are incorrect - we would
1527 ** prefer that the randomness be increased by making use of the
1528 ** uninitialized space in zBuf - but valgrind errors tend to worry
1529 ** some users. Rather than argue, it seems easier just to initialize
1530 ** the whole array and silence valgrind, even if that means less randomness
1531 ** in the random seed.
1532 **
1533 ** When testing, initializing zBuf[] to zero is all we do. That means
1534 ** that we always use the same random number sequence.* This makes the
1535 ** tests repeatable.
1536 */
1537 memset(zBuf, 0, 256);
1538#if !defined(SQLITE_TEST)
1539 {
drh842b8642005-01-21 17:53:17 +00001540 int pid, fd;
1541 fd = open("/dev/urandom", O_RDONLY);
1542 if( fd<0 ){
1543 time((time_t*)zBuf);
1544 pid = getpid();
1545 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1546 }else{
1547 read(fd, zBuf, 256);
1548 close(fd);
1549 }
drhbbd42a62004-05-22 17:41:58 +00001550 }
1551#endif
1552 return SQLITE_OK;
1553}
1554
1555/*
1556** Sleep for a little while. Return the amount of time slept.
1557*/
1558int sqlite3OsSleep(int ms){
1559#if defined(HAVE_USLEEP) && HAVE_USLEEP
1560 usleep(ms*1000);
1561 return ms;
1562#else
1563 sleep((ms+999)/1000);
1564 return 1000*((ms+999)/1000);
1565#endif
1566}
1567
1568/*
1569** Static variables used for thread synchronization
1570*/
1571static int inMutex = 0;
drh79069752004-05-22 21:30:40 +00001572#ifdef SQLITE_UNIX_THREADS
drhbbd42a62004-05-22 17:41:58 +00001573static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh79069752004-05-22 21:30:40 +00001574#endif
drhbbd42a62004-05-22 17:41:58 +00001575
1576/*
1577** The following pair of routine implement mutual exclusion for
1578** multi-threaded processes. Only a single thread is allowed to
1579** executed code that is surrounded by EnterMutex() and LeaveMutex().
1580**
1581** SQLite uses only a single Mutex. There is not much critical
1582** code and what little there is executes quickly and without blocking.
1583*/
1584void sqlite3OsEnterMutex(){
1585#ifdef SQLITE_UNIX_THREADS
1586 pthread_mutex_lock(&mutex);
1587#endif
1588 assert( !inMutex );
1589 inMutex = 1;
1590}
1591void sqlite3OsLeaveMutex(){
1592 assert( inMutex );
1593 inMutex = 0;
1594#ifdef SQLITE_UNIX_THREADS
1595 pthread_mutex_unlock(&mutex);
1596#endif
1597}
1598
1599/*
drhbbd42a62004-05-22 17:41:58 +00001600** The following variable, if set to a non-zero value, becomes the result
1601** returned from sqlite3OsCurrentTime(). This is used for testing.
1602*/
1603#ifdef SQLITE_TEST
1604int sqlite3_current_time = 0;
1605#endif
1606
1607/*
1608** Find the current time (in Universal Coordinated Time). Write the
1609** current time and date as a Julian Day number into *prNow and
1610** return 0. Return 1 if the time and date cannot be found.
1611*/
1612int sqlite3OsCurrentTime(double *prNow){
drh19e2d372005-08-29 23:00:03 +00001613#ifdef NO_GETTOD
drhbbd42a62004-05-22 17:41:58 +00001614 time_t t;
1615 time(&t);
1616 *prNow = t/86400.0 + 2440587.5;
drh19e2d372005-08-29 23:00:03 +00001617#else
1618 struct timeval sNow;
1619 struct timezone sTz; /* Not used */
1620 gettimeofday(&sNow, &sTz);
1621 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
1622#endif
drhbbd42a62004-05-22 17:41:58 +00001623#ifdef SQLITE_TEST
1624 if( sqlite3_current_time ){
1625 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
1626 }
1627#endif
1628 return 0;
1629}
1630
drhbbd42a62004-05-22 17:41:58 +00001631#endif /* OS_UNIX */