blob: f4e09b5364b4feaea09c06fb459acd12c8c04cf5 [file] [log] [blame]
drhbbd42a62004-05-22 17:41:58 +00001/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains code that is specific to Unix systems.
14*/
drhbbd42a62004-05-22 17:41:58 +000015#include "sqliteInt.h"
drheb206252004-10-01 02:00:31 +000016#include "os.h"
17#if OS_UNIX /* This file is used on unix only */
drhbbd42a62004-05-22 17:41:58 +000018
19
20#include <time.h>
drh19e2d372005-08-29 23:00:03 +000021#include <sys/time.h>
drhbbd42a62004-05-22 17:41:58 +000022#include <errno.h>
23#include <unistd.h>
drh0ccebe72005-06-07 22:22:50 +000024
25/*
26** Do not include any of the File I/O interface procedures if the
27** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
28** will be in-memory only)
29*/
30#ifndef SQLITE_OMIT_DISKIO
31
32
33/*
34** Define various macros that are missing from some systems.
35*/
drhbbd42a62004-05-22 17:41:58 +000036#ifndef O_LARGEFILE
37# define O_LARGEFILE 0
38#endif
39#ifdef SQLITE_DISABLE_LFS
40# undef O_LARGEFILE
41# define O_LARGEFILE 0
42#endif
43#ifndef O_NOFOLLOW
44# define O_NOFOLLOW 0
45#endif
46#ifndef O_BINARY
47# define O_BINARY 0
48#endif
49
50/*
51** The DJGPP compiler environment looks mostly like Unix, but it
52** lacks the fcntl() system call. So redefine fcntl() to be something
53** that always succeeds. This means that locking does not occur under
54** DJGPP. But its DOS - what did you expect?
55*/
56#ifdef __DJGPP__
57# define fcntl(A,B,C) 0
58#endif
59
60/*
drhbbd42a62004-05-22 17:41:58 +000061** Include code that is common to all os_*.c files
62*/
63#include "os_common.h"
64
drh2b4b5962005-06-15 17:47:55 +000065/*
66** The threadid macro resolves to the thread-id or to 0. Used for
67** testing and debugging only.
68*/
69#ifdef SQLITE_UNIX_THREADS
70#define threadid pthread_self()
71#else
72#define threadid 0
73#endif
74
75/*
76** Set or check the OsFile.tid field. This field is set when an OsFile
77** is first opened. All subsequent uses of the OsFile verify that the
78** same thread is operating on the OsFile. Some operating systems do
79** not allow locks to be overridden by other threads and that restriction
80** means that sqlite3* database handles cannot be moved from one thread
81** to another. This logic makes sure a user does not try to do that
82** by mistake.
83*/
84#ifdef SQLITE_UNIX_THREADS
85# define SET_THREADID(X) X->tid = pthread_self()
86# define CHECK_THREADID(X) (!pthread_equal(X->tid, pthread_self()))
87#else
88# define SET_THREADID(X)
89# define CHECK_THREADID(X) 0
danielk197713adf8a2004-06-03 16:08:41 +000090#endif
91
drhbbd42a62004-05-22 17:41:58 +000092/*
93** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
94** section 6.5.2.2 lines 483 through 490 specify that when a process
95** sets or clears a lock, that operation overrides any prior locks set
96** by the same process. It does not explicitly say so, but this implies
97** that it overrides locks set by the same process using a different
98** file descriptor. Consider this test case:
99**
100** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
101** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
102**
103** Suppose ./file1 and ./file2 are really the same file (because
104** one is a hard or symbolic link to the other) then if you set
105** an exclusive lock on fd1, then try to get an exclusive lock
106** on fd2, it works. I would have expected the second lock to
107** fail since there was already a lock on the file due to fd1.
108** But not so. Since both locks came from the same process, the
109** second overrides the first, even though they were on different
110** file descriptors opened on different file names.
111**
112** Bummer. If you ask me, this is broken. Badly broken. It means
113** that we cannot use POSIX locks to synchronize file access among
114** competing threads of the same process. POSIX locks will work fine
115** to synchronize access for threads in separate processes, but not
116** threads within the same process.
117**
118** To work around the problem, SQLite has to manage file locks internally
119** on its own. Whenever a new database is opened, we have to find the
120** specific inode of the database file (the inode is determined by the
121** st_dev and st_ino fields of the stat structure that fstat() fills in)
122** and check for locks already existing on that inode. When locks are
123** created or removed, we have to look at our own internal record of the
124** locks to see if another thread has previously set a lock on that same
125** inode.
126**
127** The OsFile structure for POSIX is no longer just an integer file
128** descriptor. It is now a structure that holds the integer file
129** descriptor and a pointer to a structure that describes the internal
130** locks on the corresponding inode. There is one locking structure
131** per inode, so if the same inode is opened twice, both OsFile structures
132** point to the same locking structure. The locking structure keeps
133** a reference count (so we will know when to delete it) and a "cnt"
134** field that tells us its internal lock status. cnt==0 means the
135** file is unlocked. cnt==-1 means the file has an exclusive lock.
136** cnt>0 means there are cnt shared locks on the file.
137**
138** Any attempt to lock or unlock a file first checks the locking
139** structure. The fcntl() system call is only invoked to set a
140** POSIX lock if the internal lock structure transitions between
141** a locked and an unlocked state.
142**
143** 2004-Jan-11:
144** More recent discoveries about POSIX advisory locks. (The more
145** I discover, the more I realize the a POSIX advisory locks are
146** an abomination.)
147**
148** If you close a file descriptor that points to a file that has locks,
149** all locks on that file that are owned by the current process are
150** released. To work around this problem, each OsFile structure contains
151** a pointer to an openCnt structure. There is one openCnt structure
152** per open inode, which means that multiple OsFiles can point to a single
153** openCnt. When an attempt is made to close an OsFile, if there are
154** other OsFiles open on the same inode that are holding locks, the call
155** to close() the file descriptor is deferred until all of the locks clear.
156** The openCnt structure keeps a list of file descriptors that need to
157** be closed and that list is walked (and cleared) when the last lock
158** clears.
159**
160** First, under Linux threads, because each thread has a separate
161** process ID, lock operations in one thread do not override locks
162** to the same file in other threads. Linux threads behave like
163** separate processes in this respect. But, if you close a file
164** descriptor in linux threads, all locks are cleared, even locks
165** on other threads and even though the other threads have different
166** process IDs. Linux threads is inconsistent in this respect.
167** (I'm beginning to think that linux threads is an abomination too.)
168** The consequence of this all is that the hash table for the lockInfo
169** structure has to include the process id as part of its key because
170** locks in different threads are treated as distinct. But the
171** openCnt structure should not include the process id in its
172** key because close() clears lock on all threads, not just the current
173** thread. Were it not for this goofiness in linux threads, we could
174** combine the lockInfo and openCnt structures into a single structure.
drh5fdae772004-06-29 03:29:00 +0000175**
176** 2004-Jun-28:
177** On some versions of linux, threads can override each others locks.
178** On others not. Sometimes you can change the behavior on the same
179** system by setting the LD_ASSUME_KERNEL environment variable. The
180** POSIX standard is silent as to which behavior is correct, as far
181** as I can tell, so other versions of unix might show the same
182** inconsistency. There is no little doubt in my mind that posix
183** advisory locks and linux threads are profoundly broken.
184**
185** To work around the inconsistencies, we have to test at runtime
186** whether or not threads can override each others locks. This test
187** is run once, the first time any lock is attempted. A static
188** variable is set to record the results of this test for future
189** use.
drhbbd42a62004-05-22 17:41:58 +0000190*/
191
192/*
193** An instance of the following structure serves as the key used
drh5fdae772004-06-29 03:29:00 +0000194** to locate a particular lockInfo structure given its inode.
195**
196** If threads cannot override each others locks, then we set the
197** lockKey.tid field to the thread ID. If threads can override
198** each others locks then tid is always set to zero. tid is also
199** set to zero if we compile without threading support.
drhbbd42a62004-05-22 17:41:58 +0000200*/
201struct lockKey {
drh5fdae772004-06-29 03:29:00 +0000202 dev_t dev; /* Device number */
203 ino_t ino; /* Inode number */
204#ifdef SQLITE_UNIX_THREADS
205 pthread_t tid; /* Thread ID or zero if threads cannot override each other */
206#endif
drhbbd42a62004-05-22 17:41:58 +0000207};
208
209/*
210** An instance of the following structure is allocated for each open
211** inode on each thread with a different process ID. (Threads have
212** different process IDs on linux, but not on most other unixes.)
213**
214** A single inode can have multiple file descriptors, so each OsFile
215** structure contains a pointer to an instance of this object and this
216** object keeps a count of the number of OsFiles pointing to it.
217*/
218struct lockInfo {
219 struct lockKey key; /* The lookup key */
drh2ac3ee92004-06-07 16:27:46 +0000220 int cnt; /* Number of SHARED locks held */
danielk19779a1d0ab2004-06-01 14:09:28 +0000221 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drhbbd42a62004-05-22 17:41:58 +0000222 int nRef; /* Number of pointers to this structure */
223};
224
225/*
226** An instance of the following structure serves as the key used
227** to locate a particular openCnt structure given its inode. This
drh5fdae772004-06-29 03:29:00 +0000228** is the same as the lockKey except that the thread ID is omitted.
drhbbd42a62004-05-22 17:41:58 +0000229*/
230struct openKey {
231 dev_t dev; /* Device number */
232 ino_t ino; /* Inode number */
233};
234
235/*
236** An instance of the following structure is allocated for each open
237** inode. This structure keeps track of the number of locks on that
238** inode. If a close is attempted against an inode that is holding
239** locks, the close is deferred until all locks clear by adding the
240** file descriptor to be closed to the pending list.
241*/
242struct openCnt {
243 struct openKey key; /* The lookup key */
244 int nRef; /* Number of pointers to this structure */
245 int nLock; /* Number of outstanding locks */
246 int nPending; /* Number of pending close() operations */
247 int *aPending; /* Malloced space holding fd's awaiting a close() */
248};
249
250/*
251** These hash table maps inodes and process IDs into lockInfo and openCnt
252** structures. Access to these hash tables must be protected by a mutex.
253*/
254static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
255static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
256
drh5fdae772004-06-29 03:29:00 +0000257
258#ifdef SQLITE_UNIX_THREADS
259/*
260** This variable records whether or not threads can override each others
261** locks.
262**
263** 0: No. Threads cannot override each others locks.
264** 1: Yes. Threads can override each others locks.
265** -1: We don't know yet.
266*/
267static int threadsOverrideEachOthersLocks = -1;
268
269/*
270** This structure holds information passed into individual test
271** threads by the testThreadLockingBehavior() routine.
272*/
273struct threadTestData {
274 int fd; /* File to be locked */
275 struct flock lock; /* The locking operation */
276 int result; /* Result of the locking operation */
277};
278
drh2b4b5962005-06-15 17:47:55 +0000279#ifdef SQLITE_LOCK_TRACE
280/*
281** Print out information about all locking operations.
282**
283** This routine is used for troubleshooting locks on multithreaded
284** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
285** command-line option on the compiler. This code is normally
286** turnned off.
287*/
288static int lockTrace(int fd, int op, struct flock *p){
289 char *zOpName, *zType;
290 int s;
291 int savedErrno;
292 if( op==F_GETLK ){
293 zOpName = "GETLK";
294 }else if( op==F_SETLK ){
295 zOpName = "SETLK";
296 }else{
297 s = fcntl(fd, op, p);
298 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
299 return s;
300 }
301 if( p->l_type==F_RDLCK ){
302 zType = "RDLCK";
303 }else if( p->l_type==F_WRLCK ){
304 zType = "WRLCK";
305 }else if( p->l_type==F_UNLCK ){
306 zType = "UNLCK";
307 }else{
308 assert( 0 );
309 }
310 assert( p->l_whence==SEEK_SET );
311 s = fcntl(fd, op, p);
312 savedErrno = errno;
313 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
314 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
315 (int)p->l_pid, s);
316 if( s && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
317 struct flock l2;
318 l2 = *p;
319 fcntl(fd, F_GETLK, &l2);
320 if( l2.l_type==F_RDLCK ){
321 zType = "RDLCK";
322 }else if( l2.l_type==F_WRLCK ){
323 zType = "WRLCK";
324 }else if( l2.l_type==F_UNLCK ){
325 zType = "UNLCK";
326 }else{
327 assert( 0 );
328 }
329 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
330 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
331 }
332 errno = savedErrno;
333 return s;
334}
335#define fcntl lockTrace
336#endif /* SQLITE_LOCK_TRACE */
337
drh5fdae772004-06-29 03:29:00 +0000338/*
339** The testThreadLockingBehavior() routine launches two separate
340** threads on this routine. This routine attempts to lock a file
341** descriptor then returns. The success or failure of that attempt
342** allows the testThreadLockingBehavior() procedure to determine
343** whether or not threads can override each others locks.
344*/
345static void *threadLockingTest(void *pArg){
346 struct threadTestData *pData = (struct threadTestData*)pArg;
347 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
348 return pArg;
349}
350
351/*
352** This procedure attempts to determine whether or not threads
353** can override each others locks then sets the
354** threadsOverrideEachOthersLocks variable appropriately.
355*/
356static void testThreadLockingBehavior(fd_orig){
357 int fd;
358 struct threadTestData d[2];
359 pthread_t t[2];
360
361 fd = dup(fd_orig);
362 if( fd<0 ) return;
363 memset(d, 0, sizeof(d));
364 d[0].fd = fd;
365 d[0].lock.l_type = F_RDLCK;
366 d[0].lock.l_len = 1;
367 d[0].lock.l_start = 0;
368 d[0].lock.l_whence = SEEK_SET;
369 d[1] = d[0];
370 d[1].lock.l_type = F_WRLCK;
371 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
372 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
373 pthread_join(t[0], 0);
374 pthread_join(t[1], 0);
375 close(fd);
376 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
377}
378#endif /* SQLITE_UNIX_THREADS */
379
drhbbd42a62004-05-22 17:41:58 +0000380/*
381** Release a lockInfo structure previously allocated by findLockInfo().
382*/
383static void releaseLockInfo(struct lockInfo *pLock){
384 pLock->nRef--;
385 if( pLock->nRef==0 ){
386 sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
387 sqliteFree(pLock);
388 }
389}
390
391/*
392** Release a openCnt structure previously allocated by findLockInfo().
393*/
394static void releaseOpenCnt(struct openCnt *pOpen){
395 pOpen->nRef--;
396 if( pOpen->nRef==0 ){
397 sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
398 sqliteFree(pOpen->aPending);
399 sqliteFree(pOpen);
400 }
401}
402
403/*
404** Given a file descriptor, locate lockInfo and openCnt structures that
405** describes that file descriptor. Create a new ones if necessary. The
406** return values might be unset if an error occurs.
407**
408** Return the number of errors.
409*/
drh38f82712004-06-18 17:10:16 +0000410static int findLockInfo(
drhbbd42a62004-05-22 17:41:58 +0000411 int fd, /* The file descriptor used in the key */
412 struct lockInfo **ppLock, /* Return the lockInfo structure here */
drh5fdae772004-06-29 03:29:00 +0000413 struct openCnt **ppOpen /* Return the openCnt structure here */
drhbbd42a62004-05-22 17:41:58 +0000414){
415 int rc;
416 struct lockKey key1;
417 struct openKey key2;
418 struct stat statbuf;
419 struct lockInfo *pLock;
420 struct openCnt *pOpen;
421 rc = fstat(fd, &statbuf);
422 if( rc!=0 ) return 1;
423 memset(&key1, 0, sizeof(key1));
424 key1.dev = statbuf.st_dev;
425 key1.ino = statbuf.st_ino;
drh5fdae772004-06-29 03:29:00 +0000426#ifdef SQLITE_UNIX_THREADS
427 if( threadsOverrideEachOthersLocks<0 ){
428 testThreadLockingBehavior(fd);
429 }
430 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
431#endif
drhbbd42a62004-05-22 17:41:58 +0000432 memset(&key2, 0, sizeof(key2));
433 key2.dev = statbuf.st_dev;
434 key2.ino = statbuf.st_ino;
435 pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
436 if( pLock==0 ){
437 struct lockInfo *pOld;
438 pLock = sqliteMallocRaw( sizeof(*pLock) );
439 if( pLock==0 ) return 1;
440 pLock->key = key1;
441 pLock->nRef = 1;
442 pLock->cnt = 0;
danielk19779a1d0ab2004-06-01 14:09:28 +0000443 pLock->locktype = 0;
drhbbd42a62004-05-22 17:41:58 +0000444 pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
445 if( pOld!=0 ){
446 assert( pOld==pLock );
447 sqliteFree(pLock);
448 return 1;
449 }
450 }else{
451 pLock->nRef++;
452 }
453 *ppLock = pLock;
454 pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
455 if( pOpen==0 ){
456 struct openCnt *pOld;
457 pOpen = sqliteMallocRaw( sizeof(*pOpen) );
458 if( pOpen==0 ){
459 releaseLockInfo(pLock);
460 return 1;
461 }
462 pOpen->key = key2;
463 pOpen->nRef = 1;
464 pOpen->nLock = 0;
465 pOpen->nPending = 0;
466 pOpen->aPending = 0;
467 pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
468 if( pOld!=0 ){
469 assert( pOld==pOpen );
470 sqliteFree(pOpen);
471 releaseLockInfo(pLock);
472 return 1;
473 }
474 }else{
475 pOpen->nRef++;
476 }
477 *ppOpen = pOpen;
478 return 0;
479}
480
481/*
482** Delete the named file
483*/
484int sqlite3OsDelete(const char *zFilename){
485 unlink(zFilename);
486 return SQLITE_OK;
487}
488
489/*
490** Return TRUE if the named file exists.
491*/
492int sqlite3OsFileExists(const char *zFilename){
493 return access(zFilename, 0)==0;
494}
495
496/*
497** Attempt to open a file for both reading and writing. If that
498** fails, try opening it read-only. If the file does not exist,
499** try to create it.
500**
501** On success, a handle for the open file is written to *id
502** and *pReadonly is set to 0 if the file was opened for reading and
503** writing or 1 if the file was opened read-only. The function returns
504** SQLITE_OK.
505**
506** On failure, the function returns SQLITE_CANTOPEN and leaves
507** *id and *pReadonly unchanged.
508*/
509int sqlite3OsOpenReadWrite(
510 const char *zFilename,
511 OsFile *id,
512 int *pReadonly
513){
514 int rc;
drhda71ce12004-06-21 18:14:45 +0000515 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000516 id->dirfd = -1;
drh2b4b5962005-06-15 17:47:55 +0000517 SET_THREADID(id);
drh8e855772005-05-17 11:25:31 +0000518 id->h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY,
519 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000520 if( id->h<0 ){
drh6458e392004-07-20 01:14:13 +0000521#ifdef EISDIR
522 if( errno==EISDIR ){
523 return SQLITE_CANTOPEN;
524 }
525#endif
drha6abd042004-06-09 17:37:22 +0000526 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
527 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000528 return SQLITE_CANTOPEN;
529 }
530 *pReadonly = 1;
531 }else{
532 *pReadonly = 0;
533 }
534 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000535 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000536 sqlite3OsLeaveMutex();
537 if( rc ){
drha6abd042004-06-09 17:37:22 +0000538 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000539 return SQLITE_NOMEM;
540 }
danielk197713adf8a2004-06-03 16:08:41 +0000541 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000542 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000543 TRACE3("OPEN %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000544 OpenCounter(+1);
545 return SQLITE_OK;
546}
547
548
549/*
550** Attempt to open a new file for exclusive access by this process.
551** The file will be opened for both reading and writing. To avoid
552** a potential security problem, we do not allow the file to have
553** previously existed. Nor do we allow the file to be a symbolic
554** link.
555**
556** If delFlag is true, then make arrangements to automatically delete
557** the file when it is closed.
558**
559** On success, write the file handle into *id and return SQLITE_OK.
560**
561** On failure, return SQLITE_CANTOPEN.
562*/
563int sqlite3OsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
564 int rc;
drhda71ce12004-06-21 18:14:45 +0000565 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000566 if( access(zFilename, 0)==0 ){
567 return SQLITE_CANTOPEN;
568 }
drh2b4b5962005-06-15 17:47:55 +0000569 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000570 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000571 id->h = open(zFilename,
drhd6459672005-08-13 17:17:01 +0000572 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY,
573 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000574 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000575 return SQLITE_CANTOPEN;
576 }
577 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000578 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000579 sqlite3OsLeaveMutex();
580 if( rc ){
drha6abd042004-06-09 17:37:22 +0000581 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000582 unlink(zFilename);
583 return SQLITE_NOMEM;
584 }
danielk197713adf8a2004-06-03 16:08:41 +0000585 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000586 id->isOpen = 1;
drhbbd42a62004-05-22 17:41:58 +0000587 if( delFlag ){
588 unlink(zFilename);
589 }
drha6abd042004-06-09 17:37:22 +0000590 TRACE3("OPEN-EX %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000591 OpenCounter(+1);
592 return SQLITE_OK;
593}
594
595/*
596** Attempt to open a new file for read-only access.
597**
598** On success, write the file handle into *id and return SQLITE_OK.
599**
600** On failure, return SQLITE_CANTOPEN.
601*/
602int sqlite3OsOpenReadOnly(const char *zFilename, OsFile *id){
603 int rc;
drhda71ce12004-06-21 18:14:45 +0000604 assert( !id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000605 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000606 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000607 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
608 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000609 return SQLITE_CANTOPEN;
610 }
611 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000612 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000613 sqlite3OsLeaveMutex();
614 if( rc ){
drha6abd042004-06-09 17:37:22 +0000615 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000616 return SQLITE_NOMEM;
617 }
danielk197713adf8a2004-06-03 16:08:41 +0000618 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000619 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000620 TRACE3("OPEN-RO %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000621 OpenCounter(+1);
622 return SQLITE_OK;
623}
624
625/*
626** Attempt to open a file descriptor for the directory that contains a
627** file. This file descriptor can be used to fsync() the directory
628** in order to make sure the creation of a new file is actually written
629** to disk.
630**
631** This routine is only meaningful for Unix. It is a no-op under
632** windows since windows does not support hard links.
633**
634** On success, a handle for a previously open file is at *id is
635** updated with the new directory file descriptor and SQLITE_OK is
636** returned.
637**
638** On failure, the function returns SQLITE_CANTOPEN and leaves
639** *id unchanged.
640*/
641int sqlite3OsOpenDirectory(
642 const char *zDirname,
643 OsFile *id
644){
drhda71ce12004-06-21 18:14:45 +0000645 if( !id->isOpen ){
drhbbd42a62004-05-22 17:41:58 +0000646 /* Do not open the directory if the corresponding file is not already
647 ** open. */
648 return SQLITE_CANTOPEN;
649 }
drh2b4b5962005-06-15 17:47:55 +0000650 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000651 assert( id->dirfd<0 );
drh8e855772005-05-17 11:25:31 +0000652 id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0);
drhbbd42a62004-05-22 17:41:58 +0000653 if( id->dirfd<0 ){
654 return SQLITE_CANTOPEN;
655 }
656 TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
657 return SQLITE_OK;
658}
659
660/*
drhab3f9fe2004-08-14 17:10:10 +0000661** If the following global variable points to a string which is the
662** name of a directory, then that directory will be used to store
663** temporary files.
664*/
tpoindex9a09a3c2004-12-20 19:01:32 +0000665char *sqlite3_temp_directory = 0;
drhab3f9fe2004-08-14 17:10:10 +0000666
667/*
drhbbd42a62004-05-22 17:41:58 +0000668** Create a temporary file name in zBuf. zBuf must be big enough to
669** hold at least SQLITE_TEMPNAME_SIZE characters.
670*/
671int sqlite3OsTempFileName(char *zBuf){
672 static const char *azDirs[] = {
drhab3f9fe2004-08-14 17:10:10 +0000673 0,
drhbbd42a62004-05-22 17:41:58 +0000674 "/var/tmp",
675 "/usr/tmp",
676 "/tmp",
677 ".",
678 };
drh57196282004-10-06 15:41:16 +0000679 static const unsigned char zChars[] =
drhbbd42a62004-05-22 17:41:58 +0000680 "abcdefghijklmnopqrstuvwxyz"
681 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
682 "0123456789";
683 int i, j;
684 struct stat buf;
685 const char *zDir = ".";
drheffd02b2004-08-29 23:42:13 +0000686 azDirs[0] = sqlite3_temp_directory;
drhbbd42a62004-05-22 17:41:58 +0000687 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drhab3f9fe2004-08-14 17:10:10 +0000688 if( azDirs[i]==0 ) continue;
drhbbd42a62004-05-22 17:41:58 +0000689 if( stat(azDirs[i], &buf) ) continue;
690 if( !S_ISDIR(buf.st_mode) ) continue;
691 if( access(azDirs[i], 07) ) continue;
692 zDir = azDirs[i];
693 break;
694 }
695 do{
696 sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
697 j = strlen(zBuf);
698 sqlite3Randomness(15, &zBuf[j]);
699 for(i=0; i<15; i++, j++){
700 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
701 }
702 zBuf[j] = 0;
703 }while( access(zBuf,0)==0 );
704 return SQLITE_OK;
705}
706
drh268283b2005-01-08 15:44:25 +0000707#ifndef SQLITE_OMIT_PAGER_PRAGMAS
drhbbd42a62004-05-22 17:41:58 +0000708/*
tpoindex9a09a3c2004-12-20 19:01:32 +0000709** Check that a given pathname is a directory and is writable
710**
711*/
712int sqlite3OsIsDirWritable(char *zBuf){
713 struct stat buf;
714 if( zBuf==0 ) return 0;
drh268283b2005-01-08 15:44:25 +0000715 if( zBuf[0]==0 ) return 0;
tpoindex9a09a3c2004-12-20 19:01:32 +0000716 if( stat(zBuf, &buf) ) return 0;
717 if( !S_ISDIR(buf.st_mode) ) return 0;
718 if( access(zBuf, 07) ) return 0;
719 return 1;
720}
drh268283b2005-01-08 15:44:25 +0000721#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex9a09a3c2004-12-20 19:01:32 +0000722
723/*
drhbbd42a62004-05-22 17:41:58 +0000724** Read data from a file into a buffer. Return SQLITE_OK if all
725** bytes were read successfully and SQLITE_IOERR if anything goes
726** wrong.
727*/
728int sqlite3OsRead(OsFile *id, void *pBuf, int amt){
729 int got;
drhda71ce12004-06-21 18:14:45 +0000730 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000731 SimulateIOError(SQLITE_IOERR);
732 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000733 got = read(id->h, pBuf, amt);
drhbbd42a62004-05-22 17:41:58 +0000734 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000735 TRACE5("READ %-3d %5d %7d %d\n", id->h, got, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000736 SEEK(0);
737 /* if( got<0 ) got = 0; */
738 if( got==amt ){
739 return SQLITE_OK;
740 }else{
741 return SQLITE_IOERR;
742 }
743}
744
745/*
746** Write data from a buffer into a file. Return SQLITE_OK on success
747** or some other error code on failure.
748*/
749int sqlite3OsWrite(OsFile *id, const void *pBuf, int amt){
750 int wrote = 0;
drhda71ce12004-06-21 18:14:45 +0000751 assert( id->isOpen );
drh4c7f9412005-02-03 00:29:47 +0000752 assert( amt>0 );
drhbbd42a62004-05-22 17:41:58 +0000753 SimulateIOError(SQLITE_IOERR);
drh047d4832004-10-01 14:38:02 +0000754 SimulateDiskfullError;
drhbbd42a62004-05-22 17:41:58 +0000755 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000756 while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
drhbbd42a62004-05-22 17:41:58 +0000757 amt -= wrote;
758 pBuf = &((char*)pBuf)[wrote];
759 }
760 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000761 TRACE5("WRITE %-3d %5d %7d %d\n", id->h, wrote, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000762 SEEK(0);
763 if( amt>0 ){
764 return SQLITE_FULL;
765 }
766 return SQLITE_OK;
767}
768
769/*
770** Move the read/write pointer in a file.
771*/
drheb206252004-10-01 02:00:31 +0000772int sqlite3OsSeek(OsFile *id, i64 offset){
drhda71ce12004-06-21 18:14:45 +0000773 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000774 SEEK(offset/1024 + 1);
drhb4746b92005-09-09 01:32:06 +0000775#ifdef SQLITE_TEST
776 if( offset ) SimulateDiskfullError
777#endif
drha6abd042004-06-09 17:37:22 +0000778 lseek(id->h, offset, SEEK_SET);
drhbbd42a62004-05-22 17:41:58 +0000779 return SQLITE_OK;
780}
781
drhb851b2c2005-03-10 14:11:12 +0000782#ifdef SQLITE_TEST
783/*
784** Count the number of fullsyncs and normal syncs. This is used to test
785** that syncs and fullsyncs are occuring at the right times.
786*/
787int sqlite3_sync_count = 0;
788int sqlite3_fullsync_count = 0;
789#endif
790
791
drhbbd42a62004-05-22 17:41:58 +0000792/*
drhdd809b02004-07-17 21:44:57 +0000793** The fsync() system call does not work as advertised on many
794** unix systems. The following procedure is an attempt to make
795** it work better.
drh1398ad32005-01-19 23:24:50 +0000796**
797** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
798** for testing when we want to run through the test suite quickly.
799** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
800** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
801** or power failure will likely corrupt the database file.
drhdd809b02004-07-17 21:44:57 +0000802*/
drheb796a72005-09-08 12:38:41 +0000803static int full_fsync(int fd, int fullSync, int dataOnly){
drhdd809b02004-07-17 21:44:57 +0000804 int rc;
drhb851b2c2005-03-10 14:11:12 +0000805
806 /* Record the number of times that we do a normal fsync() and
807 ** FULLSYNC. This is used during testing to verify that this procedure
808 ** gets called with the correct arguments.
809 */
810#ifdef SQLITE_TEST
811 if( fullSync ) sqlite3_fullsync_count++;
812 sqlite3_sync_count++;
813#endif
814
815 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
816 ** no-op
817 */
818#ifdef SQLITE_NO_SYNC
819 rc = SQLITE_OK;
820#else
821
drhdd809b02004-07-17 21:44:57 +0000822#ifdef F_FULLFSYNC
drhb851b2c2005-03-10 14:11:12 +0000823 if( fullSync ){
drhf30cc942005-03-11 17:52:34 +0000824 rc = fcntl(fd, F_FULLFSYNC, 0);
drhb851b2c2005-03-10 14:11:12 +0000825 }else{
826 rc = 1;
827 }
828 /* If the FULLSYNC failed, try to do a normal fsync() */
drhdd809b02004-07-17 21:44:57 +0000829 if( rc ) rc = fsync(fd);
drhb851b2c2005-03-10 14:11:12 +0000830
drhc035e6e2005-09-22 15:45:04 +0000831#else /* if !defined(F_FULLSYNC) */
832#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO>0
drheb796a72005-09-08 12:38:41 +0000833 if( dataOnly ){
834 rc = fdatasync(fd);
drhc035e6e2005-09-22 15:45:04 +0000835 }else
836#endif /* _POSIX_SYNCHRONIZED_IO > 0 */
837 {
drheb796a72005-09-08 12:38:41 +0000838 rc = fsync(fd);
839 }
drhf30cc942005-03-11 17:52:34 +0000840#endif /* defined(F_FULLFSYNC) */
drhb851b2c2005-03-10 14:11:12 +0000841#endif /* defined(SQLITE_NO_SYNC) */
842
drhdd809b02004-07-17 21:44:57 +0000843 return rc;
844}
845
846/*
drhbbd42a62004-05-22 17:41:58 +0000847** Make sure all writes to a particular file are committed to disk.
848**
drheb796a72005-09-08 12:38:41 +0000849** If dataOnly==0 then both the file itself and its metadata (file
850** size, access time, etc) are synced. If dataOnly!=0 then only the
851** file data is synced.
852**
drhbbd42a62004-05-22 17:41:58 +0000853** Under Unix, also make sure that the directory entry for the file
854** has been created by fsync-ing the directory that contains the file.
855** If we do not do this and we encounter a power failure, the directory
856** entry for the journal might not exist after we reboot. The next
857** SQLite to access the file will not know that the journal exists (because
858** the directory entry for the journal was never created) and the transaction
859** will not roll back - possibly leading to database corruption.
860*/
drheb796a72005-09-08 12:38:41 +0000861int sqlite3OsSync(OsFile *id, int dataOnly){
drhda71ce12004-06-21 18:14:45 +0000862 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000863 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000864 TRACE2("SYNC %-3d\n", id->h);
drheb796a72005-09-08 12:38:41 +0000865 if( full_fsync(id->h, id->fullSync, dataOnly) ){
drhbbd42a62004-05-22 17:41:58 +0000866 return SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000867 }
drha2854222004-06-17 19:04:17 +0000868 if( id->dirfd>=0 ){
869 TRACE2("DIRSYNC %-3d\n", id->dirfd);
drheb796a72005-09-08 12:38:41 +0000870 full_fsync(id->dirfd, id->fullSync, 0);
drha2854222004-06-17 19:04:17 +0000871 close(id->dirfd); /* Only need to sync once, so close the directory */
872 id->dirfd = -1; /* when we are done. */
873 }
drha2854222004-06-17 19:04:17 +0000874 return SQLITE_OK;
drhbbd42a62004-05-22 17:41:58 +0000875}
876
877/*
danielk1977962398d2004-06-14 09:35:16 +0000878** Sync the directory zDirname. This is a no-op on operating systems other
879** than UNIX.
drhb851b2c2005-03-10 14:11:12 +0000880**
881** This is used to make sure the master journal file has truely been deleted
882** before making changes to individual journals on a multi-database commit.
drhf30cc942005-03-11 17:52:34 +0000883** The F_FULLFSYNC option is not needed here.
danielk1977962398d2004-06-14 09:35:16 +0000884*/
885int sqlite3OsSyncDirectory(const char *zDirname){
886 int fd;
887 int r;
danielk1977369f27e2004-06-15 11:40:04 +0000888 SimulateIOError(SQLITE_IOERR);
drh8e855772005-05-17 11:25:31 +0000889 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
danielk1977369f27e2004-06-15 11:40:04 +0000890 TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977962398d2004-06-14 09:35:16 +0000891 if( fd<0 ){
892 return SQLITE_CANTOPEN;
893 }
894 r = fsync(fd);
895 close(fd);
896 return ((r==0)?SQLITE_OK:SQLITE_IOERR);
897}
898
899/*
drhbbd42a62004-05-22 17:41:58 +0000900** Truncate an open file to a specified size
901*/
drheb206252004-10-01 02:00:31 +0000902int sqlite3OsTruncate(OsFile *id, i64 nByte){
drhda71ce12004-06-21 18:14:45 +0000903 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000904 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000905 return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000906}
907
908/*
909** Determine the current size of a file in bytes
910*/
drheb206252004-10-01 02:00:31 +0000911int sqlite3OsFileSize(OsFile *id, i64 *pSize){
drhbbd42a62004-05-22 17:41:58 +0000912 struct stat buf;
drhda71ce12004-06-21 18:14:45 +0000913 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000914 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000915 if( fstat(id->h, &buf)!=0 ){
drhbbd42a62004-05-22 17:41:58 +0000916 return SQLITE_IOERR;
917 }
918 *pSize = buf.st_size;
919 return SQLITE_OK;
920}
921
danielk19779a1d0ab2004-06-01 14:09:28 +0000922/*
danielk197713adf8a2004-06-03 16:08:41 +0000923** This routine checks if there is a RESERVED lock held on the specified
924** file by this or any other process. If such a lock is held, return
drh2ac3ee92004-06-07 16:27:46 +0000925** non-zero. If the file is unlocked or holds only SHARED locks, then
926** return zero.
danielk197713adf8a2004-06-03 16:08:41 +0000927*/
drha6abd042004-06-09 17:37:22 +0000928int sqlite3OsCheckReservedLock(OsFile *id){
danielk197713adf8a2004-06-03 16:08:41 +0000929 int r = 0;
930
drhda71ce12004-06-21 18:14:45 +0000931 assert( id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000932 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh2ac3ee92004-06-07 16:27:46 +0000933 sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
danielk197713adf8a2004-06-03 16:08:41 +0000934
935 /* Check if a thread in this process holds such a lock */
936 if( id->pLock->locktype>SHARED_LOCK ){
937 r = 1;
938 }
939
drh2ac3ee92004-06-07 16:27:46 +0000940 /* Otherwise see if some other process holds it.
danielk197713adf8a2004-06-03 16:08:41 +0000941 */
942 if( !r ){
943 struct flock lock;
944 lock.l_whence = SEEK_SET;
drh2ac3ee92004-06-07 16:27:46 +0000945 lock.l_start = RESERVED_BYTE;
946 lock.l_len = 1;
947 lock.l_type = F_WRLCK;
drha6abd042004-06-09 17:37:22 +0000948 fcntl(id->h, F_GETLK, &lock);
danielk197713adf8a2004-06-03 16:08:41 +0000949 if( lock.l_type!=F_UNLCK ){
950 r = 1;
951 }
952 }
953
954 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +0000955 TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
danielk197713adf8a2004-06-03 16:08:41 +0000956
957 return r;
958}
959
danielk19772b444852004-06-29 07:45:33 +0000960#ifdef SQLITE_DEBUG
961/*
962** Helper function for printing out trace information from debugging
963** binaries. This returns the string represetation of the supplied
964** integer lock-type.
965*/
966static const char * locktypeName(int locktype){
967 switch( locktype ){
968 case NO_LOCK: return "NONE";
969 case SHARED_LOCK: return "SHARED";
970 case RESERVED_LOCK: return "RESERVED";
971 case PENDING_LOCK: return "PENDING";
972 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
973 }
974 return "ERROR";
975}
976#endif
977
danielk197713adf8a2004-06-03 16:08:41 +0000978/*
danielk19779a1d0ab2004-06-01 14:09:28 +0000979** Lock the file with the lock specified by parameter locktype - one
980** of the following:
981**
drh2ac3ee92004-06-07 16:27:46 +0000982** (1) SHARED_LOCK
983** (2) RESERVED_LOCK
984** (3) PENDING_LOCK
985** (4) EXCLUSIVE_LOCK
986**
drhb3e04342004-06-08 00:47:47 +0000987** Sometimes when requesting one lock state, additional lock states
988** are inserted in between. The locking might fail on one of the later
989** transitions leaving the lock state different from what it started but
990** still short of its goal. The following chart shows the allowed
991** transitions and the inserted intermediate states:
992**
993** UNLOCKED -> SHARED
994** SHARED -> RESERVED
995** SHARED -> (PENDING) -> EXCLUSIVE
996** RESERVED -> (PENDING) -> EXCLUSIVE
997** PENDING -> EXCLUSIVE
drh2ac3ee92004-06-07 16:27:46 +0000998**
drha6abd042004-06-09 17:37:22 +0000999** This routine will only increase a lock. Use the sqlite3OsUnlock()
1000** routine to lower a locking level.
danielk19779a1d0ab2004-06-01 14:09:28 +00001001*/
1002int sqlite3OsLock(OsFile *id, int locktype){
danielk1977f42f25c2004-06-25 07:21:28 +00001003 /* The following describes the implementation of the various locks and
1004 ** lock transitions in terms of the POSIX advisory shared and exclusive
1005 ** lock primitives (called read-locks and write-locks below, to avoid
1006 ** confusion with SQLite lock names). The algorithms are complicated
1007 ** slightly in order to be compatible with windows systems simultaneously
1008 ** accessing the same database file, in case that is ever required.
1009 **
1010 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1011 ** byte', each single bytes at well known offsets, and the 'shared byte
1012 ** range', a range of 510 bytes at a well known offset.
1013 **
1014 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1015 ** byte'. If this is successful, a random byte from the 'shared byte
1016 ** range' is read-locked and the lock on the 'pending byte' released.
1017 **
danielk197790ba3bd2004-06-25 08:32:25 +00001018 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1019 ** A RESERVED lock is implemented by grabbing a write-lock on the
1020 ** 'reserved byte'.
danielk1977f42f25c2004-06-25 07:21:28 +00001021 **
1022 ** A process may only obtain a PENDING lock after it has obtained a
danielk197790ba3bd2004-06-25 08:32:25 +00001023 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1024 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1025 ** obtained, but existing SHARED locks are allowed to persist. A process
1026 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1027 ** This property is used by the algorithm for rolling back a journal file
1028 ** after a crash.
danielk1977f42f25c2004-06-25 07:21:28 +00001029 **
danielk197790ba3bd2004-06-25 08:32:25 +00001030 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1031 ** implemented by obtaining a write-lock on the entire 'shared byte
1032 ** range'. Since all other locks require a read-lock on one of the bytes
1033 ** within this range, this ensures that no other locks are held on the
1034 ** database.
danielk1977f42f25c2004-06-25 07:21:28 +00001035 **
1036 ** The reason a single byte cannot be used instead of the 'shared byte
1037 ** range' is that some versions of windows do not support read-locks. By
1038 ** locking a random byte from a range, concurrent SHARED locks may exist
1039 ** even if the locking primitive used is always a write-lock.
1040 */
danielk19779a1d0ab2004-06-01 14:09:28 +00001041 int rc = SQLITE_OK;
1042 struct lockInfo *pLock = id->pLock;
1043 struct flock lock;
1044 int s;
1045
drhda71ce12004-06-21 18:14:45 +00001046 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001047 TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001048 locktypeName(id->locktype), locktypeName(pLock->locktype), pLock->cnt
1049 ,getpid() );
drh2b4b5962005-06-15 17:47:55 +00001050 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001051
1052 /* If there is already a lock of this type or more restrictive on the
1053 ** OsFile, do nothing. Don't use the end_lock: exit path, as
1054 ** sqlite3OsEnterMutex() hasn't been called yet.
1055 */
danielk197713adf8a2004-06-03 16:08:41 +00001056 if( id->locktype>=locktype ){
drhe29b9152005-03-18 14:03:15 +00001057 TRACE3("LOCK %d %s ok (already held)\n", id->h, locktypeName(locktype));
danielk19779a1d0ab2004-06-01 14:09:28 +00001058 return SQLITE_OK;
1059 }
1060
drhb3e04342004-06-08 00:47:47 +00001061 /* Make sure the locking sequence is correct
drh2ac3ee92004-06-07 16:27:46 +00001062 */
drhb3e04342004-06-08 00:47:47 +00001063 assert( id->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1064 assert( locktype!=PENDING_LOCK );
1065 assert( locktype!=RESERVED_LOCK || id->locktype==SHARED_LOCK );
drh2ac3ee92004-06-07 16:27:46 +00001066
drhb3e04342004-06-08 00:47:47 +00001067 /* This mutex is needed because id->pLock is shared across threads
1068 */
1069 sqlite3OsEnterMutex();
danielk19779a1d0ab2004-06-01 14:09:28 +00001070
1071 /* If some thread using this PID has a lock via a different OsFile*
1072 ** handle that precludes the requested lock, return BUSY.
1073 */
danielk197713adf8a2004-06-03 16:08:41 +00001074 if( (id->locktype!=pLock->locktype &&
drh2ac3ee92004-06-07 16:27:46 +00001075 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
danielk19779a1d0ab2004-06-01 14:09:28 +00001076 ){
1077 rc = SQLITE_BUSY;
1078 goto end_lock;
1079 }
1080
1081 /* If a SHARED lock is requested, and some thread using this PID already
1082 ** has a SHARED or RESERVED lock, then increment reference counts and
1083 ** return SQLITE_OK.
1084 */
1085 if( locktype==SHARED_LOCK &&
1086 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1087 assert( locktype==SHARED_LOCK );
danielk197713adf8a2004-06-03 16:08:41 +00001088 assert( id->locktype==0 );
danielk1977ecb2a962004-06-02 06:30:16 +00001089 assert( pLock->cnt>0 );
danielk197713adf8a2004-06-03 16:08:41 +00001090 id->locktype = SHARED_LOCK;
danielk19779a1d0ab2004-06-01 14:09:28 +00001091 pLock->cnt++;
1092 id->pOpen->nLock++;
1093 goto end_lock;
1094 }
1095
danielk197713adf8a2004-06-03 16:08:41 +00001096 lock.l_len = 1L;
drh2b4b5962005-06-15 17:47:55 +00001097
danielk19779a1d0ab2004-06-01 14:09:28 +00001098 lock.l_whence = SEEK_SET;
1099
drh3cde3bb2004-06-12 02:17:14 +00001100 /* A PENDING lock is needed before acquiring a SHARED lock and before
1101 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1102 ** be released.
danielk19779a1d0ab2004-06-01 14:09:28 +00001103 */
drh3cde3bb2004-06-12 02:17:14 +00001104 if( locktype==SHARED_LOCK
1105 || (locktype==EXCLUSIVE_LOCK && id->locktype<PENDING_LOCK)
1106 ){
danielk1977489468c2004-06-28 08:25:47 +00001107 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh2ac3ee92004-06-07 16:27:46 +00001108 lock.l_start = PENDING_BYTE;
drha6abd042004-06-09 17:37:22 +00001109 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001110 if( s ){
1111 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1112 goto end_lock;
1113 }
drh3cde3bb2004-06-12 02:17:14 +00001114 }
1115
1116
1117 /* If control gets to this point, then actually go ahead and make
1118 ** operating system calls for the specified lock.
1119 */
1120 if( locktype==SHARED_LOCK ){
1121 assert( pLock->cnt==0 );
1122 assert( pLock->locktype==0 );
danielk19779a1d0ab2004-06-01 14:09:28 +00001123
drh2ac3ee92004-06-07 16:27:46 +00001124 /* Now get the read-lock */
1125 lock.l_start = SHARED_FIRST;
1126 lock.l_len = SHARED_SIZE;
drha6abd042004-06-09 17:37:22 +00001127 s = fcntl(id->h, F_SETLK, &lock);
drh2ac3ee92004-06-07 16:27:46 +00001128
1129 /* Drop the temporary PENDING lock */
1130 lock.l_start = PENDING_BYTE;
1131 lock.l_len = 1L;
danielk19779a1d0ab2004-06-01 14:09:28 +00001132 lock.l_type = F_UNLCK;
drh2b4b5962005-06-15 17:47:55 +00001133 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1134 rc = SQLITE_IOERR; /* This should never happen */
1135 goto end_lock;
1136 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001137 if( s ){
drhbbd42a62004-05-22 17:41:58 +00001138 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1139 }else{
danielk197713adf8a2004-06-03 16:08:41 +00001140 id->locktype = SHARED_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001141 id->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001142 pLock->cnt = 1;
drhbbd42a62004-05-22 17:41:58 +00001143 }
drh3cde3bb2004-06-12 02:17:14 +00001144 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1145 /* We are trying for an exclusive lock but another thread in this
1146 ** same process is still holding a shared lock. */
1147 rc = SQLITE_BUSY;
drhbbd42a62004-05-22 17:41:58 +00001148 }else{
drh3cde3bb2004-06-12 02:17:14 +00001149 /* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk19779a1d0ab2004-06-01 14:09:28 +00001150 ** assumed that there is a SHARED or greater lock on the file
1151 ** already.
1152 */
danielk197713adf8a2004-06-03 16:08:41 +00001153 assert( 0!=id->locktype );
danielk19779a1d0ab2004-06-01 14:09:28 +00001154 lock.l_type = F_WRLCK;
1155 switch( locktype ){
1156 case RESERVED_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001157 lock.l_start = RESERVED_BYTE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001158 break;
danielk19779a1d0ab2004-06-01 14:09:28 +00001159 case EXCLUSIVE_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001160 lock.l_start = SHARED_FIRST;
1161 lock.l_len = SHARED_SIZE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001162 break;
1163 default:
1164 assert(0);
1165 }
drha6abd042004-06-09 17:37:22 +00001166 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001167 if( s ){
1168 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1169 }
drhbbd42a62004-05-22 17:41:58 +00001170 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001171
danielk1977ecb2a962004-06-02 06:30:16 +00001172 if( rc==SQLITE_OK ){
danielk197713adf8a2004-06-03 16:08:41 +00001173 id->locktype = locktype;
danielk1977ecb2a962004-06-02 06:30:16 +00001174 pLock->locktype = locktype;
drh3cde3bb2004-06-12 02:17:14 +00001175 }else if( locktype==EXCLUSIVE_LOCK ){
1176 id->locktype = PENDING_LOCK;
1177 pLock->locktype = PENDING_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001178 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001179
1180end_lock:
drhbbd42a62004-05-22 17:41:58 +00001181 sqlite3OsLeaveMutex();
drhe29b9152005-03-18 14:03:15 +00001182 TRACE4("LOCK %d %s %s\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001183 rc==SQLITE_OK ? "ok" : "failed");
drhbbd42a62004-05-22 17:41:58 +00001184 return rc;
1185}
1186
1187/*
drha6abd042004-06-09 17:37:22 +00001188** Lower the locking level on file descriptor id to locktype. locktype
1189** must be either NO_LOCK or SHARED_LOCK.
1190**
1191** If the locking level of the file descriptor is already at or below
1192** the requested locking level, this routine is a no-op.
1193**
drh9c105bb2004-10-02 20:38:28 +00001194** It is not possible for this routine to fail if the second argument
1195** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
1196** might return SQLITE_IOERR instead of SQLITE_OK.
drhbbd42a62004-05-22 17:41:58 +00001197*/
drha6abd042004-06-09 17:37:22 +00001198int sqlite3OsUnlock(OsFile *id, int locktype){
1199 struct lockInfo *pLock;
1200 struct flock lock;
drh9c105bb2004-10-02 20:38:28 +00001201 int rc = SQLITE_OK;
drha6abd042004-06-09 17:37:22 +00001202
drhda71ce12004-06-21 18:14:45 +00001203 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001204 TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", id->h, locktype, id->locktype,
danielk19772b444852004-06-29 07:45:33 +00001205 id->pLock->locktype, id->pLock->cnt, getpid());
drh2b4b5962005-06-15 17:47:55 +00001206 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drha6abd042004-06-09 17:37:22 +00001207
1208 assert( locktype<=SHARED_LOCK );
1209 if( id->locktype<=locktype ){
1210 return SQLITE_OK;
1211 }
drhbbd42a62004-05-22 17:41:58 +00001212 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +00001213 pLock = id->pLock;
1214 assert( pLock->cnt!=0 );
1215 if( id->locktype>SHARED_LOCK ){
1216 assert( pLock->locktype==id->locktype );
drh9c105bb2004-10-02 20:38:28 +00001217 if( locktype==SHARED_LOCK ){
1218 lock.l_type = F_RDLCK;
1219 lock.l_whence = SEEK_SET;
1220 lock.l_start = SHARED_FIRST;
1221 lock.l_len = SHARED_SIZE;
1222 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1223 /* This should never happen */
1224 rc = SQLITE_IOERR;
1225 }
1226 }
drhbbd42a62004-05-22 17:41:58 +00001227 lock.l_type = F_UNLCK;
1228 lock.l_whence = SEEK_SET;
drha6abd042004-06-09 17:37:22 +00001229 lock.l_start = PENDING_BYTE;
1230 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh2b4b5962005-06-15 17:47:55 +00001231 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1232 pLock->locktype = SHARED_LOCK;
1233 }else{
1234 rc = SQLITE_IOERR; /* This should never happen */
1235 }
drhbbd42a62004-05-22 17:41:58 +00001236 }
drha6abd042004-06-09 17:37:22 +00001237 if( locktype==NO_LOCK ){
1238 struct openCnt *pOpen;
danielk1977ecb2a962004-06-02 06:30:16 +00001239
drha6abd042004-06-09 17:37:22 +00001240 /* Decrement the shared lock counter. Release the lock using an
1241 ** OS call only when all threads in this same process have released
1242 ** the lock.
1243 */
1244 pLock->cnt--;
1245 if( pLock->cnt==0 ){
1246 lock.l_type = F_UNLCK;
1247 lock.l_whence = SEEK_SET;
1248 lock.l_start = lock.l_len = 0L;
drh2b4b5962005-06-15 17:47:55 +00001249 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1250 pLock->locktype = NO_LOCK;
1251 }else{
1252 rc = SQLITE_IOERR; /* This should never happen */
1253 }
drha6abd042004-06-09 17:37:22 +00001254 }
1255
drhbbd42a62004-05-22 17:41:58 +00001256 /* Decrement the count of locks against this same file. When the
1257 ** count reaches zero, close any other file descriptors whose close
1258 ** was deferred because of outstanding locks.
1259 */
drha6abd042004-06-09 17:37:22 +00001260 pOpen = id->pOpen;
drhbbd42a62004-05-22 17:41:58 +00001261 pOpen->nLock--;
1262 assert( pOpen->nLock>=0 );
1263 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1264 int i;
1265 for(i=0; i<pOpen->nPending; i++){
1266 close(pOpen->aPending[i]);
1267 }
1268 sqliteFree(pOpen->aPending);
1269 pOpen->nPending = 0;
1270 pOpen->aPending = 0;
1271 }
1272 }
1273 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +00001274 id->locktype = locktype;
drh9c105bb2004-10-02 20:38:28 +00001275 return rc;
drhbbd42a62004-05-22 17:41:58 +00001276}
1277
1278/*
danielk1977e3026632004-06-22 11:29:02 +00001279** Close a file.
1280*/
1281int sqlite3OsClose(OsFile *id){
1282 if( !id->isOpen ) return SQLITE_OK;
drh2b4b5962005-06-15 17:47:55 +00001283 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk1977e3026632004-06-22 11:29:02 +00001284 sqlite3OsUnlock(id, NO_LOCK);
1285 if( id->dirfd>=0 ) close(id->dirfd);
1286 id->dirfd = -1;
1287 sqlite3OsEnterMutex();
1288 if( id->pOpen->nLock ){
1289 /* If there are outstanding locks, do not actually close the file just
1290 ** yet because that would clear those locks. Instead, add the file
1291 ** descriptor to pOpen->aPending. It will be automatically closed when
1292 ** the last lock is cleared.
1293 */
1294 int *aNew;
1295 struct openCnt *pOpen = id->pOpen;
drhad81e872005-08-21 21:45:01 +00001296 aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977e3026632004-06-22 11:29:02 +00001297 if( aNew==0 ){
1298 /* If a malloc fails, just leak the file descriptor */
1299 }else{
1300 pOpen->aPending = aNew;
drhad81e872005-08-21 21:45:01 +00001301 pOpen->aPending[pOpen->nPending] = id->h;
1302 pOpen->nPending++;
danielk1977e3026632004-06-22 11:29:02 +00001303 }
1304 }else{
1305 /* There are no outstanding locks so we can close the file immediately */
1306 close(id->h);
1307 }
1308 releaseLockInfo(id->pLock);
1309 releaseOpenCnt(id->pOpen);
1310 sqlite3OsLeaveMutex();
1311 id->isOpen = 0;
1312 TRACE2("CLOSE %-3d\n", id->h);
1313 OpenCounter(-1);
1314 return SQLITE_OK;
1315}
1316
1317/*
drh0ccebe72005-06-07 22:22:50 +00001318** Turn a relative pathname into a full pathname. Return a pointer
1319** to the full pathname stored in space obtained from sqliteMalloc().
1320** The calling function is responsible for freeing this space once it
1321** is no longer needed.
1322*/
1323char *sqlite3OsFullPathname(const char *zRelative){
1324 char *zFull = 0;
1325 if( zRelative[0]=='/' ){
1326 sqlite3SetString(&zFull, zRelative, (char*)0);
1327 }else{
drh79158e12005-09-06 21:40:45 +00001328 char *zBuf = sqliteMalloc(5000);
1329 if( zBuf==0 ){
1330 return 0;
1331 }
drh0ccebe72005-06-07 22:22:50 +00001332 zBuf[0] = 0;
drh79158e12005-09-06 21:40:45 +00001333 sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh0ccebe72005-06-07 22:22:50 +00001334 (char*)0);
drh79158e12005-09-06 21:40:45 +00001335 sqliteFree(zBuf);
drh0ccebe72005-06-07 22:22:50 +00001336 }
1337 return zFull;
1338}
1339
1340
1341#endif /* SQLITE_OMIT_DISKIO */
1342/***************************************************************************
1343** Everything above deals with file I/O. Everything that follows deals
1344** with other miscellanous aspects of the operating system interface
1345****************************************************************************/
1346
1347
1348/*
drhbbd42a62004-05-22 17:41:58 +00001349** Get information to seed the random number generator. The seed
1350** is written into the buffer zBuf[256]. The calling function must
1351** supply a sufficiently large buffer.
1352*/
1353int sqlite3OsRandomSeed(char *zBuf){
1354 /* We have to initialize zBuf to prevent valgrind from reporting
1355 ** errors. The reports issued by valgrind are incorrect - we would
1356 ** prefer that the randomness be increased by making use of the
1357 ** uninitialized space in zBuf - but valgrind errors tend to worry
1358 ** some users. Rather than argue, it seems easier just to initialize
1359 ** the whole array and silence valgrind, even if that means less randomness
1360 ** in the random seed.
1361 **
1362 ** When testing, initializing zBuf[] to zero is all we do. That means
1363 ** that we always use the same random number sequence.* This makes the
1364 ** tests repeatable.
1365 */
1366 memset(zBuf, 0, 256);
1367#if !defined(SQLITE_TEST)
1368 {
drh842b8642005-01-21 17:53:17 +00001369 int pid, fd;
1370 fd = open("/dev/urandom", O_RDONLY);
1371 if( fd<0 ){
1372 time((time_t*)zBuf);
1373 pid = getpid();
1374 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1375 }else{
1376 read(fd, zBuf, 256);
1377 close(fd);
1378 }
drhbbd42a62004-05-22 17:41:58 +00001379 }
1380#endif
1381 return SQLITE_OK;
1382}
1383
1384/*
1385** Sleep for a little while. Return the amount of time slept.
1386*/
1387int sqlite3OsSleep(int ms){
1388#if defined(HAVE_USLEEP) && HAVE_USLEEP
1389 usleep(ms*1000);
1390 return ms;
1391#else
1392 sleep((ms+999)/1000);
1393 return 1000*((ms+999)/1000);
1394#endif
1395}
1396
1397/*
1398** Static variables used for thread synchronization
1399*/
1400static int inMutex = 0;
drh79069752004-05-22 21:30:40 +00001401#ifdef SQLITE_UNIX_THREADS
drhbbd42a62004-05-22 17:41:58 +00001402static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh79069752004-05-22 21:30:40 +00001403#endif
drhbbd42a62004-05-22 17:41:58 +00001404
1405/*
1406** The following pair of routine implement mutual exclusion for
1407** multi-threaded processes. Only a single thread is allowed to
1408** executed code that is surrounded by EnterMutex() and LeaveMutex().
1409**
1410** SQLite uses only a single Mutex. There is not much critical
1411** code and what little there is executes quickly and without blocking.
1412*/
1413void sqlite3OsEnterMutex(){
1414#ifdef SQLITE_UNIX_THREADS
1415 pthread_mutex_lock(&mutex);
1416#endif
1417 assert( !inMutex );
1418 inMutex = 1;
1419}
1420void sqlite3OsLeaveMutex(){
1421 assert( inMutex );
1422 inMutex = 0;
1423#ifdef SQLITE_UNIX_THREADS
1424 pthread_mutex_unlock(&mutex);
1425#endif
1426}
1427
1428/*
drhbbd42a62004-05-22 17:41:58 +00001429** The following variable, if set to a non-zero value, becomes the result
1430** returned from sqlite3OsCurrentTime(). This is used for testing.
1431*/
1432#ifdef SQLITE_TEST
1433int sqlite3_current_time = 0;
1434#endif
1435
1436/*
1437** Find the current time (in Universal Coordinated Time). Write the
1438** current time and date as a Julian Day number into *prNow and
1439** return 0. Return 1 if the time and date cannot be found.
1440*/
1441int sqlite3OsCurrentTime(double *prNow){
drh19e2d372005-08-29 23:00:03 +00001442#ifdef NO_GETTOD
drhbbd42a62004-05-22 17:41:58 +00001443 time_t t;
1444 time(&t);
1445 *prNow = t/86400.0 + 2440587.5;
drh19e2d372005-08-29 23:00:03 +00001446#else
1447 struct timeval sNow;
1448 struct timezone sTz; /* Not used */
1449 gettimeofday(&sNow, &sTz);
1450 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
1451#endif
drhbbd42a62004-05-22 17:41:58 +00001452#ifdef SQLITE_TEST
1453 if( sqlite3_current_time ){
1454 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
1455 }
1456#endif
1457 return 0;
1458}
1459
drhbbd42a62004-05-22 17:41:58 +00001460#endif /* OS_UNIX */