When a database file is opened, try to find an unused file descriptor to reuse.  This change affects unix (and other systems that use os_unix.c) only. Fix for cvstrac ticket [http://www.sqlite.org/cvstrac/tktview?tn=4018|#4018].

FossilOrigin-Name: 9b4d9ab62d687289837b13b07885e72cc3abe8a9
diff --git a/src/os_unix.c b/src/os_unix.c
index f972b59..458c90e 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -181,6 +181,7 @@
   unsigned char locktype;          /* The type of lock held on this fd */
   int lastErrno;                   /* The unix errno from the last I/O error */
   void *lockingContext;            /* Locking style specific state */
+  int flags;                       /* Flags value returned by xOpen() */
 #if SQLITE_ENABLE_LOCKING_STYLE
   int openFlags;                   /* The flags specified at open() */
 #endif
@@ -202,11 +203,6 @@
   unsigned char transCntrChng;   /* True if the transaction counter changed */
   unsigned char dbUpdate;        /* True if any part of database file changed */
   unsigned char inNormalWrite;   /* True if in a normal write operation */
-
-  /* If true, that means we are dealing with a database file that has
-  ** a range of locking bytes from PENDING_BYTE through PENDING_BYTE+511
-  ** which should never be read or written.  Asserts() will verify this */
-  unsigned char isLockable;      /* True if file might be locked */
 #endif
 #ifdef SQLITE_TEST
   /* In test mode, increase the size of this structure a bit so that 
@@ -753,7 +749,10 @@
   int nRef;                   /* Number of pointers to this structure */
   int nLock;                  /* Number of outstanding locks */
   int nPending;               /* Number of pending close() operations */
-  int *aPending;              /* Malloced space holding fds awaiting close() */
+  struct PendingClose {
+    int fd;                   /* File descriptor to close */
+    int flags;                /* Flags this file descriptor was opened with */
+  } *aPending;                /* Malloced space holding fds awaiting close() */
 #if OS_VXWORKS
   sem_t *pSem;                     /* Named POSIX semaphore */
   char aSemName[MAX_PATHNAME+1];   /* Name of that semaphore */
@@ -1406,6 +1405,60 @@
 }
 
 /*
+** Close all file descriptors accumuated in the p->aPending[] array. If
+** all such file descriptors are closed without error, the aPending[] 
+** array is deleted and SQLITE_OK returned.
+**
+** Otherwise, if an error occurs, then successfully closed file descriptor
+** entries in the aPending[] array are set to -1, the aPending[] array
+** not deleted and SQLITE_IOERR_CLOSE returned.
+*/ 
+static int closePendingFds(unixFile *pFile){
+  struct unixOpenCnt *pOpen = pFile->pOpen;
+  struct PendingClose *aPending = pOpen->aPending;
+  int i;
+  int rc = SQLITE_OK;
+  assert( unixMutexHeld() );
+  for(i=0; i<pOpen->nPending; i++){
+    if( aPending[i].fd>=0 ){
+      if( close(aPending[i].fd) ){
+        pFile->lastErrno = errno;
+        rc = SQLITE_IOERR_CLOSE;
+      }else{
+        aPending[i].fd = -1;
+      }
+    }
+  }
+  if( rc==SQLITE_OK ){
+    sqlite3_free(aPending);
+    pOpen->nPending = 0;
+    pOpen->aPending = 0;
+  }
+  return rc;
+}
+
+/*
+** Add the file descriptor used by file handle pFile to the corresponding
+** aPending[] array to be closed after some other connection releases
+** a lock.
+*/
+static void setPendingFd(unixFile *pFile){
+  struct PendingClose *aNew;
+  struct unixOpenCnt *pOpen = pFile->pOpen;
+  int nByte = (pOpen->nPending+1)*sizeof(pOpen->aPending[0]);
+  aNew = sqlite3_realloc(pOpen->aPending, nByte);
+  if( aNew==0 ){
+    /* If a malloc fails, just leak the file descriptor */
+  }else{
+    pOpen->aPending = aNew;
+    pOpen->aPending[pOpen->nPending].fd = pFile->h;
+    pOpen->aPending[pOpen->nPending].flags = pFile->flags;
+    pOpen->nPending++;
+    pFile->h = -1;
+  }
+}
+
+/*
 ** Lower the locking level on file descriptor pFile to locktype.  locktype
 ** must be either NO_LOCK or SHARED_LOCK.
 **
@@ -1487,7 +1540,6 @@
   }
   if( locktype==NO_LOCK ){
     struct unixOpenCnt *pOpen;
-    int rc2 = SQLITE_OK;
 
     /* Decrement the shared lock counter.  Release the lock using an
     ** OS call only when all threads in this same process have released
@@ -1522,27 +1574,10 @@
     pOpen->nLock--;
     assert( pOpen->nLock>=0 );
     if( pOpen->nLock==0 && pOpen->nPending>0 ){
-      int i;
-      for(i=0; i<pOpen->nPending; i++){
-        /* close pending fds, but if closing fails don't free the array
-        ** assign -1 to the successfully closed descriptors and record the
-        ** error.  The next attempt to unlock will try again. */
-        if( pOpen->aPending[i] < 0 ) continue;
-        if( close(pOpen->aPending[i]) ){
-          pFile->lastErrno = errno;
-          rc2 = SQLITE_IOERR_CLOSE;
-        }else{
-          pOpen->aPending[i] = -1;
-        }
+      int rc2 = closePendingFds(pFile);
+      if( rc==SQLITE_OK ){
+        rc = rc2;
       }
-      if( rc2==SQLITE_OK ){
-        sqlite3_free(pOpen->aPending);
-        pOpen->nPending = 0;
-        pOpen->aPending = 0;
-      }
-    }
-    if( rc==SQLITE_OK ){
-      rc = rc2;
     }
   }
 	
@@ -1612,17 +1647,7 @@
       ** descriptor to pOpen->aPending.  It will be automatically closed when
       ** the last lock is cleared.
       */
-      int *aNew;
-      struct unixOpenCnt *pOpen = pFile->pOpen;
-      aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
-      if( aNew==0 ){
-        /* If a malloc fails, just leak the file descriptor */
-      }else{
-        pOpen->aPending = aNew;
-        pOpen->aPending[pOpen->nPending] = pFile->h;
-        pOpen->nPending++;
-        pFile->h = -1;
-      }
+      setPendingFd(pFile);
     }
     releaseLockInfo(pFile->pLock);
     releaseOpenCnt(pFile->pOpen);
@@ -2592,26 +2617,14 @@
       pOpen->nLock--;
       assert( pOpen->nLock>=0 );
       if( pOpen->nLock==0 && pOpen->nPending>0 ){
-        int i;
-        for(i=0; i<pOpen->nPending; i++){
-          if( pOpen->aPending[i] < 0 ) continue;
-          if( close(pOpen->aPending[i]) ){
-            pFile->lastErrno = errno;
-            rc = SQLITE_IOERR_CLOSE;
-          }else{
-            pOpen->aPending[i] = -1;
-          }
-        }
-        if( rc==SQLITE_OK ){
-          sqlite3_free(pOpen->aPending);
-          pOpen->nPending = 0;
-          pOpen->aPending = 0;
-        }
+        rc = closePendingFds(pFile);
       }
     }
   }
   unixLeaveMutex();
-  if( rc==SQLITE_OK ) pFile->locktype = locktype;
+  if( rc==SQLITE_OK ){
+    pFile->locktype = locktype;
+  }
   return rc;
 }
 
@@ -2629,17 +2642,7 @@
       ** descriptor to pOpen->aPending.  It will be automatically closed when
       ** the last lock is cleared.
       */
-      int *aNew;
-      struct unixOpenCnt *pOpen = pFile->pOpen;
-      aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
-      if( aNew==0 ){
-        /* If a malloc fails, just leak the file descriptor */
-      }else{
-        pOpen->aPending = aNew;
-        pOpen->aPending[pOpen->nPending] = pFile->h;
-        pOpen->nPending++;
-        pFile->h = -1;
-      }
+      setPendingFd(pFile);
     }
     releaseOpenCnt(pFile->pOpen);
     sqlite3_free(pFile->lockingContext);
@@ -2725,22 +2728,25 @@
   int amt,
   sqlite3_int64 offset
 ){
+  unixFile *pFile = (unixFile *)id;
   int got;
   assert( id );
 
-  /* Never read or write any of the bytes in the locking range */
-  assert( ((unixFile*)id)->isLockable==0
-          || offset>=PENDING_BYTE+512
-          || offset+amt<=PENDING_BYTE );
+  /* If this is a database file (not a journal, master-journal or temp
+  ** file), the bytes in the locking range should never be read or written. */
+  assert( (pFile->flags&SQLITE_OPEN_MAIN_DB)==0
+       || offset>=PENDING_BYTE+512
+       || offset+amt<=PENDING_BYTE 
+  );
 
-  got = seekAndRead((unixFile*)id, offset, pBuf, amt);
+  got = seekAndRead(pFile, offset, pBuf, amt);
   if( got==amt ){
     return SQLITE_OK;
   }else if( got<0 ){
     /* lastErrno set by seekAndRead */
     return SQLITE_IOERR_READ;
   }else{
-    ((unixFile*)id)->lastErrno = 0; /* not a system error */
+    pFile->lastErrno = 0; /* not a system error */
     /* Unread parts of the buffer must be zero-filled */
     memset(&((char*)pBuf)[got], 0, amt-got);
     return SQLITE_IOERR_SHORT_READ;
@@ -2794,14 +2800,17 @@
   int amt,
   sqlite3_int64 offset 
 ){
+  unixFile *pFile = (unixFile*)id;
   int wrote = 0;
   assert( id );
   assert( amt>0 );
 
-  /* Never read or write any of the bytes in the locking range */
-  assert( ((unixFile*)id)->isLockable==0
-          || offset>=PENDING_BYTE+512
-          || offset+amt<=PENDING_BYTE );
+  /* If this is a database file (not a journal, master-journal or temp
+  ** file), the bytes in the locking range should never be read or written. */
+  assert( (pFile->flags&SQLITE_OPEN_MAIN_DB)==0
+       || offset>=PENDING_BYTE+512
+       || offset+amt<=PENDING_BYTE 
+  );
 
 #ifndef NDEBUG
   /* If we are doing a normal write to a database file (as opposed to
@@ -2810,8 +2819,7 @@
   ** has changed.  If the transaction counter is modified, record that
   ** fact too.
   */
-  if( ((unixFile*)id)->inNormalWrite ){
-    unixFile *pFile = (unixFile*)id;
+  if( pFile->inNormalWrite ){
     pFile->dbUpdate = 1;  /* The database has been modified */
     if( offset<=24 && offset+amt>=27 ){
       int rc;
@@ -2826,7 +2834,7 @@
   }
 #endif
 
-  while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
+  while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){
     amt -= wrote;
     offset += wrote;
     pBuf = &((char*)pBuf)[wrote];
@@ -2838,7 +2846,7 @@
       /* lastErrno set by seekAndWrite */
       return SQLITE_IOERR_WRITE;
     }else{
-      ((unixFile*)id)->lastErrno = 0; /* not a system error */
+      pFile->lastErrno = 0; /* not a system error */
       return SQLITE_FULL;
     }
   }
@@ -3650,6 +3658,58 @@
 static int proxyTransformUnixFile(unixFile*, const char*);
 #endif
 
+/*
+** Search for an unused file descriptor that was opened on the database 
+** file (not a journal or master-journal file) identified by pathname
+** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
+** argument to this function.
+**
+** Such a file descriptor may exist if a database connection was closed
+** but the associated file descriptor could not be closed because some
+** other file descriptor open on the same file is holding a file-lock.
+** Refer to comments in the unixClose() function and the lengthy comment
+** describing "Posix Advisory Locking" at the start of this file for 
+** further details. Also, ticket #4018.
+**
+** If a suitable file descriptor is found, then it is returned. If no
+** such file descriptor is located, -1 is returned.
+*/
+static int findReusableFd(const char *zPath, int flags){
+  int fd = -1;                         /* Return value */
+  struct stat sStat;                   /* Results of stat() call */
+
+  /* A stat() call may fail for various reasons. If this happens, it is
+  ** almost certain that an open() call on the same path will also fail.
+  ** For this reason, if an error occurs in the stat() call here, it is
+  ** ignored and -1 is returned. The caller will try to open a new file
+  ** descriptor on the same path, fail, and return an error to SQLite.
+  **
+  ** Even if a subsequent open() call does succeed, the consequences of
+  ** not searching for a resusable file descriptor are not dire.  */
+  if( 0==stat(zPath, &sStat) ){
+    struct unixOpenCnt *p;
+    struct unixFileId id;
+    id.dev = sStat.st_dev;
+    id.ino = sStat.st_ino;
+
+    unixEnterMutex();
+    for(p=openList; p&& memcmp(&id, &p->fileId, sizeof(id)); p=p->pNext);
+    if( p && p->aPending ){
+      int i;
+      struct PendingClose *aPending = p->aPending;
+      for(i=0; i<p->nPending; i++){
+        if( aPending[i].fd>=0 && flags==aPending[i].flags ){
+          fd = aPending[i].fd;
+          aPending[i].fd = -1;
+          break;
+        }
+      }
+    }
+    unixLeaveMutex();
+  }
+
+  return fd;
+}
 
 /*
 ** Open the file zPath.
@@ -3680,12 +3740,13 @@
   int flags,                   /* Input flags to control the opening */
   int *pOutFlags               /* Output flags returned to SQLite core */
 ){
-  int fd = -1;                    /* File descriptor returned by open() */
+  unixFile *p = (unixFile *)pFile;
+  int fd = -1;                   /* File descriptor returned by open() */
   int dirfd = -1;                /* Directory file descriptor */
   int openFlags = 0;             /* Flags to pass to open() */
   int eType = flags&0xFFFFFF00;  /* Type of file to open */
   int noLock;                    /* True to omit locking primitives */
-  int rc = SQLITE_OK;
+  int rc = SQLITE_OK;            /* Function Return Code */
 
   int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
   int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
@@ -3720,11 +3781,10 @@
   assert(isDelete==0 || isCreate);
 
   /* The main DB, main journal, and master journal are never automatically
-  ** deleted
-  */
-  assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
-  assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
-  assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );
+  ** deleted. Nor are they ever temporary files.  */
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL );
+  assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL );
 
   /* Assert that the upper layer has set one of the "file-type" flags. */
   assert( eType==SQLITE_OPEN_MAIN_DB      || eType==SQLITE_OPEN_TEMP_DB 
@@ -3733,9 +3793,19 @@
        || eType==SQLITE_OPEN_TRANSIENT_DB
   );
 
-  memset(pFile, 0, sizeof(unixFile));
+  memset(p, 0, sizeof(unixFile));
 
-  if( !zName ){
+  if( eType==SQLITE_OPEN_MAIN_DB ){
+    /* Try to find an unused file descriptor to reuse. This is not done
+    ** for vxworks. Not because vxworks would not benefit from the change
+    ** (it might, we're not sure), but because no way to test it is
+    ** currently available. It is better not to risk breaking vxworks for 
+    ** the sake of such an obscure feature.   */
+#if !OS_VXWORKS
+    fd = findReusableFd(zName, flags);
+#endif
+  }else if( !zName ){
+    /* If zName is NULL, the upper layer is requesting a temp file. */
     assert(isDelete && !isOpenDirectory);
     rc = getTempname(MAX_PATHNAME+1, zTmpname);
     if( rc!=SQLITE_OK ){
@@ -3744,23 +3814,35 @@
     zName = zTmpname;
   }
 
+  /* Determine the value of the flags parameter passed to POSIX function
+  ** open(). These must be calculated even if open() is not called, as
+  ** they may be stored as part of the file handle and used by the 
+  ** 'conch file' locking functions later on.  */
   if( isReadonly )  openFlags |= O_RDONLY;
   if( isReadWrite ) openFlags |= O_RDWR;
   if( isCreate )    openFlags |= O_CREAT;
   if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
   openFlags |= (O_LARGEFILE|O_BINARY);
 
-  fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
-  OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, openFlags);
-  if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
-    /* Failed to open the file for read/write access. Try read-only. */
-    flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
-    flags |= SQLITE_OPEN_READONLY;
-    return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
-  }
   if( fd<0 ){
-    return SQLITE_CANTOPEN;
+    fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
+    OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, openFlags);
+    if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
+      /* Failed to open the file for read/write access. Try read-only. */
+      flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
+      flags |= SQLITE_OPEN_READONLY;
+      return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
+    }
+    if( fd<0 ){
+      return SQLITE_CANTOPEN;
+    }
   }
+  assert( fd>=0 );
+  p->flags = flags;
+  if( pOutFlags ){
+    *pOutFlags = flags;
+  }
+
   if( isDelete ){
 #if OS_VXWORKS
     zPath = zName;
@@ -3770,24 +3852,19 @@
   }
 #if SQLITE_ENABLE_LOCKING_STYLE
   else{
-    ((unixFile*)pFile)->openFlags = openFlags;
-  }
-#endif
-  if( pOutFlags ){
-    *pOutFlags = flags;
-  }
-
-#ifndef NDEBUG
-  if( (flags & SQLITE_OPEN_MAIN_DB)!=0 ){
-    ((unixFile*)pFile)->isLockable = 1;
+    p->openFlags = openFlags;
   }
 #endif
 
-  assert( fd>=0 );
   if( isOpenDirectory ){
     rc = openDirectory(zPath, &dirfd);
     if( rc!=SQLITE_OK ){
-      close(fd); /* silently leak if fail, already in error */
+      /* It is safe to close fd at this point, because it is guaranteed not
+      ** to be open on a database file. If it were open on a database file,
+      ** it would not be safe to close as this would cause any locks held
+      ** on the file by this process to be released.  */
+      assert( eType!=SQLITE_OPEN_MAIN_DB );
+      close(fd);             /* silently leak if fail, already in error */
       return rc;
     }
   }
@@ -3803,16 +3880,14 @@
     char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
     int useProxy = 0;
 
-    /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 
-    ** 0 means never use proxy, NULL means use proxy for non-local files only
-    */
+    /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means 
+    ** never use proxy, NULL means use proxy for non-local files only.  */
     if( envforce!=NULL ){
       useProxy = atoi(envforce)>0;
     }else{
       struct statfs fsInfo;
-
       if( statfs(zPath, &fsInfo) == -1 ){
-				((unixFile*)pFile)->lastErrno = errno;
+        ((unixFile*)pFile)->lastErrno = errno;
         if( dirfd>=0 ) close(dirfd); /* silently leak if fail, in error */
         close(fd); /* silently leak if fail, in error */
         return SQLITE_IOERR_ACCESS;