diff options
Diffstat (limited to 'Python/fileutils.c')
| -rw-r--r-- | Python/fileutils.c | 260 | 
1 files changed, 226 insertions, 34 deletions
| diff --git a/Python/fileutils.c b/Python/fileutils.c index 901a746efe..e7111c1431 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -82,11 +82,11 @@ extern int _Py_normalize_encoding(const char *, char *, size_t);     Values of force_ascii: -       1: the workaround is used: _Py_wchar2char() uses -          encode_ascii_surrogateescape() and _Py_char2wchar() uses +       1: the workaround is used: Py_EncodeLocale() uses +          encode_ascii_surrogateescape() and Py_DecodeLocale() uses            decode_ascii_surrogateescape() -       0: the workaround is not used: _Py_wchar2char() uses wcstombs() and -          _Py_char2wchar() uses mbstowcs() +       0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and +          Py_DecodeLocale() uses mbstowcs()        -1: unknown, need to call check_force_ascii() to get the value  */  static int force_ascii = -1; @@ -244,24 +244,26 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)  /* Decode a byte string from the locale encoding with the -   surrogateescape error handler (undecodable bytes are decoded as characters -   in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate +   surrogateescape error handler: undecodable bytes are decoded as characters +   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate     character, escape the bytes using the surrogateescape error handler instead     of decoding them. -   Use _Py_wchar2char() to encode the character string back to a byte string. +   Return a pointer to a newly allocated wide character string, use +   PyMem_RawFree() to free the memory. If size is not NULL, write the number of +   wide characters excluding the null character into *size -   Return a pointer to a newly allocated wide character string (use -   PyMem_RawFree() to free the memory) and write the number of written wide -   characters excluding the null character into *size if size is not NULL, or -   NULL on error (decoding or memory allocation error). If size is not NULL, -   *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding -   error. +   Return NULL on decoding error or memory allocation error. If *size* is not +   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on +   decoding error. -   Conversion errors should never happen, unless there is a bug in the C -   library. */ +   Decoding errors should never happen, unless there is a bug in the C +   library. + +   Use the Py_EncodeLocale() function to encode the character string back to a +   byte string. */  wchar_t* -_Py_char2wchar(const char* arg, size_t *size) +Py_DecodeLocale(const char* arg, size_t *size)  {  #ifdef __APPLE__      wchar_t *wstr; @@ -399,19 +401,20 @@ oom:  #endif   /* __APPLE__ */  } -/* Encode a (wide) character string to the locale encoding with the -   surrogateescape error handler (characters in range U+DC80..U+DCFF are -   converted to bytes 0x80..0xFF). +/* Encode a wide character string to the locale encoding with the +   surrogateescape error handler: surrogate characters in the range +   U+DC80..U+DCFF are converted to bytes 0x80..0xFF. -   This function is the reverse of _Py_char2wchar(). +   Return a pointer to a newly allocated byte string, use PyMem_Free() to free +   the memory. Return NULL on encoding or memory allocation error. -   Return a pointer to a newly allocated byte string (use PyMem_Free() to free -   the memory), or NULL on encoding or memory allocation error. +   If error_pos is not NULL, *error_pos is set to the index of the invalid +   character on encoding error, or set to (size_t)-1 otherwise. -   If error_pos is not NULL: *error_pos is the index of the invalid character -   on encoding error, or (size_t)-1 otherwise. */ +   Use the Py_DecodeLocale() function to decode the bytes string back to a wide +   character string. */  char* -_Py_wchar2char(const wchar_t *text, size_t *error_pos) +Py_EncodeLocale(const wchar_t *text, size_t *error_pos)  {  #ifdef __APPLE__      Py_ssize_t len; @@ -530,7 +533,7 @@ _Py_wstat(const wchar_t* path, struct stat *buf)  {      int err;      char *fname; -    fname = _Py_wchar2char(path, NULL); +    fname = Py_EncodeLocale(path, NULL);      if (fname == NULL) {          errno = EINVAL;          return -1; @@ -541,8 +544,143 @@ _Py_wstat(const wchar_t* path, struct stat *buf)  }  #endif -#ifdef HAVE_STAT +#if defined(HAVE_FSTAT) || defined(MS_WINDOWS) + +#ifdef MS_WINDOWS +static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */ + +static void +FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) +{ +    /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */ +    /* Cannot simply cast and dereference in_ptr, +       since it might not be aligned properly */ +    __int64 in; +    memcpy(&in, in_ptr, sizeof(in)); +    *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ +    *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); +} + +void +_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr) +{ +    /* XXX endianness */ +    __int64 out; +    out = time_in + secs_between_epochs; +    out = out * 10000000 + nsec_in / 100; +    memcpy(out_ptr, &out, sizeof(out)); +} + +/* Below, we *know* that ugo+r is 0444 */ +#if _S_IREAD != 0400 +#error Unsupported C library +#endif +static int +attributes_to_mode(DWORD attr) +{ +    int m = 0; +    if (attr & FILE_ATTRIBUTE_DIRECTORY) +        m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */ +    else +        m |= _S_IFREG; +    if (attr & FILE_ATTRIBUTE_READONLY) +        m |= 0444; +    else +        m |= 0666; +    return m; +} + +void +_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct _Py_stat_struct *result) +{ +    memset(result, 0, sizeof(*result)); +    result->st_mode = attributes_to_mode(info->dwFileAttributes); +    result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow; +    result->st_dev = info->dwVolumeSerialNumber; +    result->st_rdev = result->st_dev; +    FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); +    FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); +    FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); +    result->st_nlink = info->nNumberOfLinks; +    result->st_ino = (((__int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow; +    if (reparse_tag == IO_REPARSE_TAG_SYMLINK) { +        /* first clear the S_IFMT bits */ +        result->st_mode ^= (result->st_mode & S_IFMT); +        /* now set the bits that make this a symlink */ +        result->st_mode |= S_IFLNK; +    } +    result->st_file_attributes = info->dwFileAttributes; +} +#endif + +/* Return information about a file. + +   On POSIX, use fstat(). + +   On Windows, use GetFileType() and GetFileInformationByHandle() which support +   files larger than 2 GB.  fstat() may fail with EOVERFLOW on files larger +   than 2 GB because the file size type is an signed 32-bit integer: see issue +   #23152. +   */ +int +_Py_fstat(int fd, struct _Py_stat_struct *result) +{ +#ifdef MS_WINDOWS +    BY_HANDLE_FILE_INFORMATION info; +    HANDLE h; +    int type; + +    if (!_PyVerify_fd(fd)) +        h = INVALID_HANDLE_VALUE; +    else +        h = (HANDLE)_get_osfhandle(fd); + +    /* Protocol violation: we explicitly clear errno, instead of +       setting it to a POSIX error. Callers should use GetLastError. */ +    errno = 0; + +    if (h == INVALID_HANDLE_VALUE) { +        /* This is really a C library error (invalid file handle). +           We set the Win32 error to the closes one matching. */ +        SetLastError(ERROR_INVALID_HANDLE); +        return -1; +    } +    memset(result, 0, sizeof(*result)); + +    type = GetFileType(h); +    if (type == FILE_TYPE_UNKNOWN) { +        DWORD error = GetLastError(); +        if (error != 0) { +            return -1; +        } +        /* else: valid but unknown file */ +    } + +    if (type != FILE_TYPE_DISK) { +        if (type == FILE_TYPE_CHAR) +            result->st_mode = _S_IFCHR; +        else if (type == FILE_TYPE_PIPE) +            result->st_mode = _S_IFIFO; +        return 0; +    } + +    if (!GetFileInformationByHandle(h, &info)) { +        return -1; +    } + +    _Py_attribute_data_to_stat(&info, 0, result); +    /* specific to fstat() */ +    result->st_ino = (((__int64)info.nFileIndexHigh)<<32) + info.nFileIndexLow; +    return 0; +#else +    return fstat(fd, result); +#endif +} +#endif   /* HAVE_FSTAT || MS_WINDOWS */ + + +#ifdef HAVE_STAT  /* Call _wstat() on Windows, or encode the path to the filesystem encoding and     call stat() otherwise. Only fill st_mode attribute on Windows. @@ -575,7 +713,8 @@ _Py_stat(PyObject *path, struct stat *statbuf)  #endif  } -#endif +#endif   /* HAVE_STAT */ +  static int  get_inheritable(int fd, int raise) @@ -814,7 +953,7 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)          errno = EINVAL;          return NULL;      } -    cpath = _Py_wchar2char(path, NULL); +    cpath = Py_EncodeLocale(path, NULL);      if (cpath == NULL)          return NULL;      f = fopen(cpath, cmode); @@ -905,7 +1044,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)      int res;      size_t r1; -    cpath = _Py_wchar2char(path, NULL); +    cpath = Py_EncodeLocale(path, NULL);      if (cpath == NULL) {          errno = EINVAL;          return -1; @@ -919,7 +1058,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)          return -1;      }      cbuf[res] = '\0'; /* buf will be null terminated */ -    wbuf = _Py_char2wchar(cbuf, &r1); +    wbuf = Py_DecodeLocale(cbuf, &r1);      if (wbuf == NULL) {          errno = EINVAL;          return -1; @@ -950,7 +1089,7 @@ _Py_wrealpath(const wchar_t *path,      wchar_t *wresolved_path;      char *res;      size_t r; -    cpath = _Py_wchar2char(path, NULL); +    cpath = Py_EncodeLocale(path, NULL);      if (cpath == NULL) {          errno = EINVAL;          return NULL; @@ -960,7 +1099,7 @@ _Py_wrealpath(const wchar_t *path,      if (res == NULL)          return NULL; -    wresolved_path = _Py_char2wchar(cresolved_path, &r); +    wresolved_path = Py_DecodeLocale(cresolved_path, &r);      if (wresolved_path == NULL) {          errno = EINVAL;          return NULL; @@ -993,7 +1132,7 @@ _Py_wgetcwd(wchar_t *buf, size_t size)      if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)          return NULL; -    wname = _Py_char2wchar(fname, &len); +    wname = Py_DecodeLocale(fname, &len);      if (wname == NULL)          return NULL;      if (size <= len) { @@ -1075,3 +1214,56 @@ _Py_dup(int fd)      return fd;  } +#ifndef MS_WINDOWS +/* Get the blocking mode of the file descriptor. +   Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared, +   raise an exception and return -1 on error. */ +int +_Py_get_blocking(int fd) +{ +    int flags = fcntl(fd, F_GETFL, 0); +    if (flags < 0) { +        PyErr_SetFromErrno(PyExc_OSError); +        return -1; +    } + +    return !(flags & O_NONBLOCK); +} + +/* Set the blocking mode of the specified file descriptor. + +   Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag +   otherwise. + +   Return 0 on success, raise an exception and return -1 on error. */ +int +_Py_set_blocking(int fd, int blocking) +{ +#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) +    int arg = !blocking; +    if (ioctl(fd, FIONBIO, &arg) < 0) +        goto error; +#else +    int flags, res; + +    flags = fcntl(fd, F_GETFL, 0); +    if (flags < 0) +        goto error; + +    if (blocking) +        flags = flags & (~O_NONBLOCK); +    else +        flags = flags | O_NONBLOCK; + +    res = fcntl(fd, F_SETFL, flags); +    if (res < 0) +        goto error; +#endif +    return 0; + +error: +    PyErr_SetFromErrno(PyExc_OSError); +    return -1; +} +#endif + | 
