diff options
Diffstat (limited to 'Python/fileutils.c')
-rw-r--r-- | Python/fileutils.c | 360 |
1 files changed, 326 insertions, 34 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c index 31689c047b..c6cdb19fbe 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -3,6 +3,7 @@ #include <locale.h> #ifdef MS_WINDOWS +# include <malloc.h> # include <windows.h> #endif @@ -82,11 +83,11 @@ extern int _Py_normalize_encoding(const char *, char *, size_t); Values of force_ascii: - 1: the workaround is used: _Py_wchar2char() uses - encode_ascii_surrogateescape() and _Py_char2wchar() uses + 1: the workaround is used: Py_EncodeLocale() uses + encode_ascii_surrogateescape() and Py_DecodeLocale() uses decode_ascii_surrogateescape() - 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and - _Py_char2wchar() uses mbstowcs() + 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and + Py_DecodeLocale() uses mbstowcs() -1: unknown, need to call check_force_ascii() to get the value */ static int force_ascii = -1; @@ -244,24 +245,26 @@ decode_ascii_surrogateescape(const char *arg, size_t *size) /* Decode a byte string from the locale encoding with the - surrogateescape error handler (undecodable bytes are decoded as characters - in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate + surrogateescape error handler: undecodable bytes are decoded as characters + in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate character, escape the bytes using the surrogateescape error handler instead of decoding them. - Use _Py_wchar2char() to encode the character string back to a byte string. + Return a pointer to a newly allocated wide character string, use + PyMem_RawFree() to free the memory. If size is not NULL, write the number of + wide characters excluding the null character into *size - Return a pointer to a newly allocated wide character string (use - PyMem_RawFree() to free the memory) and write the number of written wide - characters excluding the null character into *size if size is not NULL, or - NULL on error (decoding or memory allocation error). If size is not NULL, - *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding - error. + Return NULL on decoding error or memory allocation error. If *size* is not + NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on + decoding error. - Conversion errors should never happen, unless there is a bug in the C - library. */ + Decoding errors should never happen, unless there is a bug in the C + library. + + Use the Py_EncodeLocale() function to encode the character string back to a + byte string. */ wchar_t* -_Py_char2wchar(const char* arg, size_t *size) +Py_DecodeLocale(const char* arg, size_t *size) { #ifdef __APPLE__ wchar_t *wstr; @@ -399,19 +402,20 @@ oom: #endif /* __APPLE__ */ } -/* Encode a (wide) character string to the locale encoding with the - surrogateescape error handler (characters in range U+DC80..U+DCFF are - converted to bytes 0x80..0xFF). +/* Encode a wide character string to the locale encoding with the + surrogateescape error handler: surrogate characters in the range + U+DC80..U+DCFF are converted to bytes 0x80..0xFF. - This function is the reverse of _Py_char2wchar(). + Return a pointer to a newly allocated byte string, use PyMem_Free() to free + the memory. Return NULL on encoding or memory allocation error. - Return a pointer to a newly allocated byte string (use PyMem_Free() to free - the memory), or NULL on encoding or memory allocation error. + If error_pos is not NULL, *error_pos is set to the index of the invalid + character on encoding error, or set to (size_t)-1 otherwise. - If error_pos is not NULL: *error_pos is the index of the invalid character - on encoding error, or (size_t)-1 otherwise. */ + Use the Py_DecodeLocale() function to decode the bytes string back to a wide + character string. */ char* -_Py_wchar2char(const wchar_t *text, size_t *error_pos) +Py_EncodeLocale(const wchar_t *text, size_t *error_pos) { #ifdef __APPLE__ Py_ssize_t len; @@ -530,7 +534,7 @@ _Py_wstat(const wchar_t* path, struct stat *buf) { int err; char *fname; - fname = _Py_wchar2char(path, NULL); + fname = Py_EncodeLocale(path, NULL); if (fname == NULL) { errno = EINVAL; return -1; @@ -541,8 +545,143 @@ _Py_wstat(const wchar_t* path, struct stat *buf) } #endif -#ifdef HAVE_STAT +#if defined(HAVE_FSTAT) || defined(MS_WINDOWS) + +#ifdef MS_WINDOWS +static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */ + +static void +FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) +{ + /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */ + /* Cannot simply cast and dereference in_ptr, + since it might not be aligned properly */ + __int64 in; + memcpy(&in, in_ptr, sizeof(in)); + *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ + *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); +} + +void +_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr) +{ + /* XXX endianness */ + __int64 out; + out = time_in + secs_between_epochs; + out = out * 10000000 + nsec_in / 100; + memcpy(out_ptr, &out, sizeof(out)); +} + +/* Below, we *know* that ugo+r is 0444 */ +#if _S_IREAD != 0400 +#error Unsupported C library +#endif +static int +attributes_to_mode(DWORD attr) +{ + int m = 0; + if (attr & FILE_ATTRIBUTE_DIRECTORY) + m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */ + else + m |= _S_IFREG; + if (attr & FILE_ATTRIBUTE_READONLY) + m |= 0444; + else + m |= 0666; + return m; +} + +void +_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct _Py_stat_struct *result) +{ + memset(result, 0, sizeof(*result)); + result->st_mode = attributes_to_mode(info->dwFileAttributes); + result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow; + result->st_dev = info->dwVolumeSerialNumber; + result->st_rdev = result->st_dev; + FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_nlink = info->nNumberOfLinks; + result->st_ino = (((__int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow; + if (reparse_tag == IO_REPARSE_TAG_SYMLINK) { + /* first clear the S_IFMT bits */ + result->st_mode ^= (result->st_mode & S_IFMT); + /* now set the bits that make this a symlink */ + result->st_mode |= S_IFLNK; + } + result->st_file_attributes = info->dwFileAttributes; +} +#endif + +/* Return information about a file. + + On POSIX, use fstat(). + + On Windows, use GetFileType() and GetFileInformationByHandle() which support + files larger than 2 GB. fstat() may fail with EOVERFLOW on files larger + than 2 GB because the file size type is an signed 32-bit integer: see issue + #23152. + */ +int +_Py_fstat(int fd, struct _Py_stat_struct *result) +{ +#ifdef MS_WINDOWS + BY_HANDLE_FILE_INFORMATION info; + HANDLE h; + int type; + + if (!_PyVerify_fd(fd)) + h = INVALID_HANDLE_VALUE; + else + h = (HANDLE)_get_osfhandle(fd); + + /* Protocol violation: we explicitly clear errno, instead of + setting it to a POSIX error. Callers should use GetLastError. */ + errno = 0; + + if (h == INVALID_HANDLE_VALUE) { + /* This is really a C library error (invalid file handle). + We set the Win32 error to the closes one matching. */ + SetLastError(ERROR_INVALID_HANDLE); + return -1; + } + memset(result, 0, sizeof(*result)); + + type = GetFileType(h); + if (type == FILE_TYPE_UNKNOWN) { + DWORD error = GetLastError(); + if (error != 0) { + return -1; + } + /* else: valid but unknown file */ + } + + if (type != FILE_TYPE_DISK) { + if (type == FILE_TYPE_CHAR) + result->st_mode = _S_IFCHR; + else if (type == FILE_TYPE_PIPE) + result->st_mode = _S_IFIFO; + return 0; + } + + if (!GetFileInformationByHandle(h, &info)) { + return -1; + } + + _Py_attribute_data_to_stat(&info, 0, result); + /* specific to fstat() */ + result->st_ino = (((__int64)info.nFileIndexHigh)<<32) + info.nFileIndexLow; + return 0; +#else + return fstat(fd, result); +#endif +} +#endif /* HAVE_FSTAT || MS_WINDOWS */ + + +#ifdef HAVE_STAT /* Call _wstat() on Windows, or encode the path to the filesystem encoding and call stat() otherwise. Only fill st_mode attribute on Windows. @@ -575,7 +714,8 @@ _Py_stat(PyObject *path, struct stat *statbuf) #endif } -#endif +#endif /* HAVE_STAT */ + static int get_inheritable(int fd, int raise) @@ -814,7 +954,7 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode) errno = EINVAL; return NULL; } - cpath = _Py_wchar2char(path, NULL); + cpath = Py_EncodeLocale(path, NULL); if (cpath == NULL) return NULL; f = fopen(cpath, cmode); @@ -905,7 +1045,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) int res; size_t r1; - cpath = _Py_wchar2char(path, NULL); + cpath = Py_EncodeLocale(path, NULL); if (cpath == NULL) { errno = EINVAL; return -1; @@ -919,7 +1059,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) return -1; } cbuf[res] = '\0'; /* buf will be null terminated */ - wbuf = _Py_char2wchar(cbuf, &r1); + wbuf = Py_DecodeLocale(cbuf, &r1); if (wbuf == NULL) { errno = EINVAL; return -1; @@ -950,7 +1090,7 @@ _Py_wrealpath(const wchar_t *path, wchar_t *wresolved_path; char *res; size_t r; - cpath = _Py_wchar2char(path, NULL); + cpath = Py_EncodeLocale(path, NULL); if (cpath == NULL) { errno = EINVAL; return NULL; @@ -960,7 +1100,7 @@ _Py_wrealpath(const wchar_t *path, if (res == NULL) return NULL; - wresolved_path = _Py_char2wchar(cresolved_path, &r); + wresolved_path = Py_DecodeLocale(cresolved_path, &r); if (wresolved_path == NULL) { errno = EINVAL; return NULL; @@ -993,7 +1133,7 @@ _Py_wgetcwd(wchar_t *buf, size_t size) if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL) return NULL; - wname = _Py_char2wchar(fname, &len); + wname = Py_DecodeLocale(fname, &len); if (wname == NULL) return NULL; if (size <= len) { @@ -1075,3 +1215,155 @@ _Py_dup(int fd) return fd; } +#ifndef MS_WINDOWS +/* Get the blocking mode of the file descriptor. + Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared, + raise an exception and return -1 on error. */ +int +_Py_get_blocking(int fd) +{ + int flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + return !(flags & O_NONBLOCK); +} + +/* Set the blocking mode of the specified file descriptor. + + Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag + otherwise. + + Return 0 on success, raise an exception and return -1 on error. */ +int +_Py_set_blocking(int fd, int blocking) +{ +#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) + int arg = !blocking; + if (ioctl(fd, FIONBIO, &arg) < 0) + goto error; +#else + int flags, res; + + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) + goto error; + + if (blocking) + flags = flags & (~O_NONBLOCK); + else + flags = flags | O_NONBLOCK; + + res = fcntl(fd, F_SETFL, flags); + if (res < 0) + goto error; +#endif + return 0; + +error: + PyErr_SetFromErrno(PyExc_OSError); + return -1; +} +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1900 + +/* This function lets the Windows CRT validate the file handle without + terminating the process if it's invalid. */ +int +_PyVerify_fd(int fd) +{ + intptr_t osh; + /* Fast check for the only condition we know */ + if (fd < 0) { + _set_errno(EBADF); + return 0; + } + osh = _get_osfhandle(fd); + return osh != (intptr_t)-1; +} + +#elif _MSC_VER >= 1400 +/* Legacy implementation of _PyVerify_fd while transitioning to + * MSVC 14.0. This should eventually be removed. (issue23524) + */ + +/* Microsoft CRT in VS2005 and higher will verify that a filehandle is + * valid and raise an assertion if it isn't. + * Normally, an invalid fd is likely to be a C program error and therefore + * an assertion can be useful, but it does contradict the POSIX standard + * which for write(2) states: + * "Otherwise, -1 shall be returned and errno set to indicate the error." + * "[EBADF] The fildes argument is not a valid file descriptor open for + * writing." + * Furthermore, python allows the user to enter any old integer + * as a fd and should merely raise a python exception on error. + * The Microsoft CRT doesn't provide an official way to check for the + * validity of a file descriptor, but we can emulate its internal behaviour + * by using the exported __pinfo data member and knowledge of the + * internal structures involved. + * The structures below must be updated for each version of visual studio + * according to the file internal.h in the CRT source, until MS comes + * up with a less hacky way to do this. + * (all of this is to avoid globally modifying the CRT behaviour using + * _set_invalid_parameter_handler() and _CrtSetReportMode()) + */ +/* The actual size of the structure is determined at runtime. + * Only the first items must be present. + */ +typedef struct { + intptr_t osfhnd; + char osfile; +} my_ioinfo; + +extern __declspec(dllimport) char * __pioinfo[]; +#define IOINFO_L2E 5 +#define IOINFO_ARRAYS 64 +#define IOINFO_ARRAY_ELTS (1 << IOINFO_L2E) +#define _NHANDLE_ (IOINFO_ARRAYS * IOINFO_ARRAY_ELTS) +#define FOPEN 0x01 +#define _NO_CONSOLE_FILENO (intptr_t)-2 + +/* This function emulates what the windows CRT does to validate file handles */ +int +_PyVerify_fd(int fd) +{ + const int i1 = fd >> IOINFO_L2E; + const int i2 = fd & ((1 << IOINFO_L2E) - 1); + + static size_t sizeof_ioinfo = 0; + + /* Determine the actual size of the ioinfo structure, + * as used by the CRT loaded in memory + */ + if (sizeof_ioinfo == 0 && __pioinfo[0] != NULL) { + sizeof_ioinfo = _msize(__pioinfo[0]) / IOINFO_ARRAY_ELTS; + } + if (sizeof_ioinfo == 0) { + /* This should not happen... */ + goto fail; + } + + /* See that it isn't a special CLEAR fileno */ + if (fd != _NO_CONSOLE_FILENO) { + /* Microsoft CRT would check that 0<=fd<_nhandle but we can't do that. Instead + * we check pointer validity and other info + */ + if (0 <= i1 && i1 < IOINFO_ARRAYS && __pioinfo[i1] != NULL) { + /* finally, check that the file is open */ + my_ioinfo* info = (my_ioinfo*)(__pioinfo[i1] + i2 * sizeof_ioinfo); + if (info->osfile & FOPEN) { + return 1; + } + } + } + fail: + errno = EBADF; + return 0; +} + +#endif /* _MSC_VER >= 1900 || _MSC_VER >= 1400 */ +#endif /* defined _MSC_VER */ |