summaryrefslogtreecommitdiff
path: root/Python/fileutils.c
diff options
context:
space:
mode:
Diffstat (limited to 'Python/fileutils.c')
-rw-r--r--Python/fileutils.c360
1 files changed, 326 insertions, 34 deletions
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 31689c047b..c6cdb19fbe 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -3,6 +3,7 @@
#include <locale.h>
#ifdef MS_WINDOWS
+# include <malloc.h>
# include <windows.h>
#endif
@@ -82,11 +83,11 @@ extern int _Py_normalize_encoding(const char *, char *, size_t);
Values of force_ascii:
- 1: the workaround is used: _Py_wchar2char() uses
- encode_ascii_surrogateescape() and _Py_char2wchar() uses
+ 1: the workaround is used: Py_EncodeLocale() uses
+ encode_ascii_surrogateescape() and Py_DecodeLocale() uses
decode_ascii_surrogateescape()
- 0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
- _Py_char2wchar() uses mbstowcs()
+ 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
+ Py_DecodeLocale() uses mbstowcs()
-1: unknown, need to call check_force_ascii() to get the value
*/
static int force_ascii = -1;
@@ -244,24 +245,26 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
/* Decode a byte string from the locale encoding with the
- surrogateescape error handler (undecodable bytes are decoded as characters
- in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
+ surrogateescape error handler: undecodable bytes are decoded as characters
+ in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
character, escape the bytes using the surrogateescape error handler instead
of decoding them.
- Use _Py_wchar2char() to encode the character string back to a byte string.
+ Return a pointer to a newly allocated wide character string, use
+ PyMem_RawFree() to free the memory. If size is not NULL, write the number of
+ wide characters excluding the null character into *size
- Return a pointer to a newly allocated wide character string (use
- PyMem_RawFree() to free the memory) and write the number of written wide
- characters excluding the null character into *size if size is not NULL, or
- NULL on error (decoding or memory allocation error). If size is not NULL,
- *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
- error.
+ Return NULL on decoding error or memory allocation error. If *size* is not
+ NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
+ decoding error.
- Conversion errors should never happen, unless there is a bug in the C
- library. */
+ Decoding errors should never happen, unless there is a bug in the C
+ library.
+
+ Use the Py_EncodeLocale() function to encode the character string back to a
+ byte string. */
wchar_t*
-_Py_char2wchar(const char* arg, size_t *size)
+Py_DecodeLocale(const char* arg, size_t *size)
{
#ifdef __APPLE__
wchar_t *wstr;
@@ -399,19 +402,20 @@ oom:
#endif /* __APPLE__ */
}
-/* Encode a (wide) character string to the locale encoding with the
- surrogateescape error handler (characters in range U+DC80..U+DCFF are
- converted to bytes 0x80..0xFF).
+/* Encode a wide character string to the locale encoding with the
+ surrogateescape error handler: surrogate characters in the range
+ U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
- This function is the reverse of _Py_char2wchar().
+ Return a pointer to a newly allocated byte string, use PyMem_Free() to free
+ the memory. Return NULL on encoding or memory allocation error.
- Return a pointer to a newly allocated byte string (use PyMem_Free() to free
- the memory), or NULL on encoding or memory allocation error.
+ If error_pos is not NULL, *error_pos is set to the index of the invalid
+ character on encoding error, or set to (size_t)-1 otherwise.
- If error_pos is not NULL: *error_pos is the index of the invalid character
- on encoding error, or (size_t)-1 otherwise. */
+ Use the Py_DecodeLocale() function to decode the bytes string back to a wide
+ character string. */
char*
-_Py_wchar2char(const wchar_t *text, size_t *error_pos)
+Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{
#ifdef __APPLE__
Py_ssize_t len;
@@ -530,7 +534,7 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
{
int err;
char *fname;
- fname = _Py_wchar2char(path, NULL);
+ fname = Py_EncodeLocale(path, NULL);
if (fname == NULL) {
errno = EINVAL;
return -1;
@@ -541,8 +545,143 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
}
#endif
-#ifdef HAVE_STAT
+#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
+
+#ifdef MS_WINDOWS
+static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
+
+static void
+FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
+{
+ /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
+ /* Cannot simply cast and dereference in_ptr,
+ since it might not be aligned properly */
+ __int64 in;
+ memcpy(&in, in_ptr, sizeof(in));
+ *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
+ *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
+}
+
+void
+_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
+{
+ /* XXX endianness */
+ __int64 out;
+ out = time_in + secs_between_epochs;
+ out = out * 10000000 + nsec_in / 100;
+ memcpy(out_ptr, &out, sizeof(out));
+}
+
+/* Below, we *know* that ugo+r is 0444 */
+#if _S_IREAD != 0400
+#error Unsupported C library
+#endif
+static int
+attributes_to_mode(DWORD attr)
+{
+ int m = 0;
+ if (attr & FILE_ATTRIBUTE_DIRECTORY)
+ m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
+ else
+ m |= _S_IFREG;
+ if (attr & FILE_ATTRIBUTE_READONLY)
+ m |= 0444;
+ else
+ m |= 0666;
+ return m;
+}
+
+void
+_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct _Py_stat_struct *result)
+{
+ memset(result, 0, sizeof(*result));
+ result->st_mode = attributes_to_mode(info->dwFileAttributes);
+ result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
+ result->st_dev = info->dwVolumeSerialNumber;
+ result->st_rdev = result->st_dev;
+ FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
+ FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
+ FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
+ result->st_nlink = info->nNumberOfLinks;
+ result->st_ino = (((__int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow;
+ if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
+ /* first clear the S_IFMT bits */
+ result->st_mode ^= (result->st_mode & S_IFMT);
+ /* now set the bits that make this a symlink */
+ result->st_mode |= S_IFLNK;
+ }
+ result->st_file_attributes = info->dwFileAttributes;
+}
+#endif
+
+/* Return information about a file.
+
+ On POSIX, use fstat().
+
+ On Windows, use GetFileType() and GetFileInformationByHandle() which support
+ files larger than 2 GB. fstat() may fail with EOVERFLOW on files larger
+ than 2 GB because the file size type is an signed 32-bit integer: see issue
+ #23152.
+ */
+int
+_Py_fstat(int fd, struct _Py_stat_struct *result)
+{
+#ifdef MS_WINDOWS
+ BY_HANDLE_FILE_INFORMATION info;
+ HANDLE h;
+ int type;
+
+ if (!_PyVerify_fd(fd))
+ h = INVALID_HANDLE_VALUE;
+ else
+ h = (HANDLE)_get_osfhandle(fd);
+
+ /* Protocol violation: we explicitly clear errno, instead of
+ setting it to a POSIX error. Callers should use GetLastError. */
+ errno = 0;
+
+ if (h == INVALID_HANDLE_VALUE) {
+ /* This is really a C library error (invalid file handle).
+ We set the Win32 error to the closes one matching. */
+ SetLastError(ERROR_INVALID_HANDLE);
+ return -1;
+ }
+ memset(result, 0, sizeof(*result));
+
+ type = GetFileType(h);
+ if (type == FILE_TYPE_UNKNOWN) {
+ DWORD error = GetLastError();
+ if (error != 0) {
+ return -1;
+ }
+ /* else: valid but unknown file */
+ }
+
+ if (type != FILE_TYPE_DISK) {
+ if (type == FILE_TYPE_CHAR)
+ result->st_mode = _S_IFCHR;
+ else if (type == FILE_TYPE_PIPE)
+ result->st_mode = _S_IFIFO;
+ return 0;
+ }
+
+ if (!GetFileInformationByHandle(h, &info)) {
+ return -1;
+ }
+
+ _Py_attribute_data_to_stat(&info, 0, result);
+ /* specific to fstat() */
+ result->st_ino = (((__int64)info.nFileIndexHigh)<<32) + info.nFileIndexLow;
+ return 0;
+#else
+ return fstat(fd, result);
+#endif
+}
+#endif /* HAVE_FSTAT || MS_WINDOWS */
+
+
+#ifdef HAVE_STAT
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
call stat() otherwise. Only fill st_mode attribute on Windows.
@@ -575,7 +714,8 @@ _Py_stat(PyObject *path, struct stat *statbuf)
#endif
}
-#endif
+#endif /* HAVE_STAT */
+
static int
get_inheritable(int fd, int raise)
@@ -814,7 +954,7 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)
errno = EINVAL;
return NULL;
}
- cpath = _Py_wchar2char(path, NULL);
+ cpath = Py_EncodeLocale(path, NULL);
if (cpath == NULL)
return NULL;
f = fopen(cpath, cmode);
@@ -905,7 +1045,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
int res;
size_t r1;
- cpath = _Py_wchar2char(path, NULL);
+ cpath = Py_EncodeLocale(path, NULL);
if (cpath == NULL) {
errno = EINVAL;
return -1;
@@ -919,7 +1059,7 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
return -1;
}
cbuf[res] = '\0'; /* buf will be null terminated */
- wbuf = _Py_char2wchar(cbuf, &r1);
+ wbuf = Py_DecodeLocale(cbuf, &r1);
if (wbuf == NULL) {
errno = EINVAL;
return -1;
@@ -950,7 +1090,7 @@ _Py_wrealpath(const wchar_t *path,
wchar_t *wresolved_path;
char *res;
size_t r;
- cpath = _Py_wchar2char(path, NULL);
+ cpath = Py_EncodeLocale(path, NULL);
if (cpath == NULL) {
errno = EINVAL;
return NULL;
@@ -960,7 +1100,7 @@ _Py_wrealpath(const wchar_t *path,
if (res == NULL)
return NULL;
- wresolved_path = _Py_char2wchar(cresolved_path, &r);
+ wresolved_path = Py_DecodeLocale(cresolved_path, &r);
if (wresolved_path == NULL) {
errno = EINVAL;
return NULL;
@@ -993,7 +1133,7 @@ _Py_wgetcwd(wchar_t *buf, size_t size)
if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
return NULL;
- wname = _Py_char2wchar(fname, &len);
+ wname = Py_DecodeLocale(fname, &len);
if (wname == NULL)
return NULL;
if (size <= len) {
@@ -1075,3 +1215,155 @@ _Py_dup(int fd)
return fd;
}
+#ifndef MS_WINDOWS
+/* Get the blocking mode of the file descriptor.
+ Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
+ raise an exception and return -1 on error. */
+int
+_Py_get_blocking(int fd)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return -1;
+ }
+
+ return !(flags & O_NONBLOCK);
+}
+
+/* Set the blocking mode of the specified file descriptor.
+
+ Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
+ otherwise.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+int
+_Py_set_blocking(int fd, int blocking)
+{
+#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO)
+ int arg = !blocking;
+ if (ioctl(fd, FIONBIO, &arg) < 0)
+ goto error;
+#else
+ int flags, res;
+
+ flags = fcntl(fd, F_GETFL, 0);
+ if (flags < 0)
+ goto error;
+
+ if (blocking)
+ flags = flags & (~O_NONBLOCK);
+ else
+ flags = flags | O_NONBLOCK;
+
+ res = fcntl(fd, F_SETFL, flags);
+ if (res < 0)
+ goto error;
+#endif
+ return 0;
+
+error:
+ PyErr_SetFromErrno(PyExc_OSError);
+ return -1;
+}
+#endif
+
+#ifdef _MSC_VER
+#if _MSC_VER >= 1900
+
+/* This function lets the Windows CRT validate the file handle without
+ terminating the process if it's invalid. */
+int
+_PyVerify_fd(int fd)
+{
+ intptr_t osh;
+ /* Fast check for the only condition we know */
+ if (fd < 0) {
+ _set_errno(EBADF);
+ return 0;
+ }
+ osh = _get_osfhandle(fd);
+ return osh != (intptr_t)-1;
+}
+
+#elif _MSC_VER >= 1400
+/* Legacy implementation of _PyVerify_fd while transitioning to
+ * MSVC 14.0. This should eventually be removed. (issue23524)
+ */
+
+/* Microsoft CRT in VS2005 and higher will verify that a filehandle is
+ * valid and raise an assertion if it isn't.
+ * Normally, an invalid fd is likely to be a C program error and therefore
+ * an assertion can be useful, but it does contradict the POSIX standard
+ * which for write(2) states:
+ * "Otherwise, -1 shall be returned and errno set to indicate the error."
+ * "[EBADF] The fildes argument is not a valid file descriptor open for
+ * writing."
+ * Furthermore, python allows the user to enter any old integer
+ * as a fd and should merely raise a python exception on error.
+ * The Microsoft CRT doesn't provide an official way to check for the
+ * validity of a file descriptor, but we can emulate its internal behaviour
+ * by using the exported __pinfo data member and knowledge of the
+ * internal structures involved.
+ * The structures below must be updated for each version of visual studio
+ * according to the file internal.h in the CRT source, until MS comes
+ * up with a less hacky way to do this.
+ * (all of this is to avoid globally modifying the CRT behaviour using
+ * _set_invalid_parameter_handler() and _CrtSetReportMode())
+ */
+/* The actual size of the structure is determined at runtime.
+ * Only the first items must be present.
+ */
+typedef struct {
+ intptr_t osfhnd;
+ char osfile;
+} my_ioinfo;
+
+extern __declspec(dllimport) char * __pioinfo[];
+#define IOINFO_L2E 5
+#define IOINFO_ARRAYS 64
+#define IOINFO_ARRAY_ELTS (1 << IOINFO_L2E)
+#define _NHANDLE_ (IOINFO_ARRAYS * IOINFO_ARRAY_ELTS)
+#define FOPEN 0x01
+#define _NO_CONSOLE_FILENO (intptr_t)-2
+
+/* This function emulates what the windows CRT does to validate file handles */
+int
+_PyVerify_fd(int fd)
+{
+ const int i1 = fd >> IOINFO_L2E;
+ const int i2 = fd & ((1 << IOINFO_L2E) - 1);
+
+ static size_t sizeof_ioinfo = 0;
+
+ /* Determine the actual size of the ioinfo structure,
+ * as used by the CRT loaded in memory
+ */
+ if (sizeof_ioinfo == 0 && __pioinfo[0] != NULL) {
+ sizeof_ioinfo = _msize(__pioinfo[0]) / IOINFO_ARRAY_ELTS;
+ }
+ if (sizeof_ioinfo == 0) {
+ /* This should not happen... */
+ goto fail;
+ }
+
+ /* See that it isn't a special CLEAR fileno */
+ if (fd != _NO_CONSOLE_FILENO) {
+ /* Microsoft CRT would check that 0<=fd<_nhandle but we can't do that. Instead
+ * we check pointer validity and other info
+ */
+ if (0 <= i1 && i1 < IOINFO_ARRAYS && __pioinfo[i1] != NULL) {
+ /* finally, check that the file is open */
+ my_ioinfo* info = (my_ioinfo*)(__pioinfo[i1] + i2 * sizeof_ioinfo);
+ if (info->osfile & FOPEN) {
+ return 1;
+ }
+ }
+ }
+ fail:
+ errno = EBADF;
+ return 0;
+}
+
+#endif /* _MSC_VER >= 1900 || _MSC_VER >= 1400 */
+#endif /* defined _MSC_VER */