diff options
Diffstat (limited to 'Modules/pyexpat.c')
| -rw-r--r-- | Modules/pyexpat.c | 155 | 
1 files changed, 109 insertions, 46 deletions
| diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index ae8bf1b666..7c70f1b058 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -60,6 +60,7 @@ typedef struct {      int ordered_attributes;     /* Return attributes as a list. */      int specified_attributes;   /* Report only specified attributes. */      int in_callback;            /* Is a callback active? */ +    PyObject *intern;           /* Dictionary to intern strings */      PyObject **handlers;  } xmlparseobject; @@ -123,7 +124,7 @@ set_error(xmlparseobject *self)     Returns None if str is a null pointer. */  static PyObject * -conv_string_to_unicode(XML_Char *str) +conv_string_to_unicode(const XML_Char *str)  {      /* XXX currently this code assumes that XML_Char is 8-bit,          and hence in UTF-8.  */ @@ -132,8 +133,7 @@ conv_string_to_unicode(XML_Char *str)          Py_INCREF(Py_None);          return Py_None;      } -    return PyUnicode_DecodeUTF8((const char *)str,  -                                strlen((const char *)str),  +    return PyUnicode_DecodeUTF8(str, strlen(str),                                   "strict");  } @@ -155,7 +155,7 @@ conv_string_len_to_unicode(const XML_Char *str, int len)     Returns None if str is a null pointer. */  static PyObject * -conv_string_to_utf8(XML_Char *str) +conv_string_to_utf8(const XML_Char *str)  {      /* XXX currently this code assumes that XML_Char is 8-bit,          and hence in UTF-8.  */ @@ -164,7 +164,7 @@ conv_string_to_utf8(XML_Char *str)          Py_INCREF(Py_None);          return Py_None;      } -    return PyString_FromString((const char *)str); +    return PyString_FromString(str);  }  static PyObject * @@ -275,6 +275,25 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)                            ? conv_string_to_unicode : conv_string_to_utf8)  #endif +static PyObject* +string_intern(xmlparseobject *self, const char* str) +{ +    PyObject *result = STRING_CONV_FUNC(str); +    PyObject *value; +    if (!self->intern) +	return result; +    value = PyDict_GetItem(self->intern, result); +    if (!value) { +	if (PyDict_SetItem(self->intern, result, result) == 0) +            return result; +        else +            return NULL; +    } +    Py_INCREF(value); +    Py_DECREF(result); +    return value; +} +  static void  my_StartElementHandler(void *userData,                         const XML_Char *name, const XML_Char **atts) @@ -307,7 +326,7 @@ my_StartElementHandler(void *userData,              return;          }          for (i = 0; i < max; i += 2) { -            PyObject *n = STRING_CONV_FUNC((XML_Char *) atts[i]); +            PyObject *n = string_intern(self, (XML_Char *) atts[i]);              PyObject *v;              if (n == NULL) {                  flag_error(self); @@ -336,7 +355,7 @@ my_StartElementHandler(void *userData,                  Py_DECREF(v);              }          } -        args = Py_BuildValue("(O&N)", STRING_CONV_FUNC,name, container); +	args = Py_BuildValue("(NN)", string_intern(self, name), container);          if (args == NULL) {              Py_DECREF(container);              return; @@ -394,13 +413,13 @@ my_##NAME##Handler PARAMS {\  VOID_HANDLER(EndElement,                (void *userData, const XML_Char *name),  -             ("(O&)", STRING_CONV_FUNC, name)) +             ("(N)", string_intern(self, name)))  VOID_HANDLER(ProcessingInstruction,               (void *userData,                 const XML_Char *target,                 const XML_Char *data), -             ("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data)) +             ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))  #ifndef Py_USING_UNICODE  VOID_HANDLER(CharacterData,  @@ -421,10 +440,10 @@ VOID_HANDLER(UnparsedEntityDecl,                const XML_Char *systemId,                const XML_Char *publicId,                const XML_Char *notationName), -             ("(O&O&O&O&O&)",  -              STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base,  -              STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId,  -              STRING_CONV_FUNC,notationName)) +             ("(NNNNN)", +              string_intern(self, entityName), string_intern(self, base),  +              string_intern(self, systemId), string_intern(self, publicId),  +              string_intern(self, notationName)))  #ifndef Py_USING_UNICODE  VOID_HANDLER(EntityDecl, @@ -437,11 +456,12 @@ VOID_HANDLER(EntityDecl,                const XML_Char *systemId,                const XML_Char *publicId,                const XML_Char *notationName), -             ("O&iNO&O&O&O&", -              STRING_CONV_FUNC,entityName, is_parameter_entity, +             ("NiNNNNN", +              string_intern(self, entityName), is_parameter_entity,                conv_string_len_to_utf8(value, value_length), -              STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId, -              STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName)) +              string_intern(self, base), string_intern(self, systemId), +              string_intern(self, publicId), +              string_intern(self, notationName)))  #else  VOID_HANDLER(EntityDecl,               (void *userData, @@ -453,13 +473,14 @@ VOID_HANDLER(EntityDecl,                const XML_Char *systemId,                const XML_Char *publicId,                const XML_Char *notationName), -             ("O&iNO&O&O&O&", -              STRING_CONV_FUNC,entityName, is_parameter_entity, +             ("NiNNNNN", +              string_intern(self, entityName), is_parameter_entity,                (self->returns_unicode                  ? conv_string_len_to_unicode(value, value_length)                  : conv_string_len_to_utf8(value, value_length)), -              STRING_CONV_FUNC,base, STRING_CONV_FUNC,systemId, -              STRING_CONV_FUNC,publicId, STRING_CONV_FUNC,notationName)) +              string_intern(self, base), string_intern(self, systemId), +              string_intern(self, publicId), +              string_intern(self, notationName)))  #endif  VOID_HANDLER(XmlDecl, @@ -473,7 +494,7 @@ VOID_HANDLER(XmlDecl,  static PyObject *  conv_content_model(XML_Content * const model, -                   PyObject *(*conv_string)(XML_Char *)) +                   PyObject *(*conv_string)(const XML_Char *))  {      PyObject *result = NULL;      PyObject *children = PyTuple_New(model->numchildren); @@ -514,8 +535,8 @@ VOID_HANDLER(ElementDecl,               (void *userData,                const XML_Char *name,                XML_Content *model), -             ("O&O&", -              STRING_CONV_FUNC,name, +             ("NO&", +              string_intern(self, name),                (self->returns_unicode ? conv_content_model_unicode                                       : conv_content_model_utf8),model))  #else @@ -523,8 +544,8 @@ VOID_HANDLER(ElementDecl,               (void *userData,                const XML_Char *name,                XML_Content *model), -             ("O&O&", -              STRING_CONV_FUNC,name, conv_content_model_utf8,model)) +             ("NO&", +              string_intern(self, name), conv_content_model_utf8,model))  #endif  VOID_HANDLER(AttlistDecl, @@ -534,8 +555,8 @@ VOID_HANDLER(AttlistDecl,                const XML_Char *att_type,                const XML_Char *dflt,                int isrequired), -             ("(O&O&O&O&i)", -              STRING_CONV_FUNC,elname, STRING_CONV_FUNC,attname, +             ("(NNO&O&i)", +              string_intern(self, elname), string_intern(self, attname),                STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,                isrequired)) @@ -545,24 +566,25 @@ VOID_HANDLER(NotationDecl,  			const XML_Char *base,  			const XML_Char *systemId,  			const XML_Char *publicId), -                ("(O&O&O&O&)",  -		 STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base,  -		 STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId)) +                ("(NNNN)", +		 string_intern(self, notationName), string_intern(self, base),  +		 string_intern(self, systemId), string_intern(self, publicId)))  VOID_HANDLER(StartNamespaceDecl,  		(void *userData,  		      const XML_Char *prefix,  		      const XML_Char *uri), -                ("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri)) +                ("(NN)", +                 string_intern(self, prefix), string_intern(self, uri)))  VOID_HANDLER(EndNamespaceDecl,  		(void *userData,  		    const XML_Char *prefix), -                ("(O&)", STRING_CONV_FUNC,prefix)) +                ("(N)", string_intern(self, prefix)))  VOID_HANDLER(Comment, -               (void *userData, const XML_Char *prefix), -                ("(O&)", STRING_CONV_FUNC,prefix)) +               (void *userData, const XML_Char *data), +                ("(O&)", STRING_CONV_FUNC,data))  VOID_HANDLER(StartCdataSection,                 (void *userData), @@ -605,9 +627,9 @@ RC_HANDLER(int, ExternalEntityRef,  		    const XML_Char *systemId,  		    const XML_Char *publicId),  		int rc=0;, -                ("(O&O&O&O&)",  -		 STRING_CONV_FUNC,context, STRING_CONV_FUNC,base,  -		 STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId), +                ("(O&NNN)", +		 STRING_CONV_FUNC,context, string_intern(self, base),  +		 string_intern(self, systemId), string_intern(self, publicId)),  		rc = PyInt_AsLong(rv);, rc,  		XML_GetUserData(parser)) @@ -617,8 +639,8 @@ VOID_HANDLER(StartDoctypeDecl,               (void *userData, const XML_Char *doctypeName,                const XML_Char *sysid, const XML_Char *pubid,                int has_internal_subset), -             ("(O&O&O&i)", STRING_CONV_FUNC,doctypeName, -              STRING_CONV_FUNC,sysid, STRING_CONV_FUNC,pubid, +             ("(NNNi)", string_intern(self, doctypeName), +              string_intern(self, sysid), string_intern(self, pubid),                has_internal_subset))  VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()")) @@ -856,6 +878,8 @@ xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)      new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,  							encoding);      new_parser->handlers = 0; +    new_parser->intern = self->intern; +    Py_XINCREF(new_parser->intern);  #ifdef Py_TPFLAGS_HAVE_GC      PyObject_GC_Track(new_parser);  #else @@ -988,7 +1012,7 @@ XML_Encoding * info)  #endif  static PyObject * -newxmlparseobject(char *encoding, char *namespace_separator) +newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)  {      int i;      xmlparseobject *self; @@ -1022,6 +1046,8 @@ newxmlparseobject(char *encoding, char *namespace_separator)      else {          self->itself = XML_ParserCreate(encoding);      } +    self->intern = intern; +    Py_XINCREF(self->intern);  #ifdef Py_TPFLAGS_HAVE_GC      PyObject_GC_Track(self);  #else @@ -1074,6 +1100,7 @@ xmlparse_dealloc(xmlparseobject *self)          }          free(self->handlers);      } +    Py_XDECREF(self->intern);  #if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6      /* Code for versions before 1.6 */      free(self); @@ -1118,6 +1145,16 @@ xmlparse_getattr(xmlparseobject *self, char *name)          return PyInt_FromLong((long) self->returns_unicode);      if (strcmp(name, "specified_attributes") == 0)          return PyInt_FromLong((long) self->specified_attributes); +    if (strcmp(name, "intern") == 0) { +        if (self->intern == NULL) { +            Py_INCREF(Py_None); +            return Py_None; +        } +        else { +            Py_INCREF(self->intern); +            return self->intern; +        } +    }      handlernum = handlername2int(name); @@ -1138,6 +1175,7 @@ xmlparse_getattr(xmlparseobject *self, char *name)          PyList_Append(rc, PyString_FromString("ordered_attributes"));          PyList_Append(rc, PyString_FromString("returns_unicode"));          PyList_Append(rc, PyString_FromString("specified_attributes")); +        PyList_Append(rc, PyString_FromString("intern"));          return rc;      } @@ -1221,6 +1259,8 @@ static int  xmlparse_clear(xmlparseobject *op)  {      clear_handlers(op, 0); +    Py_XDECREF(op->intern); +    op->intern = 0;      return 0;  }  #endif @@ -1275,10 +1315,14 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)  {      char *encoding = NULL;      char *namespace_separator = NULL; -    static char *kwlist[] = {"encoding", "namespace_separator", NULL}; - -    if (!PyArg_ParseTupleAndKeywords(args, kw, "|zz:ParserCreate", kwlist, -                                     &encoding, &namespace_separator)) +    PyObject *intern = NULL; +    PyObject *result; +    int intern_decref = 0; +    static char *kwlist[] = {"encoding", "namespace_separator",  +			     "intern", NULL}; + +    if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist, +                                     &encoding, &namespace_separator, &intern))          return NULL;      if (namespace_separator != NULL          && strlen(namespace_separator) > 1) { @@ -1287,7 +1331,26 @@ pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)                          " character, omitted, or None");          return NULL;      } -    return newxmlparseobject(encoding, namespace_separator); +    /* Explicitly passing None means no interning is desired. +       Not passing anything means that a new dictionary is used. */ +    if (intern == Py_None) +	intern = NULL; +    else if (intern == NULL) { +	intern = PyDict_New(); +	if (!intern) +	    return NULL; +	intern_decref = 1; +    }  +    else if (!PyDict_Check(intern)) { +	PyErr_SetString(PyExc_TypeError, "intern must be a dictionary"); +	return NULL; +    } + +    result = newxmlparseobject(encoding, namespace_separator, intern); +    if (intern_decref) { +	Py_DECREF(intern); +    } +    return result;  }  PyDoc_STRVAR(pyexpat_ErrorString__doc__, | 
