55 files changed, 12362 insertions, 4649 deletions
diff --git a/Cython/Compiler/AnalysedTreeTransforms.py b/Cython/Compiler/AnalysedTreeTransforms.py
index 07bf31f3e..d4941606e 100644
--- a/Cython/Compiler/AnalysedTreeTransforms.py
+++ b/Cython/Compiler/AnalysedTreeTransforms.py
@@ -10,9 +10,9 @@ from . import Symtab
 class AutoTestDictTransform(ScopeTrackingTransform):
     # Handles autotestdict directive
 
-    blacklist = ['__cinit__', '__dealloc__', '__richcmp__',
-                 '__nonzero__', '__bool__',
-                 '__len__', '__contains__']
+    excludelist = ['__cinit__', '__dealloc__', '__richcmp__',
+                   '__nonzero__', '__bool__',
+                   '__len__', '__contains__']
 
     def visit_ModuleNode(self, node):
         if node.is_pxd:
@@ -81,7 +81,7 @@ class AutoTestDictTransform(ScopeTrackingTransform):
                     name = node.entry.name
             else:
                 name = node.name
-            if self.scope_type == 'cclass' and name in self.blacklist:
+            if self.scope_type == 'cclass' and name in self.excludelist:
                 return node
             if self.scope_type == 'pyclass':
                 class_name = self.scope_node.name
diff --git a/Cython/Compiler/Annotate.py b/Cython/Compiler/Annotate.py
index 5feac02d8..8e8d2c4a8 100644
--- a/Cython/Compiler/Annotate.py
+++ b/Cython/Compiler/Annotate.py
@@ -23,8 +23,12 @@ from .. import Utils
 
 class AnnotationCCodeWriter(CCodeWriter):
 
-    def __init__(self, create_from=None, buffer=None, copy_formatting=True):
+    # also used as marker for detection of complete code emission in tests
+    COMPLETE_CODE_TITLE = "Complete cythonized code"
+
+    def __init__(self, create_from=None, buffer=None, copy_formatting=True, show_entire_c_code=False, source_desc=None):
         CCodeWriter.__init__(self, create_from, buffer, copy_formatting=copy_formatting)
+        self.show_entire_c_code = show_entire_c_code
         if create_from is None:
             self.annotation_buffer = StringIO()
             self.last_annotated_pos = None
@@ -45,8 +49,8 @@ class AnnotationCCodeWriter(CCodeWriter):
     def create_new(self, create_from, buffer, copy_formatting):
         return AnnotationCCodeWriter(create_from, buffer, copy_formatting)
 
-    def write(self, s):
-        CCodeWriter.write(self, s)
+    def _write_to_buffer(self, s):
+        self.buffer.write(s)
         self.annotation_buffer.write(s)
 
     def mark_pos(self, pos, trace=True):
@@ -69,7 +73,7 @@ class AnnotationCCodeWriter(CCodeWriter):
         """css template will later allow to choose a colormap"""
         css = [self._css_template]
         for i in range(255):
-            color = u"FFFF%02x" % int(255/(1+i/10.0))
+            color = u"FFFF%02x" % int(255.0 // (1.0 + i/10.0))
             css.append('.cython.score-%d {background-color: #%s;}' % (i, color))
         try:
             from pygments.formatters import HtmlFormatter
@@ -83,7 +87,7 @@ class AnnotationCCodeWriter(CCodeWriter):
         body.cython { font-family: courier; font-size: 12; }
 
         .cython.tag  {  }
-        .cython.line { margin: 0em }
+        .cython.line { color: #000000; margin: 0em }
         .cython.code { font-size: 9; color: #444444; display: none; margin: 0px 0px 0px 8px; border-left: 8px none; }
 
         .cython.line .run { background-color: #B0FFB0; }
@@ -198,17 +202,24 @@ class AnnotationCCodeWriter(CCodeWriter):
             for line in coverage_data.iterfind('lines/line')
         )
 
-    def _htmlify_code(self, code):
+    def _htmlify_code(self, code, language):
         try:
             from pygments import highlight
-            from pygments.lexers import CythonLexer
+            from pygments.lexers import CythonLexer, CppLexer
             from pygments.formatters import HtmlFormatter
         except ImportError:
             # no Pygments, just escape the code
             return html_escape(code)
 
+        if language == "cython":
+            lexer = CythonLexer(stripnl=False, stripall=False)
+        elif language == "c/cpp":
+            lexer = CppLexer(stripnl=False, stripall=False)
+        else:
+            # unknown language, use fallback
+            return html_escape(code)
         html_code = highlight(
-            code, CythonLexer(stripnl=False, stripall=False),
+            code, lexer,
             HtmlFormatter(nowrap=True))
         return html_code
 
@@ -228,7 +239,7 @@ class AnnotationCCodeWriter(CCodeWriter):
             return u"<span class='%s'>%s</span>" % (
                 group_name, match.group(group_name))
 
-        lines = self._htmlify_code(cython_code).splitlines()
+        lines = self._htmlify_code(cython_code, "cython").splitlines()
         lineno_width = len(str(len(lines)))
         if not covered_lines:
             covered_lines = None
@@ -279,6 +290,19 @@ class AnnotationCCodeWriter(CCodeWriter):
                 outlist.append(u"<pre class='cython code score-{score} {covered}'>{code}</pre>".format(
                     score=score, covered=covered, code=c_code))
         outlist.append(u"</div>")
+
+        # now the whole c-code if needed:
+        if self.show_entire_c_code:
+            outlist.append(u'<p><div class="cython">')
+            onclick_title = u"<pre class='cython line'{onclick}>+ {title}</pre>\n"
+            outlist.append(onclick_title.format(
+                              onclick=self._onclick_attr,
+                              title=AnnotationCCodeWriter.COMPLETE_CODE_TITLE,
+                           ))
+            complete_code_as_html = self._htmlify_code(self.buffer.getvalue(), "c/cpp")
+            outlist.append(u"<pre class='cython code'>{code}</pre>".format(code=complete_code_as_html))
+            outlist.append(u"</div></p>")
+
         return outlist
 
 
diff --git a/Cython/Compiler/AutoDocTransforms.py b/Cython/Compiler/AutoDocTransforms.py
index d3c0a1d0d..6c342f7ef 100644
--- a/Cython/Compiler/AutoDocTransforms.py
+++ b/Cython/Compiler/AutoDocTransforms.py
@@ -3,18 +3,48 @@ from __future__ import absolute_import, print_function
 from .Visitor import CythonTransform
 from .StringEncoding import EncodedString
 from . import Options
-from . import PyrexTypes, ExprNodes
+from . import PyrexTypes
 from ..CodeWriter import ExpressionWriter
+from .Errors import warning
 
 
 class AnnotationWriter(ExpressionWriter):
+    """
+    A Cython code writer for Python expressions in argument/variable annotations.
+    """
+    def __init__(self, description=None):
+        """description is optional. If specified it is used in
+        warning messages for the nodes that don't convert to string properly.
+        If not specified then no messages are generated.
+        """
+        ExpressionWriter.__init__(self)
+        self.description = description
+        self.incomplete = False
 
     def visit_Node(self, node):
         self.put(u"<???>")
+        self.incomplete = True
+        if self.description:
+            warning(node.pos,
+                    "Failed to convert code to string representation in {0}".format(
+                        self.description), level=1)
 
     def visit_LambdaNode(self, node):
         # XXX Should we do better?
         self.put("<lambda>")
+        self.incomplete = True
+        if self.description:
+            warning(node.pos,
+                    "Failed to convert lambda to string representation in {0}".format(
+                        self.description), level=1)
+
+    def visit_UnicodeNode(self, node):
+        # Discard Unicode prefix in annotations. Any tool looking at them
+        # would probably expect Py3 string semantics.
+        self.emit_string(node, "")
+
+    def visit_AnnotationNode(self, node):
+        self.put(node.string.unicode_value)
 
 
 class EmbedSignature(CythonTransform):
@@ -25,6 +55,12 @@ class EmbedSignature(CythonTransform):
         self.class_node = None
 
     def _fmt_expr(self, node):
+        writer = ExpressionWriter()
+        result = writer.write(node)
+        # print(type(node).__name__, '-->', result)
+        return result
+
+    def _fmt_annotation(self, node):
         writer = AnnotationWriter()
         result = writer.write(node)
         # print(type(node).__name__, '-->', result)
@@ -37,7 +73,7 @@ class EmbedSignature(CythonTransform):
             doc = arg.type.declaration_code(arg.name, for_display=1)
 
         if arg.annotation:
-            annotation = self._fmt_expr(arg.annotation)
+            annotation = self._fmt_annotation(arg.annotation)
             doc = doc + (': %s' % annotation)
             if arg.default:
                 default = self._fmt_expr(arg.default)
@@ -50,12 +86,12 @@ class EmbedSignature(CythonTransform):
     def _fmt_star_arg(self, arg):
         arg_doc = arg.name
         if arg.annotation:
-            annotation = self._fmt_expr(arg.annotation)
+            annotation = self._fmt_annotation(arg.annotation)
             arg_doc = arg_doc + (': %s' % annotation)
         return arg_doc
 
     def _fmt_arglist(self, args,
-                     npargs=0, pargs=None,
+                     npoargs=0, npargs=0, pargs=None,
                      nkargs=0, kargs=None,
                      hide_self=False):
         arglist = []
@@ -65,9 +101,11 @@ class EmbedSignature(CythonTransform):
                 arglist.append(arg_doc)
         if pargs:
             arg_doc = self._fmt_star_arg(pargs)
-            arglist.insert(npargs, '*%s' % arg_doc)
+            arglist.insert(npargs + npoargs, '*%s' % arg_doc)
         elif nkargs:
-            arglist.insert(npargs, '*')
+            arglist.insert(npargs + npoargs, '*')
+        if npoargs:
+            arglist.insert(npoargs, '/')
         if kargs:
             arg_doc = self._fmt_star_arg(kargs)
             arglist.append('**%s' % arg_doc)
@@ -80,12 +118,12 @@ class EmbedSignature(CythonTransform):
             return ret.declaration_code("", for_display=1)
 
     def _fmt_signature(self, cls_name, func_name, args,
-                       npargs=0, pargs=None,
+                       npoargs=0, npargs=0, pargs=None,
                        nkargs=0, kargs=None,
                        return_expr=None,
                        return_type=None, hide_self=False):
         arglist = self._fmt_arglist(args,
-                                    npargs, pargs,
+                                    npoargs, npargs, pargs,
                                     nkargs, kargs,
                                     hide_self=hide_self)
         arglist_doc = ', '.join(arglist)
@@ -94,7 +132,7 @@ class EmbedSignature(CythonTransform):
             func_doc = '%s.%s' % (cls_name, func_doc)
         ret_doc = None
         if return_expr:
-            ret_doc = self._fmt_expr(return_expr)
+            ret_doc = self._fmt_annotation(return_expr)
         elif return_type:
             ret_doc = self._fmt_ret_type(return_type)
         if ret_doc:
@@ -147,11 +185,12 @@ class EmbedSignature(CythonTransform):
         else:
             class_name, func_name = self.class_name, node.name
 
+        npoargs = getattr(node, 'num_posonly_args', 0)
         nkargs = getattr(node, 'num_kwonly_args', 0)
-        npargs = len(node.args) - nkargs
+        npargs = len(node.args) - nkargs - npoargs
         signature = self._fmt_signature(
             class_name, func_name, node.args,
-            npargs, node.star_arg,
+            npoargs, npargs, node.star_arg,
             nkargs, node.starstar_arg,
             return_expr=node.return_type_annotation,
             return_type=None, hide_self=hide_self)
@@ -176,7 +215,7 @@ class EmbedSignature(CythonTransform):
     def visit_CFuncDefNode(self, node):
         if not self.current_directives['embedsignature']:
             return node
-        if not node.overridable: # not cpdef FOO(...):
+        if not node.overridable:  # not cpdef FOO(...):
             return node
 
         signature = self._fmt_signature(
@@ -192,8 +231,9 @@ class EmbedSignature(CythonTransform):
                 old_doc = None
             new_doc = self._embed_signature(signature, old_doc)
             node.entry.doc = EncodedString(new_doc)
-            if hasattr(node, 'py_func') and node.py_func is not None:
-                node.py_func.entry.doc = EncodedString(new_doc)
+            py_func = getattr(node, 'py_func', None)
+            if py_func is not None:
+                py_func.entry.doc = EncodedString(new_doc)
         return node
 
     def visit_PropertyNode(self, node):
diff --git a/Cython/Compiler/Buffer.py b/Cython/Compiler/Buffer.py
index c62a24f56..e86e1e9c2 100644
--- a/Cython/Compiler/Buffer.py
+++ b/Cython/Compiler/Buffer.py
@@ -85,7 +85,7 @@ class IntroduceBufferAuxiliaryVars(CythonTransform):
                 aux_var = scope.declare_var(name=None, cname=cname,
                                             type=type, pos=node.pos)
                 if entry.is_arg:
-                    aux_var.used = True # otherwise, NameNode will mark whether it is used
+                    aux_var.used = True  # otherwise, NameNode will mark whether it is used
 
                 return aux_var
 
@@ -111,9 +111,9 @@ class IntroduceBufferAuxiliaryVars(CythonTransform):
 #
 # Analysis
 #
-buffer_options = ("dtype", "ndim", "mode", "negative_indices", "cast") # ordered!
+buffer_options = ("dtype", "ndim", "mode", "negative_indices", "cast")  # ordered!
 buffer_defaults = {"ndim": 1, "mode": "full", "negative_indices": True, "cast": False}
-buffer_positional_options_count = 1 # anything beyond this needs keyword argument
+buffer_positional_options_count = 1  # anything beyond this needs keyword argument
 
 ERR_BUF_OPTION_UNKNOWN = '"%s" is not a buffer option'
 ERR_BUF_TOO_MANY = 'Too many buffer options'
@@ -146,12 +146,12 @@ def analyse_buffer_options(globalpos, env, posargs, dictargs, defaults=None, nee
 
     options = {}
     for name, (value, pos) in dictargs.items():
-        if not name in buffer_options:
+        if name not in buffer_options:
             raise CompileError(pos, ERR_BUF_OPTION_UNKNOWN % name)
         options[name] = value
 
     for name, (value, pos) in zip(buffer_options, posargs):
-        if not name in buffer_options:
+        if name not in buffer_options:
             raise CompileError(pos, ERR_BUF_OPTION_UNKNOWN % name)
         if name in options:
             raise CompileError(pos, ERR_BUF_DUP % name)
@@ -159,7 +159,7 @@ def analyse_buffer_options(globalpos, env, posargs, dictargs, defaults=None, nee
 
     # Check that they are all there and copy defaults
     for name in buffer_options:
-        if not name in options:
+        if name not in options:
             try:
                 options[name] = defaults[name]
             except KeyError:
@@ -298,9 +298,10 @@ def put_unpack_buffer_aux_into_scope(buf_entry, code):
     ln = []
     for i in range(buf_entry.type.ndim):
         for fldname in fldnames:
-            ln.append("%s.diminfo[%d].%s = %s.rcbuffer->pybuffer.%s[%d];" % \
-                    (pybuffernd_struct, i, fldname,
-                     pybuffernd_struct, fldname, i))
+            ln.append("%s.diminfo[%d].%s = %s.rcbuffer->pybuffer.%s[%d];" % (
+                pybuffernd_struct, i, fldname,
+                pybuffernd_struct, fldname, i,
+            ))
     code.putln(' '.join(ln))
 
 def put_init_vars(entry, code):
@@ -373,7 +374,7 @@ def put_assign_to_buffer(lhs_cname, rhs_cname, buf_entry,
     code.putln("{")  # Set up necessary stack for getbuffer
     code.putln("__Pyx_BufFmt_StackElem __pyx_stack[%d];" % buffer_type.dtype.struct_nesting_depth())
 
-    getbuffer = get_getbuffer_call(code, "%s", buffer_aux, buffer_type) # fill in object below
+    getbuffer = get_getbuffer_call(code, "%s", buffer_aux, buffer_type)  # fill in object below
 
     if is_initialized:
         # Release any existing buffer
@@ -419,7 +420,7 @@ def put_assign_to_buffer(lhs_cname, rhs_cname, buf_entry,
         put_unpack_buffer_aux_into_scope(buf_entry, code)
         code.putln('}')
 
-    code.putln("}") # Release stack
+    code.putln("}")  # Release stack
 
 
 def put_buffer_lookup_code(entry, index_signeds, index_cnames, directives,
@@ -669,17 +670,25 @@ def get_type_information_cname(code, dtype, maxdepth=None):
             structinfo_name = "NULL"
         elif dtype.is_struct:
             struct_scope = dtype.scope
-            if dtype.is_const:
-                struct_scope = struct_scope.const_base_type_scope
+            if dtype.is_cv_qualified:
+                struct_scope = struct_scope.base_type_scope
             # Must pre-call all used types in order not to recurse during utility code writing.
             fields = struct_scope.var_entries
             assert len(fields) > 0
             types = [get_type_information_cname(code, f.type, maxdepth - 1)
                      for f in fields]
             typecode.putln("static __Pyx_StructField %s[] = {" % structinfo_name, safe=True)
+
+            if dtype.is_cv_qualified:
+                # roughly speaking, remove "const" from struct_type
+                struct_type = dtype.cv_base_type.empty_declaration_code()
+            else:
+                struct_type = dtype.empty_declaration_code()
+
             for f, typeinfo in zip(fields, types):
                 typecode.putln('  {&%s, "%s", offsetof(%s, %s)},' %
-                           (typeinfo, f.name, dtype.empty_declaration_code(), f.cname), safe=True)
+                               (typeinfo, f.name, struct_type, f.cname), safe=True)
+
             typecode.putln('  {NULL, NULL, 0}', safe=True)
             typecode.putln("};", safe=True)
         else:
@@ -690,10 +699,10 @@ def get_type_information_cname(code, dtype, maxdepth=None):
         flags = "0"
         is_unsigned = "0"
         if dtype is PyrexTypes.c_char_type:
-            is_unsigned = "IS_UNSIGNED(%s)" % declcode
+            is_unsigned = "__PYX_IS_UNSIGNED(%s)" % declcode
             typegroup = "'H'"
         elif dtype.is_int:
-            is_unsigned = "IS_UNSIGNED(%s)" % declcode
+            is_unsigned = "__PYX_IS_UNSIGNED(%s)" % declcode
             typegroup = "%s ? 'U' : 'I'" % is_unsigned
         elif complex_possible or dtype.is_complex:
             typegroup = "'C'"
diff --git a/Cython/Compiler/Builtin.py b/Cython/Compiler/Builtin.py
index e0d203ae0..78b2638d5 100644
--- a/Cython/Compiler/Builtin.py
+++ b/Cython/Compiler/Builtin.py
@@ -4,11 +4,11 @@
 
 from __future__ import absolute_import
 
-from .Symtab import BuiltinScope, StructOrUnionScope
-from .Code import UtilityCode
+from .StringEncoding import EncodedString
+from .Symtab import BuiltinScope, StructOrUnionScope, ModuleScope, Entry
+from .Code import UtilityCode, TempitaUtilityCode
 from .TypeSlots import Signature
 from . import PyrexTypes
-from . import Options
 
 
 # C-level implementations of builtin types, functions and methods
@@ -30,17 +30,19 @@ builtin_utility_code = {
 class _BuiltinOverride(object):
     def __init__(self, py_name, args, ret_type, cname, py_equiv="*",
                  utility_code=None, sig=None, func_type=None,
-                 is_strict_signature=False, builtin_return_type=None):
+                 is_strict_signature=False, builtin_return_type=None,
+                 nogil=None):
         self.py_name, self.cname, self.py_equiv = py_name, cname, py_equiv
         self.args, self.ret_type = args, ret_type
         self.func_type, self.sig = func_type, sig
         self.builtin_return_type = builtin_return_type
         self.is_strict_signature = is_strict_signature
         self.utility_code = utility_code
+        self.nogil = nogil
 
     def build_func_type(self, sig=None, self_arg=None):
         if sig is None:
-            sig = Signature(self.args, self.ret_type)
+            sig = Signature(self.args, self.ret_type, nogil=self.nogil)
             sig.exception_check = False  # not needed for the current builtins
         func_type = sig.function_type(self_arg)
         if self.is_strict_signature:
@@ -54,7 +56,7 @@ class BuiltinAttribute(object):
     def __init__(self, py_name, cname=None, field_type=None, field_type_name=None):
         self.py_name = py_name
         self.cname = cname or py_name
-        self.field_type_name = field_type_name # can't do the lookup before the type is declared!
+        self.field_type_name = field_type_name  # can't do the lookup before the type is declared!
         self.field_type = field_type
 
     def declare_in_type(self, self_type):
@@ -89,16 +91,38 @@ class BuiltinMethod(_BuiltinOverride):
             self.py_name, method_type, self.cname, utility_code=self.utility_code)
 
 
+class BuiltinProperty(object):
+    # read only for now
+    def __init__(self, py_name, property_type, call_cname,
+                 exception_value=None, exception_check=None, utility_code=None):
+        self.py_name = py_name
+        self.property_type = property_type
+        self.call_cname = call_cname
+        self.utility_code = utility_code
+        self.exception_value = exception_value
+        self.exception_check = exception_check
+
+    def declare_in_type(self, self_type):
+        self_type.scope.declare_cproperty(
+            self.py_name,
+            self.property_type,
+            self.call_cname,
+            exception_value=self.exception_value,
+            exception_check=self.exception_check,
+            utility_code=self.utility_code
+        )
+
+
 builtin_function_table = [
     # name,        args,   return,  C API func,           py equiv = "*"
     BuiltinFunction('abs',        "d",    "d",     "fabs",
-                    is_strict_signature = True),
+                    is_strict_signature=True, nogil=True),
     BuiltinFunction('abs',        "f",    "f",     "fabsf",
-                    is_strict_signature = True),
+                    is_strict_signature=True, nogil=True),
     BuiltinFunction('abs',        "i",    "i",     "abs",
-                    is_strict_signature = True),
+                    is_strict_signature=True, nogil=True),
     BuiltinFunction('abs',        "l",    "l",     "labs",
-                    is_strict_signature = True),
+                    is_strict_signature=True, nogil=True),
     BuiltinFunction('abs',        None,    None,   "__Pyx_abs_longlong",
                 utility_code = UtilityCode.load("abs_longlong", "Builtins.c"),
                 func_type = PyrexTypes.CFuncType(
@@ -209,7 +233,7 @@ builtin_function_table = [
     #('sum',       "",     "",      ""),
     #('sorted',    "",     "",      ""),
     #('type',       "O",    "O",     "PyObject_Type"),
-    #('unichr',    "",     "",      ""),
+    BuiltinFunction('unichr',     "i",    "O",      "PyUnicode_FromOrdinal", builtin_return_type='unicode'),
     #('unicode',   "",     "",      ""),
     #('vars',      "",     "",      ""),
     #('zip',       "",     "",      ""),
@@ -268,21 +292,33 @@ builtin_types_table = [
     ("basestring", "PyBaseString_Type", [
                                     BuiltinMethod("join",  "TO",   "T", "__Pyx_PyBaseString_Join",
                                                   utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
+                                    BuiltinMethod("__mul__",  "Tz",   "T", "__Pyx_PySequence_Multiply",
+                                                  utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
                                     ]),
     ("bytearray", "PyByteArray_Type", [
+                                    BuiltinMethod("__mul__",  "Tz",   "T", "__Pyx_PySequence_Multiply",
+                                                  utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
                                     ]),
     ("bytes",   "PyBytes_Type",    [BuiltinMethod("join",  "TO",   "O", "__Pyx_PyBytes_Join",
                                                   utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
+                                    BuiltinMethod("__mul__",  "Tz",   "T", "__Pyx_PySequence_Multiply",
+                                                  utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
                                     ]),
     ("str",     "PyString_Type",   [BuiltinMethod("join",  "TO",   "O", "__Pyx_PyString_Join",
                                                   builtin_return_type='basestring',
                                                   utility_code=UtilityCode.load("StringJoin", "StringTools.c")),
+                                    BuiltinMethod("__mul__",  "Tz",   "T", "__Pyx_PySequence_Multiply",
+                                                  utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
                                     ]),
     ("unicode", "PyUnicode_Type",  [BuiltinMethod("__contains__",  "TO",   "b", "PyUnicode_Contains"),
                                     BuiltinMethod("join",  "TO",   "T", "PyUnicode_Join"),
+                                    BuiltinMethod("__mul__",  "Tz",   "T", "__Pyx_PySequence_Multiply",
+                                                  utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
                                     ]),
 
-    ("tuple",   "PyTuple_Type",    []),
+    ("tuple",   "PyTuple_Type",    [BuiltinMethod("__mul__", "Tz", "T", "__Pyx_PySequence_Multiply",
+                                                  utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
+                                    ]),
 
     ("list",    "PyList_Type",     [BuiltinMethod("insert",  "TzO",  "r", "PyList_Insert"),
                                     BuiltinMethod("reverse", "T",    "r", "PyList_Reverse"),
@@ -290,6 +326,8 @@ builtin_types_table = [
                                                   utility_code=UtilityCode.load("ListAppend", "Optimize.c")),
                                     BuiltinMethod("extend",  "TO",   "r", "__Pyx_PyList_Extend",
                                                   utility_code=UtilityCode.load("ListExtend", "Optimize.c")),
+                                    BuiltinMethod("__mul__",  "Tz",   "T", "__Pyx_PySequence_Multiply",
+                                                  utility_code=UtilityCode.load("PySequenceMultiply", "ObjectHandling.c")),
                                     ]),
 
     ("dict",    "PyDict_Type",     [BuiltinMethod("__contains__",  "TO",   "b", "PyDict_Contains"),
@@ -336,18 +374,45 @@ builtin_types_table = [
     ("frozenset", "PyFrozenSet_Type", []),
     ("Exception", "((PyTypeObject*)PyExc_Exception)[0]", []),
     ("StopAsyncIteration", "((PyTypeObject*)__Pyx_PyExc_StopAsyncIteration)[0]", []),
+    ("memoryview", "PyMemoryView_Type", [
+        # TODO - format would be nice, but hard to get
+        # __len__ can be accessed through a direct lookup of the buffer (but probably in Optimize.c)
+        # error checking would ideally be limited api only
+        BuiltinProperty("ndim", PyrexTypes.c_int_type, '__Pyx_PyMemoryView_Get_ndim',
+                        exception_value="-1", exception_check=True,
+                        utility_code=TempitaUtilityCode.load_cached(
+                            "memoryview_get_from_buffer", "Builtins.c",
+                            context=dict(name="ndim")
+                        )
+        ),
+        BuiltinProperty("readonly", PyrexTypes.c_bint_type, '__Pyx_PyMemoryView_Get_readonly',
+                        exception_value="-1", exception_check=True,
+                        utility_code=TempitaUtilityCode.load_cached(
+                            "memoryview_get_from_buffer", "Builtins.c",
+                            context=dict(name="readonly")
+                        )
+        ),
+        BuiltinProperty("itemsize", PyrexTypes.c_py_ssize_t_type, '__Pyx_PyMemoryView_Get_itemsize',
+                        exception_value="-1", exception_check=True,
+                        utility_code=TempitaUtilityCode.load_cached(
+                            "memoryview_get_from_buffer", "Builtins.c",
+                            context=dict(name="itemsize")
+                        )
+        )]
+    )
 ]
 
 
-types_that_construct_their_instance = set([
+types_that_construct_their_instance = frozenset({
     # some builtin types do not always return an instance of
     # themselves - these do:
     'type', 'bool', 'long', 'float', 'complex',
     'bytes', 'unicode', 'bytearray',
-    'tuple', 'list', 'dict', 'set', 'frozenset'
+    'tuple', 'list', 'dict', 'set', 'frozenset',
     # 'str',             # only in Py3.x
     # 'file',            # only in Py2.x
-])
+    'memoryview'
+})
 
 
 builtin_structs_table = [
@@ -397,7 +462,13 @@ def init_builtin_types():
             objstruct_cname = "PyBaseExceptionObject"
         else:
             objstruct_cname = 'Py%sObject' % name.capitalize()
-        the_type = builtin_scope.declare_builtin_type(name, cname, utility, objstruct_cname)
+        type_class = PyrexTypes.BuiltinObjectType
+        if name in ['dict', 'list', 'set', 'frozenset']:
+            type_class = PyrexTypes.BuiltinTypeConstructorObjectType
+        elif name == 'tuple':
+            type_class = PyrexTypes.PythonTupleTypeConstructor
+        the_type = builtin_scope.declare_builtin_type(name, cname, utility, objstruct_cname,
+                                                      type_class=type_class)
         builtin_types[name] = the_type
         for method in methods:
             method.declare_in_type(the_type)
@@ -413,17 +484,20 @@ def init_builtin_structs():
 
 
 def init_builtins():
+    #Errors.init_thread()  # hopefully not needed - we should not emit warnings ourselves
     init_builtin_structs()
     init_builtin_types()
     init_builtin_funcs()
 
     builtin_scope.declare_var(
         '__debug__', PyrexTypes.c_const_type(PyrexTypes.c_bint_type),
-        pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True)
+        pos=None, cname='__pyx_assertions_enabled()', is_cdef=True)
 
-    global list_type, tuple_type, dict_type, set_type, frozenset_type
-    global bytes_type, str_type, unicode_type, basestring_type, slice_type
-    global float_type, bool_type, type_type, complex_type, bytearray_type
+    global type_type, list_type, tuple_type, dict_type, set_type, frozenset_type, slice_type
+    global bytes_type, str_type, unicode_type, basestring_type, bytearray_type
+    global float_type, int_type, long_type, bool_type, complex_type
+    global memoryview_type, py_buffer_type
+    global sequence_types
     type_type  = builtin_scope.lookup('type').type
     list_type  = builtin_scope.lookup('list').type
     tuple_type = builtin_scope.lookup('tuple').type
@@ -431,14 +505,107 @@ def init_builtins():
     set_type   = builtin_scope.lookup('set').type
     frozenset_type = builtin_scope.lookup('frozenset').type
     slice_type   = builtin_scope.lookup('slice').type
+
     bytes_type = builtin_scope.lookup('bytes').type
     str_type   = builtin_scope.lookup('str').type
     unicode_type = builtin_scope.lookup('unicode').type
     basestring_type = builtin_scope.lookup('basestring').type
     bytearray_type = builtin_scope.lookup('bytearray').type
+    memoryview_type = builtin_scope.lookup('memoryview').type
+
     float_type = builtin_scope.lookup('float').type
+    int_type = builtin_scope.lookup('int').type
+    long_type = builtin_scope.lookup('long').type
     bool_type  = builtin_scope.lookup('bool').type
     complex_type  = builtin_scope.lookup('complex').type
 
+    sequence_types = (
+        list_type,
+        tuple_type,
+        bytes_type,
+        str_type,
+        unicode_type,
+        basestring_type,
+        bytearray_type,
+        memoryview_type,
+    )
+
+    # Set up type inference links between equivalent Python/C types
+    bool_type.equivalent_type = PyrexTypes.c_bint_type
+    PyrexTypes.c_bint_type.equivalent_type = bool_type
+
+    float_type.equivalent_type = PyrexTypes.c_double_type
+    PyrexTypes.c_double_type.equivalent_type = float_type
+
+    complex_type.equivalent_type = PyrexTypes.c_double_complex_type
+    PyrexTypes.c_double_complex_type.equivalent_type = complex_type
+
+    py_buffer_type = builtin_scope.lookup('Py_buffer').type
+
 
 init_builtins()
+
+##############################
+# Support for a few standard library modules that Cython understands (currently typing and dataclasses)
+##############################
+_known_module_scopes = {}
+
+def get_known_standard_library_module_scope(module_name):
+    mod = _known_module_scopes.get(module_name)
+    if mod:
+        return mod
+
+    if module_name == "typing":
+        mod = ModuleScope(module_name, None, None)
+        for name, tp in [
+                ('Dict', dict_type),
+                ('List', list_type),
+                ('Tuple', tuple_type),
+                ('Set', set_type),
+                ('FrozenSet', frozenset_type),
+                ]:
+            name = EncodedString(name)
+            entry = mod.declare_type(name, tp, pos = None)
+            var_entry = Entry(name, None, PyrexTypes.py_object_type)
+            var_entry.is_pyglobal = True
+            var_entry.is_variable = True
+            var_entry.scope = mod
+            entry.as_variable = var_entry
+
+        for name in ['ClassVar', 'Optional']:
+            name = EncodedString(name)
+            indexed_type = PyrexTypes.SpecialPythonTypeConstructor(EncodedString("typing."+name))
+            entry = mod.declare_type(name, indexed_type, pos = None)
+            var_entry = Entry(name, None, PyrexTypes.py_object_type)
+            var_entry.is_pyglobal = True
+            var_entry.is_variable = True
+            var_entry.scope = mod
+            entry.as_variable = var_entry
+        _known_module_scopes[module_name] = mod
+    elif module_name == "dataclasses":
+        mod = ModuleScope(module_name, None, None)
+        indexed_type = PyrexTypes.SpecialPythonTypeConstructor(EncodedString("dataclasses.InitVar"))
+        initvar_string = EncodedString("InitVar")
+        entry = mod.declare_type(initvar_string, indexed_type, pos = None)
+        var_entry = Entry(initvar_string, None, PyrexTypes.py_object_type)
+        var_entry.is_pyglobal = True
+        var_entry.scope = mod
+        entry.as_variable = var_entry
+        _known_module_scopes[module_name] = mod
+    return mod
+
+
+def get_known_standard_library_entry(qualified_name):
+    name_parts = qualified_name.split(".")
+    module_name = EncodedString(name_parts[0])
+    rest = name_parts[1:]
+
+    if len(rest) > 1:  # for now, we don't know how to deal with any nested modules
+        return None
+
+    mod = get_known_standard_library_module_scope(module_name)
+
+    # eventually handle more sophisticated multiple lookups if needed
+    if mod and rest:
+        return mod.lookup_here(rest[0])
+    return None
diff --git a/Cython/Compiler/CmdLine.py b/Cython/Compiler/CmdLine.py
index 470fe6bd4..776636c32 100644
--- a/Cython/Compiler/CmdLine.py
+++ b/Cython/Compiler/CmdLine.py
@@ -4,237 +4,248 @@
 
 from __future__ import absolute_import
 
-import os
 import sys
+import os
+from argparse import ArgumentParser, Action, SUPPRESS
 from . import Options
 
-usage = """\
-Cython (http://cython.org) is a compiler for code written in the
-Cython language.  Cython is based on Pyrex by Greg Ewing.
-
-Usage: cython [options] sourcefile.{pyx,py} ...
-
-Options:
-  -V, --version                  Display version number of cython compiler
-  -l, --create-listing           Write error messages to a listing file
-  -I, --include-dir <directory>  Search for include files in named directory
-                                 (multiple include directories are allowed).
-  -o, --output-file <filename>   Specify name of generated C file
-  -t, --timestamps               Only compile newer source files
-  -f, --force                    Compile all source files (overrides implied -t)
-  -v, --verbose                  Be verbose, print file names on multiple compilation
-  -p, --embed-positions          If specified, the positions in Cython files of each
-                                 function definition is embedded in its docstring.
-  --cleanup <level>              Release interned objects on python exit, for memory debugging.
-                                 Level indicates aggressiveness, default 0 releases nothing.
-  -w, --working <directory>      Sets the working directory for Cython (the directory modules
-                                 are searched from)
-  --gdb                          Output debug information for cygdb
-  --gdb-outdir <directory>       Specify gdb debug information output directory. Implies --gdb.
-
-  -D, --no-docstrings            Strip docstrings from the compiled module.
-  -a, --annotate                 Produce a colorized HTML version of the source.
-  --annotate-coverage <cov.xml>  Annotate and include coverage information from cov.xml.
-  --line-directives              Produce #line directives pointing to the .pyx source
-  --cplus                        Output a C++ rather than C file.
-  --embed[=<method_name>]        Generate a main() function that embeds the Python interpreter.
-  -2                             Compile based on Python-2 syntax and code semantics.
-  -3                             Compile based on Python-3 syntax and code semantics.
-  --3str                         Compile based on Python-3 syntax and code semantics without
-                                 assuming unicode by default for string literals under Python 2.
-  --lenient                      Change some compile time errors to runtime errors to
-                                 improve Python compatibility
-  --capi-reexport-cincludes      Add cincluded headers to any auto-generated header files.
-  --fast-fail                    Abort the compilation on the first error
-  --warning-errors, -Werror      Make all warnings into errors
-  --warning-extra, -Wextra       Enable extra warnings
-  -X, --directive <name>=<value>[,<name=value,...] Overrides a compiler directive
-  -E, --compile-time-env name=value[,<name=value,...] Provides compile time env like DEF would do.
-  --module-name                  Fully qualified module name. If not given, it is deduced from the
-                                 import path if source file is in a package, or equals the
-                                 filename otherwise.
-  -M, --depfile                  Produce depfiles for the sources
-"""
-
-
-# The following experimental options are supported only on MacOSX:
-#  -C, --compile    Compile generated .c file to .o file
-#  --link           Link .o file to produce extension module (implies -C)
-#  -+, --cplus      Use C++ compiler for compiling and linking
-#  Additional .o files to link may be supplied when using -X."""
-
-def bad_usage():
-    sys.stderr.write(usage)
-    sys.exit(1)
 
-def parse_command_line(args):
-    from .Main import CompilationOptions, default_options
-
-    pending_arg = []
-
-    def pop_arg():
-        if not args or pending_arg:
-            bad_usage()
-        if '=' in args[0] and args[0].startswith('--'):  # allow "--long-option=xyz"
-            name, value = args.pop(0).split('=', 1)
-            pending_arg.append(value)
-            return name
-        return args.pop(0)
-
-    def pop_value(default=None):
-        if pending_arg:
-            return pending_arg.pop()
-        elif default is not None:
-            return default
-        elif not args:
-            bad_usage()
-        return args.pop(0)
-
-    def get_param(option):
-        tail = option[2:]
-        if tail:
-            return tail
-        else:
-            return pop_arg()
-
-    options = CompilationOptions(default_options)
-    sources = []
-    while args:
-        if args[0].startswith("-"):
-            option = pop_arg()
-            if option in ("-V", "--version"):
-                options.show_version = 1
-            elif option in ("-l", "--create-listing"):
-                options.use_listing_file = 1
-            elif option in ("-+", "--cplus"):
-                options.cplus = 1
-            elif option == "--embed":
-                Options.embed = pop_value("main")
-            elif option.startswith("-I"):
-                options.include_path.append(get_param(option))
-            elif option == "--include-dir":
-                options.include_path.append(pop_value())
-            elif option in ("-w", "--working"):
-                options.working_path = pop_value()
-            elif option in ("-o", "--output-file"):
-                options.output_file = pop_value()
-            elif option in ("-t", "--timestamps"):
-                options.timestamps = 1
-            elif option in ("-f", "--force"):
-                options.timestamps = 0
-            elif option in ("-v", "--verbose"):
-                options.verbose += 1
-            elif option in ("-p", "--embed-positions"):
-                Options.embed_pos_in_docstring = 1
-            elif option in ("-z", "--pre-import"):
-                Options.pre_import = pop_value()
-            elif option == "--cleanup":
-                Options.generate_cleanup_code = int(pop_value())
-            elif option in ("-D", "--no-docstrings"):
-                Options.docstrings = False
-            elif option in ("-a", "--annotate"):
-                Options.annotate = True
-            elif option == "--annotate-coverage":
-                Options.annotate = True
-                Options.annotate_coverage_xml = pop_value()
-            elif option == "--convert-range":
-                Options.convert_range = True
-            elif option == "--line-directives":
-                options.emit_linenums = True
-            elif option == "--no-c-in-traceback":
-                options.c_line_in_traceback = False
-            elif option == "--gdb":
-                options.gdb_debug = True
-                options.output_dir = os.curdir
-            elif option == "--gdb-outdir":
-                options.gdb_debug = True
-                options.output_dir = pop_value()
-            elif option == "--lenient":
-                Options.error_on_unknown_names = False
-                Options.error_on_uninitialized = False
-            elif option == '-2':
-                options.language_level = 2
-            elif option == '-3':
-                options.language_level = 3
-            elif option == '--3str':
-                options.language_level = '3str'
-            elif option == "--capi-reexport-cincludes":
-                options.capi_reexport_cincludes = True
-            elif option == "--fast-fail":
-                Options.fast_fail = True
-            elif option == "--cimport-from-pyx":
-                Options.cimport_from_pyx = True
-            elif option in ('-Werror', '--warning-errors'):
-                Options.warning_errors = True
-            elif option in ('-Wextra', '--warning-extra'):
-                options.compiler_directives.update(Options.extra_warnings)
-            elif option == "--old-style-globals":
-                Options.old_style_globals = True
-            elif option == "--directive" or option.startswith('-X'):
-                if option.startswith('-X') and option[2:].strip():
-                    x_args = option[2:]
-                else:
-                    x_args = pop_value()
-                try:
-                    options.compiler_directives = Options.parse_directive_list(
-                        x_args, relaxed_bool=True,
-                        current_settings=options.compiler_directives)
-                except ValueError as e:
-                    sys.stderr.write("Error in compiler directive: %s\n" % e.args[0])
-                    sys.exit(1)
-            elif option == "--compile-time-env" or option.startswith('-E'):
-                if option.startswith('-E') and option[2:].strip():
-                    x_args = option[2:]
-                else:
-                    x_args = pop_value()
-                try:
-                    options.compile_time_env = Options.parse_compile_time_env(
-                        x_args, current_settings=options.compile_time_env)
-                except ValueError as e:
-                    sys.stderr.write("Error in compile-time-env: %s\n" % e.args[0])
-                    sys.exit(1)
-            elif option == "--module-name":
-                options.module_name = pop_value()
-            elif option in ('-M', '--depfile'):
-                options.depfile = True
-            elif option.startswith('--debug'):
-                option = option[2:].replace('-', '_')
-                from . import DebugFlags
-                if option in dir(DebugFlags):
-                    setattr(DebugFlags, option, True)
-                else:
-                    sys.stderr.write("Unknown debug flag: %s\n" % option)
-                    bad_usage()
-            elif option in ('-h', '--help'):
-                sys.stdout.write(usage)
-                sys.exit(0)
+if sys.version_info < (3, 3):
+    # TODO: This workaround can be removed in Cython 3.1
+    FileNotFoundError = IOError
+
+
+class ParseDirectivesAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        old_directives = dict(getattr(namespace, self.dest,
+                                      Options.get_directive_defaults()))
+        directives = Options.parse_directive_list(
+            values, relaxed_bool=True, current_settings=old_directives)
+        setattr(namespace, self.dest, directives)
+
+
+class ParseOptionsAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        options = dict(getattr(namespace, self.dest, {}))
+        for opt in values.split(','):
+            if '=' in opt:
+                n, v = opt.split('=', 1)
+                v = v.lower() not in ('false', 'f', '0', 'no')
             else:
-                sys.stderr.write(usage)
-                sys.stderr.write("Unknown compiler flag: %s\n" % option)
-                sys.exit(1)
+                n, v = opt, True
+            options[n] = v
+        setattr(namespace, self.dest, options)
+
+
+class ParseCompileTimeEnvAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        old_env = dict(getattr(namespace, self.dest, {}))
+        new_env = Options.parse_compile_time_env(values, current_settings=old_env)
+        setattr(namespace, self.dest, new_env)
+
+
+class ActivateAllWarningsAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        directives = getattr(namespace, 'compiler_directives', {})
+        directives.update(Options.extra_warnings)
+        namespace.compiler_directives = directives
+
+
+class SetLenientAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        namespace.error_on_unknown_names = False
+        namespace.error_on_uninitialized = False
+
+
+class SetGDBDebugAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        namespace.gdb_debug = True
+        namespace.output_dir = os.curdir
+
+
+class SetGDBDebugOutputAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        namespace.gdb_debug = True
+        namespace.output_dir = values
+
+
+class SetAnnotateCoverageAction(Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        namespace.annotate = True
+        namespace.annotate_coverage_xml = values
+
+
+def create_cython_argparser():
+    description = "Cython (https://cython.org/) is a compiler for code written in the "\
+                  "Cython language.  Cython is based on Pyrex by Greg Ewing."
+
+    parser = ArgumentParser(description=description, argument_default=SUPPRESS)
+
+    parser.add_argument("-V", "--version", dest='show_version', action='store_const', const=1,
+                      help='Display version number of cython compiler')
+    parser.add_argument("-l", "--create-listing", dest='use_listing_file', action='store_const', const=1,
+                      help='Write error messages to a listing file')
+    parser.add_argument("-I", "--include-dir", dest='include_path', action='append',
+                      help='Search for include files in named directory '
+                           '(multiple include directories are allowed).')
+    parser.add_argument("-o", "--output-file", dest='output_file', action='store', type=str,
+                      help='Specify name of generated C file')
+    parser.add_argument("-t", "--timestamps", dest='timestamps', action='store_const', const=1,
+                      help='Only compile newer source files')
+    parser.add_argument("-f", "--force", dest='timestamps', action='store_const', const=0,
+                      help='Compile all source files (overrides implied -t)')
+    parser.add_argument("-v", "--verbose", dest='verbose', action='count',
+                      help='Be verbose, print file names on multiple compilation')
+    parser.add_argument("-p", "--embed-positions", dest='embed_pos_in_docstring', action='store_const', const=1,
+                      help='If specified, the positions in Cython files of each '
+                           'function definition is embedded in its docstring.')
+    parser.add_argument("--cleanup", dest='generate_cleanup_code', action='store', type=int,
+                      help='Release interned objects on python exit, for memory debugging. '
+                           'Level indicates aggressiveness, default 0 releases nothing.')
+    parser.add_argument("-w", "--working", dest='working_path', action='store', type=str,
+                      help='Sets the working directory for Cython (the directory modules are searched from)')
+    parser.add_argument("--gdb", action=SetGDBDebugAction, nargs=0,
+                      help='Output debug information for cygdb')
+    parser.add_argument("--gdb-outdir", action=SetGDBDebugOutputAction, type=str,
+                      help='Specify gdb debug information output directory. Implies --gdb.')
+    parser.add_argument("-D", "--no-docstrings", dest='docstrings', action='store_false',
+                      help='Strip docstrings from the compiled module.')
+    parser.add_argument('-a', '--annotate', action='store_const', const='default', dest='annotate',
+                      help='Produce a colorized HTML version of the source.')
+    parser.add_argument('--annotate-fullc', action='store_const', const='fullc', dest='annotate',
+                      help='Produce a colorized HTML version of the source '
+                           'which includes entire generated C/C++-code.')
+    parser.add_argument("--annotate-coverage", dest='annotate_coverage_xml', action=SetAnnotateCoverageAction, type=str,
+                      help='Annotate and include coverage information from cov.xml.')
+    parser.add_argument("--line-directives", dest='emit_linenums', action='store_true',
+                      help='Produce #line directives pointing to the .pyx source')
+    parser.add_argument("-+", "--cplus", dest='cplus', action='store_const', const=1,
+                      help='Output a C++ rather than C file.')
+    parser.add_argument('--embed', action='store_const', const='main',
+                      help='Generate a main() function that embeds the Python interpreter. '
+                           'Pass --embed=<method_name> for a name other than main().')
+    parser.add_argument('-2', dest='language_level', action='store_const', const=2,
+                      help='Compile based on Python-2 syntax and code semantics.')
+    parser.add_argument('-3', dest='language_level', action='store_const', const=3,
+                      help='Compile based on Python-3 syntax and code semantics.')
+    parser.add_argument('--3str', dest='language_level', action='store_const', const='3str',
+                      help='Compile based on Python-3 syntax and code semantics without '
+                           'assuming unicode by default for string literals under Python 2.')
+    parser.add_argument("--lenient", action=SetLenientAction, nargs=0,
+                      help='Change some compile time errors to runtime errors to '
+                           'improve Python compatibility')
+    parser.add_argument("--capi-reexport-cincludes", dest='capi_reexport_cincludes', action='store_true',
+                      help='Add cincluded headers to any auto-generated header files.')
+    parser.add_argument("--fast-fail", dest='fast_fail', action='store_true',
+                      help='Abort the compilation on the first error')
+    parser.add_argument("-Werror", "--warning-errors", dest='warning_errors', action='store_true',
+                      help='Make all warnings into errors')
+    parser.add_argument("-Wextra", "--warning-extra", action=ActivateAllWarningsAction, nargs=0,
+                      help='Enable extra warnings')
+
+    parser.add_argument('-X', '--directive', metavar='NAME=VALUE,...',
+                      dest='compiler_directives', type=str,
+                      action=ParseDirectivesAction,
+                      help='Overrides a compiler directive')
+    parser.add_argument('-E', '--compile-time-env', metavar='NAME=VALUE,...',
+                      dest='compile_time_env', type=str,
+                      action=ParseCompileTimeEnvAction,
+                      help='Provides compile time env like DEF would do.')
+    parser.add_argument("--module-name",
+                      dest='module_name', type=str, action='store',
+                      help='Fully qualified module name. If not given, is '
+                           'deduced from the import path if source file is in '
+                           'a package, or equals the filename otherwise.')
+    parser.add_argument('-M', '--depfile', action='store_true', help='produce depfiles for the sources')
+    parser.add_argument('sources', nargs='*', default=[])
+
+    # TODO: add help
+    parser.add_argument("-z", "--pre-import", dest='pre_import', action='store', type=str, help=SUPPRESS)
+    parser.add_argument("--convert-range", dest='convert_range', action='store_true', help=SUPPRESS)
+    parser.add_argument("--no-c-in-traceback", dest='c_line_in_traceback', action='store_false', help=SUPPRESS)
+    parser.add_argument("--cimport-from-pyx", dest='cimport_from_pyx', action='store_true', help=SUPPRESS)
+    parser.add_argument("--old-style-globals", dest='old_style_globals', action='store_true', help=SUPPRESS)
+
+    # debug stuff:
+    from . import DebugFlags
+    for name in vars(DebugFlags):
+        if name.startswith("debug"):
+            option_name = name.replace('_', '-')
+            parser.add_argument("--" + option_name, action='store_true', help=SUPPRESS)
+
+    return parser
+
+
+def parse_command_line_raw(parser, args):
+    # special handling for --embed and --embed=xxxx as they aren't correctly parsed
+    def filter_out_embed_options(args):
+        with_embed, without_embed = [], []
+        for x in args:
+            if x == '--embed' or x.startswith('--embed='):
+                with_embed.append(x)
+            else:
+                without_embed.append(x)
+        return with_embed, without_embed
+
+    with_embed, args_without_embed = filter_out_embed_options(args)
+
+    arguments, unknown = parser.parse_known_args(args_without_embed)
+
+    sources = arguments.sources
+    del arguments.sources
+
+    # unknown can be either debug, embed or input files or really unknown
+    for option in unknown:
+        if option.startswith('-'):
+            parser.error("unknown option " + option)
+        else:
+            sources.append(option)
+
+    # embed-stuff must be handled extra:
+    for x in with_embed:
+        if x == '--embed':
+            name = 'main'  # default value
         else:
-            sources.append(pop_arg())
+            name = x[len('--embed='):]
+        setattr(arguments, 'embed', name)
 
-    if pending_arg:
-        bad_usage()
+    return arguments, sources
+
+
+def parse_command_line(args):
+    parser = create_cython_argparser()
+    arguments, sources = parse_command_line_raw(parser, args)
+
+    work_dir = getattr(arguments, 'working_path', '')
+    for source in sources:
+        if work_dir and not os.path.isabs(source):
+            source = os.path.join(work_dir, source)
+        if not os.path.exists(source):
+            import errno
+            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), source)
+
+    options = Options.CompilationOptions(Options.default_options)
+    for name, value in vars(arguments).items():
+        if name.startswith('debug'):
+            from . import DebugFlags
+            if name in dir(DebugFlags):
+                setattr(DebugFlags, name, value)
+            else:
+                parser.error("Unknown debug flag: %s\n" % name)
+        elif hasattr(Options, name):
+            setattr(Options, name, value)
+        else:
+            setattr(options, name, value)
 
     if options.use_listing_file and len(sources) > 1:
-        sys.stderr.write(
-            "cython: Only one source file allowed when using -o\n")
-        sys.exit(1)
+        parser.error("cython: Only one source file allowed when using -o\n")
     if len(sources) == 0 and not options.show_version:
-        bad_usage()
+        parser.error("cython: Need at least one source file\n")
     if Options.embed and len(sources) > 1:
-        sys.stderr.write(
-            "cython: Only one source file allowed when using --embed\n")
-        sys.exit(1)
+        parser.error("cython: Only one source file allowed when using --embed\n")
     if options.module_name:
         if options.timestamps:
-            sys.stderr.write(
-                "cython: Cannot use --module-name with --timestamps\n")
-            sys.exit(1)
+            parser.error("cython: Cannot use --module-name with --timestamps\n")
         if len(sources) > 1:
-            sys.stderr.write(
-                "cython: Only one source file allowed when using --module-name\n")
-            sys.exit(1)
+            parser.error("cython: Only one source file allowed when using --module-name\n")
     return options, sources
diff --git a/Cython/Compiler/Code.pxd b/Cython/Compiler/Code.pxd
index acad0c1cf..4601474b2 100644
--- a/Cython/Compiler/Code.pxd
+++ b/Cython/Compiler/Code.pxd
@@ -1,5 +1,4 @@
-
-from __future__ import absolute_import
+# cython: language_level=3
 
 cimport cython
 from ..StringIOTree cimport StringIOTree
@@ -55,6 +54,7 @@ cdef class FunctionState:
     cdef public object closure_temps
     cdef public bint should_declare_error_indicator
     cdef public bint uses_error_indicator
+    cdef public bint error_without_exception
 
     @cython.locals(n=size_t)
     cpdef new_label(self, name=*)
@@ -110,6 +110,9 @@ cdef class CCodeWriter(object):
     cdef bint bol
 
     cpdef write(self, s)
+    @cython.final
+    cdef _write_lines(self, s)
+    cpdef _write_to_buffer(self, s)
     cpdef put(self, code)
     cpdef put_safe(self, code)
     cpdef putln(self, code=*, bint safe=*)
@@ -117,6 +120,8 @@ cdef class CCodeWriter(object):
     cdef increase_indent(self)
     @cython.final
     cdef decrease_indent(self)
+    @cython.final
+    cdef indent(self)
 
 
 cdef class PyrexCodeWriter:
diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py
index d0b4756e5..089685496 100644
--- a/Cython/Compiler/Code.py
+++ b/Cython/Compiler/Code.py
@@ -1,4 +1,4 @@
-# cython: language_level = 2
+# cython: language_level=3str
 # cython: auto_pickle=False
 #
 #   Code output module
@@ -13,27 +13,21 @@ cython.declare(os=object, re=object, operator=object, textwrap=object,
                DebugFlags=object, basestring=object, defaultdict=object,
                closing=object, partial=object)
 
+import hashlib
+import operator
 import os
 import re
 import shutil
-import sys
-import operator
 import textwrap
 from string import Template
 from functools import partial
-from contextlib import closing
+from contextlib import closing, contextmanager
 from collections import defaultdict
 
-try:
-    import hashlib
-except ImportError:
-    import md5 as hashlib
-
 from . import Naming
 from . import Options
 from . import DebugFlags
 from . import StringEncoding
-from . import Version
 from .. import Utils
 from .Scanning import SourceDescriptor
 from ..StringIOTree import StringIOTree
@@ -43,8 +37,6 @@ try:
 except ImportError:
     from builtins import str as basestring
 
-KEYWORDS_MUST_BE_BYTES = sys.version_info < (2, 7)
-
 
 non_portable_builtins_map = {
     # builtins that have different names in different Python versions
@@ -101,20 +93,18 @@ uncachable_builtins = [
     '__build_class__',
     'ascii',  # might deserve an implementation in Cython
     #'exec',  # implemented in Cython
-    ## - Py2.7+
-    'memoryview',
     ## - platform specific
     'WindowsError',
     ## - others
     '_',  # e.g. used by gettext
 ]
 
-special_py_methods = set([
+special_py_methods = cython.declare(frozenset, frozenset((
     '__cinit__', '__dealloc__', '__richcmp__', '__next__',
     '__await__', '__aiter__', '__anext__',
     '__getreadbuffer__', '__getwritebuffer__', '__getsegcount__',
-    '__getcharbuffer__', '__getbuffer__', '__releasebuffer__'
-])
+    '__getcharbuffer__', '__getbuffer__', '__releasebuffer__',
+)))
 
 modifier_output_mapper = {
     'inline': 'CYTHON_INLINE'
@@ -203,6 +193,28 @@ def get_utility_dir():
     Cython_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     return os.path.join(Cython_dir, "Utility")
 
+read_utilities_hook = None
+"""
+Override the hook for reading a utilities file that contains code fragments used
+by the codegen.
+
+The hook functions takes the path of the utilities file, and returns a list
+of strings, one per line.
+
+The default behavior is to open a file relative to get_utility_dir().
+"""
+
+def read_utilities_from_utility_dir(path):
+    """
+    Read all lines of the file at the provided path from a path relative
+    to get_utility_dir().
+    """
+    filename = os.path.join(get_utility_dir(), path)
+    with closing(Utils.open_source_file(filename, encoding='UTF-8')) as f:
+        return f.readlines()
+
+# by default, read utilities from the utility directory.
+read_utilities_hook = read_utilities_from_utility_dir
 
 class UtilityCodeBase(object):
     """
@@ -224,6 +236,15 @@ class UtilityCodeBase(object):
 
         [definitions]
 
+        ##### MyUtility #####
+        #@subsitute: tempita
+
+        [requires tempita substitution
+         - context can't be specified here though so only
+           tempita utility that requires no external context
+           will benefit from this tag
+         - only necessary when @required from non-tempita code]
+
     for prototypes and implementation respectively.  For non-python or
     -cython files backslashes should be used instead.  5 to 30 comment
     characters may be used on either side.
@@ -242,8 +263,7 @@ class UtilityCodeBase(object):
             return
 
         code = '\n'.join(lines)
-        if tags and 'substitute' in tags and tags['substitute'] == set(['naming']):
-            del tags['substitute']
+        if tags and 'substitute' in tags and 'naming' in tags['substitute']:
             try:
                 code = Template(code).substitute(vars(Naming))
             except (KeyError, ValueError) as e:
@@ -259,15 +279,11 @@ class UtilityCodeBase(object):
             utility[1] = code
         else:
             all_tags = utility[2]
-            if KEYWORDS_MUST_BE_BYTES:
-                type = type.encode('ASCII')
             all_tags[type] = code
 
         if tags:
             all_tags = utility[2]
             for name, values in tags.items():
-                if KEYWORDS_MUST_BE_BYTES:
-                    name = name.encode('ASCII')
                 all_tags.setdefault(name, set()).update(values)
 
     @classmethod
@@ -276,7 +292,6 @@ class UtilityCodeBase(object):
         if utilities:
             return utilities
 
-        filename = os.path.join(get_utility_dir(), path)
         _, ext = os.path.splitext(path)
         if ext in ('.pyx', '.py', '.pxd', '.pxi'):
             comment = '#'
@@ -292,8 +307,7 @@ class UtilityCodeBase(object):
             {'C': comment}).match
         match_type = re.compile(r'(.+)[.](proto(?:[.]\S+)?|impl|init|cleanup)$').match
 
-        with closing(Utils.open_source_file(filename, encoding='UTF-8')) as f:
-            all_lines = f.readlines()
+        all_lines = read_utilities_hook(path)
 
         utilities = defaultdict(lambda: [None, None, {}])
         lines = []
@@ -335,43 +349,22 @@ class UtilityCodeBase(object):
         return utilities
 
     @classmethod
-    def load(cls, util_code_name, from_file=None, **kwargs):
+    def load(cls, util_code_name, from_file, **kwargs):
         """
         Load utility code from a file specified by from_file (relative to
-        Cython/Utility) and name util_code_name.  If from_file is not given,
-        load it from the file util_code_name.*.  There should be only one
-        file matched by this pattern.
+        Cython/Utility) and name util_code_name.
         """
+
         if '::' in util_code_name:
             from_file, util_code_name = util_code_name.rsplit('::', 1)
-        if not from_file:
-            utility_dir = get_utility_dir()
-            prefix = util_code_name + '.'
-            try:
-                listing = os.listdir(utility_dir)
-            except OSError:
-                # XXX the code below assumes as 'zipimport.zipimporter' instance
-                # XXX should be easy to generalize, but too lazy right now to write it
-                import zipfile
-                global __loader__
-                loader = __loader__
-                archive = loader.archive
-                with closing(zipfile.ZipFile(archive)) as fileobj:
-                    listing = [os.path.basename(name)
-                               for name in fileobj.namelist()
-                               if os.path.join(archive, name).startswith(utility_dir)]
-            files = [filename for filename in listing
-                     if filename.startswith(prefix)]
-            if not files:
-                raise ValueError("No match found for utility code " + util_code_name)
-            if len(files) > 1:
-                raise ValueError("More than one filename match found for utility code " + util_code_name)
-            from_file = files[0]
-
+        assert from_file
         utilities = cls.load_utilities_from_file(from_file)
         proto, impl, tags = utilities[util_code_name]
 
         if tags:
+            if "substitute" in tags and "tempita" in tags["substitute"]:
+                if not issubclass(cls, TempitaUtilityCode):
+                    return TempitaUtilityCode.load(util_code_name, from_file, **kwargs)
             orig_kwargs = kwargs.copy()
             for name, values in tags.items():
                 if name in kwargs:
@@ -385,6 +378,12 @@ class UtilityCodeBase(object):
                         # dependencies are rarely unique, so use load_cached() when we can
                         values = [cls.load_cached(dep, from_file)
                                   for dep in sorted(values)]
+                elif name == 'substitute':
+                    # don't want to pass "naming" or "tempita" to the constructor
+                    # since these will have been handled
+                    values = values - {'naming', 'tempita'}
+                    if not values:
+                        continue
                 elif not values:
                     values = None
                 elif len(values) == 1:
@@ -404,11 +403,11 @@ class UtilityCodeBase(object):
         return cls(**kwargs)
 
     @classmethod
-    def load_cached(cls, utility_code_name, from_file=None, __cache={}):
+    def load_cached(cls, utility_code_name, from_file, __cache={}):
         """
         Calls .load(), but using a per-type cache based on utility name and file name.
         """
-        key = (cls, from_file, utility_code_name)
+        key = (utility_code_name, from_file, cls)
         try:
             return __cache[key]
         except KeyError:
@@ -417,7 +416,7 @@ class UtilityCodeBase(object):
         return code
 
     @classmethod
-    def load_as_string(cls, util_code_name, from_file=None, **kwargs):
+    def load_as_string(cls, util_code_name, from_file, **kwargs):
         """
         Load a utility code as a string. Returns (proto, implementation)
         """
@@ -437,7 +436,7 @@ class UtilityCodeBase(object):
         return "<%s(%s)>" % (type(self).__name__, self.name)
 
     def get_tree(self, **kwargs):
-        pass
+        return None
 
     def __deepcopy__(self, memodict=None):
         # No need to deep-copy utility code since it's essentially immutable.
@@ -566,7 +565,7 @@ class UtilityCode(UtilityCodeBase):
             r'([a-zA-Z_]+),'      # type cname
             r'\s*"([^"]+)",'      # method name
             r'\s*([^),]+)'        # object cname
-            r'((?:,\s*[^),]+)*)'  # args*
+            r'((?:,[^),]+)*)'     # args*
             r'\)', externalise, impl)
         assert 'CALL_UNBOUND_METHOD(' not in impl
 
@@ -692,6 +691,7 @@ class LazyUtilityCode(UtilityCodeBase):
 class FunctionState(object):
     # return_label     string          function return point label
     # error_label      string          error catch point label
+    # error_without_exception  boolean Can go to the error label without an exception (e.g. __next__ can return NULL)
     # continue_label   string          loop continue point label
     # break_label      string          loop break point label
     # return_from_error_cleanup_label string
@@ -740,6 +740,8 @@ class FunctionState(object):
         self.should_declare_error_indicator = False
         self.uses_error_indicator = False
 
+        self.error_without_exception = False
+
     # safety checks
 
     def validate_exit(self):
@@ -770,9 +772,9 @@ class FunctionState(object):
         self.yield_labels.append(num_and_label)
         return num_and_label
 
-    def new_error_label(self):
+    def new_error_label(self, prefix=""):
         old_err_lbl = self.error_label
-        self.error_label = self.new_label('error')
+        self.error_label = self.new_label(prefix + 'error')
         return old_err_lbl
 
     def get_loop_labels(self):
@@ -784,11 +786,11 @@ class FunctionState(object):
         (self.continue_label,
          self.break_label) = labels
 
-    def new_loop_labels(self):
+    def new_loop_labels(self, prefix=""):
         old_labels = self.get_loop_labels()
         self.set_loop_labels(
-            (self.new_label("continue"),
-             self.new_label("break")))
+            (self.new_label(prefix + "continue"),
+             self.new_label(prefix + "break")))
         return old_labels
 
     def get_all_labels(self):
@@ -829,14 +831,14 @@ class FunctionState(object):
         allocated and released one of the same type). Type is simply registered
         and handed back, but will usually be a PyrexType.
 
-        If type.is_pyobject, manage_ref comes into play. If manage_ref is set to
+        If type.needs_refcounting, manage_ref comes into play. If manage_ref is set to
         True, the temp will be decref-ed on return statements and in exception
         handling clauses. Otherwise the caller has to deal with any reference
         counting of the variable.
 
-        If not type.is_pyobject, then manage_ref will be ignored, but it
+        If not type.needs_refcounting, then manage_ref will be ignored, but it
         still has to be passed. It is recommended to pass False by convention
-        if it is known that type will never be a Python object.
+        if it is known that type will never be a reference counted type.
 
         static=True marks the temporary declaration with "static".
         This is only used when allocating backing store for a module-level
@@ -846,14 +848,16 @@ class FunctionState(object):
 
         A C string referring to the variable is returned.
         """
-        if type.is_const and not type.is_reference:
-            type = type.const_base_type
+        if type.is_cv_qualified and not type.is_reference:
+            type = type.cv_base_type
         elif type.is_reference and not type.is_fake_reference:
             type = type.ref_base_type
         elif type.is_cfunction:
             from . import PyrexTypes
             type = PyrexTypes.c_ptr_type(type)  # A function itself isn't an l-value
-        if not type.is_pyobject and not type.is_memoryviewslice:
+        elif type.is_cpp_class and not type.is_fake_reference and self.scope.directives['cpp_locals']:
+            self.scope.use_utility_code(UtilityCode.load_cached("OptionalLocals", "CppSupport.cpp"))
+        if not type.needs_refcounting:
             # Make manage_ref canonical, so that manage_ref will always mean
             # a decref is needed.
             manage_ref = False
@@ -906,17 +910,17 @@ class FunctionState(object):
         for name, type, manage_ref, static in self.temps_allocated:
             freelist = self.temps_free.get((type, manage_ref))
             if freelist is None or name not in freelist[1]:
-                used.append((name, type, manage_ref and type.is_pyobject))
+                used.append((name, type, manage_ref and type.needs_refcounting))
         return used
 
     def temps_holding_reference(self):
         """Return a list of (cname,type) tuples of temp names and their type
-        that are currently in use. This includes only temps of a
-        Python object type which owns its reference.
+        that are currently in use. This includes only temps
+        with a reference counted type which owns its reference.
         """
         return [(name, type)
                 for name, type, manage_ref in self.temps_in_use()
-                if manage_ref  and type.is_pyobject]
+                if manage_ref and type.needs_refcounting]
 
     def all_managed_temps(self):
         """Return a list of (cname, type) tuples of refcount-managed Python objects.
@@ -1121,10 +1125,10 @@ class GlobalState(object):
         'h_code',
         'filename_table',
         'utility_code_proto_before_types',
-        'numeric_typedefs',          # Let these detailed individual parts stay!,
-        'complex_type_declarations', # as the proper solution is to make a full DAG...
-        'type_declarations',         # More coarse-grained blocks would simply hide
-        'utility_code_proto',        # the ugliness, not fix it
+        'numeric_typedefs',           # Let these detailed individual parts stay!,
+        'complex_type_declarations',  # as the proper solution is to make a full DAG...
+        'type_declarations',          # More coarse-grained blocks would simply hide
+        'utility_code_proto',         # the ugliness, not fix it
         'module_declarations',
         'typeinfo',
         'before_global_var',
@@ -1132,19 +1136,34 @@ class GlobalState(object):
         'string_decls',
         'decls',
         'late_includes',
-        'all_the_rest',
+        'module_state',
+        'module_state_clear',
+        'module_state_traverse',
+        'module_state_defines',  # redefines names used in module_state/_clear/_traverse
+        'module_code',  # user code goes here
         'pystring_table',
         'cached_builtins',
         'cached_constants',
-        'init_globals',
+        'init_constants',
+        'init_globals',  # (utility code called at init-time)
         'init_module',
         'cleanup_globals',
         'cleanup_module',
         'main_method',
+        'utility_code_pragmas',  # silence some irrelevant warnings in utility code
         'utility_code_def',
+        'utility_code_pragmas_end',  # clean-up the utility_code_pragmas
         'end'
     ]
 
+    # h files can only have a much smaller list of sections
+    h_code_layout = [
+        'h_code',
+        'utility_code_proto_before_types',
+        'type_declarations',
+        'utility_code_proto',
+        'end'
+    ]
 
     def __init__(self, writer, module_node, code_config, common_utility_include_dir=None):
         self.filename_table = {}
@@ -1156,8 +1175,8 @@ class GlobalState(object):
         self.code_config = code_config
         self.common_utility_include_dir = common_utility_include_dir
         self.parts = {}
-        self.module_node = module_node # because some utility code generation needs it
-                                       # (generating backwards-compatible Get/ReleaseBuffer
+        self.module_node = module_node  # because some utility code generation needs it
+                                        # (generating backwards-compatible Get/ReleaseBuffer
 
         self.const_cnames_used = {}
         self.string_const_index = {}
@@ -1173,8 +1192,10 @@ class GlobalState(object):
 
     def initialize_main_c_code(self):
         rootwriter = self.rootwriter
-        for part in self.code_layout:
-            self.parts[part] = rootwriter.insertion_point()
+        for i, part in enumerate(self.code_layout):
+            w = self.parts[part] = rootwriter.insertion_point()
+            if i > 0:
+                w.putln("/* #### Code section: %s ### */" % part)
 
         if not Options.cache_builtins:
             del self.parts['cached_builtins']
@@ -1188,13 +1209,18 @@ class GlobalState(object):
         w.putln("")
         w.putln("static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {")
         w.put_declare_refcount_context()
-        w.put_setup_refcount_context("__Pyx_InitCachedConstants")
+        w.put_setup_refcount_context(StringEncoding.EncodedString("__Pyx_InitCachedConstants"))
 
         w = self.parts['init_globals']
         w.enter_cfunc_scope()
         w.putln("")
         w.putln("static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {")
 
+        w = self.parts['init_constants']
+        w.enter_cfunc_scope()
+        w.putln("")
+        w.putln("static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) {")
+
         if not Options.generate_cleanup_code:
             del self.parts['cleanup_globals']
         else:
@@ -1213,6 +1239,11 @@ class GlobalState(object):
         code.putln("")
         code.putln("/* --- Runtime support code --- */")
 
+    def initialize_main_h_code(self):
+        rootwriter = self.rootwriter
+        for part in self.h_code_layout:
+            self.parts[part] = rootwriter.insertion_point()
+
     def finalize_main_c_code(self):
         self.close_global_decls()
 
@@ -1224,6 +1255,18 @@ class GlobalState(object):
         code.put(util.format_code(util.impl))
         code.putln("")
 
+        #
+        # utility code pragmas
+        #
+        code = self.parts['utility_code_pragmas']
+        util = UtilityCode.load_cached("UtilityCodePragmas", "ModuleSetupCode.c")
+        code.putln(util.format_code(util.impl))
+        code.putln("")
+        code = self.parts['utility_code_pragmas_end']
+        util = UtilityCode.load_cached("UtilityCodePragmasEnd", "ModuleSetupCode.c")
+        code.putln(util.format_code(util.impl))
+        code.putln("")
+
     def __getitem__(self, key):
         return self.parts[key]
 
@@ -1253,13 +1296,14 @@ class GlobalState(object):
         w.putln("}")
         w.exit_cfunc_scope()
 
-        w = self.parts['init_globals']
-        w.putln("return 0;")
-        if w.label_used(w.error_label):
-            w.put_label(w.error_label)
-            w.putln("return -1;")
-        w.putln("}")
-        w.exit_cfunc_scope()
+        for part in ['init_globals', 'init_constants']:
+            w = self.parts[part]
+            w.putln("return 0;")
+            if w.label_used(w.error_label):
+                w.put_label(w.error_label)
+                w.putln("return -1;")
+            w.putln("}")
+            w.exit_cfunc_scope()
 
         if Options.generate_cleanup_code:
             w = self.parts['cleanup_globals']
@@ -1306,8 +1350,11 @@ class GlobalState(object):
                 return const
         # create a new Python object constant
         const = self.new_py_const(type, prefix)
-        if cleanup_level is not None \
-                and cleanup_level <= Options.generate_cleanup_code:
+        if (cleanup_level is not None
+                and cleanup_level <= Options.generate_cleanup_code
+                # Note that this function is used for all argument defaults
+                # which aren't just Python objects
+                and type.needs_refcounting):
             cleanup_writer = self.parts['cleanup_globals']
             cleanup_writer.putln('Py_CLEAR(%s);' % const.cname)
         if dedup_key is not None:
@@ -1376,23 +1423,33 @@ class GlobalState(object):
         value = bytes_value.decode('ASCII', 'ignore')
         return self.new_const_cname(value=value)
 
-    def new_num_const_cname(self, value, py_type):
+    def unique_const_cname(self, format_str):  # type: (str) -> str
+        used = self.const_cnames_used
+        cname = value = format_str.format(sep='', counter='')
+        while cname in used:
+            counter = used[value] = used[value] + 1
+            cname = format_str.format(sep='_', counter=counter)
+        used[cname] = 1
+        return cname
+
+    def new_num_const_cname(self, value, py_type):  # type: (str, str) -> str
         if py_type == 'long':
             value += 'L'
             py_type = 'int'
         prefix = Naming.interned_prefixes[py_type]
-        cname = "%s%s" % (prefix, value)
-        cname = cname.replace('+', '_').replace('-', 'neg_').replace('.', '_')
+
+        value = value.replace('.', '_').replace('+', '_').replace('-', 'neg_')
+        if len(value) > 42:
+            # update tests/run/large_integer_T5290.py in case the amount is changed
+            cname = self.unique_const_cname(
+                prefix + "large{counter}_" + value[:18] + "_xxx_" + value[-18:])
+        else:
+            cname = "%s%s" % (prefix, value)
         return cname
 
     def new_const_cname(self, prefix='', value=''):
         value = replace_identifier('_', value)[:32].strip('_')
-        used = self.const_cnames_used
-        name_suffix = value
-        while name_suffix in used:
-            counter = used[value] = used[value] + 1
-            name_suffix = '%s_%d' % (value, counter)
-        used[name_suffix] = 1
+        name_suffix = self.unique_const_cname(value + "{sep}{counter}")
         if prefix:
             prefix = Naming.interned_prefixes[prefix]
         else:
@@ -1460,26 +1517,38 @@ class GlobalState(object):
         consts = [(len(c.cname), c.cname, c)
                   for c in self.py_constants]
         consts.sort()
-        decls_writer = self.parts['decls']
         for _, cname, c in consts:
-            decls_writer.putln(
-                "static %s;" % c.type.declaration_code(cname))
+            self.parts['module_state'].putln("%s;" % c.type.declaration_code(cname))
+            self.parts['module_state_defines'].putln(
+                "#define %s %s->%s" % (cname, Naming.modulestateglobal_cname, cname))
+            if not c.type.needs_refcounting:
+                # Note that py_constants is used for all argument defaults
+                # which aren't necessarily PyObjects, so aren't appropriate
+                # to clear.
+                continue
+            self.parts['module_state_clear'].putln(
+                "Py_CLEAR(clear_module_state->%s);" % cname)
+            self.parts['module_state_traverse'].putln(
+                "Py_VISIT(traverse_module_state->%s);" % cname)
 
     def generate_cached_methods_decls(self):
         if not self.cached_cmethods:
             return
 
         decl = self.parts['decls']
-        init = self.parts['init_globals']
+        init = self.parts['init_constants']
         cnames = []
         for (type_cname, method_name), cname in sorted(self.cached_cmethods.items()):
             cnames.append(cname)
             method_name_cname = self.get_interned_identifier(StringEncoding.EncodedString(method_name)).cname
-            decl.putln('static __Pyx_CachedCFunction %s = {0, &%s, 0, 0, 0};' % (
-                cname, method_name_cname))
+            decl.putln('static __Pyx_CachedCFunction %s = {0, 0, 0, 0, 0};' % (
+                cname))
             # split type reference storage as it might not be static
             init.putln('%s.type = (PyObject*)&%s;' % (
                 cname, type_cname))
+            # method name string isn't static in limited api
+            init.putln('%s.method_name = &%s;' % (
+                cname, method_name_cname))
 
         if Options.generate_cleanup_code:
             cleanup = self.parts['cleanup_globals']
@@ -1517,13 +1586,18 @@ class GlobalState(object):
                 decls_writer.putln("static Py_UNICODE %s[] = { %s };" % (cname, utf16_array))
                 decls_writer.putln("#endif")
 
+        init_constants = self.parts['init_constants']
         if py_strings:
             self.use_utility_code(UtilityCode.load_cached("InitStrings", "StringTools.c"))
             py_strings.sort()
             w = self.parts['pystring_table']
             w.putln("")
-            w.putln("static __Pyx_StringTabEntry %s[] = {" % Naming.stringtab_cname)
-            for c_cname, _, py_string in py_strings:
+            w.putln("static int __Pyx_CreateStringTabAndInitStrings(void) {")
+            # the stringtab is a function local rather than a global to
+            # ensure that it doesn't conflict with module state
+            w.putln("__Pyx_StringTabEntry %s[] = {" % Naming.stringtab_cname)
+            for py_string_args in py_strings:
+                c_cname, _, py_string = py_string_args
                 if not py_string.is_str or not py_string.encoding or \
                         py_string.encoding in ('ASCII', 'USASCII', 'US-ASCII',
                                                'UTF8', 'UTF-8'):
@@ -1531,8 +1605,15 @@ class GlobalState(object):
                 else:
                     encoding = '"%s"' % py_string.encoding.lower()
 
-                decls_writer.putln(
-                    "static PyObject *%s;" % py_string.cname)
+                self.parts['module_state'].putln("PyObject *%s;" % py_string.cname)
+                self.parts['module_state_defines'].putln("#define %s %s->%s" % (
+                    py_string.cname,
+                    Naming.modulestateglobal_cname,
+                    py_string.cname))
+                self.parts['module_state_clear'].putln("Py_CLEAR(clear_module_state->%s);" %
+                    py_string.cname)
+                self.parts['module_state_traverse'].putln("Py_VISIT(traverse_module_state->%s);" %
+                    py_string.cname)
                 if py_string.py3str_cstring:
                     w.putln("#if PY_MAJOR_VERSION >= 3")
                     w.putln("{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % (
@@ -1556,22 +1637,27 @@ class GlobalState(object):
                     w.putln("#endif")
             w.putln("{0, 0, 0, 0, 0, 0, 0}")
             w.putln("};")
+            w.putln("return __Pyx_InitStrings(%s);" % Naming.stringtab_cname)
+            w.putln("}")
 
-            init_globals = self.parts['init_globals']
-            init_globals.putln(
-                "if (__Pyx_InitStrings(%s) < 0) %s" % (
-                    Naming.stringtab_cname,
-                    init_globals.error_goto(self.module_pos)))
+            init_constants.putln(
+                "if (__Pyx_CreateStringTabAndInitStrings() < 0) %s;" %
+                    init_constants.error_goto(self.module_pos))
 
     def generate_num_constants(self):
         consts = [(c.py_type, c.value[0] == '-', len(c.value), c.value, c.value_code, c)
                   for c in self.num_const_index.values()]
         consts.sort()
-        decls_writer = self.parts['decls']
-        init_globals = self.parts['init_globals']
+        init_constants = self.parts['init_constants']
         for py_type, _, _, value, value_code, c in consts:
             cname = c.cname
-            decls_writer.putln("static PyObject *%s;" % cname)
+            self.parts['module_state'].putln("PyObject *%s;" % cname)
+            self.parts['module_state_defines'].putln("#define %s %s->%s" % (
+                cname, Naming.modulestateglobal_cname, cname))
+            self.parts['module_state_clear'].putln(
+                "Py_CLEAR(clear_module_state->%s);" % cname)
+            self.parts['module_state_traverse'].putln(
+                "Py_VISIT(traverse_module_state->%s);" % cname)
             if py_type == 'float':
                 function = 'PyFloat_FromDouble(%s)'
             elif py_type == 'long':
@@ -1582,9 +1668,9 @@ class GlobalState(object):
                 function = "PyInt_FromLong(%sL)"
             else:
                 function = "PyInt_FromLong(%s)"
-            init_globals.putln('%s = %s; %s' % (
+            init_constants.putln('%s = %s; %s' % (
                 cname, function % value_code,
-                init_globals.error_goto_if_null(cname, self.module_pos)))
+                init_constants.error_goto_if_null(cname, self.module_pos)))
 
     # The functions below are there in a transition phase only
     # and will be deprecated. They are called from Nodes.BlockNode.
@@ -1759,10 +1845,21 @@ class CCodeWriter(object):
         return self.buffer.getvalue()
 
     def write(self, s):
-        # also put invalid markers (lineno 0), to indicate that those lines
-        # have no Cython source code correspondence
-        cython_lineno = self.last_marked_pos[1] if self.last_marked_pos else 0
-        self.buffer.markers.extend([cython_lineno] * s.count('\n'))
+        if '\n' in s:
+            self._write_lines(s)
+        else:
+            self._write_to_buffer(s)
+
+    def _write_lines(self, s):
+        # Cygdb needs to know which Cython source line corresponds to which C line.
+        # Therefore, we write this information into "self.buffer.markers" and then write it from there
+        # into cython_debug/cython_debug_info_* (see ModuleNode._serialize_lineno_map).
+        filename_line = self.last_marked_pos[:2] if self.last_marked_pos else (None, 0)
+        self.buffer.markers.extend([filename_line] * s.count('\n'))
+
+        self._write_to_buffer(s)
+
+    def _write_to_buffer(self, s):
         self.buffer.write(s)
 
     def insertion_point(self):
@@ -1804,13 +1901,37 @@ class CCodeWriter(object):
     @funccontext_property
     def yield_labels(self): pass
 
+    def label_interceptor(self, new_labels, orig_labels, skip_to_label=None, pos=None, trace=True):
+        """
+        Helper for generating multiple label interceptor code blocks.
+
+        @param new_labels: the new labels that should be intercepted
+        @param orig_labels: the original labels that we should dispatch to after the interception
+        @param skip_to_label: a label to skip to before starting the code blocks
+        @param pos: the node position to mark for each interceptor block
+        @param trace: add a trace line for the pos marker or not
+        """
+        for label, orig_label in zip(new_labels, orig_labels):
+            if not self.label_used(label):
+                continue
+            if skip_to_label:
+                # jump over the whole interception block
+                self.put_goto(skip_to_label)
+                skip_to_label = None
+
+            if pos is not None:
+                self.mark_pos(pos, trace=trace)
+            self.put_label(label)
+            yield (label, orig_label)
+            self.put_goto(orig_label)
+
     # Functions delegated to function scope
     def new_label(self, name=None):    return self.funcstate.new_label(name)
-    def new_error_label(self):         return self.funcstate.new_error_label()
+    def new_error_label(self, *args):  return self.funcstate.new_error_label(*args)
     def new_yield_label(self, *args):  return self.funcstate.new_yield_label(*args)
     def get_loop_labels(self):         return self.funcstate.get_loop_labels()
     def set_loop_labels(self, labels): return self.funcstate.set_loop_labels(labels)
-    def new_loop_labels(self):         return self.funcstate.new_loop_labels()
+    def new_loop_labels(self, *args):  return self.funcstate.new_loop_labels(*args)
     def get_all_labels(self):          return self.funcstate.get_all_labels()
     def set_all_labels(self, labels):  return self.funcstate.set_all_labels(labels)
     def all_new_labels(self):          return self.funcstate.all_new_labels()
@@ -1822,6 +1943,7 @@ class CCodeWriter(object):
         self.funcstate = FunctionState(self, scope=scope)
 
     def exit_cfunc_scope(self):
+        self.funcstate.validate_exit()
         self.funcstate = None
 
     # constant handling
@@ -1865,13 +1987,13 @@ class CCodeWriter(object):
             self.emit_marker()
         if self.code_config.emit_linenums and self.last_marked_pos:
             source_desc, line, _ = self.last_marked_pos
-            self.write('\n#line %s "%s"\n' % (line, source_desc.get_escaped_description()))
+            self._write_lines('\n#line %s "%s"\n' % (line, source_desc.get_escaped_description()))
         if code:
             if safe:
                 self.put_safe(code)
             else:
                 self.put(code)
-        self.write("\n")
+        self._write_lines("\n")
         self.bol = 1
 
     def mark_pos(self, pos, trace=True):
@@ -1885,13 +2007,13 @@ class CCodeWriter(object):
         pos, trace = self.last_pos
         self.last_marked_pos = pos
         self.last_pos = None
-        self.write("\n")
+        self._write_lines("\n")
         if self.code_config.emit_code_comments:
             self.indent()
-            self.write("/* %s */\n" % self._build_marker(pos))
+            self._write_lines("/* %s */\n" % self._build_marker(pos))
         if trace and self.funcstate and self.funcstate.can_trace and self.globalstate.directives['linetrace']:
             self.indent()
-            self.write('__Pyx_TraceLine(%d,%d,%s)\n' % (
+            self._write_lines('__Pyx_TraceLine(%d,%d,%s)\n' % (
                 pos[1], not self.funcstate.gil_owned, self.error_goto(pos)))
 
     def _build_marker(self, pos):
@@ -1912,7 +2034,7 @@ class CCodeWriter(object):
         include_dir = self.globalstate.common_utility_include_dir
         if include_dir and len(code) > 1024:
             include_file = "%s_%s.h" % (
-                name, hashlib.md5(code.encode('utf8')).hexdigest())
+                name, hashlib.sha1(code.encode('utf8')).hexdigest())
             path = os.path.join(include_dir, include_file)
             if not os.path.exists(path):
                 tmp_path = '%s.tmp%s' % (path, os.getpid())
@@ -1968,7 +2090,7 @@ class CCodeWriter(object):
         self.putln("}")
 
     def indent(self):
-        self.write("  " * self.level)
+        self._write_to_buffer("  " * self.level)
 
     def get_py_version_hex(self, pyversion):
         return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
@@ -1990,26 +2112,33 @@ class CCodeWriter(object):
         if entry.visibility == "private" and not entry.used:
             #print "...private and not used, skipping", entry.cname ###
             return
-        if storage_class:
-            self.put("%s " % storage_class)
         if not entry.cf_used:
             self.put('CYTHON_UNUSED ')
-        self.put(entry.type.declaration_code(
-            entry.cname, dll_linkage=dll_linkage))
+        if storage_class:
+            self.put("%s " % storage_class)
+        if entry.is_cpp_optional:
+            self.put(entry.type.cpp_optional_declaration_code(
+                entry.cname, dll_linkage=dll_linkage))
+        else:
+            self.put(entry.type.declaration_code(
+                entry.cname, dll_linkage=dll_linkage))
         if entry.init is not None:
             self.put_safe(" = %s" % entry.type.literal_code(entry.init))
         elif entry.type.is_pyobject:
             self.put(" = NULL")
         self.putln(";")
+        self.funcstate.scope.use_entry_utility_code(entry)
 
     def put_temp_declarations(self, func_context):
         for name, type, manage_ref, static in func_context.temps_allocated:
-            decl = type.declaration_code(name)
+            if type.is_cpp_class and not type.is_fake_reference and func_context.scope.directives['cpp_locals']:
+                decl = type.cpp_optional_declaration_code(name)
+            else:
+                decl = type.declaration_code(name)
             if type.is_pyobject:
                 self.putln("%s = NULL;" % decl)
             elif type.is_memoryviewslice:
-                from . import MemoryView
-                self.putln("%s = %s;" % (decl, MemoryView.memslice_entry_init))
+                self.putln("%s = %s;" % (decl, type.literal_code(type.default_value)))
             else:
                 self.putln("%s%s;" % (static and "static " or "", decl))
 
@@ -2024,7 +2153,7 @@ class CCodeWriter(object):
             self.putln("%sint %s = 0;" % (unused, Naming.clineno_cname))
 
     def put_generated_by(self):
-        self.putln("/* Generated by Cython %s */" % Version.watermark)
+        self.putln(Utils.GENERATED_BY_MARKER)
         self.putln("")
 
     def put_h_guard(self, guard):
@@ -2047,7 +2176,7 @@ class CCodeWriter(object):
     def entry_as_pyobject(self, entry):
         type = entry.type
         if (not entry.is_self_arg and not entry.type.is_complete()
-            or entry.type.is_extension_type):
+                or entry.type.is_extension_type):
             return "(PyObject *)" + entry.cname
         else:
             return entry.cname
@@ -2056,123 +2185,89 @@ class CCodeWriter(object):
         from .PyrexTypes import py_object_type, typecast
         return typecast(py_object_type, type, cname)
 
-    def put_gotref(self, cname):
-        self.putln("__Pyx_GOTREF(%s);" % cname)
+    def put_gotref(self, cname, type):
+        type.generate_gotref(self, cname)
 
-    def put_giveref(self, cname):
-        self.putln("__Pyx_GIVEREF(%s);" % cname)
+    def put_giveref(self, cname, type):
+        type.generate_giveref(self, cname)
 
-    def put_xgiveref(self, cname):
-        self.putln("__Pyx_XGIVEREF(%s);" % cname)
+    def put_xgiveref(self, cname, type):
+        type.generate_xgiveref(self, cname)
 
-    def put_xgotref(self, cname):
-        self.putln("__Pyx_XGOTREF(%s);" % cname)
+    def put_xgotref(self, cname, type):
+        type.generate_xgotref(self, cname)
 
     def put_incref(self, cname, type, nanny=True):
-        if nanny:
-            self.putln("__Pyx_INCREF(%s);" % self.as_pyobject(cname, type))
-        else:
-            self.putln("Py_INCREF(%s);" % self.as_pyobject(cname, type))
+        # Note: original put_Memslice_Incref/Decref also added in some utility code
+        # this is unnecessary since the relevant utility code is loaded anyway if a memoryview is used
+        # and so has been removed. However, it's potentially a feature that might be useful here
+        type.generate_incref(self, cname, nanny=nanny)
 
-    def put_decref(self, cname, type, nanny=True):
-        self._put_decref(cname, type, nanny, null_check=False, clear=False)
+    def put_xincref(self, cname, type, nanny=True):
+        type.generate_xincref(self, cname, nanny=nanny)
 
-    def put_var_gotref(self, entry):
-        if entry.type.is_pyobject:
-            self.putln("__Pyx_GOTREF(%s);" % self.entry_as_pyobject(entry))
+    def put_decref(self, cname, type, nanny=True, have_gil=True):
+        type.generate_decref(self, cname, nanny=nanny, have_gil=have_gil)
 
-    def put_var_giveref(self, entry):
-        if entry.type.is_pyobject:
-            self.putln("__Pyx_GIVEREF(%s);" % self.entry_as_pyobject(entry))
+    def put_xdecref(self, cname, type, nanny=True, have_gil=True):
+        type.generate_xdecref(self, cname, nanny=nanny, have_gil=have_gil)
 
-    def put_var_xgotref(self, entry):
-        if entry.type.is_pyobject:
-            self.putln("__Pyx_XGOTREF(%s);" % self.entry_as_pyobject(entry))
+    def put_decref_clear(self, cname, type, clear_before_decref=False, nanny=True, have_gil=True):
+        type.generate_decref_clear(self, cname, clear_before_decref=clear_before_decref,
+                              nanny=nanny, have_gil=have_gil)
 
-    def put_var_xgiveref(self, entry):
-        if entry.type.is_pyobject:
-            self.putln("__Pyx_XGIVEREF(%s);" % self.entry_as_pyobject(entry))
+    def put_xdecref_clear(self, cname, type, clear_before_decref=False, nanny=True, have_gil=True):
+        type.generate_xdecref_clear(self, cname, clear_before_decref=clear_before_decref,
+                              nanny=nanny, have_gil=have_gil)
 
-    def put_var_incref(self, entry, nanny=True):
-        if entry.type.is_pyobject:
-            if nanny:
-                self.putln("__Pyx_INCREF(%s);" % self.entry_as_pyobject(entry))
-            else:
-                self.putln("Py_INCREF(%s);" % self.entry_as_pyobject(entry))
+    def put_decref_set(self, cname, type, rhs_cname):
+        type.generate_decref_set(self, cname, rhs_cname)
 
-    def put_var_xincref(self, entry):
-        if entry.type.is_pyobject:
-            self.putln("__Pyx_XINCREF(%s);" % self.entry_as_pyobject(entry))
+    def put_xdecref_set(self, cname, type, rhs_cname):
+        type.generate_xdecref_set(self, cname, rhs_cname)
 
-    def put_decref_clear(self, cname, type, nanny=True, clear_before_decref=False):
-        self._put_decref(cname, type, nanny, null_check=False,
-                         clear=True, clear_before_decref=clear_before_decref)
+    def put_incref_memoryviewslice(self, slice_cname, type, have_gil):
+        # TODO ideally this would just be merged into "put_incref"
+        type.generate_incref_memoryviewslice(self, slice_cname, have_gil=have_gil)
 
-    def put_xdecref(self, cname, type, nanny=True, have_gil=True):
-        self._put_decref(cname, type, nanny, null_check=True,
-                         have_gil=have_gil, clear=False)
+    def put_var_incref_memoryviewslice(self, entry, have_gil):
+        self.put_incref_memoryviewslice(entry.cname, entry.type, have_gil=have_gil)
 
-    def put_xdecref_clear(self, cname, type, nanny=True, clear_before_decref=False):
-        self._put_decref(cname, type, nanny, null_check=True,
-                         clear=True, clear_before_decref=clear_before_decref)
+    def put_var_gotref(self, entry):
+        self.put_gotref(entry.cname, entry.type)
 
-    def _put_decref(self, cname, type, nanny=True, null_check=False,
-                    have_gil=True, clear=False, clear_before_decref=False):
-        if type.is_memoryviewslice:
-            self.put_xdecref_memoryviewslice(cname, have_gil=have_gil)
-            return
+    def put_var_giveref(self, entry):
+        self.put_giveref(entry.cname, entry.type)
 
-        prefix = '__Pyx' if nanny else 'Py'
-        X = 'X' if null_check else ''
+    def put_var_xgotref(self, entry):
+        self.put_xgotref(entry.cname, entry.type)
 
-        if clear:
-            if clear_before_decref:
-                if not nanny:
-                    X = ''  # CPython doesn't have a Py_XCLEAR()
-                self.putln("%s_%sCLEAR(%s);" % (prefix, X, cname))
-            else:
-                self.putln("%s_%sDECREF(%s); %s = 0;" % (
-                    prefix, X, self.as_pyobject(cname, type), cname))
-        else:
-            self.putln("%s_%sDECREF(%s);" % (
-                prefix, X, self.as_pyobject(cname, type)))
+    def put_var_xgiveref(self, entry):
+        self.put_xgiveref(entry.cname, entry.type)
 
-    def put_decref_set(self, cname, rhs_cname):
-        self.putln("__Pyx_DECREF_SET(%s, %s);" % (cname, rhs_cname))
+    def put_var_incref(self, entry, **kwds):
+        self.put_incref(entry.cname, entry.type, **kwds)
 
-    def put_xdecref_set(self, cname, rhs_cname):
-        self.putln("__Pyx_XDECREF_SET(%s, %s);" % (cname, rhs_cname))
+    def put_var_xincref(self, entry, **kwds):
+        self.put_xincref(entry.cname, entry.type, **kwds)
 
-    def put_var_decref(self, entry):
-        if entry.type.is_pyobject:
-            self.putln("__Pyx_XDECREF(%s);" % self.entry_as_pyobject(entry))
+    def put_var_decref(self, entry, **kwds):
+        self.put_decref(entry.cname, entry.type, **kwds)
 
-    def put_var_xdecref(self, entry, nanny=True):
-        if entry.type.is_pyobject:
-            if nanny:
-                self.putln("__Pyx_XDECREF(%s);" % self.entry_as_pyobject(entry))
-            else:
-                self.putln("Py_XDECREF(%s);" % self.entry_as_pyobject(entry))
-
-    def put_var_decref_clear(self, entry):
-        self._put_var_decref_clear(entry, null_check=False)
-
-    def put_var_xdecref_clear(self, entry):
-        self._put_var_decref_clear(entry, null_check=True)
-
-    def _put_var_decref_clear(self, entry, null_check):
-        if entry.type.is_pyobject:
-            if entry.in_closure:
-                # reset before DECREF to make sure closure state is
-                # consistent during call to DECREF()
-                self.putln("__Pyx_%sCLEAR(%s);" % (
-                    null_check and 'X' or '',
-                    entry.cname))
-            else:
-                self.putln("__Pyx_%sDECREF(%s); %s = 0;" % (
-                    null_check and 'X' or '',
-                    self.entry_as_pyobject(entry),
-                    entry.cname))
+    def put_var_xdecref(self, entry, **kwds):
+        self.put_xdecref(entry.cname, entry.type, **kwds)
+
+    def put_var_decref_clear(self, entry, **kwds):
+        self.put_decref_clear(entry.cname, entry.type, clear_before_decref=entry.in_closure, **kwds)
+
+    def put_var_decref_set(self, entry, rhs_cname, **kwds):
+        self.put_decref_set(entry.cname, entry.type, rhs_cname, **kwds)
+
+    def put_var_xdecref_set(self, entry, rhs_cname, **kwds):
+        self.put_xdecref_set(entry.cname, entry.type, rhs_cname, **kwds)
+
+    def put_var_xdecref_clear(self, entry, **kwds):
+        self.put_xdecref_clear(entry.cname, entry.type, clear_before_decref=entry.in_closure, **kwds)
 
     def put_var_decrefs(self, entries, used_only = 0):
         for entry in entries:
@@ -2190,19 +2285,6 @@ class CCodeWriter(object):
         for entry in entries:
             self.put_var_xdecref_clear(entry)
 
-    def put_incref_memoryviewslice(self, slice_cname, have_gil=False):
-        from . import MemoryView
-        self.globalstate.use_utility_code(MemoryView.memviewslice_init_code)
-        self.putln("__PYX_INC_MEMVIEW(&%s, %d);" % (slice_cname, int(have_gil)))
-
-    def put_xdecref_memoryviewslice(self, slice_cname, have_gil=False):
-        from . import MemoryView
-        self.globalstate.use_utility_code(MemoryView.memviewslice_init_code)
-        self.putln("__PYX_XDEC_MEMVIEW(&%s, %d);" % (slice_cname, int(have_gil)))
-
-    def put_xgiveref_memoryviewslice(self, slice_cname):
-        self.put_xgiveref("%s.memview" % slice_cname)
-
     def put_init_to_py_none(self, cname, type, nanny=True):
         from .PyrexTypes import py_object_type, typecast
         py_none = typecast(type, py_object_type, "Py_None")
@@ -2220,8 +2302,11 @@ class CCodeWriter(object):
             self.put_giveref('Py_None')
 
     def put_pymethoddef(self, entry, term, allow_skip=True, wrapper_code_writer=None):
+        is_reverse_number_slot = False
         if entry.is_special or entry.name == '__getattribute__':
-            if entry.name not in special_py_methods:
+            from . import TypeSlots
+            is_reverse_number_slot = True
+            if entry.name not in special_py_methods and not TypeSlots.is_reverse_number_slot(entry.name):
                 if entry.name == '__getattr__' and not self.globalstate.directives['fast_getattr']:
                     pass
                 # Python's typeobject.c will automatically fill in our slot
@@ -2233,37 +2318,60 @@ class CCodeWriter(object):
         method_flags = entry.signature.method_flags()
         if not method_flags:
             return
-        from . import TypeSlots
-        if entry.is_special or TypeSlots.is_reverse_number_slot(entry.name):
+        if entry.is_special:
             method_flags += [TypeSlots.method_coexist]
         func_ptr = wrapper_code_writer.put_pymethoddef_wrapper(entry) if wrapper_code_writer else entry.func_cname
         # Add required casts, but try not to shadow real warnings.
-        cast = '__Pyx_PyCFunctionFast' if 'METH_FASTCALL' in method_flags else 'PyCFunction'
-        if 'METH_KEYWORDS' in method_flags:
-            cast += 'WithKeywords'
+        cast = entry.signature.method_function_type()
         if cast != 'PyCFunction':
             func_ptr = '(void*)(%s)%s' % (cast, func_ptr)
+        entry_name = entry.name.as_c_string_literal()
+        if is_reverse_number_slot:
+            # Unlike most special functions, reverse number operator slots are actually generated here
+            # (to ensure that they can be looked up). However, they're sometimes guarded by the preprocessor
+            # so a bit of extra logic is needed
+            slot = TypeSlots.get_slot_table(self.globalstate.directives).get_slot_by_method_name(entry.name)
+            preproc_guard = slot.preprocessor_guard_code()
+            if preproc_guard:
+                self.putln(preproc_guard)
         self.putln(
-            '{"%s", (PyCFunction)%s, %s, %s}%s' % (
-                entry.name,
+            '{%s, (PyCFunction)%s, %s, %s}%s' % (
+                entry_name,
                 func_ptr,
                 "|".join(method_flags),
                 entry.doc_cname if entry.doc else '0',
                 term))
+        if is_reverse_number_slot and preproc_guard:
+            self.putln("#endif")
 
     def put_pymethoddef_wrapper(self, entry):
         func_cname = entry.func_cname
         if entry.is_special:
-            method_flags = entry.signature.method_flags()
-            if method_flags and 'METH_NOARGS' in method_flags:
+            method_flags = entry.signature.method_flags() or []
+            from .TypeSlots import method_noargs
+            if method_noargs in method_flags:
                 # Special NOARGS methods really take no arguments besides 'self', but PyCFunction expects one.
                 func_cname = Naming.method_wrapper_prefix + func_cname
-                self.putln("static PyObject *%s(PyObject *self, CYTHON_UNUSED PyObject *arg) {return %s(self);}" % (
-                    func_cname, entry.func_cname))
+                self.putln("static PyObject *%s(PyObject *self, CYTHON_UNUSED PyObject *arg) {" % func_cname)
+                func_call = "%s(self)" % entry.func_cname
+                if entry.name == "__next__":
+                    self.putln("PyObject *res = %s;" % func_call)
+                    # tp_iternext can return NULL without an exception
+                    self.putln("if (!res && !PyErr_Occurred()) { PyErr_SetNone(PyExc_StopIteration); }")
+                    self.putln("return res;")
+                else:
+                    self.putln("return %s;" % func_call)
+                self.putln("}")
         return func_cname
 
     # GIL methods
 
+    def use_fast_gil_utility_code(self):
+        if self.globalstate.directives['fast_gil']:
+            self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
+        else:
+            self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
+
     def put_ensure_gil(self, declare_gilstate=True, variable=None):
         """
         Acquire the GIL. The generated code is safe even when no PyThreadState
@@ -2273,10 +2381,7 @@ class CCodeWriter(object):
         """
         self.globalstate.use_utility_code(
             UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
-        if self.globalstate.directives['fast_gil']:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
-        else:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
+        self.use_fast_gil_utility_code()
         self.putln("#ifdef WITH_THREAD")
         if not variable:
             variable = '__pyx_gilstate_save'
@@ -2289,41 +2394,43 @@ class CCodeWriter(object):
         """
         Releases the GIL, corresponds to `put_ensure_gil`.
         """
-        if self.globalstate.directives['fast_gil']:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
-        else:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
+        self.use_fast_gil_utility_code()
         if not variable:
             variable = '__pyx_gilstate_save'
         self.putln("#ifdef WITH_THREAD")
         self.putln("__Pyx_PyGILState_Release(%s);" % variable)
         self.putln("#endif")
 
-    def put_acquire_gil(self, variable=None):
+    def put_acquire_gil(self, variable=None, unknown_gil_state=True):
         """
         Acquire the GIL. The thread's thread state must have been initialized
         by a previous `put_release_gil`
         """
-        if self.globalstate.directives['fast_gil']:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
-        else:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
+        self.use_fast_gil_utility_code()
         self.putln("#ifdef WITH_THREAD")
         self.putln("__Pyx_FastGIL_Forget();")
         if variable:
             self.putln('_save = %s;' % variable)
+        if unknown_gil_state:
+            self.putln("if (_save) {")
         self.putln("Py_BLOCK_THREADS")
+        if unknown_gil_state:
+            self.putln("}")
         self.putln("#endif")
 
-    def put_release_gil(self, variable=None):
+    def put_release_gil(self, variable=None, unknown_gil_state=True):
         "Release the GIL, corresponds to `put_acquire_gil`."
-        if self.globalstate.directives['fast_gil']:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("FastGil", "ModuleSetupCode.c"))
-        else:
-          self.globalstate.use_utility_code(UtilityCode.load_cached("NoFastGil", "ModuleSetupCode.c"))
+        self.use_fast_gil_utility_code()
         self.putln("#ifdef WITH_THREAD")
         self.putln("PyThreadState *_save;")
+        self.putln("_save = NULL;")
+        if unknown_gil_state:
+            # we don't *know* that we don't have the GIL (since we may be inside a nogil function,
+            # and Py_UNBLOCK_THREADS is unsafe without the GIL)
+            self.putln("if (PyGILState_Check()) {")
         self.putln("Py_UNBLOCK_THREADS")
+        if unknown_gil_state:
+            self.putln("}")
         if variable:
             self.putln('%s = _save;' % variable)
         self.putln("__Pyx_FastGIL_Remember();")
@@ -2341,23 +2448,34 @@ class CCodeWriter(object):
         # return self.putln("if (unlikely(%s < 0)) %s" % (value, self.error_goto(pos)))
         return self.putln("if (%s < 0) %s" % (value, self.error_goto(pos)))
 
-    def put_error_if_unbound(self, pos, entry, in_nogil_context=False):
-        from . import ExprNodes
+    def put_error_if_unbound(self, pos, entry, in_nogil_context=False, unbound_check_code=None):
         if entry.from_closure:
             func = '__Pyx_RaiseClosureNameError'
             self.globalstate.use_utility_code(
-                ExprNodes.raise_closure_name_error_utility_code)
+                UtilityCode.load_cached("RaiseClosureNameError", "ObjectHandling.c"))
         elif entry.type.is_memoryviewslice and in_nogil_context:
             func = '__Pyx_RaiseUnboundMemoryviewSliceNogil'
             self.globalstate.use_utility_code(
-                ExprNodes.raise_unbound_memoryview_utility_code_nogil)
+                UtilityCode.load_cached("RaiseUnboundMemoryviewSliceNogil", "ObjectHandling.c"))
+        elif entry.type.is_cpp_class and entry.is_cglobal:
+            func = '__Pyx_RaiseCppGlobalNameError'
+            self.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseCppGlobalNameError", "ObjectHandling.c"))
+        elif entry.type.is_cpp_class and entry.is_variable and not entry.is_member and entry.scope.is_c_class_scope:
+            # there doesn't seem to be a good way to detecting an instance-attribute of a C class
+            # (is_member is only set for class attributes)
+            func = '__Pyx_RaiseCppAttributeError'
+            self.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseCppAttributeError", "ObjectHandling.c"))
         else:
             func = '__Pyx_RaiseUnboundLocalError'
             self.globalstate.use_utility_code(
-                ExprNodes.raise_unbound_local_error_utility_code)
+                UtilityCode.load_cached("RaiseUnboundLocalError", "ObjectHandling.c"))
 
+        if not unbound_check_code:
+            unbound_check_code = entry.type.check_for_null_code(entry.cname)
         self.putln('if (unlikely(!%s)) { %s("%s"); %s }' % (
-                                entry.type.check_for_null_code(entry.cname),
+                                unbound_check_code,
                                 func,
                                 entry.name,
                                 self.error_goto(pos)))
@@ -2390,7 +2508,8 @@ class CCodeWriter(object):
         return self.error_goto_if("!%s" % cname, pos)
 
     def error_goto_if_neg(self, cname, pos):
-        return self.error_goto_if("%s < 0" % cname, pos)
+        # Add extra parentheses to silence clang warnings about constant conditions.
+        return self.error_goto_if("(%s < 0)" % cname, pos)
 
     def error_goto_if_PyErr(self, pos):
         return self.error_goto_if("PyErr_Occurred()", pos)
@@ -2402,13 +2521,14 @@ class CCodeWriter(object):
         self.putln('__Pyx_RefNannyDeclarations')
 
     def put_setup_refcount_context(self, name, acquire_gil=False):
+        name = name.as_c_string_literal()  # handle unicode names
         if acquire_gil:
             self.globalstate.use_utility_code(
                 UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
-        self.putln('__Pyx_RefNannySetupContext("%s", %d);' % (name, acquire_gil and 1 or 0))
+        self.putln('__Pyx_RefNannySetupContext(%s, %d);' % (name, acquire_gil and 1 or 0))
 
-    def put_finish_refcount_context(self):
-        self.putln("__Pyx_RefNannyFinishContext();")
+    def put_finish_refcount_context(self, nogil=False):
+        self.putln("__Pyx_RefNannyFinishContextNogil()" if nogil else "__Pyx_RefNannyFinishContext();")
 
     def put_add_traceback(self, qualified_name, include_cline=True):
         """
@@ -2416,14 +2536,16 @@ class CCodeWriter(object):
 
         qualified_name should be the qualified name of the function.
         """
+        qualified_name = qualified_name.as_c_string_literal()  # handle unicode names
         format_tuple = (
             qualified_name,
             Naming.clineno_cname if include_cline else 0,
             Naming.lineno_cname,
             Naming.filename_cname,
         )
+
         self.funcstate.uses_error_indicator = True
-        self.putln('__Pyx_AddTraceback("%s", %s, %s, %s);' % format_tuple)
+        self.putln('__Pyx_AddTraceback(%s, %s, %s, %s);' % format_tuple)
 
     def put_unraisable(self, qualified_name, nogil=False):
         """
@@ -2504,16 +2626,16 @@ class PyrexCodeWriter(object):
     def dedent(self):
         self.level -= 1
 
+
 class PyxCodeWriter(object):
     """
-    Can be used for writing out some Cython code. To use the indenter
-    functionality, the Cython.Compiler.Importer module will have to be used
-    to load the code to support python 2.4
+    Can be used for writing out some Cython code.
     """
 
     def __init__(self, buffer=None, indent_level=0, context=None, encoding='ascii'):
         self.buffer = buffer or StringIOTree()
         self.level = indent_level
+        self.original_level = indent_level
         self.context = context
         self.encoding = encoding
 
@@ -2524,22 +2646,19 @@ class PyxCodeWriter(object):
     def dedent(self, levels=1):
         self.level -= levels
 
+    @contextmanager
     def indenter(self, line):
         """
-        Instead of
-
-            with pyx_code.indenter("for i in range(10):"):
-                pyx_code.putln("print i")
-
-        write
-
-            if pyx_code.indenter("for i in range(10);"):
-                pyx_code.putln("print i")
-                pyx_code.dedent()
+        with pyx_code.indenter("for i in range(10):"):
+            pyx_code.putln("print i")
         """
         self.putln(line)
         self.indent()
-        return True
+        yield
+        self.dedent()
+
+    def empty(self):
+        return self.buffer.empty()
 
     def getvalue(self):
         result = self.buffer.getvalue()
@@ -2554,7 +2673,7 @@ class PyxCodeWriter(object):
         self._putln(line)
 
     def _putln(self, line):
-        self.buffer.write("%s%s\n" % (self.level * "    ", line))
+        self.buffer.write(u"%s%s\n" % (self.level * u"    ", line))
 
     def put_chunk(self, chunk, context=None):
         context = context or self.context
@@ -2566,8 +2685,13 @@ class PyxCodeWriter(object):
             self._putln(line)
 
     def insertion_point(self):
-        return PyxCodeWriter(self.buffer.insertion_point(), self.level,
-                             self.context)
+        return type(self)(self.buffer.insertion_point(), self.level, self.context)
+
+    def reset(self):
+        # resets the buffer so that nothing gets written. Most useful
+        # for abandoning all work in a specific insertion point
+        self.buffer.reset()
+        self.level = self.original_level
 
     def named_insertion_point(self, name):
         setattr(self, name, self.insertion_point())
diff --git a/Cython/Compiler/CythonScope.py b/Cython/Compiler/CythonScope.py
index 1c25d1a6b..f73be0070 100644
--- a/Cython/Compiler/CythonScope.py
+++ b/Cython/Compiler/CythonScope.py
@@ -6,6 +6,7 @@ from .UtilityCode import CythonUtilityCode
 from .Errors import error
 from .Scanning import StringSourceDescriptor
 from . import MemoryView
+from .StringEncoding import EncodedString
 
 
 class CythonScope(ModuleScope):
@@ -50,7 +51,7 @@ class CythonScope(ModuleScope):
     def find_module(self, module_name, pos):
         error("cython.%s is not available" % module_name, pos)
 
-    def find_submodule(self, module_name):
+    def find_submodule(self, module_name, as_package=False):
         entry = self.entries.get(module_name, None)
         if not entry:
             self.load_cythonscope()
@@ -125,10 +126,26 @@ class CythonScope(ModuleScope):
 
         view_utility_scope = MemoryView.view_utility_code.declare_in_scope(
                                             self.viewscope, cython_scope=self,
-                                            whitelist=MemoryView.view_utility_whitelist)
+                                            allowlist=MemoryView.view_utility_allowlist)
+
+        # Marks the types as being cython_builtin_type so that they can be
+        # extended from without Cython attempting to import cython.view
+        ext_types = [ entry.type
+                         for entry in view_utility_scope.entries.values()
+                         if entry.type.is_extension_type ]
+        for ext_type in ext_types:
+            ext_type.is_cython_builtin_type = 1
 
         # self.entries["array"] = view_utility_scope.entries.pop("array")
 
+        # dataclasses scope
+        dc_str = EncodedString(u'dataclasses')
+        dataclassesscope = ModuleScope(dc_str, self, context=None)
+        self.declare_module(dc_str, dataclassesscope, pos=None).as_module = dataclassesscope
+        dataclassesscope.is_cython_builtin = True
+        dataclassesscope.pxd_file_loaded = True
+        # doesn't actually have any contents
+
 
 def create_cython_scope(context):
     # One could in fact probably make it a singleton,
diff --git a/Cython/Compiler/Dataclass.py b/Cython/Compiler/Dataclass.py
new file mode 100644
index 000000000..e775e9182
--- /dev/null
+++ b/Cython/Compiler/Dataclass.py
@@ -0,0 +1,840 @@
+# functions to transform a c class into a dataclass
+
+from collections import OrderedDict
+from textwrap import dedent
+import operator
+
+from . import ExprNodes
+from . import Nodes
+from . import PyrexTypes
+from . import Builtin
+from . import Naming
+from .Errors import error, warning
+from .Code import UtilityCode, TempitaUtilityCode, PyxCodeWriter
+from .Visitor import VisitorTransform
+from .StringEncoding import EncodedString
+from .TreeFragment import TreeFragment
+from .ParseTreeTransforms import NormalizeTree, SkipDeclarations
+from .Options import copy_inherited_directives
+
+_dataclass_loader_utilitycode = None
+
+def make_dataclasses_module_callnode(pos):
+    global _dataclass_loader_utilitycode
+    if not _dataclass_loader_utilitycode:
+        python_utility_code = UtilityCode.load_cached("Dataclasses_fallback", "Dataclasses.py")
+        python_utility_code = EncodedString(python_utility_code.impl)
+        _dataclass_loader_utilitycode = TempitaUtilityCode.load(
+            "SpecificModuleLoader", "Dataclasses.c",
+            context={'cname': "dataclasses", 'py_code': python_utility_code.as_c_string_literal()})
+    return ExprNodes.PythonCapiCallNode(
+        pos, "__Pyx_Load_dataclasses_Module",
+        PyrexTypes.CFuncType(PyrexTypes.py_object_type, []),
+        utility_code=_dataclass_loader_utilitycode,
+        args=[],
+    )
+
+def make_dataclass_call_helper(pos, callable, kwds):
+    utility_code = UtilityCode.load_cached("DataclassesCallHelper", "Dataclasses.c")
+    func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("callable", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("kwds", PyrexTypes.py_object_type, None)
+        ],
+    )
+    return ExprNodes.PythonCapiCallNode(
+        pos,
+        function_name="__Pyx_DataclassesCallHelper",
+        func_type=func_type,
+        utility_code=utility_code,
+        args=[callable, kwds],
+    )
+
+
+class RemoveAssignmentsToNames(VisitorTransform, SkipDeclarations):
+    """
+    Cython (and Python) normally treats
+
+    class A:
+         x = 1
+
+    as generating a class attribute. However for dataclasses the `= 1` should be interpreted as
+    a default value to initialize an instance attribute with.
+    This transform therefore removes the `x=1` assignment so that the class attribute isn't
+    generated, while recording what it has removed so that it can be used in the initialization.
+    """
+    def __init__(self, names):
+        super(RemoveAssignmentsToNames, self).__init__()
+        self.names = names
+        self.removed_assignments = {}
+
+    def visit_CClassNode(self, node):
+        self.visitchildren(node)
+        return node
+
+    def visit_PyClassNode(self, node):
+        return node  # go no further
+
+    def visit_FuncDefNode(self, node):
+        return node  # go no further
+
+    def visit_SingleAssignmentNode(self, node):
+        if node.lhs.is_name and node.lhs.name in self.names:
+            if node.lhs.name in self.removed_assignments:
+                warning(node.pos, ("Multiple assignments for '%s' in dataclass; "
+                                   "using most recent") % node.lhs.name, 1)
+            self.removed_assignments[node.lhs.name] = node.rhs
+            return []
+        return node
+
+    # I believe cascaded assignment is always a syntax error with annotations
+    # so there's no need to define visit_CascadedAssignmentNode
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+        return node
+
+
+class TemplateCode(object):
+    """
+    Adds the ability to keep track of placeholder argument names to PyxCodeWriter.
+
+    Also adds extra_stats which are nodes bundled at the end when this
+    is converted to a tree.
+    """
+    _placeholder_count = 0
+
+    def __init__(self, writer=None, placeholders=None, extra_stats=None):
+        self.writer = PyxCodeWriter() if writer is None else writer
+        self.placeholders = {} if placeholders is None else placeholders
+        self.extra_stats = [] if extra_stats is None else extra_stats
+
+    def add_code_line(self, code_line):
+        self.writer.putln(code_line)
+
+    def add_code_lines(self, code_lines):
+        for line in code_lines:
+            self.writer.putln(line)
+
+    def reset(self):
+        # don't attempt to reset placeholders - it really doesn't matter if
+        # we have unused placeholders
+        self.writer.reset()
+
+    def empty(self):
+        return self.writer.empty()
+
+    def indenter(self):
+        return self.writer.indenter()
+
+    def new_placeholder(self, field_names, value):
+        name = self._new_placeholder_name(field_names)
+        self.placeholders[name] = value
+        return name
+
+    def add_extra_statements(self, statements):
+        if self.extra_stats is None:
+            assert False, "Can only use add_extra_statements on top-level writer"
+        self.extra_stats.extend(statements)
+
+    def _new_placeholder_name(self, field_names):
+        while True:
+            name = "DATACLASS_PLACEHOLDER_%d" % self._placeholder_count
+            if (name not in self.placeholders
+                    and name not in field_names):
+                # make sure name isn't already used and doesn't
+                # conflict with a variable name (which is unlikely but possible)
+                break
+            self._placeholder_count += 1
+        return name
+
+    def generate_tree(self, level='c_class'):
+        stat_list_node = TreeFragment(
+            self.writer.getvalue(),
+            level=level,
+            pipeline=[NormalizeTree(None)],
+        ).substitute(self.placeholders)
+
+        stat_list_node.stats += self.extra_stats
+        return stat_list_node
+
+    def insertion_point(self):
+        new_writer = self.writer.insertion_point()
+        return TemplateCode(
+            writer=new_writer,
+            placeholders=self.placeholders,
+            extra_stats=self.extra_stats
+        )
+
+
+class _MISSING_TYPE(object):
+    pass
+MISSING = _MISSING_TYPE()
+
+
+class Field(object):
+    """
+    Field is based on the dataclasses.field class from the standard library module.
+    It is used internally during the generation of Cython dataclasses to keep track
+    of the settings for individual attributes.
+
+    Attributes of this class are stored as nodes so they can be used in code construction
+    more readily (i.e. we store BoolNode rather than bool)
+    """
+    default = MISSING
+    default_factory = MISSING
+    private = False
+
+    literal_keys = ("repr", "hash", "init", "compare", "metadata")
+
+    # default values are defined by the CPython dataclasses.field
+    def __init__(self, pos, default=MISSING, default_factory=MISSING,
+                 repr=None, hash=None, init=None,
+                 compare=None, metadata=None,
+                 is_initvar=False, is_classvar=False,
+                 **additional_kwds):
+        if default is not MISSING:
+            self.default = default
+        if default_factory is not MISSING:
+            self.default_factory = default_factory
+        self.repr = repr or ExprNodes.BoolNode(pos, value=True)
+        self.hash = hash or ExprNodes.NoneNode(pos)
+        self.init = init or ExprNodes.BoolNode(pos, value=True)
+        self.compare = compare or ExprNodes.BoolNode(pos, value=True)
+        self.metadata = metadata or ExprNodes.NoneNode(pos)
+        self.is_initvar = is_initvar
+        self.is_classvar = is_classvar
+
+        for k, v in additional_kwds.items():
+            # There should not be any additional keywords!
+            error(v.pos, "cython.dataclasses.field() got an unexpected keyword argument '%s'" % k)
+
+        for field_name in self.literal_keys:
+            field_value = getattr(self, field_name)
+            if not field_value.is_literal:
+                error(field_value.pos,
+                      "cython.dataclasses.field parameter '%s' must be a literal value" % field_name)
+
+    def iterate_record_node_arguments(self):
+        for key in (self.literal_keys + ('default', 'default_factory')):
+            value = getattr(self, key)
+            if value is not MISSING:
+                yield key, value
+
+
+def process_class_get_fields(node):
+    var_entries = node.scope.var_entries
+    # order of definition is used in the dataclass
+    var_entries = sorted(var_entries, key=operator.attrgetter('pos'))
+    var_names = [entry.name for entry in var_entries]
+
+    # don't treat `x = 1` as an assignment of a class attribute within the dataclass
+    transform = RemoveAssignmentsToNames(var_names)
+    transform(node)
+    default_value_assignments = transform.removed_assignments
+
+    base_type = node.base_type
+    fields = OrderedDict()
+    while base_type:
+        if base_type.is_external or not base_type.scope.implemented:
+            warning(node.pos, "Cannot reliably handle Cython dataclasses with base types "
+                "in external modules since it is not possible to tell what fields they have", 2)
+        if base_type.dataclass_fields:
+            fields = base_type.dataclass_fields.copy()
+            break
+        base_type = base_type.base_type
+
+    for entry in var_entries:
+        name = entry.name
+        is_initvar = entry.declared_with_pytyping_modifier("dataclasses.InitVar")
+        # TODO - classvars aren't included in "var_entries" so are missed here
+        # and thus this code is never triggered
+        is_classvar = entry.declared_with_pytyping_modifier("typing.ClassVar")
+        if name in default_value_assignments:
+            assignment = default_value_assignments[name]
+            if (isinstance(assignment, ExprNodes.CallNode)
+                    and assignment.function.as_cython_attribute() == "dataclasses.field"):
+                # I believe most of this is well-enforced when it's treated as a directive
+                # but it doesn't hurt to make sure
+                valid_general_call = (isinstance(assignment, ExprNodes.GeneralCallNode)
+                        and isinstance(assignment.positional_args, ExprNodes.TupleNode)
+                        and not assignment.positional_args.args
+                        and (assignment.keyword_args is None or isinstance(assignment.keyword_args, ExprNodes.DictNode)))
+                valid_simple_call = (isinstance(assignment, ExprNodes.SimpleCallNode) and not assignment.args)
+                if not (valid_general_call or valid_simple_call):
+                    error(assignment.pos, "Call to 'cython.dataclasses.field' must only consist "
+                          "of compile-time keyword arguments")
+                    continue
+                keyword_args = assignment.keyword_args.as_python_dict() if valid_general_call and assignment.keyword_args else {}
+                if 'default' in keyword_args and 'default_factory' in keyword_args:
+                    error(assignment.pos, "cannot specify both default and default_factory")
+                    continue
+                field = Field(node.pos, **keyword_args)
+            else:
+                if isinstance(assignment, ExprNodes.CallNode):
+                    func = assignment.function
+                    if ((func.is_name and func.name == "field")
+                            or (func.is_attribute and func.attribute == "field")):
+                        warning(assignment.pos, "Do you mean cython.dataclasses.field instead?", 1)
+                if assignment.type in [Builtin.list_type, Builtin.dict_type, Builtin.set_type]:
+                    # The standard library module generates a TypeError at runtime
+                    # in this situation.
+                    # Error message is copied from CPython
+                    error(assignment.pos, "mutable default <class '{0}'> for field {1} is not allowed: "
+                          "use default_factory".format(assignment.type.name, name))
+
+                field = Field(node.pos, default=assignment)
+        else:
+            field = Field(node.pos)
+        field.is_initvar = is_initvar
+        field.is_classvar = is_classvar
+        if entry.visibility == "private":
+            field.private = True
+        fields[name] = field
+    node.entry.type.dataclass_fields = fields
+    return fields
+
+
+def handle_cclass_dataclass(node, dataclass_args, analyse_decs_transform):
+    # default argument values from https://docs.python.org/3/library/dataclasses.html
+    kwargs = dict(init=True, repr=True, eq=True,
+                  order=False, unsafe_hash=False,
+                  frozen=False, kw_only=False)
+    if dataclass_args is not None:
+        if dataclass_args[0]:
+            error(node.pos, "cython.dataclasses.dataclass takes no positional arguments")
+        for k, v in dataclass_args[1].items():
+            if k not in kwargs:
+                error(node.pos,
+                      "cython.dataclasses.dataclass() got an unexpected keyword argument '%s'" % k)
+            if not isinstance(v, ExprNodes.BoolNode):
+                error(node.pos,
+                      "Arguments passed to cython.dataclasses.dataclass must be True or False")
+            kwargs[k] = v.value
+
+    kw_only = kwargs['kw_only']
+
+    fields = process_class_get_fields(node)
+
+    dataclass_module = make_dataclasses_module_callnode(node.pos)
+
+    # create __dataclass_params__ attribute. I try to use the exact
+    # `_DataclassParams` class defined in the standard library module if at all possible
+    # for maximum duck-typing compatibility.
+    dataclass_params_func = ExprNodes.AttributeNode(node.pos, obj=dataclass_module,
+                                                    attribute=EncodedString("_DataclassParams"))
+    dataclass_params_keywords = ExprNodes.DictNode.from_pairs(
+        node.pos,
+        [ (ExprNodes.IdentifierStringNode(node.pos, value=EncodedString(k)),
+           ExprNodes.BoolNode(node.pos, value=v))
+          for k, v in kwargs.items() ] +
+        [ (ExprNodes.IdentifierStringNode(node.pos, value=EncodedString(k)),
+           ExprNodes.BoolNode(node.pos, value=v))
+          for k, v in [('kw_only', kw_only), ('match_args', False),
+                       ('slots', False), ('weakref_slot', False)]
+        ])
+    dataclass_params = make_dataclass_call_helper(
+        node.pos, dataclass_params_func, dataclass_params_keywords)
+    dataclass_params_assignment = Nodes.SingleAssignmentNode(
+        node.pos,
+        lhs = ExprNodes.NameNode(node.pos, name=EncodedString("__dataclass_params__")),
+        rhs = dataclass_params)
+
+    dataclass_fields_stats = _set_up_dataclass_fields(node, fields, dataclass_module)
+
+    stats = Nodes.StatListNode(node.pos,
+                               stats=[dataclass_params_assignment] + dataclass_fields_stats)
+
+    code = TemplateCode()
+    generate_init_code(code, kwargs['init'], node, fields, kw_only)
+    generate_repr_code(code, kwargs['repr'], node, fields)
+    generate_eq_code(code, kwargs['eq'], node, fields)
+    generate_order_code(code, kwargs['order'], node, fields)
+    generate_hash_code(code, kwargs['unsafe_hash'], kwargs['eq'], kwargs['frozen'], node, fields)
+
+    stats.stats += code.generate_tree().stats
+
+    # turn off annotation typing, so all arguments to __init__ are accepted as
+    # generic objects and thus can accept _HAS_DEFAULT_FACTORY.
+    # Type conversion comes later
+    comp_directives = Nodes.CompilerDirectivesNode(node.pos,
+        directives=copy_inherited_directives(node.scope.directives, annotation_typing=False),
+        body=stats)
+
+    comp_directives.analyse_declarations(node.scope)
+    # probably already in this scope, but it doesn't hurt to make sure
+    analyse_decs_transform.enter_scope(node, node.scope)
+    analyse_decs_transform.visit(comp_directives)
+    analyse_decs_transform.exit_scope()
+
+    node.body.stats.append(comp_directives)
+
+
+def generate_init_code(code, init, node, fields, kw_only):
+    """
+    Notes on CPython generated "__init__":
+    * Implemented in `_init_fn`.
+    * The use of the `dataclasses._HAS_DEFAULT_FACTORY` sentinel value as
+      the default argument for fields that need constructing with a factory
+      function is copied from the CPython implementation. (`None` isn't
+      suitable because it could also be a value for the user to pass.)
+      There's no real reason why it needs importing from the dataclasses module
+      though - it could equally be a value generated by Cython when the module loads.
+    * seen_default and the associated error message are copied directly from Python
+    * Call to user-defined __post_init__ function (if it exists) is copied from
+      CPython.
+
+    Cython behaviour deviates a little here (to be decided if this is right...)
+    Because the class variable from the assignment does not exist Cython fields will
+    return None (or whatever their type default is) if not initialized while Python
+    dataclasses will fall back to looking up the class variable.
+    """
+    if not init or node.scope.lookup_here("__init__"):
+        return
+
+    # selfname behaviour copied from the cpython module
+    selfname = "__dataclass_self__" if "self" in fields else "self"
+    args = [selfname]
+
+    if kw_only:
+        args.append("*")
+
+    function_start_point = code.insertion_point()
+    code = code.insertion_point()
+
+    # create a temp to get _HAS_DEFAULT_FACTORY
+    dataclass_module = make_dataclasses_module_callnode(node.pos)
+    has_default_factory = ExprNodes.AttributeNode(
+        node.pos,
+        obj=dataclass_module,
+        attribute=EncodedString("_HAS_DEFAULT_FACTORY")
+    )
+
+    default_factory_placeholder = code.new_placeholder(fields, has_default_factory)
+
+    seen_default = False
+    for name, field in fields.items():
+        entry = node.scope.lookup(name)
+        if entry.annotation:
+            annotation = u": %s" % entry.annotation.string.value
+        else:
+            annotation = u""
+        assignment = u''
+        if field.default is not MISSING or field.default_factory is not MISSING:
+            seen_default = True
+            if field.default_factory is not MISSING:
+                ph_name = default_factory_placeholder
+            else:
+                ph_name = code.new_placeholder(fields, field.default)  # 'default' should be a node
+            assignment = u" = %s" % ph_name
+        elif seen_default and not kw_only and field.init.value:
+            error(entry.pos, ("non-default argument '%s' follows default argument "
+                              "in dataclass __init__") % name)
+            code.reset()
+            return
+
+        if field.init.value:
+            args.append(u"%s%s%s" % (name, annotation, assignment))
+
+        if field.is_initvar:
+            continue
+        elif field.default_factory is MISSING:
+            if field.init.value:
+                code.add_code_line(u"    %s.%s = %s" % (selfname, name, name))
+            elif assignment:
+                # not an argument to the function, but is still initialized
+                code.add_code_line(u"    %s.%s%s" % (selfname, name, assignment))
+        else:
+            ph_name = code.new_placeholder(fields, field.default_factory)
+            if field.init.value:
+                # close to:
+                # def __init__(self, name=_PLACEHOLDER_VALUE):
+                #     self.name = name_default_factory() if name is _PLACEHOLDER_VALUE else name
+                code.add_code_line(u"    %s.%s = %s() if %s is %s else %s" % (
+                    selfname, name, ph_name, name, default_factory_placeholder, name))
+            else:
+                # still need to use the default factory to initialize
+                code.add_code_line(u"    %s.%s = %s()" % (
+                    selfname, name, ph_name))
+
+    if node.scope.lookup("__post_init__"):
+        post_init_vars = ", ".join(name for name, field in fields.items()
+                                   if field.is_initvar)
+        code.add_code_line("    %s.__post_init__(%s)" % (selfname, post_init_vars))
+
+    if code.empty():
+        code.add_code_line("    pass")
+
+    args = u", ".join(args)
+    function_start_point.add_code_line(u"def __init__(%s):" % args)
+
+
+def generate_repr_code(code, repr, node, fields):
+    """
+    The core of the CPython implementation is just:
+    ['return self.__class__.__qualname__ + f"(' +
+                     ', '.join([f"{f.name}={{self.{f.name}!r}}"
+                                for f in fields]) +
+                     ')"'],
+
+    The only notable difference here is self.__class__.__qualname__ -> type(self).__name__
+    which is because Cython currently supports Python 2.
+
+    However, it also has some guards for recursive repr invokations. In the standard
+    library implementation they're done with a wrapper decorator that captures a set
+    (with the set keyed by id and thread). Here we create a set as a thread local
+    variable and key only by id.
+    """
+    if not repr or node.scope.lookup("__repr__"):
+        return
+
+    # The recursive guard is likely a little costly, so skip it if possible.
+    # is_gc_simple defines where it can contain recursive objects
+    needs_recursive_guard = False
+    for name in fields.keys():
+        entry = node.scope.lookup(name)
+        type_ = entry.type
+        if type_.is_memoryviewslice:
+            type_ = type_.dtype
+        if not type_.is_pyobject:
+            continue  # no GC
+        if not type_.is_gc_simple:
+            needs_recursive_guard = True
+            break
+
+    if needs_recursive_guard:
+        code.add_code_line("__pyx_recursive_repr_guard = __import__('threading').local()")
+        code.add_code_line("__pyx_recursive_repr_guard.running = set()")
+    code.add_code_line("def __repr__(self):")
+    if needs_recursive_guard:
+        code.add_code_line("    key = id(self)")
+        code.add_code_line("    guard_set = self.__pyx_recursive_repr_guard.running")
+        code.add_code_line("    if key in guard_set: return '...'")
+        code.add_code_line("    guard_set.add(key)")
+        code.add_code_line("    try:")
+    strs = [u"%s={self.%s!r}" % (name, name)
+            for name, field in fields.items()
+            if field.repr.value and not field.is_initvar]
+    format_string = u", ".join(strs)
+
+    code.add_code_line(u'        name = getattr(type(self), "__qualname__", type(self).__name__)')
+    code.add_code_line(u"        return f'{name}(%s)'" % format_string)
+    if needs_recursive_guard:
+        code.add_code_line("    finally:")
+        code.add_code_line("        guard_set.remove(key)")
+
+
+def generate_cmp_code(code, op, funcname, node, fields):
+    if node.scope.lookup_here(funcname):
+        return
+
+    names = [name for name, field in fields.items() if (field.compare.value and not field.is_initvar)]
+
+    code.add_code_lines([
+        "def %s(self, other):" % funcname,
+        "    if not isinstance(other, %s):" % node.class_name,
+        "        return NotImplemented",
+        #
+        "    cdef %s other_cast" % node.class_name,
+        "    other_cast = <%s>other" % node.class_name,
+    ])
+
+    # The Python implementation of dataclasses.py does a tuple comparison
+    # (roughly):
+    #  return self._attributes_to_tuple() {op} other._attributes_to_tuple()
+    #
+    # For the Cython implementation a tuple comparison isn't an option because
+    # not all attributes can be converted to Python objects and stored in a tuple
+    #
+    # TODO - better diagnostics of whether the types support comparison before
+    #    generating the code. Plus, do we want to convert C structs to dicts and
+    #    compare them that way (I think not, but it might be in demand)?
+    checks = []
+    for name in names:
+        checks.append("(self.%s %s other_cast.%s)" % (
+            name, op, name))
+
+    if checks:
+        code.add_code_line("    return " + " and ".join(checks))
+    else:
+        if "=" in op:
+            code.add_code_line("    return True")  # "() == ()" is True
+        else:
+            code.add_code_line("    return False")
+
+
+def generate_eq_code(code, eq, node, fields):
+    if not eq:
+        return
+    generate_cmp_code(code, "==", "__eq__", node, fields)
+
+
+def generate_order_code(code, order, node, fields):
+    if not order:
+        return
+
+    for op, name in [("<", "__lt__"),
+                     ("<=", "__le__"),
+                     (">", "__gt__"),
+                     (">=", "__ge__")]:
+        generate_cmp_code(code, op, name, node, fields)
+
+
+def generate_hash_code(code, unsafe_hash, eq, frozen, node, fields):
+    """
+    Copied from CPython implementation - the intention is to follow this as far as
+    is possible:
+    #    +------------------- unsafe_hash= parameter
+    #    |       +----------- eq= parameter
+    #    |       |       +--- frozen= parameter
+    #    |       |       |
+    #    v       v       v    |        |        |
+    #                         |   no   |  yes   |  <--- class has explicitly defined __hash__
+    # +=======+=======+=======+========+========+
+    # | False | False | False |        |        | No __eq__, use the base class __hash__
+    # +-------+-------+-------+--------+--------+
+    # | False | False | True  |        |        | No __eq__, use the base class __hash__
+    # +-------+-------+-------+--------+--------+
+    # | False | True  | False | None   |        | <-- the default, not hashable
+    # +-------+-------+-------+--------+--------+
+    # | False | True  | True  | add    |        | Frozen, so hashable, allows override
+    # +-------+-------+-------+--------+--------+
+    # | True  | False | False | add    | raise  | Has no __eq__, but hashable
+    # +-------+-------+-------+--------+--------+
+    # | True  | False | True  | add    | raise  | Has no __eq__, but hashable
+    # +-------+-------+-------+--------+--------+
+    # | True  | True  | False | add    | raise  | Not frozen, but hashable
+    # +-------+-------+-------+--------+--------+
+    # | True  | True  | True  | add    | raise  | Frozen, so hashable
+    # +=======+=======+=======+========+========+
+    # For boxes that are blank, __hash__ is untouched and therefore
+    # inherited from the base class.  If the base is object, then
+    # id-based hashing is used.
+
+    The Python implementation creates a tuple of all the fields, then hashes them.
+    This implementation creates a tuple of all the hashes of all the fields and hashes that.
+    The reason for this slight difference is to avoid to-Python conversions for anything
+    that Cython knows how to hash directly (It doesn't look like this currently applies to
+    anything though...).
+    """
+
+    hash_entry = node.scope.lookup_here("__hash__")
+    if hash_entry:
+        # TODO ideally assignment of __hash__ to None shouldn't trigger this
+        # but difficult to get the right information here
+        if unsafe_hash:
+            # error message taken from CPython dataclasses module
+            error(node.pos, "Cannot overwrite attribute __hash__ in class %s" % node.class_name)
+        return
+
+    if not unsafe_hash:
+        if not eq:
+            return
+        if not frozen:
+            code.add_extra_statements([
+                Nodes.SingleAssignmentNode(
+                    node.pos,
+                    lhs=ExprNodes.NameNode(node.pos, name=EncodedString("__hash__")),
+                    rhs=ExprNodes.NoneNode(node.pos),
+                )
+            ])
+            return
+
+    names = [
+        name for name, field in fields.items()
+        if not field.is_initvar and (
+            field.compare.value if field.hash.value is None else field.hash.value)
+    ]
+
+    # make a tuple of the hashes
+    hash_tuple_items = u", ".join(u"self.%s" % name for name in names)
+    if hash_tuple_items:
+        hash_tuple_items += u","  # ensure that one arg form is a tuple
+
+    # if we're here we want to generate a hash
+    code.add_code_lines([
+        "def __hash__(self):",
+        "    return hash((%s))" % hash_tuple_items,
+    ])
+
+
+def get_field_type(pos, entry):
+    """
+    sets the .type attribute for a field
+
+    Returns the annotation if possible (since this is what the dataclasses
+    module does). If not (for example, attributes defined with cdef) then
+    it creates a string fallback.
+    """
+    if entry.annotation:
+        # Right now it doesn't look like cdef classes generate an
+        # __annotations__ dict, therefore it's safe to just return
+        # entry.annotation
+        # (TODO: remove .string if we ditch PEP563)
+        return entry.annotation.string
+        # If they do in future then we may need to look up into that
+        # to duplicating the node. The code below should do this:
+        #class_name_node = ExprNodes.NameNode(pos, name=entry.scope.name)
+        #annotations = ExprNodes.AttributeNode(
+        #    pos, obj=class_name_node,
+        #    attribute=EncodedString("__annotations__")
+        #)
+        #return ExprNodes.IndexNode(
+        #    pos, base=annotations,
+        #    index=ExprNodes.StringNode(pos, value=entry.name)
+        #)
+    else:
+        # it's slightly unclear what the best option is here - we could
+        # try to return PyType_Type. This case should only happen with
+        # attributes defined with cdef so Cython is free to make it's own
+        # decision
+        s = EncodedString(entry.type.declaration_code("", for_display=1))
+        return ExprNodes.StringNode(pos, value=s)
+
+
+class FieldRecordNode(ExprNodes.ExprNode):
+    """
+    __dataclass_fields__ contains a bunch of field objects recording how each field
+    of the dataclass was initialized (mainly corresponding to the arguments passed to
+    the "field" function). This node is used for the attributes of these field objects.
+
+    If possible, coerces `arg` to a Python object.
+    Otherwise, generates a sensible backup string.
+    """
+    subexprs = ['arg']
+
+    def __init__(self, pos, arg):
+        super(FieldRecordNode, self).__init__(pos, arg=arg)
+
+    def analyse_types(self, env):
+        self.arg.analyse_types(env)
+        self.type = self.arg.type
+        return self
+
+    def coerce_to_pyobject(self, env):
+        if self.arg.type.can_coerce_to_pyobject(env):
+            return self.arg.coerce_to_pyobject(env)
+        else:
+            # A string representation of the code that gave the field seems like a reasonable
+            # fallback. This'll mostly happen for "default" and "default_factory" where the
+            # type may be a C-type that can't be converted to Python.
+            return self._make_string()
+
+    def _make_string(self):
+        from .AutoDocTransforms import AnnotationWriter
+        writer = AnnotationWriter(description="Dataclass field")
+        string = writer.write(self.arg)
+        return ExprNodes.StringNode(self.pos, value=EncodedString(string))
+
+    def generate_evaluation_code(self, code):
+        return self.arg.generate_evaluation_code(code)
+
+
+def _set_up_dataclass_fields(node, fields, dataclass_module):
+    # For defaults and default_factories containing things like lambda,
+    # they're already declared in the class scope, and it creates a big
+    # problem if multiple copies are floating around in both the __init__
+    # function, and in the __dataclass_fields__ structure.
+    # Therefore, create module-level constants holding these values and
+    # pass those around instead
+    #
+    # If possible we use the `Field` class defined in the standard library
+    # module so that the information stored here is as close to a regular
+    # dataclass as is possible.
+    variables_assignment_stats = []
+    for name, field in fields.items():
+        if field.private:
+            continue  # doesn't appear in the public interface
+        for attrname in [ "default", "default_factory" ]:
+            field_default = getattr(field, attrname)
+            if field_default is MISSING or field_default.is_literal or field_default.is_name:
+                # some simple cases where we don't need to set up
+                # the variable as a module-level constant
+                continue
+            global_scope = node.scope.global_scope()
+            module_field_name = global_scope.mangle(
+                global_scope.mangle(Naming.dataclass_field_default_cname, node.class_name),
+                name)
+            # create an entry in the global scope for this variable to live
+            field_node = ExprNodes.NameNode(field_default.pos, name=EncodedString(module_field_name))
+            field_node.entry = global_scope.declare_var(
+                field_node.name, type=field_default.type or PyrexTypes.unspecified_type,
+                pos=field_default.pos, cname=field_node.name, is_cdef=True,
+                # TODO: do we need to set 'pytyping_modifiers' here?
+            )
+            # replace the field so that future users just receive the namenode
+            setattr(field, attrname, field_node)
+
+            variables_assignment_stats.append(
+                Nodes.SingleAssignmentNode(field_default.pos, lhs=field_node, rhs=field_default))
+
+    placeholders = {}
+    field_func = ExprNodes.AttributeNode(node.pos, obj=dataclass_module,
+                                         attribute=EncodedString("field"))
+    dc_fields = ExprNodes.DictNode(node.pos, key_value_pairs=[])
+    dc_fields_namevalue_assignments = []
+
+    for name, field in fields.items():
+        if field.private:
+            continue  # doesn't appear in the public interface
+        type_placeholder_name = "PLACEHOLDER_%s" % name
+        placeholders[type_placeholder_name] = get_field_type(
+            node.pos, node.scope.entries[name]
+        )
+
+        # defining these make the fields introspect more like a Python dataclass
+        field_type_placeholder_name = "PLACEHOLDER_FIELD_TYPE_%s" % name
+        if field.is_initvar:
+            placeholders[field_type_placeholder_name] = ExprNodes.AttributeNode(
+                node.pos, obj=dataclass_module,
+                attribute=EncodedString("_FIELD_INITVAR")
+            )
+        elif field.is_classvar:
+            # TODO - currently this isn't triggered
+            placeholders[field_type_placeholder_name] = ExprNodes.AttributeNode(
+                node.pos, obj=dataclass_module,
+                attribute=EncodedString("_FIELD_CLASSVAR")
+            )
+        else:
+            placeholders[field_type_placeholder_name] = ExprNodes.AttributeNode(
+                node.pos, obj=dataclass_module,
+                attribute=EncodedString("_FIELD")
+            )
+
+        dc_field_keywords = ExprNodes.DictNode.from_pairs(
+            node.pos,
+            [(ExprNodes.IdentifierStringNode(node.pos, value=EncodedString(k)),
+               FieldRecordNode(node.pos, arg=v))
+              for k, v in field.iterate_record_node_arguments()]
+
+        )
+        dc_field_call = make_dataclass_call_helper(
+            node.pos, field_func, dc_field_keywords
+        )
+        dc_fields.key_value_pairs.append(
+            ExprNodes.DictItemNode(
+                node.pos,
+                key=ExprNodes.IdentifierStringNode(node.pos, value=EncodedString(name)),
+                value=dc_field_call))
+        dc_fields_namevalue_assignments.append(
+            dedent(u"""\
+                __dataclass_fields__[{0!r}].name = {0!r}
+                __dataclass_fields__[{0!r}].type = {1}
+                __dataclass_fields__[{0!r}]._field_type = {2}
+            """).format(name, type_placeholder_name, field_type_placeholder_name))
+
+    dataclass_fields_assignment = \
+        Nodes.SingleAssignmentNode(node.pos,
+                        lhs = ExprNodes.NameNode(node.pos,
+                                        name=EncodedString("__dataclass_fields__")),
+                        rhs = dc_fields)
+
+    dc_fields_namevalue_assignments = u"\n".join(dc_fields_namevalue_assignments)
+    dc_fields_namevalue_assignments = TreeFragment(dc_fields_namevalue_assignments,
+                                                   level="c_class",
+                                                   pipeline=[NormalizeTree(None)])
+    dc_fields_namevalue_assignments = dc_fields_namevalue_assignments.substitute(placeholders)
+
+    return (variables_assignment_stats
+            + [dataclass_fields_assignment]
+            + dc_fields_namevalue_assignments.stats)
diff --git a/Cython/Compiler/Errors.py b/Cython/Compiler/Errors.py
index 9761b52c3..bde320732 100644
--- a/Cython/Compiler/Errors.py
+++ b/Cython/Compiler/Errors.py
@@ -12,6 +12,13 @@ except ImportError:
 import sys
 from contextlib import contextmanager
 
+try:
+    from threading import local as _threadlocal
+except ImportError:
+    class _threadlocal(object): pass
+
+threadlocal = _threadlocal()
+
 from ..Utils import open_new_file
 from . import DebugFlags
 from . import Options
@@ -24,6 +31,8 @@ class PyrexError(Exception):
 class PyrexWarning(Exception):
     pass
 
+class CannotSpecialize(PyrexError):
+    pass
 
 def context(position):
     source = position[0]
@@ -36,7 +45,7 @@ def context(position):
         s = u"[unprintable code]\n"
     else:
         s = u''.join(F[max(0, position[1]-6):position[1]])
-        s = u'...\n%s%s^\n' % (s, u' '*(position[2]-1))
+        s = u'...\n%s%s^\n' % (s, u' '*(position[2]))
     s = u'%s\n%s%s\n' % (u'-'*60, s, u'-'*60)
     return s
 
@@ -60,11 +69,9 @@ class CompileError(PyrexError):
         self.message_only = message
         self.formatted_message = format_error(message, position)
         self.reported = False
-    # Deprecated and withdrawn in 2.6:
-    #   self.message = message
         Exception.__init__(self, self.formatted_message)
         # Python Exception subclass pickling is broken,
-        # see http://bugs.python.org/issue1692335
+        # see https://bugs.python.org/issue1692335
         self.args = (position, message)
 
     def __str__(self):
@@ -74,8 +81,6 @@ class CompileWarning(PyrexWarning):
 
     def __init__(self, position = None, message = ""):
         self.position = position
-    # Deprecated and withdrawn in 2.6:
-    #   self.message = message
         Exception.__init__(self, format_position(position) + message)
 
 class InternalError(Exception):
@@ -114,7 +119,7 @@ class CompilerCrash(CompileError):
             message += u'%s: %s' % (cause.__class__.__name__, cause)
         CompileError.__init__(self, pos, message)
         # Python Exception subclass pickling is broken,
-        # see http://bugs.python.org/issue1692335
+        # see https://bugs.python.org/issue1692335
         self.args = (pos, context, message, cause, stacktrace)
 
 class NoElementTreeInstalledException(PyrexError):
@@ -122,35 +127,29 @@ class NoElementTreeInstalledException(PyrexError):
     implementation was found
     """
 
-listing_file = None
-num_errors = 0
-echo_file = None
-
-def open_listing_file(path, echo_to_stderr = 1):
+def open_listing_file(path, echo_to_stderr=True):
     # Begin a new error listing. If path is None, no file
     # is opened, the error counter is just reset.
-    global listing_file, num_errors, echo_file
     if path is not None:
-        listing_file = open_new_file(path)
+        threadlocal.cython_errors_listing_file = open_new_file(path)
     else:
-        listing_file = None
+        threadlocal.cython_errors_listing_file = None
     if echo_to_stderr:
-        echo_file = sys.stderr
+        threadlocal.cython_errors_echo_file = sys.stderr
     else:
-        echo_file = None
-    num_errors = 0
+        threadlocal.cython_errors_echo_file = None
+    threadlocal.cython_errors_count = 0
 
 def close_listing_file():
-    global listing_file
-    if listing_file:
-        listing_file.close()
-        listing_file = None
+    if threadlocal.cython_errors_listing_file:
+        threadlocal.cython_errors_listing_file.close()
+        threadlocal.cython_errors_listing_file = None
 
 def report_error(err, use_stack=True):
+    error_stack = threadlocal.cython_errors_stack
     if error_stack and use_stack:
         error_stack[-1].append(err)
     else:
-        global num_errors
         # See Main.py for why dual reporting occurs. Quick fix for now.
         if err.reported: return
         err.reported = True
@@ -159,41 +158,50 @@ def report_error(err, use_stack=True):
             # Python <= 2.5 does this for non-ASCII Unicode exceptions
             line = format_error(getattr(err, 'message_only', "[unprintable exception message]"),
                                 getattr(err, 'position', None)) + u'\n'
+        listing_file = threadlocal.cython_errors_listing_file
         if listing_file:
             try: listing_file.write(line)
             except UnicodeEncodeError:
                 listing_file.write(line.encode('ASCII', 'replace'))
+        echo_file = threadlocal.cython_errors_echo_file
         if echo_file:
             try: echo_file.write(line)
             except UnicodeEncodeError:
                 echo_file.write(line.encode('ASCII', 'replace'))
-        num_errors += 1
+        threadlocal.cython_errors_count += 1
         if Options.fast_fail:
             raise AbortError("fatal errors")
 
-
 def error(position, message):
     #print("Errors.error:", repr(position), repr(message)) ###
     if position is None:
         raise InternalError(message)
     err = CompileError(position, message)
-    if DebugFlags.debug_exception_on_error: raise Exception(err) # debug
+    if DebugFlags.debug_exception_on_error: raise Exception(err)  # debug
     report_error(err)
     return err
 
 
-LEVEL = 1 # warn about all errors level 1 or higher
+LEVEL = 1  # warn about all errors level 1 or higher
+
+def _write_file_encode(file, line):
+    try:
+        file.write(line)
+    except UnicodeEncodeError:
+        file.write(line.encode('ascii', 'replace'))
 
 
 def message(position, message, level=1):
     if level < LEVEL:
         return
     warn = CompileWarning(position, message)
-    line = "note: %s\n" % warn
+    line = u"note: %s\n" % warn
+    listing_file = threadlocal.cython_errors_listing_file
     if listing_file:
-        listing_file.write(line)
+        _write_file_encode(listing_file, line)
+    echo_file = threadlocal.cython_errors_echo_file
     if echo_file:
-        echo_file.write(line)
+        _write_file_encode(echo_file, line)
     return warn
 
 
@@ -203,63 +211,76 @@ def warning(position, message, level=0):
     if Options.warning_errors and position:
         return error(position, message)
     warn = CompileWarning(position, message)
-    line = "warning: %s\n" % warn
+    line = u"warning: %s\n" % warn
+    listing_file = threadlocal.cython_errors_listing_file
     if listing_file:
-        listing_file.write(line)
+        _write_file_encode(listing_file, line)
+    echo_file = threadlocal.cython_errors_echo_file
     if echo_file:
-        echo_file.write(line)
+        _write_file_encode(echo_file, line)
     return warn
 
 
-_warn_once_seen = {}
 def warn_once(position, message, level=0):
-    if level < LEVEL or message in _warn_once_seen:
+    if level < LEVEL:
+        return
+    warn_once_seen = threadlocal.cython_errors_warn_once_seen
+    if message in warn_once_seen:
         return
     warn = CompileWarning(position, message)
-    line = "warning: %s\n" % warn
+    line = u"warning: %s\n" % warn
+    listing_file = threadlocal.cython_errors_listing_file
     if listing_file:
-        listing_file.write(line)
+        _write_file_encode(listing_file, line)
+    echo_file = threadlocal.cython_errors_echo_file
     if echo_file:
-        echo_file.write(line)
-    _warn_once_seen[message] = True
+        _write_file_encode(echo_file, line)
+    warn_once_seen[message] = True
     return warn
 
 
 # These functions can be used to momentarily suppress errors.
 
-error_stack = []
-
-
 def hold_errors():
-    error_stack.append([])
+    errors = []
+    threadlocal.cython_errors_stack.append(errors)
+    return errors
 
 
 def release_errors(ignore=False):
-    held_errors = error_stack.pop()
+    held_errors = threadlocal.cython_errors_stack.pop()
     if not ignore:
         for err in held_errors:
             report_error(err)
 
 
 def held_errors():
-    return error_stack[-1]
+    return threadlocal.cython_errors_stack[-1]
 
 
 # same as context manager:
 
 @contextmanager
 def local_errors(ignore=False):
-    errors = []
-    error_stack.append(errors)
+    errors = hold_errors()
     try:
         yield errors
     finally:
         release_errors(ignore=ignore)
 
 
-# this module needs a redesign to support parallel cythonisation, but
-# for now, the following works at least in sequential compiler runs
+# Keep all global state in thread local storage to support parallel cythonisation in distutils.
+
+def init_thread():
+    threadlocal.cython_errors_count = 0
+    threadlocal.cython_errors_listing_file = None
+    threadlocal.cython_errors_echo_file = None
+    threadlocal.cython_errors_warn_once_seen = set()
+    threadlocal.cython_errors_stack = []
 
 def reset():
-    _warn_once_seen.clear()
-    del error_stack[:]
+    threadlocal.cython_errors_warn_once_seen.clear()
+    del threadlocal.cython_errors_stack[:]
+
+def get_errors_count():
+    return threadlocal.cython_errors_count
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py
index 305eae9eb..242a97d6a 100644
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -13,7 +13,8 @@ cython.declare(error=object, warning=object, warn_once=object, InternalError=obj
                unicode_type=object, str_type=object, bytes_type=object, type_type=object,
                Builtin=object, Symtab=object, Utils=object, find_coercion_error=object,
                debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object,
-               bytearray_type=object, slice_type=object, _py_int_types=object,
+               bytearray_type=object, slice_type=object, memoryview_type=object,
+               builtin_sequence_types=object, _py_int_types=object,
                IS_PYTHON3=cython.bint)
 
 import re
@@ -23,26 +24,30 @@ import os.path
 import operator
 
 from .Errors import (
-    error, warning, InternalError, CompileError, report_error, local_errors)
+    error, warning, InternalError, CompileError, report_error, local_errors,
+    CannotSpecialize)
 from .Code import UtilityCode, TempitaUtilityCode
 from . import StringEncoding
 from . import Naming
 from . import Nodes
-from .Nodes import Node, utility_code_for_imports, analyse_type_annotation
+from .Nodes import Node, utility_code_for_imports, SingleAssignmentNode
 from . import PyrexTypes
-from .PyrexTypes import py_object_type, c_long_type, typecast, error_type, \
+from .PyrexTypes import py_object_type, typecast, error_type, \
     unspecified_type
 from . import TypeSlots
-from .Builtin import list_type, tuple_type, set_type, dict_type, type_type, \
-     unicode_type, str_type, bytes_type, bytearray_type, basestring_type, slice_type
+from .Builtin import (
+    list_type, tuple_type, set_type, dict_type, type_type,
+    unicode_type, str_type, bytes_type, bytearray_type, basestring_type,
+    slice_type, long_type, sequence_types as builtin_sequence_types, memoryview_type,
+)
 from . import Builtin
 from . import Symtab
 from .. import Utils
 from .Annotate import AnnotationItem
 from . import Future
 from ..Debugging import print_call_chain
-from .DebugFlags import debug_disposal_code, debug_temp_alloc, \
-    debug_coercion
+from .DebugFlags import debug_disposal_code, debug_coercion
+
 from .Pythran import (to_pythran, is_pythran_supported_type, is_pythran_supported_operation_type,
      is_pythran_expr, pythran_func_type, pythran_binop_type, pythran_unaryop_type, has_np_pythran,
      pythran_indexing_code, pythran_indexing_type, is_pythran_supported_node_or_none, pythran_type,
@@ -182,7 +187,7 @@ def infer_sequence_item_type(env, seq_node, index_node=None, seq_type=None):
             else:
                 return item.infer_type(env)
         # if we're lucky, all items have the same type
-        item_types = set([item.infer_type(env) for item in seq_node.args])
+        item_types = {item.infer_type(env) for item in seq_node.args}
         if len(item_types) == 1:
             return item_types.pop()
     return None
@@ -205,6 +210,11 @@ def make_dedup_key(outer_type, item_nodes):
         # For constants, look at the Python value type if we don't know the concrete Cython type.
         else (node.type, node.constant_result,
               type(node.constant_result) if node.type is py_object_type else None) if node.has_constant_result()
+        # IdentifierStringNode doesn't usually have a "constant_result" set because:
+        #  1. it doesn't usually have unicode_value
+        #  2. it's often created later in the compilation process after ConstantFolding
+        # but should be cacheable
+        else (node.type, node.value, node.unicode_value, "IdentifierStringNode") if isinstance(node, IdentifierStringNode)
         else None  # something we cannot handle => short-circuit below
         for node in item_nodes
     ]
@@ -237,19 +247,23 @@ def get_exception_handler(exception_value):
                 exception_value.entry.cname),
             False)
 
+
 def maybe_check_py_error(code, check_py_exception, pos, nogil):
     if check_py_exception:
         if nogil:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ErrOccurredWithGIL", "Exceptions.c"))
             code.putln(code.error_goto_if("__Pyx_ErrOccurredWithGIL()", pos))
         else:
             code.putln(code.error_goto_if("PyErr_Occurred()", pos))
 
+
 def translate_cpp_exception(code, pos, inside, py_result, exception_value, nogil):
     raise_py_exception, check_py_exception = get_exception_handler(exception_value)
     code.putln("try {")
     code.putln("%s" % inside)
     if py_result:
-      code.putln(code.error_goto_if_null(py_result, pos))
+        code.putln(code.error_goto_if_null(py_result, pos))
     maybe_check_py_error(code, check_py_exception, pos, nogil)
     code.putln("} catch(...) {")
     if nogil:
@@ -260,10 +274,24 @@ def translate_cpp_exception(code, pos, inside, py_result, exception_value, nogil
     code.putln(code.error_goto(pos))
     code.putln("}")
 
+def needs_cpp_exception_conversion(node):
+    assert node.exception_check == "+"
+    if node.exception_value is None:
+        return True
+    # exception_value can be a NameNode
+    # (in which case it's used as a handler function and no conversion is needed)
+    if node.exception_value.is_name:
+        return False
+    # or a CharNode with a value of "*"
+    if isinstance(node.exception_value, CharNode) and node.exception_value.value == "*":
+        return True
+    # Most other const-nodes are disallowed after "+" by the parser
+    return False
+
+
 # Used to handle the case where an lvalue expression and an overloaded assignment
 # both have an exception declaration.
-def translate_double_cpp_exception(code, pos, lhs_type, lhs_code, rhs_code,
-    lhs_exc_val, assign_exc_val, nogil):
+def translate_double_cpp_exception(code, pos, lhs_type, lhs_code, rhs_code, lhs_exc_val, assign_exc_val, nogil):
     handle_lhs_exc, lhc_check_py_exc = get_exception_handler(lhs_exc_val)
     handle_assignment_exc, assignment_check_py_exc = get_exception_handler(assign_exc_val)
     code.putln("try {")
@@ -301,23 +329,23 @@ class ExprNode(Node):
     #  is_sequence_constructor
     #               boolean      Is a list or tuple constructor expression
     #  is_starred   boolean      Is a starred expression (e.g. '*a')
-    #  saved_subexpr_nodes
-    #               [ExprNode or [ExprNode or None] or None]
-    #                            Cached result of subexpr_nodes()
     #  use_managed_ref boolean   use ref-counted temps/assignments/etc.
     #  result_is_used  boolean   indicates that the result will be dropped and the
     #                            result_code/temp_result can safely be set to None
     #  is_numpy_attribute   boolean   Is a Numpy module attribute
     #  annotation   ExprNode or None    PEP526 annotation for names or expressions
+    #  generator_arg_tag  None or Node   A tag to mark ExprNodes that potentially need to
+    #                              be changed to a generator argument
 
     result_ctype = None
     type = None
     annotation = None
     temp_code = None
-    old_temp = None # error checker for multiple frees etc.
-    use_managed_ref = True # can be set by optimisation transforms
+    old_temp = None  # error checker for multiple frees etc.
+    use_managed_ref = True  # can be set by optimisation transforms
     result_is_used = True
     is_numpy_attribute = False
+    generator_arg_tag = None
 
     #  The Analyse Expressions phase for expressions is split
     #  into two sub-phases:
@@ -446,8 +474,8 @@ class ExprNode(Node):
     is_memview_broadcast = False
     is_memview_copy_assignment = False
 
-    saved_subexpr_nodes = None
     is_temp = False
+    has_temp_moved = False  # if True then attempting to do anything but free the temp is invalid
     is_target = False
     is_starred = False
 
@@ -455,11 +483,13 @@ class ExprNode(Node):
 
     child_attrs = property(fget=operator.attrgetter('subexprs'))
 
+    def analyse_annotations(self, env):
+        pass
+
     def not_implemented(self, method_name):
-        print_call_chain(method_name, "not implemented") ###
+        print_call_chain(method_name, "not implemented")
         raise InternalError(
-            "%s.%s not implemented" %
-                (self.__class__.__name__, method_name))
+            "%s.%s not implemented" % (self.__class__.__name__, method_name))
 
     def is_lvalue(self):
         return 0
@@ -498,11 +528,27 @@ class ExprNode(Node):
         else:
             return self.calculate_result_code()
 
+    def _make_move_result_rhs(self, result, optional=False):
+        if optional and not (self.is_temp and self.type.is_cpp_class and not self.type.is_reference):
+            return result
+        self.has_temp_moved = True
+        return "{}({})".format("__PYX_STD_MOVE_IF_SUPPORTED" if optional else "std::move", result)
+
+    def move_result_rhs(self):
+        return self._make_move_result_rhs(self.result(), optional=True)
+
+    def move_result_rhs_as(self, type):
+        result = self.result_as(type)
+        if not (type.is_reference or type.needs_refcounting):
+            requires_move = type.is_rvalue_reference and self.is_temp
+            result = self._make_move_result_rhs(result, optional=not requires_move)
+        return result
+
     def pythran_result(self, type_=None):
         if is_pythran_supported_node_or_none(self):
             return to_pythran(self)
 
-        assert(type_ is not None)
+        assert type_ is not None
         return to_pythran(self, type_)
 
     def is_c_result_required(self):
@@ -569,6 +615,9 @@ class ExprNode(Node):
     def analyse_target_declaration(self, env):
         error(self.pos, "Cannot assign to or delete this")
 
+    def analyse_assignment_expression_target_declaration(self, env):
+        error(self.pos, "Cannot use anything except a name in an assignment expression")
+
     # ------------- Expression Analysis ----------------
 
     def analyse_const_expression(self, env):
@@ -614,7 +663,7 @@ class ExprNode(Node):
     def type_dependencies(self, env):
         # Returns the list of entries whose types must be determined
         # before the type of self can be inferred.
-        if hasattr(self, 'type') and self.type is not None:
+        if getattr(self, 'type', None) is not None:
             return ()
         return sum([node.type_dependencies(env) for node in self.subexpr_nodes()], ())
 
@@ -623,12 +672,13 @@ class ExprNode(Node):
         # Differs from analyse_types as it avoids unnecessary
         # analysis of subexpressions, but can assume everything
         # in self.type_dependencies() has been resolved.
-        if hasattr(self, 'type') and self.type is not None:
-            return self.type
-        elif hasattr(self, 'entry') and self.entry is not None:
-            return self.entry.type
-        else:
-            self.not_implemented("infer_type")
+        type = getattr(self, 'type', None)
+        if type is not None:
+            return type
+        entry = getattr(self, 'entry', None)
+        if entry is not None:
+            return entry.type
+        self.not_implemented("infer_type")
 
     def nonlocally_immutable(self):
         # Returns whether this variable is a safe reference, i.e.
@@ -655,6 +705,19 @@ class ExprNode(Node):
         # type, return that type, else None.
         return None
 
+    def analyse_as_specialized_type(self, env):
+        type = self.analyse_as_type(env)
+        if type and type.is_fused and env.fused_to_specific:
+            # while it would be nice to test "if entry.type in env.fused_to_specific"
+            # rather than try/catch this doesn't work reliably (mainly for nested fused types)
+            try:
+                return type.specialize(env.fused_to_specific)
+            except KeyError:
+                pass
+        if type and type.is_fused:
+            error(self.pos, "Type is not specific")
+        return type
+
     def analyse_as_extension_type(self, env):
         # If this node can be interpreted as a reference to an
         # extension type or builtin type, return its type, else None.
@@ -746,19 +809,20 @@ class ExprNode(Node):
 
     def make_owned_reference(self, code):
         """
-        If result is a pyobject, make sure we own a reference to it.
+        Make sure we own a reference to result.
         If the result is in a temp, it is already a new reference.
         """
-        if self.type.is_pyobject and not self.result_in_temp():
+        if not self.result_in_temp():
             code.put_incref(self.result(), self.ctype())
 
     def make_owned_memoryviewslice(self, code):
         """
         Make sure we own the reference to this memoryview slice.
         """
+        # TODO ideally this would be shared with "make_owned_reference"
         if not self.result_in_temp():
-            code.put_incref_memoryviewslice(self.result(),
-                                            have_gil=self.in_nogil_context)
+            code.put_incref_memoryviewslice(self.result(), self.type,
+                                            have_gil=not self.in_nogil_context)
 
     def generate_evaluation_code(self, code):
         #  Generate code to evaluate this node and
@@ -785,19 +849,17 @@ class ExprNode(Node):
         self.not_implemented("generate_result_code")
 
     def generate_disposal_code(self, code):
+        if self.has_temp_moved:
+            code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
         if self.is_temp:
             if self.type.is_string or self.type.is_pyunicode_ptr:
                 # postponed from self.generate_evaluation_code()
                 self.generate_subexpr_disposal_code(code)
                 self.free_subexpr_temps(code)
             if self.result():
-                if self.type.is_pyobject:
-                    code.put_decref_clear(self.result(), self.ctype())
-                elif self.type.is_memoryviewslice:
-                    code.put_xdecref_memoryviewslice(
-                            self.result(), have_gil=not self.in_nogil_context)
-                    code.putln("%s.memview = NULL;" % self.result())
-                    code.putln("%s.data = NULL;" % self.result())
+                code.put_decref_clear(self.result(), self.ctype(),
+                                        have_gil=not self.in_nogil_context)
         else:
             # Already done if self.is_temp
             self.generate_subexpr_disposal_code(code)
@@ -819,11 +881,15 @@ class ExprNode(Node):
             elif self.type.is_memoryviewslice:
                 code.putln("%s.memview = NULL;" % self.result())
                 code.putln("%s.data = NULL;" % self.result())
+
+            if self.has_temp_moved:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
         else:
             self.generate_subexpr_disposal_code(code)
 
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
-        exception_check=None, exception_value=None):
+                                 exception_check=None, exception_value=None):
         #  Stub method for nodes which are not legal as
         #  the LHS of an assignment. An error will have
         #  been reported earlier.
@@ -849,6 +915,32 @@ class ExprNode(Node):
     def generate_function_definitions(self, env, code):
         pass
 
+    # ----Generation of small bits of reference counting --
+
+    def generate_decref_set(self, code, rhs):
+        code.put_decref_set(self.result(), self.ctype(), rhs)
+
+    def generate_xdecref_set(self, code, rhs):
+        code.put_xdecref_set(self.result(), self.ctype(), rhs)
+
+    def generate_gotref(self, code, handle_null=False,
+                        maybe_null_extra_check=True):
+        if not (handle_null and self.cf_is_null):
+            if (handle_null and self.cf_maybe_null
+                    and maybe_null_extra_check):
+                self.generate_xgotref(code)
+            else:
+                code.put_gotref(self.result(), self.ctype())
+
+    def generate_xgotref(self, code):
+        code.put_xgotref(self.result(), self.ctype())
+
+    def generate_giveref(self, code):
+        code.put_giveref(self.result(), self.ctype())
+
+    def generate_xgiveref(self, code):
+        code.put_xgiveref(self.result(), self.ctype())
+
     # ---------------- Annotation ---------------------
 
     def annotate(self, code):
@@ -883,8 +975,8 @@ class ExprNode(Node):
         if used_as_reference and not src_type.is_reference:
             dst_type = dst_type.ref_base_type
 
-        if src_type.is_const:
-            src_type = src_type.const_base_type
+        if src_type.is_cv_qualified:
+            src_type = src_type.cv_base_type
 
         if src_type.is_fused or dst_type.is_fused:
             # See if we are coercing a fused function to a pointer to a
@@ -970,7 +1062,12 @@ class ExprNode(Node):
               and src_type != dst_type
               and dst_type.assignable_from(src_type)):
             src = CoerceToComplexNode(src, dst_type, env)
-        else: # neither src nor dst are py types
+        elif (src_type is PyrexTypes.soft_complex_type
+              and src_type != dst_type
+              and not dst_type.assignable_from(src_type)):
+            src = coerce_from_soft_complex(src, dst_type, env)
+        else:
+            # neither src nor dst are py types
             # Added the string comparison, since for c types that
             # is enough, but Cython gets confused when the types are
             # in different pxi files.
@@ -1010,6 +1107,8 @@ class ExprNode(Node):
         type = self.type
         if type.is_enum or type.is_error:
             return self
+        elif type is PyrexTypes.c_bint_type:
+            return self
         elif type.is_pyobject or type.is_int or type.is_ptr or type.is_float:
             return CoerceToBooleanNode(self, env)
         elif type.is_cpp_class and type.scope and type.scope.lookup("operator bool"):
@@ -1090,6 +1189,15 @@ class ExprNode(Node):
                 kwargs[attr_name] = value
         return cls(node.pos, **kwargs)
 
+    def get_known_standard_library_import(self):
+        """
+        Gets the module.path that this node was imported from.
+
+        Many nodes do not have one, or it is ambiguous, in which case
+        this function returns a false value.
+        """
+        return None
+
 
 class AtomicExprNode(ExprNode):
     #  Abstract base class for expression nodes which have
@@ -1108,6 +1216,7 @@ class PyConstNode(AtomicExprNode):
 
     is_literal = 1
     type = py_object_type
+    nogil_check = None
 
     def is_simple(self):
         return 1
@@ -1133,8 +1242,6 @@ class NoneNode(PyConstNode):
 
     constant_result = None
 
-    nogil_check = None
-
     def compile_time_value(self, denv):
         return None
 
@@ -1204,7 +1311,7 @@ class BoolNode(ConstNode):
 
     def calculate_result_code(self):
         if self.type.is_pyobject:
-            return self.value and 'Py_True' or 'Py_False'
+            return 'Py_True' if self.value else 'Py_False'
         else:
             return str(int(self.value))
 
@@ -1256,7 +1363,7 @@ class IntNode(ConstNode):
 
     unsigned = ""
     longness = ""
-    is_c_literal = None # unknown
+    is_c_literal = None  # unknown
 
     def __init__(self, pos, **kwds):
         ExprNode.__init__(self, pos, **kwds)
@@ -1429,18 +1536,26 @@ class FloatNode(ConstNode):
 
 
 def _analyse_name_as_type(name, pos, env):
-    type = PyrexTypes.parse_basic_type(name)
-    if type is not None:
-        return type
-
-    global_entry = env.global_scope().lookup(name)
-    if global_entry and global_entry.type and (
-            global_entry.type.is_extension_type
-            or global_entry.type.is_struct_or_union
-            or global_entry.type.is_builtin_type
-            or global_entry.type.is_cpp_class):
-        return global_entry.type
+    ctype = PyrexTypes.parse_basic_type(name)
+    if ctype is not None and env.in_c_type_context:
+        return ctype
+
+    global_scope = env.global_scope()
+    global_entry = global_scope.lookup(name)
+    if global_entry and global_entry.is_type:
+        type = global_entry.type
+        if (not env.in_c_type_context
+                and type is Builtin.int_type
+                and global_scope.context.language_level == 2):
+            # While we still support Python2 this needs to be downgraded
+            # to a generic Python object to include both int and long.
+            # With language_level > 3, we keep the type but also accept 'long' in Py2.
+            type = py_object_type
+        if type and (type.is_pyobject or env.in_c_type_context):
+            return type
+        ctype = ctype or type
 
+    # This is fairly heavy, so it's worth trying some easier things above.
     from .TreeFragment import TreeFragment
     with local_errors(ignore=True):
         pos = (pos[0], pos[1], pos[2]-7)
@@ -1453,8 +1568,11 @@ def _analyse_name_as_type(name, pos, env):
             if isinstance(sizeof_node, SizeofTypeNode):
                 sizeof_node = sizeof_node.analyse_types(env)
                 if isinstance(sizeof_node, SizeofTypeNode):
-                    return sizeof_node.arg_type
-    return None
+                    type = sizeof_node.arg_type
+                    if type and (type.is_pyobject or env.in_c_type_context):
+                        return type
+                    ctype = ctype or type
+    return ctype
 
 
 class BytesNode(ConstNode):
@@ -1539,7 +1657,7 @@ class BytesNode(ConstNode):
         self.result_code = result
 
     def get_constant_c_result_code(self):
-        return None # FIXME
+        return None  # FIXME
 
     def calculate_result_code(self):
         return self.result_code
@@ -1594,12 +1712,9 @@ class UnicodeNode(ConstNode):
             if dst_type.is_string and self.bytes_value is not None:
                 # special case: '-3' enforced unicode literal used in a
                 # C char* context
-                return BytesNode(self.pos, value=self.bytes_value
-                    ).coerce_to(dst_type, env)
+                return BytesNode(self.pos, value=self.bytes_value).coerce_to(dst_type, env)
             if dst_type.is_pyunicode_ptr:
-                node = UnicodeNode(self.pos, value=self.value)
-                node.type = dst_type
-                return node
+                return UnicodeNode(self.pos, value=self.value, type=dst_type)
             error(self.pos,
                   "Unicode literals do not support coercion to C types other "
                   "than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* "
@@ -1784,7 +1899,7 @@ class ImagNode(AtomicExprNode):
                     self.result(),
                     float(self.value),
                     code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
 
 class NewExprNode(AtomicExprNode):
@@ -1837,7 +1952,7 @@ class NameNode(AtomicExprNode):
     is_name = True
     is_cython_module = False
     cython_attribute = None
-    lhs_of_first_assignment = False # TODO: remove me
+    lhs_of_first_assignment = False  # TODO: remove me
     is_used_as_rvalue = 0
     entry = None
     type_entry = None
@@ -1919,30 +2034,66 @@ class NameNode(AtomicExprNode):
     def declare_from_annotation(self, env, as_target=False):
         """Implements PEP 526 annotation typing in a fairly relaxed way.
 
-        Annotations are ignored for global variables, Python class attributes and already declared variables.
-        String literals are allowed and ignored.
-        The ambiguous Python types 'int' and 'long' are ignored and the 'cython.int' form must be used instead.
+        Annotations are ignored for global variables.
+        All other annotations are stored on the entry in the symbol table.
+        String literals are allowed and not evaluated.
+        The ambiguous Python types 'int' and 'long' are not evaluated - the 'cython.int' form must be used instead.
         """
-        if not env.directives['annotation_typing']:
-            return
-        if env.is_module_scope or env.is_py_class_scope:
-            # annotations never create global cdef names and Python classes don't support them anyway
-            return
         name = self.name
-        if self.entry or env.lookup_here(name) is not None:
-            # already declared => ignore annotation
-            return
-
         annotation = self.annotation
-        if annotation.is_string_literal:
-            # name: "description" => not a type, but still a declared variable or attribute
-            atype = None
-        else:
-            _, atype = analyse_type_annotation(annotation, env)
-        if atype is None:
-            atype = unspecified_type if as_target and env.directives['infer_types'] != False else py_object_type
-        self.entry = env.declare_var(name, atype, self.pos, is_cdef=not as_target)
-        self.entry.annotation = annotation
+        entry = self.entry or env.lookup_here(name)
+        if not entry:
+            # annotations never create global cdef names
+            if env.is_module_scope:
+                return
+
+            modifiers = ()
+            if (
+                # name: "description" => not a type, but still a declared variable or attribute
+                annotation.expr.is_string_literal
+                # don't do type analysis from annotations if not asked to, but still collect the annotation
+                or not env.directives['annotation_typing']
+            ):
+                atype = None
+            elif env.is_py_class_scope:
+                # For Python class scopes every attribute is a Python object
+                atype = py_object_type
+            else:
+                modifiers, atype = annotation.analyse_type_annotation(env)
+
+            if atype is None:
+                atype = unspecified_type if as_target and env.directives['infer_types'] != False else py_object_type
+            elif atype.is_fused and env.fused_to_specific:
+                try:
+                    atype = atype.specialize(env.fused_to_specific)
+                except CannotSpecialize:
+                    error(self.pos,
+                          "'%s' cannot be specialized since its type is not a fused argument to this function" %
+                          self.name)
+                    atype = error_type
+
+            visibility = 'private'
+            if env.is_c_dataclass_scope:
+                # handle "frozen" directive - full inspection of the dataclass directives happens
+                # in Dataclass.py
+                is_frozen = env.is_c_dataclass_scope == "frozen"
+                if atype.is_pyobject or atype.can_coerce_to_pyobject(env):
+                    visibility = 'readonly' if is_frozen else 'public'
+                    # If the object can't be coerced that's fine - we just don't create a property
+
+            if as_target and env.is_c_class_scope and not (atype.is_pyobject or atype.is_error):
+                # TODO: this will need revising slightly if annotated cdef attributes are implemented
+                atype = py_object_type
+                warning(annotation.pos, "Annotation ignored since class-level attributes must be Python objects. "
+                        "Were you trying to set up an instance attribute?", 2)
+
+            entry = self.entry = env.declare_var(
+                name, atype, self.pos, is_cdef=not as_target, visibility=visibility,
+                pytyping_modifiers=modifiers)
+
+        # Even if the entry already exists, make sure we're supplying an annotation if we can.
+        if annotation and not entry.annotation:
+            entry.annotation = annotation
 
     def analyse_as_module(self, env):
         # Try to interpret this as a reference to a cimported module.
@@ -1952,22 +2103,50 @@ class NameNode(AtomicExprNode):
             entry = env.lookup(self.name)
         if entry and entry.as_module:
             return entry.as_module
+        if entry and entry.known_standard_library_import:
+            scope = Builtin.get_known_standard_library_module_scope(entry.known_standard_library_import)
+            if scope and scope.is_module_scope:
+                return scope
         return None
 
     def analyse_as_type(self, env):
+        type = None
         if self.cython_attribute:
             type = PyrexTypes.parse_basic_type(self.cython_attribute)
-        else:
+        elif env.in_c_type_context:
             type = PyrexTypes.parse_basic_type(self.name)
         if type:
             return type
+
         entry = self.entry
         if not entry:
             entry = env.lookup(self.name)
+        if entry and not entry.is_type and entry.known_standard_library_import:
+            entry = Builtin.get_known_standard_library_entry(entry.known_standard_library_import)
         if entry and entry.is_type:
-            return entry.type
-        else:
-            return None
+            # Infer equivalent C types instead of Python types when possible.
+            type = entry.type
+            if not env.in_c_type_context and type is Builtin.long_type:
+                # Try to give a helpful warning when users write plain C type names.
+                warning(self.pos, "Found Python 2.x type 'long' in a Python annotation. Did you mean to use 'cython.long'?")
+                type = py_object_type
+            elif type.is_pyobject and type.equivalent_type:
+                type = type.equivalent_type
+            elif type is Builtin.int_type and env.global_scope().context.language_level == 2:
+                # While we still support Python 2 this must be a plain object
+                # so that it can be either int or long.  With language_level=3(str),
+                # we pick up the type but accept both int and long in Py2.
+                type = py_object_type
+            return type
+        if self.name == 'object':
+            # This is normally parsed as "simple C type", but not if we don't parse C types.
+            return py_object_type
+
+        # Try to give a helpful warning when users write plain C type names.
+        if not env.in_c_type_context and PyrexTypes.parse_basic_type(self.name):
+            warning(self.pos, "Found C type '%s' in a Python annotation. Did you mean to use 'cython.%s'?" % (self.name, self.name))
+
+        return None
 
     def analyse_as_extension_type(self, env):
         # Try to interpret this as a reference to an extension type.
@@ -1981,11 +2160,29 @@ class NameNode(AtomicExprNode):
         return None
 
     def analyse_target_declaration(self, env):
+        return self._analyse_target_declaration(env, is_assignment_expression=False)
+
+    def analyse_assignment_expression_target_declaration(self, env):
+        return self._analyse_target_declaration(env, is_assignment_expression=True)
+
+    def _analyse_target_declaration(self, env, is_assignment_expression):
+        self.is_target = True
         if not self.entry:
-            self.entry = env.lookup_here(self.name)
+            if is_assignment_expression:
+                self.entry = env.lookup_assignment_expression_target(self.name)
+            else:
+                self.entry = env.lookup_here(self.name)
+        if self.entry:
+            self.entry.known_standard_library_import = ""  # already exists somewhere and so is now ambiguous
         if not self.entry and self.annotation is not None:
             # name : type = ...
-            self.declare_from_annotation(env, as_target=True)
+            is_dataclass = env.is_c_dataclass_scope
+            # In a dataclass, an assignment should not prevent a name from becoming an instance attribute.
+            # Hence, "as_target = not is_dataclass".
+            self.declare_from_annotation(env, as_target=not is_dataclass)
+        elif (self.entry and self.entry.is_inherited and
+                self.annotation and env.is_c_dataclass_scope):
+            error(self.pos, "Cannot redeclare inherited fields in Cython dataclasses")
         if not self.entry:
             if env.directives['warn.undeclared']:
                 warning(self.pos, "implicit declaration of '%s'" % self.name, 1)
@@ -1993,7 +2190,10 @@ class NameNode(AtomicExprNode):
                 type = unspecified_type
             else:
                 type = py_object_type
-            self.entry = env.declare_var(self.name, type, self.pos)
+            if is_assignment_expression:
+                self.entry = env.declare_assignment_expression_target(self.name, type, self.pos)
+            else:
+                self.entry = env.declare_var(self.name, type, self.pos)
         if self.entry.is_declared_generic:
             self.result_ctype = py_object_type
         if self.entry.as_module:
@@ -2033,8 +2233,6 @@ class NameNode(AtomicExprNode):
 
         if self.type.is_const:
             error(self.pos, "Assignment to const '%s'" % self.name)
-        if self.type.is_reference:
-            error(self.pos, "Assignment to reference '%s'" % self.name)
         if not self.is_lvalue():
             error(self.pos, "Assignment to non-lvalue '%s'" % self.name)
             self.type = PyrexTypes.error_type
@@ -2071,7 +2269,7 @@ class NameNode(AtomicExprNode):
         if self.is_used_as_rvalue:
             entry = self.entry
             if entry.is_builtin:
-                if not entry.is_const: # cached builtins are ok
+                if not entry.is_const:  # cached builtins are ok
                     self.gil_error()
             elif entry.is_pyglobal:
                 self.gil_error()
@@ -2096,7 +2294,7 @@ class NameNode(AtomicExprNode):
         entry = self.entry
         if entry.is_type and entry.type.is_extension_type:
             self.type_entry = entry
-        if entry.is_type and entry.type.is_enum:
+        if entry.is_type and (entry.type.is_enum or entry.type.is_cpp_enum):
             py_entry = Symtab.Entry(self.name, None, py_object_type)
             py_entry.is_pyglobal = True
             py_entry.scope = self.entry.scope
@@ -2122,7 +2320,7 @@ class NameNode(AtomicExprNode):
     def may_be_none(self):
         if self.cf_state and self.type and (self.type.is_pyobject or
                                             self.type.is_memoryviewslice):
-            # gard against infinite recursion on self-dependencies
+            # guard against infinite recursion on self-dependencies
             if getattr(self, '_none_checking', False):
                 # self-dependency - either this node receives a None
                 # value from *another* node, or it can not reference
@@ -2189,24 +2387,25 @@ class NameNode(AtomicExprNode):
     def calculate_result_code(self):
         entry = self.entry
         if not entry:
-            return "<error>" # There was an error earlier
+            return "<error>"  # There was an error earlier
+        if self.entry.is_cpp_optional and not self.is_target:
+            return "(*%s)" % entry.cname
         return entry.cname
 
     def generate_result_code(self, code):
-        assert hasattr(self, 'entry')
         entry = self.entry
         if entry is None:
-            return # There was an error earlier
+            return  # There was an error earlier
         if entry.utility_code:
             code.globalstate.use_utility_code(entry.utility_code)
         if entry.is_builtin and entry.is_const:
-            return # Lookup already cached
+            return  # Lookup already cached
         elif entry.is_pyclass_attr:
             assert entry.type.is_pyobject, "Python global or builtin not a Python object"
             interned_cname = code.intern_identifier(self.entry.name)
             if entry.is_builtin:
                 namespace = Naming.builtins_cname
-            else: # entry.is_pyglobal
+            else:  # entry.is_pyglobal
                 namespace = entry.scope.namespace_cname
             if not self.cf_is_null:
                 code.putln(
@@ -2225,7 +2424,7 @@ class NameNode(AtomicExprNode):
             if not self.cf_is_null:
                 code.putln("}")
             code.putln(code.error_goto_if_null(self.result(), self.pos))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
         elif entry.is_builtin and not entry.scope.is_module_scope:
             # known builtin
@@ -2238,7 +2437,7 @@ class NameNode(AtomicExprNode):
                 self.result(),
                 interned_cname,
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
         elif entry.is_pyglobal or (entry.is_builtin and entry.scope.is_module_scope):
             # name in class body, global name or unknown builtin
@@ -2262,25 +2461,34 @@ class NameNode(AtomicExprNode):
                         entry.scope.namespace_cname,
                         interned_cname,
                         code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
         elif entry.is_local or entry.in_closure or entry.from_closure or entry.type.is_memoryviewslice:
             # Raise UnboundLocalError for objects and memoryviewslices
             raise_unbound = (
                 (self.cf_maybe_null or self.cf_is_null) and not self.allow_null)
-            null_code = entry.type.check_for_null_code(entry.cname)
 
             memslice_check = entry.type.is_memoryviewslice and self.initialized_check
+            optional_cpp_check = entry.is_cpp_optional and self.initialized_check
+
+            if optional_cpp_check:
+                unbound_check_code = entry.type.cpp_optional_check_for_null_code(entry.cname)
+            else:
+                unbound_check_code = entry.type.check_for_null_code(entry.cname)
+
+            if unbound_check_code and raise_unbound and (entry.type.is_pyobject or memslice_check or optional_cpp_check):
+                code.put_error_if_unbound(self.pos, entry, self.in_nogil_context, unbound_check_code=unbound_check_code)
 
-            if null_code and raise_unbound and (entry.type.is_pyobject or memslice_check):
-                code.put_error_if_unbound(self.pos, entry, self.in_nogil_context)
+        elif entry.is_cglobal and entry.is_cpp_optional and self.initialized_check:
+            unbound_check_code = entry.type.cpp_optional_check_for_null_code(entry.cname)
+            code.put_error_if_unbound(self.pos, entry, unbound_check_code=unbound_check_code)
 
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
-        exception_check=None, exception_value=None):
+                                 exception_check=None, exception_value=None):
         #print "NameNode.generate_assignment_code:", self.name ###
         entry = self.entry
         if entry is None:
-            return # There was an error earlier
+            return  # There was an error earlier
 
         if (self.entry.type.is_ptr and isinstance(rhs, ListNode)
                 and not self.lhs_of_first_assignment and not rhs.in_module_scope):
@@ -2301,8 +2509,10 @@ class NameNode(AtomicExprNode):
                 setter = 'PyDict_SetItem'
                 namespace = Naming.moddict_cname
             elif entry.is_pyclass_attr:
-                code.globalstate.use_utility_code(UtilityCode.load_cached("SetNameInClass", "ObjectHandling.c"))
-                setter = '__Pyx_SetNameInClass'
+                # Special-case setting __new__
+                n = "SetNewInClass" if self.name == "__new__" else "SetNameInClass"
+                code.globalstate.use_utility_code(UtilityCode.load_cached(n, "ObjectHandling.c"))
+                setter = '__Pyx_' + n
             else:
                 assert False, repr(entry)
             code.put_error_if_neg(
@@ -2344,31 +2554,24 @@ class NameNode(AtomicExprNode):
                     rhs.make_owned_reference(code)
                     is_external_ref = entry.is_cglobal or self.entry.in_closure or self.entry.from_closure
                     if is_external_ref:
-                        if not self.cf_is_null:
-                            if self.cf_maybe_null:
-                                code.put_xgotref(self.py_result())
-                            else:
-                                code.put_gotref(self.py_result())
+                        self.generate_gotref(code, handle_null=True)
                     assigned = True
                     if entry.is_cglobal:
-                        code.put_decref_set(
-                            self.result(), rhs.result_as(self.ctype()))
+                        self.generate_decref_set(code, rhs.result_as(self.ctype()))
                     else:
                         if not self.cf_is_null:
                             if self.cf_maybe_null:
-                                code.put_xdecref_set(
-                                    self.result(), rhs.result_as(self.ctype()))
+                                self.generate_xdecref_set(code, rhs.result_as(self.ctype()))
                             else:
-                                code.put_decref_set(
-                                    self.result(), rhs.result_as(self.ctype()))
+                                self.generate_decref_set(code, rhs.result_as(self.ctype()))
                         else:
                             assigned = False
                     if is_external_ref:
-                        code.put_giveref(rhs.py_result())
+                        rhs.generate_giveref(code)
             if not self.type.is_memoryviewslice:
                 if not assigned:
                     if overloaded_assignment:
-                        result = rhs.result()
+                        result = rhs.move_result_rhs()
                         if exception_check == '+':
                             translate_cpp_exception(
                                 code, self.pos,
@@ -2378,7 +2581,7 @@ class NameNode(AtomicExprNode):
                         else:
                             code.putln('%s = %s;' % (self.result(), result))
                     else:
-                        result = rhs.result_as(self.ctype())
+                        result = rhs.move_result_rhs_as(self.ctype())
 
                         if is_pythran_expr(self.type):
                             code.putln('new (&%s) decltype(%s){%s};' % (self.result(), self.result(), result))
@@ -2431,7 +2634,7 @@ class NameNode(AtomicExprNode):
 
     def generate_deletion_code(self, code, ignore_nonexisting=False):
         if self.entry is None:
-            return # There was an error earlier
+            return  # There was an error earlier
         elif self.entry.is_pyclass_attr:
             namespace = self.entry.scope.namespace_cname
             interned_cname = code.intern_identifier(self.entry.name)
@@ -2467,26 +2670,20 @@ class NameNode(AtomicExprNode):
                 if self.cf_maybe_null and not ignore_nonexisting:
                     code.put_error_if_unbound(self.pos, self.entry)
 
-                if self.entry.type.is_pyobject:
-                    if self.entry.in_closure:
-                        # generator
-                        if ignore_nonexisting and self.cf_maybe_null:
-                            code.put_xgotref(self.result())
-                        else:
-                            code.put_gotref(self.result())
-                    if ignore_nonexisting and self.cf_maybe_null:
-                        code.put_xdecref(self.result(), self.ctype())
-                    else:
-                        code.put_decref(self.result(), self.ctype())
-                    code.putln('%s = NULL;' % self.result())
+                if self.entry.in_closure:
+                    # generator
+                    self.generate_gotref(code, handle_null=True, maybe_null_extra_check=ignore_nonexisting)
+                if ignore_nonexisting and self.cf_maybe_null:
+                    code.put_xdecref_clear(self.result(), self.ctype(),
+                                        have_gil=not self.nogil)
                 else:
-                    code.put_xdecref_memoryviewslice(self.entry.cname,
-                                                     have_gil=not self.nogil)
+                    code.put_decref_clear(self.result(), self.ctype(),
+                                          have_gil=not self.nogil)
         else:
             error(self.pos, "Deletion of C names not supported")
 
     def annotate(self, code):
-        if hasattr(self, 'is_called') and self.is_called:
+        if getattr(self, 'is_called', False):
             pos = (self.pos[0], self.pos[1], self.pos[2] - len(self.name) - 1)
             if self.type.is_pyobject:
                 style, text = 'py_call', 'python function (%s)'
@@ -2494,6 +2691,11 @@ class NameNode(AtomicExprNode):
                 style, text = 'c_call', 'c function (%s)'
             code.annotate(pos, AnnotationItem(style, text % self.type, size=len(self.name)))
 
+    def get_known_standard_library_import(self):
+        if self.entry:
+            return self.entry.known_standard_library_import
+        return None
+
 class BackquoteNode(ExprNode):
     #  `expr`
     #
@@ -2520,7 +2722,7 @@ class BackquoteNode(ExprNode):
                 self.result(),
                 self.arg.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class ImportNode(ExprNode):
@@ -2539,44 +2741,68 @@ class ImportNode(ExprNode):
     #                           relative to the current module.
     #                     None: decide the level according to language level and
     #                           directives
+    #  get_top_level_module   int          true: return top-level module, false: return imported module
+    #  module_names           TupleNode    the separate names of the module and submodules, or None
 
     type = py_object_type
+    module_names = None
+    get_top_level_module = False
+    is_temp = True
 
-    subexprs = ['module_name', 'name_list']
+    subexprs = ['module_name', 'name_list', 'module_names']
 
     def analyse_types(self, env):
         if self.level is None:
-            if (env.directives['py2_import'] or
-                Future.absolute_import not in env.global_scope().context.future_directives):
+            # For modules in packages, and without 'absolute_import' enabled, try relative (Py2) import first.
+            if env.global_scope().parent_module and (
+                    env.directives['py2_import'] or
+                    Future.absolute_import not in env.global_scope().context.future_directives):
                 self.level = -1
             else:
                 self.level = 0
         module_name = self.module_name.analyse_types(env)
         self.module_name = module_name.coerce_to_pyobject(env)
+        assert self.module_name.is_string_literal
         if self.name_list:
             name_list = self.name_list.analyse_types(env)
             self.name_list = name_list.coerce_to_pyobject(env)
-        self.is_temp = 1
+        elif '.' in self.module_name.value:
+            self.module_names = TupleNode(self.module_name.pos, args=[
+                IdentifierStringNode(self.module_name.pos, value=part, constant_result=part)
+                for part in map(StringEncoding.EncodedString, self.module_name.value.split('.'))
+            ]).analyse_types(env)
         return self
 
     gil_message = "Python import"
 
     def generate_result_code(self, code):
-        if self.name_list:
-            name_list_code = self.name_list.py_result()
+        assert self.module_name.is_string_literal
+        module_name = self.module_name.value
+
+        if self.level <= 0 and not self.name_list and not self.get_top_level_module:
+            if self.module_names:
+                assert self.module_names.is_literal  # make sure we create the tuple only once
+            if self.level == 0:
+                utility_code = UtilityCode.load_cached("ImportDottedModule", "ImportExport.c")
+                helper_func = "__Pyx_ImportDottedModule"
+            else:
+                utility_code = UtilityCode.load_cached("ImportDottedModuleRelFirst", "ImportExport.c")
+                helper_func = "__Pyx_ImportDottedModuleRelFirst"
+            code.globalstate.use_utility_code(utility_code)
+            import_code = "%s(%s, %s)" % (
+                helper_func,
+                self.module_name.py_result(),
+                self.module_names.py_result() if self.module_names else 'NULL',
+            )
         else:
-            name_list_code = "0"
-
-        code.globalstate.use_utility_code(UtilityCode.load_cached("Import", "ImportExport.c"))
-        import_code = "__Pyx_Import(%s, %s, %d)" % (
-            self.module_name.py_result(),
-            name_list_code,
-            self.level)
+            code.globalstate.use_utility_code(UtilityCode.load_cached("Import", "ImportExport.c"))
+            import_code = "__Pyx_Import(%s, %s, %d)" % (
+                self.module_name.py_result(),
+                self.name_list.py_result() if self.name_list else '0',
+                self.level)
 
-        if (self.level <= 0 and
-                self.module_name.is_string_literal and
-                self.module_name.value in utility_code_for_imports):
-            helper_func, code_name, code_file = utility_code_for_imports[self.module_name.value]
+        if self.level <= 0 and module_name in utility_code_for_imports:
+            helper_func, code_name, code_file = utility_code_for_imports[module_name]
             code.globalstate.use_utility_code(UtilityCode.load_cached(code_name, code_file))
             import_code = '%s(%s)' % (helper_func, import_code)
 
@@ -2584,10 +2810,104 @@ class ImportNode(ExprNode):
             self.result(),
             import_code,
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
+
+    def get_known_standard_library_import(self):
+        return self.module_name.value
+
+
+class ScopedExprNode(ExprNode):
+    # Abstract base class for ExprNodes that have their own local
+    # scope, such as generator expressions.
+    #
+    # expr_scope    Scope  the inner scope of the expression
 
+    subexprs = []
+    expr_scope = None
+
+    # does this node really have a local scope, e.g. does it leak loop
+    # variables or not?  non-leaking Py3 behaviour is default, except
+    # for list comprehensions where the behaviour differs in Py2 and
+    # Py3 (set in Parsing.py based on parser context)
+    has_local_scope = True
+
+    def init_scope(self, outer_scope, expr_scope=None):
+        if expr_scope is not None:
+            self.expr_scope = expr_scope
+        elif self.has_local_scope:
+            self.expr_scope = Symtab.ComprehensionScope(outer_scope)
+        elif not self.expr_scope:  # don't unset if it's already been set
+            self.expr_scope = None
+
+    def analyse_declarations(self, env):
+        self.init_scope(env)
+
+    def analyse_scoped_declarations(self, env):
+        # this is called with the expr_scope as env
+        pass
 
-class IteratorNode(ExprNode):
+    def analyse_types(self, env):
+        # no recursion here, the children will be analysed separately below
+        return self
+
+    def analyse_scoped_expressions(self, env):
+        # this is called with the expr_scope as env
+        return self
+
+    def generate_evaluation_code(self, code):
+        # set up local variables and free their references on exit
+        generate_inner_evaluation_code = super(ScopedExprNode, self).generate_evaluation_code
+        if not self.has_local_scope or not self.expr_scope.var_entries:
+            # no local variables => delegate, done
+            generate_inner_evaluation_code(code)
+            return
+
+        code.putln('{ /* enter inner scope */')
+        py_entries = []
+        for _, entry in sorted(item for item in self.expr_scope.entries.items() if item[0]):
+            if not entry.in_closure:
+                if entry.type.is_pyobject and entry.used:
+                    py_entries.append(entry)
+        if not py_entries:
+            # no local Python references => no cleanup required
+            generate_inner_evaluation_code(code)
+            code.putln('} /* exit inner scope */')
+            return
+
+        # must free all local Python references at each exit point
+        old_loop_labels = code.new_loop_labels()
+        old_error_label = code.new_error_label()
+
+        generate_inner_evaluation_code(code)
+
+        # normal (non-error) exit
+        self._generate_vars_cleanup(code, py_entries)
+
+        # error/loop body exit points
+        exit_scope = code.new_label('exit_scope')
+        code.put_goto(exit_scope)
+        for label, old_label in ([(code.error_label, old_error_label)] +
+                                 list(zip(code.get_loop_labels(), old_loop_labels))):
+            if code.label_used(label):
+                code.put_label(label)
+                self._generate_vars_cleanup(code, py_entries)
+                code.put_goto(old_label)
+        code.put_label(exit_scope)
+        code.putln('} /* exit inner scope */')
+
+        code.set_loop_labels(old_loop_labels)
+        code.error_label = old_error_label
+
+    def _generate_vars_cleanup(self, code, py_entries):
+        for entry in py_entries:
+            if entry.is_cglobal:
+                code.put_var_gotref(entry)
+                code.put_var_decref_set(entry, "Py_None")
+            else:
+                code.put_var_xdecref_clear(entry)
+
+
+class IteratorNode(ScopedExprNode):
     #  Used as part of for statement implementation.
     #
     #  Implements result = iter(sequence)
@@ -2597,20 +2917,25 @@ class IteratorNode(ExprNode):
     type = py_object_type
     iter_func_ptr = None
     counter_cname = None
-    cpp_iterator_cname = None
     reversed = False      # currently only used for list/tuple types (see Optimize.py)
     is_async = False
+    has_local_scope = False
 
     subexprs = ['sequence']
 
     def analyse_types(self, env):
+        if self.expr_scope:
+            env = self.expr_scope  # actually evaluate sequence in this scope instead
         self.sequence = self.sequence.analyse_types(env)
         if (self.sequence.type.is_array or self.sequence.type.is_ptr) and \
                 not self.sequence.type.is_string:
             # C array iteration will be transformed later on
             self.type = self.sequence.type
         elif self.sequence.type.is_cpp_class:
-            self.analyse_cpp_types(env)
+            return CppIteratorNode(self.pos, sequence=self.sequence).analyse_types(env)
+        elif self.is_reversed_cpp_iteration():
+            sequence = self.sequence.arg_tuple.args[0].arg
+            return CppIteratorNode(self.pos, sequence=sequence, reversed=True).analyse_types(env)
         else:
             self.sequence = self.sequence.coerce_to_pyobject(env)
             if self.sequence.type in (list_type, tuple_type):
@@ -2625,8 +2950,27 @@ class IteratorNode(ExprNode):
             PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None),
             ]))
 
+    def is_reversed_cpp_iteration(self):
+        """
+        Returns True if the 'reversed' function is applied to a C++ iterable.
+
+        This supports C++ classes with reverse_iterator implemented.
+        """
+        if not (isinstance(self.sequence, SimpleCallNode) and
+                self.sequence.arg_tuple and len(self.sequence.arg_tuple.args) == 1):
+            return False
+        func = self.sequence.function
+        if func.is_name and func.name == "reversed":
+            if not func.entry.is_builtin:
+                return False
+            arg = self.sequence.arg_tuple.args[0]
+            if isinstance(arg, CoercionNode) and arg.arg.is_name:
+                arg = arg.arg.entry
+                return arg.type.is_cpp_class
+        return False
+
     def type_dependencies(self, env):
-        return self.sequence.type_dependencies(env)
+        return self.sequence.type_dependencies(self.expr_scope or env)
 
     def infer_type(self, env):
         sequence_type = self.sequence.infer_type(env)
@@ -2640,65 +2984,10 @@ class IteratorNode(ExprNode):
             return sequence_type
         return py_object_type
 
-    def analyse_cpp_types(self, env):
-        sequence_type = self.sequence.type
-        if sequence_type.is_ptr:
-            sequence_type = sequence_type.base_type
-        begin = sequence_type.scope.lookup("begin")
-        end = sequence_type.scope.lookup("end")
-        if (begin is None
-            or not begin.type.is_cfunction
-            or begin.type.args):
-            error(self.pos, "missing begin() on %s" % self.sequence.type)
-            self.type = error_type
-            return
-        if (end is None
-            or not end.type.is_cfunction
-            or end.type.args):
-            error(self.pos, "missing end() on %s" % self.sequence.type)
-            self.type = error_type
-            return
-        iter_type = begin.type.return_type
-        if iter_type.is_cpp_class:
-            if env.lookup_operator_for_types(
-                    self.pos,
-                    "!=",
-                    [iter_type, end.type.return_type]) is None:
-                error(self.pos, "missing operator!= on result of begin() on %s" % self.sequence.type)
-                self.type = error_type
-                return
-            if env.lookup_operator_for_types(self.pos, '++', [iter_type]) is None:
-                error(self.pos, "missing operator++ on result of begin() on %s" % self.sequence.type)
-                self.type = error_type
-                return
-            if env.lookup_operator_for_types(self.pos, '*', [iter_type]) is None:
-                error(self.pos, "missing operator* on result of begin() on %s" % self.sequence.type)
-                self.type = error_type
-                return
-            self.type = iter_type
-        elif iter_type.is_ptr:
-            if not (iter_type == end.type.return_type):
-                error(self.pos, "incompatible types for begin() and end()")
-            self.type = iter_type
-        else:
-            error(self.pos, "result type of begin() on %s must be a C++ class or pointer" % self.sequence.type)
-            self.type = error_type
-            return
-
     def generate_result_code(self, code):
         sequence_type = self.sequence.type
         if sequence_type.is_cpp_class:
-            if self.sequence.is_name:
-                # safe: C++ won't allow you to reassign to class references
-                begin_func = "%s.begin" % self.sequence.result()
-            else:
-                sequence_type = PyrexTypes.c_ptr_type(sequence_type)
-                self.cpp_iterator_cname = code.funcstate.allocate_temp(sequence_type, manage_ref=False)
-                code.putln("%s = &%s;" % (self.cpp_iterator_cname, self.sequence.result()))
-                begin_func = "%s->begin" % self.cpp_iterator_cname
-            # TODO: Limit scope.
-            code.putln("%s = %s();" % (self.result(), begin_func))
-            return
+            assert False, "Should have been changed to CppIteratorNode"
         if sequence_type.is_array or sequence_type.is_ptr:
             raise InternalError("for in carray slice not transformed")
 
@@ -2740,12 +3029,12 @@ class IteratorNode(ExprNode):
                 self.result(),
                 self.sequence.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
             # PyObject_GetIter() fails if "tp_iternext" is not set, but the check below
             # makes it visible to the C compiler that the pointer really isn't NULL, so that
             # it can distinguish between the special cases and the generic case
-            code.putln("%s = Py_TYPE(%s)->tp_iternext; %s" % (
+            code.putln("%s = __Pyx_PyObject_GetIterNextFunc(%s); %s" % (
                 self.iter_func_ptr, self.py_result(),
                 code.error_goto_if_null(self.iter_func_ptr, self.pos)))
         if self.may_be_a_sequence:
@@ -2787,28 +3076,14 @@ class IteratorNode(ExprNode):
                 self.counter_cname,
                 inc_dec,
                 code.error_goto_if_null(result_name, self.pos)))
-        code.put_gotref(result_name)
+        code.put_gotref(result_name, py_object_type)
         code.putln("#endif")
 
     def generate_iter_next_result_code(self, result_name, code):
         sequence_type = self.sequence.type
         if self.reversed:
             code.putln("if (%s < 0) break;" % self.counter_cname)
-        if sequence_type.is_cpp_class:
-            if self.cpp_iterator_cname:
-                end_func = "%s->end" % self.cpp_iterator_cname
-            else:
-                end_func = "%s.end" % self.sequence.result()
-            # TODO: Cache end() call?
-            code.putln("if (!(%s != %s())) break;" % (
-                            self.result(),
-                            end_func))
-            code.putln("%s = *%s;" % (
-                            result_name,
-                            self.result()))
-            code.putln("++%s;" % self.result())
-            return
-        elif sequence_type is list_type:
+        if sequence_type is list_type:
             self.generate_next_sequence_item('List', result_name, code)
             return
         elif sequence_type is tuple_type:
@@ -2838,7 +3113,7 @@ class IteratorNode(ExprNode):
         code.putln("}")
         code.putln("break;")
         code.putln("}")
-        code.put_gotref(result_name)
+        code.put_gotref(result_name, py_object_type)
         code.putln("}")
 
     def free_temps(self, code):
@@ -2847,11 +3122,178 @@ class IteratorNode(ExprNode):
         if self.iter_func_ptr:
             code.funcstate.release_temp(self.iter_func_ptr)
             self.iter_func_ptr = None
-        if self.cpp_iterator_cname:
-            code.funcstate.release_temp(self.cpp_iterator_cname)
         ExprNode.free_temps(self, code)
 
 
+class CppIteratorNode(ExprNode):
+    # Iteration over a C++ container.
+    # Created at the analyse_types stage by IteratorNode
+    cpp_sequence_cname = None
+    cpp_attribute_op = "."
+    extra_dereference = ""
+    is_temp = True
+    reversed = False
+
+    subexprs = ['sequence']
+
+    def get_iterator_func_names(self):
+        return ("begin", "end") if not self.reversed else ("rbegin", "rend")
+
+    def analyse_types(self, env):
+        sequence_type = self.sequence.type
+        if sequence_type.is_ptr:
+            sequence_type = sequence_type.base_type
+        begin_name, end_name = self.get_iterator_func_names()
+        begin = sequence_type.scope.lookup(begin_name)
+        end = sequence_type.scope.lookup(end_name)
+        if (begin is None
+                or not begin.type.is_cfunction
+                or begin.type.args):
+            error(self.pos, "missing %s() on %s" % (begin_name, self.sequence.type))
+            self.type = error_type
+            return self
+        if (end is None
+                or not end.type.is_cfunction
+                or end.type.args):
+            error(self.pos, "missing %s() on %s" % (end_name, self.sequence.type))
+            self.type = error_type
+            return self
+        iter_type = begin.type.return_type
+        if iter_type.is_cpp_class:
+            if env.directives['cpp_locals']:
+                self.extra_dereference = "*"
+            if env.lookup_operator_for_types(
+                    self.pos,
+                    "!=",
+                    [iter_type, end.type.return_type]) is None:
+                error(self.pos, "missing operator!= on result of %s() on %s" % (begin_name, self.sequence.type))
+                self.type = error_type
+                return self
+            if env.lookup_operator_for_types(self.pos, '++', [iter_type]) is None:
+                error(self.pos, "missing operator++ on result of %s() on %s" % (begin_name, self.sequence.type))
+                self.type = error_type
+                return self
+            if env.lookup_operator_for_types(self.pos, '*', [iter_type]) is None:
+                error(self.pos, "missing operator* on result of %s() on %s" % (begin_name, self.sequence.type))
+                self.type = error_type
+                return self
+            self.type = iter_type
+        elif iter_type.is_ptr:
+            if not (iter_type == end.type.return_type):
+                error(self.pos, "incompatible types for %s() and %s()" % (begin_name, end_name))
+            self.type = iter_type
+        else:
+            error(self.pos, "result type of %s() on %s must be a C++ class or pointer" % (begin_name, self.sequence.type))
+            self.type = error_type
+        return self
+
+    def generate_result_code(self, code):
+        sequence_type = self.sequence.type
+        begin_name, _ = self.get_iterator_func_names()
+        # essentially 3 options:
+        if self.sequence.is_simple():
+            # 1) Sequence can be accessed directly, like a name;
+            #    assigning to it may break the container, but that's the responsibility
+            #    of the user
+            code.putln("%s = %s%s%s();" % (
+                self.result(),
+                self.sequence.result(),
+                self.cpp_attribute_op,
+                begin_name))
+        else:
+                # (while it'd be nice to limit the scope of the loop temp, it's essentially
+                # impossible to do while supporting generators)
+                temp_type = sequence_type
+                if temp_type.is_reference:
+                    # 2) Sequence is a reference (often obtained by dereferencing a pointer);
+                    #    make the temp a pointer so we are not sensitive to users reassigning
+                    #    the pointer than it came from
+                    temp_type = PyrexTypes.CPtrType(sequence_type.ref_base_type)
+                if temp_type.is_ptr or code.globalstate.directives['cpp_locals']:
+                    self.cpp_attribute_op = "->"
+                # 3) (otherwise) sequence comes from a function call or similar, so we must
+                #    create a temp to store it in
+                self.cpp_sequence_cname = code.funcstate.allocate_temp(temp_type, manage_ref=False)
+                code.putln("%s = %s%s;" % (self.cpp_sequence_cname,
+                                           "&" if temp_type.is_ptr else "",
+                                           self.sequence.move_result_rhs()))
+                code.putln("%s = %s%s%s();" % (
+                    self.result(),
+                    self.cpp_sequence_cname,
+                    self.cpp_attribute_op,
+                    begin_name))
+
+    def generate_iter_next_result_code(self, result_name, code):
+        # end call isn't cached to support containers that allow adding while iterating
+        # (much as this is usually a bad idea)
+        _, end_name = self.get_iterator_func_names()
+        code.putln("if (!(%s%s != %s%s%s())) break;" % (
+                        self.extra_dereference,
+                        self.result(),
+                        self.cpp_sequence_cname or self.sequence.result(),
+                        self.cpp_attribute_op,
+                        end_name))
+        code.putln("%s = *%s%s;" % (
+                        result_name,
+                        self.extra_dereference,
+                        self.result()))
+        code.putln("++%s%s;" % (self.extra_dereference, self.result()))
+
+    def generate_subexpr_disposal_code(self, code):
+        if not self.cpp_sequence_cname:
+            # the sequence is accessed directly so any temporary result in its
+            # subexpressions must remain available until the iterator is not needed
+            return
+        ExprNode.generate_subexpr_disposal_code(self, code)
+
+    def free_subexpr_temps(self, code):
+        if not self.cpp_sequence_cname:
+            # the sequence is accessed directly so any temporary result in its
+            # subexpressions must remain available until the iterator is not needed
+            return
+        ExprNode.free_subexpr_temps(self, code)
+
+    def generate_disposal_code(self, code):
+        if not self.cpp_sequence_cname:
+            # postponed from CppIteratorNode.generate_subexpr_disposal_code
+            # and CppIteratorNode.free_subexpr_temps
+            ExprNode.generate_subexpr_disposal_code(self, code)
+            ExprNode.free_subexpr_temps(self, code)
+        ExprNode.generate_disposal_code(self, code)
+
+    def free_temps(self, code):
+        if self.cpp_sequence_cname:
+            code.funcstate.release_temp(self.cpp_sequence_cname)
+        # skip over IteratorNode since we don't use any of the temps it does
+        ExprNode.free_temps(self, code)
+
+
+def remove_const(item_type):
+    """
+    Removes the constness of a given type and its underlying templates
+    if any.
+
+    This is to solve the compilation error when the temporary variable used to
+    store the result of an iterator cannot be changed due to its constness.
+    For example, the value_type of std::map, which will also be the type of
+    the temporarry variable, is std::pair<const Key, T>. This means the first
+    component of the variable cannot be reused to store the result of each
+    iteration, which leads to a compilation error.
+    """
+    if item_type.is_const:
+        item_type = item_type.cv_base_type
+    if item_type.is_typedef:
+        item_type = remove_const(item_type.typedef_base_type)
+    if item_type.is_cpp_class and item_type.templates:
+        templates = [remove_const(t) if t.is_const else t for t in item_type.templates]
+        template_type = item_type.template_type
+        item_type = PyrexTypes.CppClassType(
+            template_type.name, template_type.scope,
+            template_type.cname, template_type.base_classes,
+            templates, template_type)
+    return item_type
+
+
 class NextNode(AtomicExprNode):
     #  Used as part of for statement implementation.
     #  Implements result = next(iterator)
@@ -2876,16 +3318,12 @@ class NextNode(AtomicExprNode):
             iterator_type = self.iterator.infer_type(env)
         if iterator_type.is_ptr or iterator_type.is_array:
             return iterator_type.base_type
-        elif self.iterator.sequence.type is bytearray_type:
-            # This is a temporary work-around to fix bytearray iteration in 0.29.x
-            # It has been fixed properly in master, refer to ticket: 3473
-            return py_object_type
         elif iterator_type.is_cpp_class:
             item_type = env.lookup_operator_for_types(self.pos, "*", [iterator_type]).type.return_type
             if item_type.is_reference:
                 item_type = item_type.ref_base_type
-            if item_type.is_const:
-                item_type = item_type.const_base_type
+            if item_type.is_cv_qualified:
+                item_type = item_type.cv_base_type
             return item_type
         else:
             # Avoid duplication of complicated logic.
@@ -2898,6 +3336,7 @@ class NextNode(AtomicExprNode):
 
     def analyse_types(self, env):
         self.type = self.infer_type(env, self.iterator.type)
+        self.type = remove_const(self.type)
         self.is_temp = 1
         return self
 
@@ -2905,7 +3344,7 @@ class NextNode(AtomicExprNode):
         self.iterator.generate_iter_next_result_code(self.result(), code)
 
 
-class AsyncIteratorNode(ExprNode):
+class AsyncIteratorNode(ScopedExprNode):
     #  Used as part of 'async for' statement implementation.
     #
     #  Implements result = sequence.__aiter__()
@@ -2917,11 +3356,14 @@ class AsyncIteratorNode(ExprNode):
     is_async = True
     type = py_object_type
     is_temp = 1
+    has_local_scope = False
 
     def infer_type(self, env):
         return py_object_type
 
     def analyse_types(self, env):
+        if self.expr_scope:
+            env = self.expr_scope
         self.sequence = self.sequence.analyse_types(env)
         if not self.sequence.type.is_pyobject:
             error(self.pos, "async for loops not allowed on C/C++ types")
@@ -2934,7 +3376,7 @@ class AsyncIteratorNode(ExprNode):
             self.result(),
             self.sequence.py_result(),
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
 
 
 class AsyncNextNode(AtomicExprNode):
@@ -2964,7 +3406,7 @@ class AsyncNextNode(AtomicExprNode):
             self.result(),
             self.iterator.py_result(),
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
 
 
 class WithExitCallNode(ExprNode):
@@ -3007,7 +3449,7 @@ class WithExitCallNode(ExprNode):
         self.args.free_temps(code)
 
         code.putln(code.error_goto_if_null(result_var, self.pos))
-        code.put_gotref(result_var)
+        code.put_gotref(result_var, py_object_type)
 
         if self.await_expr:
             # FIXME: result_var temp currently leaks into the closure
@@ -3072,7 +3514,7 @@ class TempNode(ExprNode):
         return self
 
     def analyse_target_declaration(self, env):
-        pass
+        self.is_target = True
 
     def generate_result_code(self, code):
         pass
@@ -3139,6 +3581,7 @@ class JoinedStrNode(ExprNode):
     #
     type = unicode_type
     is_temp = True
+    gil_message = "String concatenation"
 
     subexprs = ['values']
 
@@ -3161,7 +3604,7 @@ class JoinedStrNode(ExprNode):
             list_var,
             num_items,
             code.error_goto_if_null(list_var, self.pos)))
-        code.put_gotref(list_var)
+        code.put_gotref(list_var, py_object_type)
         code.putln("%s = 0;" % ulength_var)
         code.putln("%s = 127;" % max_char_var)  # at least ASCII character range
 
@@ -3205,7 +3648,7 @@ class JoinedStrNode(ExprNode):
                     max_char_var, max_char_value, max_char_var, max_char_value, max_char_var))
             code.putln("%s += %s;" % (ulength_var, ulength))
 
-            code.put_giveref(node.py_result())
+            node.generate_giveref(code)
             code.putln('PyTuple_SET_ITEM(%s, %s, %s);' % (list_var, i, node.py_result()))
             node.generate_post_assignment_code(code)
             node.free_temps(code)
@@ -3220,7 +3663,7 @@ class JoinedStrNode(ExprNode):
             ulength_var,
             max_char_var,
             code.error_goto_if_null(self.py_result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
         code.put_decref_clear(list_var, py_object_type)
         code.funcstate.release_temp(list_var)
@@ -3232,7 +3675,7 @@ class FormattedValueNode(ExprNode):
     # {}-delimited portions of an f-string
     #
     # value           ExprNode                The expression itself
-    # conversion_char str or None             Type conversion (!s, !r, !a, or none, or 'd' for integer conversion)
+    # conversion_char str or None             Type conversion (!s, !r, !a, none, or 'd' for integer conversion)
     # format_spec     JoinedStrNode or None   Format string passed to __format__
     # c_format_spec   str or None             If not None, formatting can be done at the C level
 
@@ -3241,6 +3684,7 @@ class FormattedValueNode(ExprNode):
     type = unicode_type
     is_temp = True
     c_format_spec = None
+    gil_message = "String formatting"
 
     find_conversion_func = {
         's': 'PyObject_Unicode',
@@ -3278,7 +3722,7 @@ class FormattedValueNode(ExprNode):
                 self.result(),
                 convert_func_call,
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
             return
 
         value_result = self.value.py_result()
@@ -3317,7 +3761,7 @@ class FormattedValueNode(ExprNode):
             value_result,
             format_spec,
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 #-------------------------------------------------------------------
@@ -3355,7 +3799,7 @@ class ParallelThreadsAvailableNode(AtomicExprNode):
         return self.temp_code
 
 
-class ParallelThreadIdNode(AtomicExprNode): #, Nodes.ParallelNode):
+class ParallelThreadIdNode(AtomicExprNode):  #, Nodes.ParallelNode):
     """
     Implements cython.parallel.threadid()
     """
@@ -3464,9 +3908,9 @@ class IndexNode(_IndexingBaseNode):
 
     def analyse_as_type(self, env):
         base_type = self.base.analyse_as_type(env)
-        if base_type and not base_type.is_pyobject:
-            if base_type.is_cpp_class:
-                if isinstance(self.index, TupleNode):
+        if base_type:
+            if base_type.is_cpp_class or base_type.python_type_constructor_name:
+                if self.index.is_sequence_constructor:
                     template_values = self.index.args
                 else:
                     template_values = [self.index]
@@ -3481,7 +3925,7 @@ class IndexNode(_IndexingBaseNode):
                 env.use_utility_code(MemoryView.view_utility_code)
                 axes = [self.index] if self.index.is_slice else list(self.index.args)
                 return PyrexTypes.MemoryViewSliceType(base_type, MemoryView.get_axes_specs(env, axes))
-            else:
+            elif not base_type.is_pyobject:
                 # C array
                 index = self.index.compile_time_value(env)
                 if index is not None:
@@ -3494,6 +3938,19 @@ class IndexNode(_IndexingBaseNode):
                 error(self.pos, "Array size must be a compile time constant")
         return None
 
+    def analyse_pytyping_modifiers(self, env):
+        # Check for declaration modifiers, e.g. "typing.Optional[...]" or "dataclasses.InitVar[...]"
+        # TODO: somehow bring this together with TemplatedTypeNode.analyse_pytyping_modifiers()
+        modifiers = []
+        modifier_node = self
+        while modifier_node.is_subscript:
+            modifier_type = modifier_node.base.analyse_as_type(env)
+            if (modifier_type and modifier_type.python_type_constructor_name
+                    and modifier_type.modifier_name):
+                modifiers.append(modifier_type.modifier_name)
+            modifier_node = modifier_node.index
+        return modifiers
+
     def type_dependencies(self, env):
         return self.base.type_dependencies(env) + self.index.type_dependencies(env)
 
@@ -3511,6 +3968,8 @@ class IndexNode(_IndexingBaseNode):
                                bytearray_type, list_type, tuple_type):
                 # slicing these returns the same type
                 return base_type
+            elif base_type.is_memoryviewslice:
+                return base_type
             else:
                 # TODO: Handle buffers (hopefully without too much redundancy).
                 return py_object_type
@@ -3553,6 +4012,23 @@ class IndexNode(_IndexingBaseNode):
                         index += base_type.size
                     if 0 <= index < base_type.size:
                         return base_type.components[index]
+            elif base_type.is_memoryviewslice:
+                if base_type.ndim == 0:
+                    pass  # probably an error, but definitely don't know what to do - return pyobject for now
+                if base_type.ndim == 1:
+                    return base_type.dtype
+                else:
+                    return PyrexTypes.MemoryViewSliceType(base_type.dtype, base_type.axes[1:])
+
+        if self.index.is_sequence_constructor and base_type.is_memoryviewslice:
+            inferred_type = base_type
+            for a in self.index.args:
+                if not inferred_type.is_memoryviewslice:
+                    break  # something's gone wrong
+                inferred_type = IndexNode(self.pos, base=ExprNode(self.base.pos, type=inferred_type),
+                                          index=a).infer_type(env)
+            else:
+                return inferred_type
 
         if base_type.is_cpp_class:
             class FakeOperand:
@@ -3630,6 +4106,8 @@ class IndexNode(_IndexingBaseNode):
         if not base_type.is_cfunction:
             self.index = self.index.analyse_types(env)
             self.original_index_type = self.index.type
+            if self.original_index_type.is_reference:
+                self.original_index_type = self.original_index_type.ref_base_type
 
             if base_type.is_unicode_char:
                 # we infer Py_UNICODE/Py_UCS4 for unicode strings in some
@@ -3670,12 +4148,13 @@ class IndexNode(_IndexingBaseNode):
             self.is_temp = 1
         elif self.index.type.is_int and base_type is not dict_type:
             if (getting
+                    and not env.directives['boundscheck']
                     and (base_type in (list_type, tuple_type, bytearray_type))
                     and (not self.index.type.signed
                          or not env.directives['wraparound']
                          or (isinstance(self.index, IntNode) and
                              self.index.has_constant_result() and self.index.constant_result >= 0))
-                    and not env.directives['boundscheck']):
+                    ):
                 self.is_temp = 0
             else:
                 self.is_temp = 1
@@ -3702,12 +4181,16 @@ class IndexNode(_IndexingBaseNode):
             if base_type in (list_type, tuple_type) and self.index.type.is_int:
                 item_type = infer_sequence_item_type(
                     env, self.base, self.index, seq_type=base_type)
-            if item_type is None:
-                item_type = py_object_type
-            self.type = item_type
             if base_type in (list_type, tuple_type, dict_type):
                 # do the None check explicitly (not in a helper) to allow optimising it away
                 self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
+            if item_type is None or not item_type.is_pyobject:
+                # Even if we inferred a C type as result, we will read a Python object, so trigger coercion if needed.
+                # We could potentially use "item_type.equivalent_type" here, but that may trigger assumptions
+                # about the actual runtime item types, rather than just their ability to coerce to the C "item_type".
+                self.type = py_object_type
+            else:
+                self.type = item_type
 
         self.wrap_in_nonecheck_node(env, getting)
         return self
@@ -3715,6 +4198,8 @@ class IndexNode(_IndexingBaseNode):
     def analyse_as_c_array(self, env, is_slice):
         base_type = self.base.type
         self.type = base_type.base_type
+        if self.type.is_cpp_class:
+            self.type = PyrexTypes.CReferenceType(self.type)
         if is_slice:
             self.type = base_type
         elif self.index.type.is_pyobject:
@@ -3739,7 +4224,7 @@ class IndexNode(_IndexingBaseNode):
         if self.exception_check:
             if not setting:
                 self.is_temp = True
-            if self.exception_value is None:
+            if needs_cpp_exception_conversion(self):
                 env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
         self.index = self.index.coerce_to(func_type.args[0].type, env)
         self.type = func_type.return_type
@@ -4001,6 +4486,7 @@ class IndexNode(_IndexingBaseNode):
             return
 
         utility_code = None
+        error_value = None
         if self.type.is_pyobject:
             error_value = 'NULL'
             if self.index.type.is_int:
@@ -4036,8 +4522,8 @@ class IndexNode(_IndexingBaseNode):
             error_value = '-1'
             utility_code = UtilityCode.load_cached("GetItemIntByteArray", "StringTools.c")
         elif not (self.base.type.is_cpp_class and self.exception_check):
-            assert False, "unexpected type %s and base type %s for indexing" % (
-                self.type, self.base.type)
+            assert False, "unexpected type %s and base type %s for indexing (%s)" % (
+                self.type, self.base.type, self.pos)
 
         if utility_code is not None:
             code.globalstate.use_utility_code(utility_code)
@@ -4064,7 +4550,7 @@ class IndexNode(_IndexingBaseNode):
                     self.extra_index_params(code),
                     code.error_goto_if(error_check % self.result(), self.pos)))
         if self.type.is_pyobject:
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
     def generate_setitem_code(self, value_code, code):
         if self.index.type.is_int:
@@ -4100,7 +4586,7 @@ class IndexNode(_IndexingBaseNode):
             self.pos))
 
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
-        exception_check=None, exception_value=None):
+                                 exception_check=None, exception_value=None):
         self.generate_subexpr_evaluation_code(code)
 
         if self.type.is_pyobject:
@@ -4109,8 +4595,7 @@ class IndexNode(_IndexingBaseNode):
             value_code = self._check_byte_value(code, rhs)
             self.generate_setitem_code(value_code, code)
         elif self.base.type.is_cpp_class and self.exception_check and self.exception_check == '+':
-            if overloaded_assignment and exception_check and \
-                self.exception_value != exception_value:
+            if overloaded_assignment and exception_check and self.exception_value != exception_value:
                 # Handle the case that both the index operator and the assignment
                 # operator have a c++ exception handler and they are not the same.
                 translate_double_cpp_exception(code, self.pos, self.type,
@@ -4357,11 +4842,11 @@ class BufferIndexNode(_IndexingBaseNode):
                                                manage_ref=False)
             rhs_code = rhs.result()
             code.putln("%s = %s;" % (ptr, ptrexpr))
-            code.put_xgotref("*%s" % ptr)
+            code.put_xgotref("*%s" % ptr, self.buffer_type.dtype)
             code.putln("__Pyx_INCREF(%s); __Pyx_XDECREF(*%s);" % (
                 rhs_code, ptr))
             code.putln("*%s %s= %s;" % (ptr, op, rhs_code))
-            code.put_xgiveref("*%s" % ptr)
+            code.put_xgiveref("*%s" % ptr, self.buffer_type.dtype)
             code.funcstate.release_temp(ptr)
         else:
             # Simple case
@@ -4627,7 +5112,7 @@ class MemoryViewSliceNode(MemoryViewIndexNode):
         assert not list(it)
 
         buffer_entry.generate_buffer_slice_code(
-            code, self.original_indices, self.result(),
+            code, self.original_indices, self.result(), self.type,
             have_gil=have_gil, have_slices=have_slices,
             directives=code.globalstate.directives)
 
@@ -4730,8 +5215,17 @@ class MemoryCopyScalar(MemoryCopyNode):
             code.putln("%s __pyx_temp_slice = %s;" % (slice_decl, self.dst.result()))
             dst_temp = "__pyx_temp_slice"
 
+        force_strided = False
+        indices = self.dst.original_indices
+        for idx in indices:
+            if isinstance(idx, SliceNode) and not (idx.start.is_none and
+                                                   idx.stop.is_none and
+                                                   idx.step.is_none):
+                force_strided = True
+
         slice_iter_obj = MemoryView.slice_iter(self.dst.type, dst_temp,
-                                               self.dst.type.ndim, code)
+                                               self.dst.type.ndim, code,
+                                               force_strided=force_strided)
         p = slice_iter_obj.start_loops()
 
         if dtype.is_pyobject:
@@ -4753,8 +5247,10 @@ class SliceIndexNode(ExprNode):
     #  start     ExprNode or None
     #  stop      ExprNode or None
     #  slice     ExprNode or None   constant slice object
+    #  nogil     bool               used internally
 
     subexprs = ['base', 'start', 'stop', 'slice']
+    nogil = False
 
     slice = None
 
@@ -4924,7 +5420,7 @@ class SliceIndexNode(ExprNode):
 
     def analyse_as_type(self, env):
         base_type = self.base.analyse_as_type(env)
-        if base_type and not base_type.is_pyobject:
+        if base_type:
             if not self.start and not self.stop:
                 # memory view
                 from . import MemoryView
@@ -4940,7 +5436,10 @@ class SliceIndexNode(ExprNode):
                     base_type, MemoryView.get_axes_specs(env, [slice_node]))
         return None
 
-    nogil_check = Node.gil_error
+    def nogil_check(self, env):
+        self.nogil = env.nogil
+        return super(SliceIndexNode, self).nogil_check(env)
+
     gil_message = "Slicing Python object"
 
     get_slice_utility_code = TempitaUtilityCode.load(
@@ -5064,15 +5563,14 @@ class SliceIndexNode(ExprNode):
                     start_code,
                     stop_code,
                     code.error_goto_if_null(result, self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
-        exception_check=None, exception_value=None):
+                                 exception_check=None, exception_value=None):
         self.generate_subexpr_evaluation_code(code)
         if self.type.is_pyobject:
             code.globalstate.use_utility_code(self.set_slice_utility_code)
-            (has_c_start, has_c_stop, c_start, c_stop,
-             py_start, py_stop, py_slice) = self.get_slice_config()
+            has_c_start, has_c_stop, c_start, c_stop, py_start, py_stop, py_slice = self.get_slice_config()
             code.put_error_if_neg(self.pos,
                 "__Pyx_PyObject_SetSlice(%s, %s, %s, %s, %s, %s, %s, %d, %d, %d)" % (
                     self.base.py_result(),
@@ -5209,11 +5707,15 @@ class SliceIndexNode(ExprNode):
 
         if runtime_check:
             code.putln("if (unlikely((%s) != (%s))) {" % (runtime_check, target_size))
+            if self.nogil:
+                code.put_ensure_gil()
             code.putln(
                 'PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length,'
                 ' expected %%" CYTHON_FORMAT_SSIZE_T "d, got %%" CYTHON_FORMAT_SSIZE_T "d",'
                 ' (Py_ssize_t)(%s), (Py_ssize_t)(%s));' % (
                     target_size, runtime_check))
+            if self.nogil:
+                code.put_release_ensured_gil()
             code.putln(code.error_goto(self.pos))
             code.putln("}")
 
@@ -5299,9 +5801,9 @@ class SliceNode(ExprNode):
                 self.stop.py_result(),
                 self.step.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
         if self.is_literal:
-            code.put_giveref(self.py_result())
+            self.generate_giveref(code)
 
 class SliceIntNode(SliceNode):
     #  start:stop:step in subscript list
@@ -5398,6 +5900,9 @@ class CallNode(ExprNode):
                         return PyrexTypes.c_double_type
                     elif function.entry.name in Builtin.types_that_construct_their_instance:
                         return result_type
+        func_type = self.function.analyse_as_type(env)
+        if func_type and (func_type.is_struct_or_union or func_type.is_cpp_class):
+            return func_type
         return py_object_type
 
     def type_dependencies(self, env):
@@ -5523,6 +6028,16 @@ class SimpleCallNode(CallNode):
         except Exception as e:
             self.compile_time_value_error(e)
 
+    @classmethod
+    def for_cproperty(cls, pos, obj, entry):
+        # Create a call node for C property access.
+        property_scope = entry.scope
+        getter_entry = property_scope.lookup_here(entry.name)
+        assert getter_entry, "Getter not found in scope %s: %s" % (property_scope, property_scope.entries)
+        function = NameNode(pos, name=entry.name, entry=getter_entry, type=getter_entry.type)
+        node = cls(pos, function=function, args=[obj])
+        return node
+
     def analyse_as_type(self, env):
         attr = self.function.as_cython_attribute()
         if attr == 'pointer':
@@ -5588,6 +6103,7 @@ class SimpleCallNode(CallNode):
             self.analyse_c_function_call(env)
             if func_type.exception_check == '+':
                 self.is_temp = True
+
         return self
 
     def function_type(self):
@@ -5639,8 +6155,8 @@ class SimpleCallNode(CallNode):
             else:
                 alternatives = overloaded_entry.all_alternatives()
 
-            entry = PyrexTypes.best_match(
-                [arg.type for arg in args], alternatives, self.pos, env, args)
+            entry = PyrexTypes.best_match([arg.type for arg in args],
+                                          alternatives, self.pos, env, args)
 
             if not entry:
                 self.type = PyrexTypes.error_type
@@ -5723,7 +6239,7 @@ class SimpleCallNode(CallNode):
                     # but we must make sure it cannot be collected
                     # before we return from the function, so we create
                     # an owned temp reference to it
-                    if i > 0: # first argument doesn't matter
+                    if i > 0:  # first argument doesn't matter
                         some_args_in_temps = True
                     arg = arg.coerce_to_temp(env)
             args[i] = arg
@@ -5752,7 +6268,7 @@ class SimpleCallNode(CallNode):
             # case)
             for i in range(actual_nargs-1):
                 if i == 0 and self.self is not None:
-                    continue # self is ok
+                    continue  # self is ok
                 arg = args[i]
                 if arg.nonlocally_immutable():
                     # locals, C functions, unassignable types are safe.
@@ -5768,7 +6284,7 @@ class SimpleCallNode(CallNode):
                 else:
                     #self.args[i] = arg.coerce_to_temp(env)
                     # instead: issue a warning
-                    if i > 0 or i == 1 and self.self is not None: # skip first arg
+                    if i > 0 or i == 1 and self.self is not None:  # skip first arg
                         warning(arg.pos, "Argument evaluation order in C function call is undefined and may not be as expected", 0)
                         break
 
@@ -5797,15 +6313,9 @@ class SimpleCallNode(CallNode):
         if self.is_temp and self.type.is_reference:
             self.type = PyrexTypes.CFakeReferenceType(self.type.ref_base_type)
 
-        # Called in 'nogil' context?
-        self.nogil = env.nogil
-        if (self.nogil and
-            func_type.exception_check and
-            func_type.exception_check != '+'):
-            env.use_utility_code(pyerr_occurred_withgil_utility_code)
         # C++ exception handler
         if func_type.exception_check == '+':
-            if func_type.exception_value is None:
+            if needs_cpp_exception_conversion(func_type):
                 env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
 
         self.overflowcheck = env.directives['overflowcheck']
@@ -5824,8 +6334,8 @@ class SimpleCallNode(CallNode):
         expected_nargs = max_nargs - func_type.optional_arg_count
         actual_nargs = len(self.args)
         for formal_arg, actual_arg in args[:expected_nargs]:
-                arg_code = actual_arg.result_as(formal_arg.type)
-                arg_list_code.append(arg_code)
+            arg_code = actual_arg.move_result_rhs_as(formal_arg.type)
+            arg_list_code.append(arg_code)
 
         if func_type.is_overridable:
             arg_list_code.append(str(int(self.wrapper_call or self.function.entry.is_unbound_cmethod)))
@@ -5838,7 +6348,7 @@ class SimpleCallNode(CallNode):
             arg_list_code.append(optional_args)
 
         for actual_arg in self.args[len(formal_args):]:
-            arg_list_code.append(actual_arg.result())
+            arg_list_code.append(actual_arg.move_result_rhs())
 
         result = "%s(%s)" % (self.function.result(), ', '.join(arg_list_code))
         return result
@@ -5899,7 +6409,7 @@ class SimpleCallNode(CallNode):
                     arg.py_result(),
                     code.error_goto_if_null(self.result(), self.pos)))
 
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
         for subexpr in subexprs:
             if subexpr is not None:
@@ -5918,8 +6428,9 @@ class SimpleCallNode(CallNode):
                     self.function.py_result(),
                     arg_code,
                     code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
         elif func_type.is_cfunction:
+            nogil = not code.funcstate.gil_owned
             if self.has_optional_args:
                 actual_nargs = len(self.args)
                 expected_nargs = len(func_type.args) - func_type.optional_arg_count
@@ -5947,7 +6458,9 @@ class SimpleCallNode(CallNode):
                 if exc_val is not None:
                     exc_checks.append("%s == %s" % (self.result(), func_type.return_type.cast_code(exc_val)))
                 if exc_check:
-                    if self.nogil:
+                    if nogil:
+                        code.globalstate.use_utility_code(
+                            UtilityCode.load_cached("ErrOccurredWithGIL", "Exceptions.c"))
                         exc_checks.append("__Pyx_ErrOccurredWithGIL()")
                     else:
                         exc_checks.append("PyErr_Occurred()")
@@ -5965,7 +6478,7 @@ class SimpleCallNode(CallNode):
                 if func_type.exception_check == '+':
                     translate_cpp_exception(code, self.pos, '%s%s;' % (lhs, rhs),
                                             self.result() if self.type.is_pyobject else None,
-                                            func_type.exception_value, self.nogil)
+                                            func_type.exception_value, nogil)
                 else:
                     if exc_checks:
                         goto_error = code.error_goto_if(" && ".join(exc_checks), self.pos)
@@ -5973,7 +6486,7 @@ class SimpleCallNode(CallNode):
                         goto_error = ""
                     code.putln("%s%s; %s" % (lhs, rhs, goto_error))
                 if self.type.is_pyobject and self.result():
-                    code.put_gotref(self.py_result())
+                    self.generate_gotref(code)
             if self.has_optional_args:
                 code.funcstate.release_temp(self.opt_arg_struct)
 
@@ -6039,10 +6552,8 @@ class PyMethodCallNode(SimpleCallNode):
 
         self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
         code.putln("%s = NULL;" % self_arg)
-        arg_offset_cname = None
-        if len(args) > 1:
-            arg_offset_cname = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
-            code.putln("%s = 0;" % arg_offset_cname)
+        arg_offset_cname = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
+        code.putln("%s = 0;" % arg_offset_cname)
 
         def attribute_is_likely_method(attr):
             obj = attr.obj
@@ -6074,116 +6585,35 @@ class PyMethodCallNode(SimpleCallNode):
         code.put_incref(self_arg, py_object_type)
         code.put_incref("function", py_object_type)
         # free method object as early to possible to enable reuse from CPython's freelist
-        code.put_decref_set(function, "function")
-        if len(args) > 1:
-            code.putln("%s = 1;" % arg_offset_cname)
+        code.put_decref_set(function, py_object_type, "function")
+        code.putln("%s = 1;" % arg_offset_cname)
         code.putln("}")
         code.putln("}")
 
-        if not args:
-            # fastest special case: try to avoid tuple creation
-            code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyObjectCallNoArg", "ObjectHandling.c"))
-            code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyObjectCallOneArg", "ObjectHandling.c"))
-            code.putln(
-                "%s = (%s) ? __Pyx_PyObject_CallOneArg(%s, %s) : __Pyx_PyObject_CallNoArg(%s);" % (
-                    self.result(), self_arg,
-                    function, self_arg,
-                    function))
-            code.put_xdecref_clear(self_arg, py_object_type)
-            code.funcstate.release_temp(self_arg)
-            code.putln(code.error_goto_if_null(self.result(), self.pos))
-            code.put_gotref(self.py_result())
-        elif len(args) == 1:
-            # fastest special case: try to avoid tuple creation
-            code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyObjectCall2Args", "ObjectHandling.c"))
-            code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyObjectCallOneArg", "ObjectHandling.c"))
-            arg = args[0]
-            code.putln(
-                "%s = (%s) ? __Pyx_PyObject_Call2Args(%s, %s, %s) : __Pyx_PyObject_CallOneArg(%s, %s);" % (
-                    self.result(), self_arg,
-                    function, self_arg, arg.py_result(),
-                    function, arg.py_result()))
-            code.put_xdecref_clear(self_arg, py_object_type)
-            code.funcstate.release_temp(self_arg)
-            arg.generate_disposal_code(code)
-            arg.free_temps(code)
-            code.putln(code.error_goto_if_null(self.result(), self.pos))
-            code.put_gotref(self.py_result())
-        else:
-            code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyFunctionFastCall", "ObjectHandling.c"))
-            code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyCFunctionFastCall", "ObjectHandling.c"))
-            for test_func, call_prefix in [('PyFunction_Check', 'Py'), ('__Pyx_PyFastCFunction_Check', 'PyC')]:
-                code.putln("#if CYTHON_FAST_%sCALL" % call_prefix.upper())
-                code.putln("if (%s(%s)) {" % (test_func, function))
-                code.putln("PyObject *%s[%d] = {%s, %s};" % (
-                    Naming.quick_temp_cname,
-                    len(args)+1,
-                    self_arg,
-                    ', '.join(arg.py_result() for arg in args)))
-                code.putln("%s = __Pyx_%sFunction_FastCall(%s, %s+1-%s, %d+%s); %s" % (
-                    self.result(),
-                    call_prefix,
-                    function,
-                    Naming.quick_temp_cname,
-                    arg_offset_cname,
-                    len(args),
-                    arg_offset_cname,
-                    code.error_goto_if_null(self.result(), self.pos)))
-                code.put_xdecref_clear(self_arg, py_object_type)
-                code.put_gotref(self.py_result())
-                for arg in args:
-                    arg.generate_disposal_code(code)
-                code.putln("} else")
-                code.putln("#endif")
-
-            code.putln("{")
-            args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
-            code.putln("%s = PyTuple_New(%d+%s); %s" % (
-                args_tuple, len(args), arg_offset_cname,
-                code.error_goto_if_null(args_tuple, self.pos)))
-            code.put_gotref(args_tuple)
-
-            if len(args) > 1:
-                code.putln("if (%s) {" % self_arg)
-            code.putln("__Pyx_GIVEREF(%s); PyTuple_SET_ITEM(%s, 0, %s); %s = NULL;" % (
-                self_arg, args_tuple, self_arg, self_arg))  # stealing owned ref in this case
-            code.funcstate.release_temp(self_arg)
-            if len(args) > 1:
-                code.putln("}")
-
-            for i, arg in enumerate(args):
-                arg.make_owned_reference(code)
-                code.put_giveref(arg.py_result())
-                code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % (
-                    args_tuple, i, arg_offset_cname, arg.py_result()))
-            if len(args) > 1:
-                code.funcstate.release_temp(arg_offset_cname)
-
-            for arg in args:
-                arg.generate_post_assignment_code(code)
-                arg.free_temps(code)
-
-            code.globalstate.use_utility_code(
-                UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c"))
-            code.putln(
-                "%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
-                    self.result(),
-                    function, args_tuple,
-                    code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+        # actually call the function
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("PyObjectFastCall", "ObjectHandling.c"))
 
-            code.put_decref_clear(args_tuple, py_object_type)
-            code.funcstate.release_temp(args_tuple)
+        code.putln("{")
+        code.putln("PyObject *__pyx_callargs[%d] = {%s, %s};" % (
+            len(args)+1,
+            self_arg,
+            ', '.join(arg.py_result() for arg in args)))
+        code.putln("%s = __Pyx_PyObject_FastCall(%s, __pyx_callargs+1-%s, %d+%s);" % (
+            self.result(),
+            function,
+            arg_offset_cname,
+            len(args),
+            arg_offset_cname))
 
-            if len(args) == 1:
-                code.putln("}")
-            code.putln("}")  # !CYTHON_FAST_PYCALL
+        code.put_xdecref_clear(self_arg, py_object_type)
+        code.funcstate.release_temp(self_arg)
+        code.funcstate.release_temp(arg_offset_cname)
+        for arg in args:
+            arg.generate_disposal_code(code)
+            arg.free_temps(code)
+        code.putln(code.error_goto_if_null(self.result(), self.pos))
+        self.generate_gotref(code)
 
         if reuse_function_temp:
             self.function.generate_disposal_code(code)
@@ -6191,6 +6621,7 @@ class PyMethodCallNode(SimpleCallNode):
         else:
             code.put_decref_clear(function, py_object_type)
             code.funcstate.release_temp(function)
+        code.putln("}")
 
 
 class InlinedDefNodeCallNode(CallNode):
@@ -6241,7 +6672,7 @@ class InlinedDefNodeCallNode(CallNode):
                     # but we must make sure it cannot be collected
                     # before we return from the function, so we create
                     # an owned temp reference to it
-                    if i > 0: # first argument doesn't matter
+                    if i > 0:  # first argument doesn't matter
                         some_args_in_temps = True
                     arg = arg.coerce_to_temp(env)
             self.args[i] = arg
@@ -6288,7 +6719,7 @@ class InlinedDefNodeCallNode(CallNode):
                 self.function.def_node.entry.pyfunc_cname,
                 arg_code,
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class PythonCapiFunctionNode(ExprNode):
@@ -6358,7 +6789,7 @@ class CachedBuiltinMethodCallNode(CallNode):
             self.result(), call_code,
             code.error_goto_if_null(self.result(), self.pos)
         ))
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
 
 
 class GeneralCallNode(CallNode):
@@ -6449,7 +6880,7 @@ class GeneralCallNode(CallNode):
         kwargs = self.keyword_args
         declared_args = function_type.args
         if entry.is_cmethod:
-            declared_args = declared_args[1:] # skip 'self'
+            declared_args = declared_args[1:]  # skip 'self'
 
         if len(pos_args) > len(declared_args):
             error(self.pos, "function call got too many positional arguments, "
@@ -6457,8 +6888,10 @@ class GeneralCallNode(CallNode):
                                                      len(pos_args)))
             return None
 
-        matched_args = set([ arg.name for arg in declared_args[:len(pos_args)]
-                             if arg.name ])
+        matched_args = {
+            arg.name for arg in declared_args[:len(pos_args)]
+            if arg.name
+        }
         unmatched_args = declared_args[len(pos_args):]
         matched_kwargs_count = 0
         args = list(pos_args)
@@ -6576,7 +7009,7 @@ class GeneralCallNode(CallNode):
                 self.positional_args.py_result(),
                 kwargs,
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class AsTupleNode(ExprNode):
@@ -6618,7 +7051,7 @@ class AsTupleNode(ExprNode):
                 self.result(),
                 cfunc, self.arg.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class MergedDictNode(ExprNode):
@@ -6711,16 +7144,18 @@ class MergedDictNode(ExprNode):
                 self.result(),
                 item.py_result(),
                 code.error_goto_if_null(self.result(), item.pos)))
-            code.put_gotref(self.result())
+            self.generate_gotref(code)
             item.generate_disposal_code(code)
 
         if item.type is not dict_type:
             code.putln('} else {')
-            code.putln("%s = PyObject_CallFunctionObjArgs((PyObject*)&PyDict_Type, %s, NULL); %s" % (
+            code.globalstate.use_utility_code(UtilityCode.load_cached(
+                "PyObjectCallOneArg", "ObjectHandling.c"))
+            code.putln("%s = __Pyx_PyObject_CallOneArg((PyObject*)&PyDict_Type, %s); %s" % (
                 self.result(),
                 item.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
             item.generate_disposal_code(code)
             code.putln('}')
         item.free_temps(code)
@@ -6792,7 +7227,6 @@ class AttributeNode(ExprNode):
     is_attribute = 1
     subexprs = ['obj']
 
-    type = PyrexTypes.error_type
     entry = None
     is_called = 0
     needs_none_check = True
@@ -6822,6 +7256,35 @@ class AttributeNode(ExprNode):
                 self.entry = entry.as_variable
                 self.analyse_as_python_attribute(env)
                 return self
+            elif entry and entry.is_cfunction and self.obj.type is not Builtin.type_type:
+                # "bound" cdef function.
+                # This implementation is likely a little inefficient and could be improved.
+                # Essentially it does:
+                #  __import__("functools").partial(coerce_to_object(self), self.obj)
+                from .UtilNodes import EvalWithTempExprNode, ResultRefNode
+                # take self.obj out to a temp because it's used twice
+                obj_node = ResultRefNode(self.obj, type=self.obj.type)
+                obj_node.result_ctype = self.obj.result_ctype
+                self.obj = obj_node
+                unbound_node = ExprNode.coerce_to(self, dst_type, env)
+                utility_code=UtilityCode.load_cached(
+                    "PyMethodNew2Arg", "ObjectHandling.c"
+                )
+                func_type = PyrexTypes.CFuncType(
+                    PyrexTypes.py_object_type, [
+                        PyrexTypes.CFuncTypeArg("func", PyrexTypes.py_object_type, None),
+                        PyrexTypes.CFuncTypeArg("self", PyrexTypes.py_object_type, None)
+                    ],
+                )
+                binding_call = PythonCapiCallNode(
+                    self.pos,
+                    function_name="__Pyx_PyMethod_New2Arg",
+                    func_type=func_type,
+                    args=[unbound_node, obj_node],
+                    utility_code=utility_code,
+                )
+                complete_call = EvalWithTempExprNode(obj_node, binding_call)
+                return complete_call.analyse_types(env)
         return ExprNode.coerce_to(self, dst_type, env)
 
     def calculate_constant_result(self):
@@ -6871,7 +7334,7 @@ class AttributeNode(ExprNode):
         return self.type
 
     def analyse_target_declaration(self, env):
-        pass
+        self.is_target = True
 
     def analyse_target_types(self, env):
         node = self.analyse_types(env, target = 1)
@@ -6882,6 +7345,8 @@ class AttributeNode(ExprNode):
         return node
 
     def analyse_types(self, env, target = 0):
+        if not self.type:
+            self.type = PyrexTypes.error_type  # default value if it isn't analysed successfully
         self.initialized_check = env.directives['initializedcheck']
         node = self.analyse_as_cimported_attribute_node(env, target)
         if node is None and not target:
@@ -6889,7 +7354,7 @@ class AttributeNode(ExprNode):
         if node is None:
             node = self.analyse_as_ordinary_attribute_node(env, target)
             assert node is not None
-        if node.entry:
+        if (node.is_attribute or node.is_name) and node.entry:
             node.entry.used = True
         if node.is_attribute:
             node.wrap_obj_in_nonecheck(env)
@@ -6908,6 +7373,7 @@ class AttributeNode(ExprNode):
                     or entry.is_type or entry.is_const):
                 return self.as_name_node(env, entry, target)
             if self.is_cimported_module_without_shadow(env):
+                # TODO: search for submodule
                 error(self.pos, "cimported module has no attribute '%s'" % self.attribute)
                 return self
         return None
@@ -6930,31 +7396,13 @@ class AttributeNode(ExprNode):
                             return None
                         ubcm_entry = entry
                     else:
-                        # Create a temporary entry describing the C method
-                        # as an ordinary function.
-                        if entry.func_cname and not hasattr(entry.type, 'op_arg_struct'):
-                            cname = entry.func_cname
-                            if entry.type.is_static_method or (
-                                    env.parent_scope and env.parent_scope.is_cpp_class_scope):
-                                ctype = entry.type
-                            elif type.is_cpp_class:
-                                error(self.pos, "%s not a static member of %s" % (entry.name, type))
-                                ctype = PyrexTypes.error_type
-                            else:
-                                # Fix self type.
-                                ctype = copy.copy(entry.type)
-                                ctype.args = ctype.args[:]
-                                ctype.args[0] = PyrexTypes.CFuncTypeArg('self', type, 'self', None)
-                        else:
-                            cname = "%s->%s" % (type.vtabptr_cname, entry.cname)
-                            ctype = entry.type
-                        ubcm_entry = Symtab.Entry(entry.name, cname, ctype)
-                        ubcm_entry.is_cfunction = 1
-                        ubcm_entry.func_cname = entry.func_cname
-                        ubcm_entry.is_unbound_cmethod = 1
-                        ubcm_entry.scope = entry.scope
+                        ubcm_entry = self._create_unbound_cmethod_entry(type, entry, env)
+                        ubcm_entry.overloaded_alternatives = [
+                            self._create_unbound_cmethod_entry(type, overloaded_alternative, env)
+                            for overloaded_alternative in entry.overloaded_alternatives
+                        ]
                     return self.as_name_node(env, ubcm_entry, target=False)
-            elif type.is_enum:
+            elif type.is_enum or type.is_cpp_enum:
                 if self.attribute in type.values:
                     for entry in type.entry.enum_values:
                         if entry.name == self.attribute:
@@ -6965,13 +7413,39 @@ class AttributeNode(ExprNode):
                     error(self.pos, "%s not a known value of %s" % (self.attribute, type))
         return None
 
+    def _create_unbound_cmethod_entry(self, type, entry, env):
+        # Create a temporary entry describing the unbound C method in `entry`
+        # as an ordinary function.
+        if entry.func_cname and entry.type.op_arg_struct is None:
+            cname = entry.func_cname
+            if entry.type.is_static_method or (
+                    env.parent_scope and env.parent_scope.is_cpp_class_scope):
+                ctype = entry.type
+            elif type.is_cpp_class:
+                error(self.pos, "%s not a static member of %s" % (entry.name, type))
+                ctype = PyrexTypes.error_type
+            else:
+                # Fix self type.
+                ctype = copy.copy(entry.type)
+                ctype.args = ctype.args[:]
+                ctype.args[0] = PyrexTypes.CFuncTypeArg('self', type, 'self', None)
+        else:
+            cname = "%s->%s" % (type.vtabptr_cname, entry.cname)
+            ctype = entry.type
+        ubcm_entry = Symtab.Entry(entry.name, cname, ctype)
+        ubcm_entry.is_cfunction = 1
+        ubcm_entry.func_cname = entry.func_cname
+        ubcm_entry.is_unbound_cmethod = 1
+        ubcm_entry.scope = entry.scope
+        return ubcm_entry
+
     def analyse_as_type(self, env):
         module_scope = self.obj.analyse_as_module(env)
         if module_scope:
             return module_scope.lookup_type(self.attribute)
         if not self.obj.is_string_literal:
             base_type = self.obj.analyse_as_type(env)
-            if base_type and hasattr(base_type, 'scope') and base_type.scope is not None:
+            if base_type and getattr(base_type, 'scope', None) is not None:
                 return base_type.scope.lookup_type(self.attribute)
         return None
 
@@ -7022,13 +7496,18 @@ class AttributeNode(ExprNode):
                 self.result_ctype = py_object_type
         elif target and self.obj.type.is_builtin_type:
             error(self.pos, "Assignment to an immutable object field")
+        elif self.entry and self.entry.is_cproperty:
+            if not target:
+                return SimpleCallNode.for_cproperty(self.pos, self.obj, self.entry).analyse_types(env)
+            # TODO: implement writable C-properties?
+            error(self.pos, "Assignment to a read-only property")
         #elif self.type.is_memoryviewslice and not target:
         #    self.is_temp = True
         return self
 
     def analyse_attribute(self, env, obj_type = None):
         # Look up attribute and set self.type and self.member.
-        immutable_obj = obj_type is not None # used during type inference
+        immutable_obj = obj_type is not None  # used during type inference
         self.is_py_attr = 0
         self.member = self.attribute
         if obj_type is None:
@@ -7078,7 +7557,10 @@ class AttributeNode(ExprNode):
                 # fused function go through assignment synthesis
                 # (foo = pycfunction(foo_func_obj)) and need to go through
                 # regular Python lookup as well
-                if (entry.is_variable and not entry.fused_cfunction) or entry.is_cmethod:
+                if entry.is_cproperty:
+                    self.type = entry.type
+                    return
+                elif (entry.is_variable and not entry.fused_cfunction) or entry.is_cmethod:
                     self.type = entry.type
                     self.member = entry.cname
                     return
@@ -7105,15 +7587,15 @@ class AttributeNode(ExprNode):
         if not obj_type.is_pyobject and not obj_type.is_error:
             # Expose python methods for immutable objects.
             if (obj_type.is_string or obj_type.is_cpp_string
-                or obj_type.is_buffer or obj_type.is_memoryviewslice
-                or obj_type.is_numeric
-                or (obj_type.is_ctuple and obj_type.can_coerce_to_pyobject(env))
-                or (obj_type.is_struct and obj_type.can_coerce_to_pyobject(env))):
+                    or obj_type.is_buffer or obj_type.is_memoryviewslice
+                    or obj_type.is_numeric
+                    or (obj_type.is_ctuple and obj_type.can_coerce_to_pyobject(env))
+                    or (obj_type.is_struct and obj_type.can_coerce_to_pyobject(env))):
                 if not immutable_obj:
                     self.obj = self.obj.coerce_to_pyobject(env)
             elif (obj_type.is_cfunction and (self.obj.is_name or self.obj.is_attribute)
-                  and self.obj.entry.as_variable
-                  and self.obj.entry.as_variable.type.is_pyobject):
+                    and self.obj.entry.as_variable
+                    and self.obj.entry.as_variable.type.is_pyobject):
                 # might be an optimised builtin function => unpack it
                 if not immutable_obj:
                     self.obj = self.obj.coerce_to_pyobject(env)
@@ -7174,9 +7656,14 @@ class AttributeNode(ExprNode):
             return NameNode.is_ephemeral(self)
 
     def calculate_result_code(self):
-        #print "AttributeNode.calculate_result_code:", self.member ###
-        #print "...obj node =", self.obj, "code", self.obj.result() ###
-        #print "...obj type", self.obj.type, "ctype", self.obj.ctype() ###
+        result = self.calculate_access_code()
+        if self.entry and self.entry.is_cpp_optional and not self.is_target:
+            result = "(*%s)" % result
+        return result
+
+    def calculate_access_code(self):
+        # Does the job of calculate_result_code but doesn't dereference cpp_optionals
+        # Therefore allowing access to the holder variable
         obj = self.obj
         obj_code = obj.result_as(obj.type)
         #print "...obj_code =", obj_code ###
@@ -7227,7 +7714,7 @@ class AttributeNode(ExprNode):
                     self.obj.py_result(),
                     code.intern_identifier(self.attribute),
                     code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
         elif self.type.is_memoryviewslice:
             if self.is_memslice_transpose:
                 # transpose the slice
@@ -7238,10 +7725,11 @@ class AttributeNode(ExprNode):
                         return
 
                 code.putln("%s = %s;" % (self.result(), self.obj.result()))
-                code.put_incref_memoryviewslice(self.result(), have_gil=True)
+                code.put_incref_memoryviewslice(self.result(), self.type,
+                                have_gil=True)
 
-                T = "__pyx_memslice_transpose(&%s) == 0"
-                code.putln(code.error_goto_if(T % self.result(), self.pos))
+                T = "__pyx_memslice_transpose(&%s)" % self.result()
+                code.putln(code.error_goto_if_neg(T, self.pos))
             elif self.initialized_check:
                 code.putln(
                     'if (unlikely(!%s.memview)) {'
@@ -7249,6 +7737,14 @@ class AttributeNode(ExprNode):
                                         '"Memoryview is not initialized");'
                         '%s'
                     '}' % (self.result(), code.error_goto(self.pos)))
+        elif self.entry.is_cpp_optional and self.initialized_check:
+            if self.is_target:
+                undereferenced_result = self.result()
+            else:
+                assert not self.is_temp  # calculate_access_code() only makes sense for non-temps
+                undereferenced_result = self.calculate_access_code()
+            unbound_check_code = self.type.cpp_optional_check_for_null_code(undereferenced_result)
+            code.put_error_if_unbound(self.pos, self.entry, unbound_check_code=unbound_check_code)
         else:
             # result_code contains what is needed, but we may need to insert
             # a check and raise an exception
@@ -7261,15 +7757,12 @@ class AttributeNode(ExprNode):
     def generate_disposal_code(self, code):
         if self.is_temp and self.type.is_memoryviewslice and self.is_memslice_transpose:
             # mirror condition for putting the memview incref here:
-            code.put_xdecref_memoryviewslice(
-                    self.result(), have_gil=True)
-            code.putln("%s.memview = NULL;" % self.result())
-            code.putln("%s.data = NULL;" % self.result())
+            code.put_xdecref_clear(self.result(), self.type, have_gil=True)
         else:
             ExprNode.generate_disposal_code(self, code)
 
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
-        exception_check=None, exception_value=None):
+                                 exception_check=None, exception_value=None):
         self.obj.generate_evaluation_code(code)
         if self.is_py_attr:
             code.globalstate.use_utility_code(
@@ -7282,8 +7775,9 @@ class AttributeNode(ExprNode):
             rhs.generate_disposal_code(code)
             rhs.free_temps(code)
         elif self.obj.type.is_complex:
-            code.putln("__Pyx_SET_C%s(%s, %s);" % (
+            code.putln("__Pyx_SET_C%s%s(%s, %s);" % (
                 self.member.upper(),
+                self.obj.type.implementation_suffix,
                 self.obj.result_as(self.obj.type),
                 rhs.result_as(self.ctype())))
             rhs.generate_disposal_code(code)
@@ -7292,8 +7786,8 @@ class AttributeNode(ExprNode):
             select_code = self.result()
             if self.type.is_pyobject and self.use_managed_ref:
                 rhs.make_owned_reference(code)
-                code.put_giveref(rhs.py_result())
-                code.put_gotref(select_code)
+                rhs.generate_giveref(code)
+                code.put_gotref(select_code, self.type)
                 code.put_decref(select_code, self.ctype())
             elif self.type.is_memoryviewslice:
                 from . import MemoryView
@@ -7304,7 +7798,7 @@ class AttributeNode(ExprNode):
                 code.putln(
                     "%s = %s;" % (
                         select_code,
-                        rhs.result_as(self.ctype())))
+                        rhs.move_result_rhs_as(self.ctype())))
                         #rhs.result()))
             rhs.generate_post_assignment_code(code)
             rhs.free_temps(code)
@@ -7333,6 +7827,12 @@ class AttributeNode(ExprNode):
             style, text = 'c_attr', 'c attribute (%s)'
         code.annotate(self.pos, AnnotationItem(style, text % self.type, size=len(self.attribute)))
 
+    def get_known_standard_library_import(self):
+        module_name = self.obj.get_known_standard_library_import()
+        if module_name:
+            return StringEncoding.EncodedString("%s.%s" % (module_name, self.attribute))
+        return None
+
 
 #-------------------------------------------------------------------
 #
@@ -7435,9 +7935,10 @@ class SequenceNode(ExprNode):
                 arg = arg.analyse_types(env)
             self.args[i] = arg.coerce_to_pyobject(env)
         if self.mult_factor:
-            self.mult_factor = self.mult_factor.analyse_types(env)
-            if not self.mult_factor.type.is_int:
-                self.mult_factor = self.mult_factor.coerce_to_pyobject(env)
+            mult_factor = self.mult_factor.analyse_types(env)
+            if not mult_factor.type.is_int:
+                mult_factor = mult_factor.coerce_to_pyobject(env)
+            self.mult_factor = mult_factor.coerce_to_simple(env)
         self.is_temp = 1
         # not setting self.type here, subtypes do this
         return self
@@ -7539,7 +8040,7 @@ class SequenceNode(ExprNode):
                 len(self.args),
                 ', '.join(arg.py_result() for arg in self.args),
                 code.error_goto_if_null(target, self.pos)))
-            code.put_gotref(target)
+            code.put_gotref(target, py_object_type)
         elif self.type.is_ctuple:
             for i, arg in enumerate(self.args):
                 code.putln("%s.f%s = %s;" % (
@@ -7556,7 +8057,7 @@ class SequenceNode(ExprNode):
             code.putln("%s = %s(%s%s); %s" % (
                 target, create_func, arg_count, size_factor,
                 code.error_goto_if_null(target, self.pos)))
-            code.put_gotref(target)
+            code.put_gotref(target, py_object_type)
 
             if c_mult:
                 # FIXME: can't use a temp variable here as the code may
@@ -7580,7 +8081,7 @@ class SequenceNode(ExprNode):
                 arg = self.args[i]
                 if c_mult or not arg.result_in_temp():
                     code.put_incref(arg.result(), arg.ctype())
-                code.put_giveref(arg.py_result())
+                arg.generate_giveref(code)
                 code.putln("%s(%s, %s, %s);" % (
                     set_item_func,
                     target,
@@ -7597,7 +8098,7 @@ class SequenceNode(ExprNode):
                 Naming.quick_temp_cname, target, mult_factor.py_result(),
                 code.error_goto_if_null(Naming.quick_temp_cname, self.pos)
                 ))
-            code.put_gotref(Naming.quick_temp_cname)
+            code.put_gotref(Naming.quick_temp_cname, py_object_type)
             code.put_decref(target, py_object_type)
             code.putln('%s = %s;' % (target, Naming.quick_temp_cname))
             code.putln('}')
@@ -7619,7 +8120,7 @@ class SequenceNode(ExprNode):
                 self.mult_factor.generate_disposal_code(code)
 
     def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
-        exception_check=None, exception_value=None):
+                                 exception_check=None, exception_value=None):
         if self.starred_assignment:
             self.generate_starred_assignment_code(rhs, code)
         else:
@@ -7683,10 +8184,12 @@ class SequenceNode(ExprNode):
         # list/tuple => check size
         code.putln("Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);")
         code.putln("if (unlikely(size != %d)) {" % len(self.args))
-        code.globalstate.use_utility_code(raise_too_many_values_to_unpack)
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("RaiseTooManyValuesToUnpack", "ObjectHandling.c"))
         code.putln("if (size > %d) __Pyx_RaiseTooManyValuesError(%d);" % (
             len(self.args), len(self.args)))
-        code.globalstate.use_utility_code(raise_need_more_values_to_unpack)
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c"))
         code.putln("else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);")
         # < 0 => exception
         code.putln(code.error_goto(self.pos))
@@ -7715,7 +8218,7 @@ class SequenceNode(ExprNode):
                 code.putln("%s = PySequence_ITEM(sequence, %d); %s" % (
                     item.result(), i,
                     code.error_goto_if_null(item.result(), self.pos)))
-                code.put_gotref(item.result())
+                code.put_gotref(item.result(), item.type)
         else:
             code.putln("{")
             code.putln("Py_ssize_t i;")
@@ -7725,7 +8228,7 @@ class SequenceNode(ExprNode):
             code.putln("for (i=0; i < %s; i++) {" % len(self.unpacked_items))
             code.putln("PyObject* item = PySequence_ITEM(sequence, i); %s" % (
                 code.error_goto_if_null('item', self.pos)))
-            code.put_gotref('item')
+            code.put_gotref('item', py_object_type)
             code.putln("*(temps[i]) = item;")
             code.putln("}")
             code.putln("}")
@@ -7749,9 +8252,11 @@ class SequenceNode(ExprNode):
             code.putln("}")
 
     def generate_generic_parallel_unpacking_code(self, code, rhs, unpacked_items, use_loop, terminate=True):
-        code.globalstate.use_utility_code(raise_need_more_values_to_unpack)
-        code.globalstate.use_utility_code(UtilityCode.load_cached("IterFinish", "ObjectHandling.c"))
-        code.putln("Py_ssize_t index = -1;") # must be at the start of a C block!
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c"))
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("IterFinish", "ObjectHandling.c"))
+        code.putln("Py_ssize_t index = -1;")  # must be at the start of a C block!
 
         if use_loop:
             code.putln("PyObject** temps[%s] = {%s};" % (
@@ -7764,11 +8269,11 @@ class SequenceNode(ExprNode):
                 iterator_temp,
                 rhs.py_result(),
                 code.error_goto_if_null(iterator_temp, self.pos)))
-        code.put_gotref(iterator_temp)
+        code.put_gotref(iterator_temp, py_object_type)
         rhs.generate_disposal_code(code)
 
         iternext_func = code.funcstate.allocate_temp(self._func_iternext_type, manage_ref=False)
-        code.putln("%s = Py_TYPE(%s)->tp_iternext;" % (
+        code.putln("%s = __Pyx_PyObject_GetIterNextFunc(%s);" % (
             iternext_func, iterator_temp))
 
         unpacking_error_label = code.new_label('unpacking_failed')
@@ -7777,7 +8282,7 @@ class SequenceNode(ExprNode):
             code.putln("for (index=0; index < %s; index++) {" % len(unpacked_items))
             code.put("PyObject* item = %s; if (unlikely(!item)) " % unpack_code)
             code.put_goto(unpacking_error_label)
-            code.put_gotref("item")
+            code.put_gotref("item", py_object_type)
             code.putln("*(temps[index]) = item;")
             code.putln("}")
         else:
@@ -7789,7 +8294,7 @@ class SequenceNode(ExprNode):
                         unpack_code,
                         item.result()))
                 code.put_goto(unpacking_error_label)
-                code.put_gotref(item.py_result())
+                item.generate_gotref(code)
 
         if terminate:
             code.globalstate.use_utility_code(
@@ -7842,11 +8347,14 @@ class SequenceNode(ExprNode):
 
         starred_target.allocate(code)
         target_list = starred_target.result()
-        code.putln("%s = PySequence_List(%s); %s" % (
+        code.putln("%s = %s(%s); %s" % (
             target_list,
+            "__Pyx_PySequence_ListKeepNew" if (
+                    not iterator_temp and rhs.is_temp and rhs.type in (py_object_type, list_type))
+                else "PySequence_List",
             iterator_temp or rhs.py_result(),
             code.error_goto_if_null(target_list, self.pos)))
-        code.put_gotref(target_list)
+        starred_target.generate_gotref(code)
 
         if iterator_temp:
             code.put_decref_clear(iterator_temp, py_object_type)
@@ -7855,7 +8363,8 @@ class SequenceNode(ExprNode):
             rhs.generate_disposal_code(code)
 
         if unpacked_fixed_items_right:
-            code.globalstate.use_utility_code(raise_need_more_values_to_unpack)
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c"))
             length_temp = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
             code.putln('%s = PyList_GET_SIZE(%s);' % (length_temp, target_list))
             code.putln("if (unlikely(%s < %d)) {" % (length_temp, len(unpacked_fixed_items_right)))
@@ -7877,7 +8386,7 @@ class SequenceNode(ExprNode):
                 code.putln("%s = PySequence_ITEM(%s, %s-%d); " % (
                     item.py_result(), target_list, length_temp, i+1))
                 code.putln('#endif')
-                code.put_gotref(item.py_result())
+                item.generate_gotref(code)
                 coerced_arg.generate_evaluation_code(code)
 
             code.putln('#if !CYTHON_COMPILING_IN_CPYTHON')
@@ -7885,12 +8394,12 @@ class SequenceNode(ExprNode):
             code.putln('%s = PySequence_GetSlice(%s, 0, %s-%d); %s' % (
                 sublist_temp, target_list, length_temp, len(unpacked_fixed_items_right),
                 code.error_goto_if_null(sublist_temp, self.pos)))
-            code.put_gotref(sublist_temp)
+            code.put_gotref(sublist_temp, py_object_type)
             code.funcstate.release_temp(length_temp)
             code.put_decref(target_list, py_object_type)
             code.putln('%s = %s; %s = NULL;' % (target_list, sublist_temp, sublist_temp))
             code.putln('#else')
-            code.putln('(void)%s;' % sublist_temp)  # avoid warning about unused variable
+            code.putln('CYTHON_UNUSED_VAR(%s);' % sublist_temp)
             code.funcstate.release_temp(sublist_temp)
             code.putln('#endif')
 
@@ -7925,6 +8434,12 @@ class TupleNode(SequenceNode):
         return env.declare_tuple_type(self.pos, arg_types).type
 
     def analyse_types(self, env, skip_children=False):
+        # reset before re-analysing
+        if self.is_literal:
+            self.is_literal = False
+        if self.is_partly_literal:
+            self.is_partly_literal = False
+
         if len(self.args) == 0:
             self.is_temp = False
             self.is_literal = True
@@ -7955,7 +8470,7 @@ class TupleNode(SequenceNode):
             node.is_temp = False
             node.is_literal = True
         else:
-            if not node.mult_factor.type.is_pyobject:
+            if not node.mult_factor.type.is_pyobject and not node.mult_factor.type.is_int:
                 node.mult_factor = node.mult_factor.coerce_to_pyobject(env)
             node.is_temp = True
             node.is_partly_literal = True
@@ -7977,7 +8492,13 @@ class TupleNode(SequenceNode):
                 return self.coerce_to_ctuple(dst_type, env)
             elif dst_type is tuple_type or dst_type is py_object_type:
                 coerced_args = [arg.coerce_to_pyobject(env) for arg in self.args]
-                return TupleNode(self.pos, args=coerced_args, type=tuple_type, is_temp=1).analyse_types(env, skip_children=True)
+                return TupleNode(
+                    self.pos,
+                    args=coerced_args,
+                    type=tuple_type,
+                    mult_factor=self.mult_factor,
+                    is_temp=1,
+                ).analyse_types(env, skip_children=True)
             else:
                 return self.coerce_to_pyobject(env).coerce_to(dst_type, env)
         elif dst_type.is_ctuple and not self.mult_factor:
@@ -8031,15 +8552,23 @@ class TupleNode(SequenceNode):
                 # constant is not yet initialised
                 const_code.mark_pos(self.pos)
                 self.generate_sequence_packing_code(const_code, tuple_target, plain=not self.is_literal)
-                const_code.put_giveref(tuple_target)
+                const_code.put_giveref(tuple_target, py_object_type)
             if self.is_literal:
                 self.result_code = tuple_target
+            elif self.mult_factor.type.is_int:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PySequenceMultiply", "ObjectHandling.c"))
+                code.putln('%s = __Pyx_PySequence_Multiply(%s, %s); %s' % (
+                    self.result(), tuple_target, self.mult_factor.result(),
+                    code.error_goto_if_null(self.result(), self.pos)
+                ))
+                self.generate_gotref(code)
             else:
                 code.putln('%s = PyNumber_Multiply(%s, %s); %s' % (
                     self.result(), tuple_target, self.mult_factor.py_result(),
                     code.error_goto_if_null(self.result(), self.pos)
                 ))
-                code.put_gotref(self.py_result())
+                self.generate_gotref(code)
         else:
             self.type.entry.used = True
             self.generate_sequence_packing_code(code)
@@ -8203,97 +8732,6 @@ class ListNode(SequenceNode):
             raise InternalError("List type never specified")
 
 
-class ScopedExprNode(ExprNode):
-    # Abstract base class for ExprNodes that have their own local
-    # scope, such as generator expressions.
-    #
-    # expr_scope    Scope  the inner scope of the expression
-
-    subexprs = []
-    expr_scope = None
-
-    # does this node really have a local scope, e.g. does it leak loop
-    # variables or not?  non-leaking Py3 behaviour is default, except
-    # for list comprehensions where the behaviour differs in Py2 and
-    # Py3 (set in Parsing.py based on parser context)
-    has_local_scope = True
-
-    def init_scope(self, outer_scope, expr_scope=None):
-        if expr_scope is not None:
-            self.expr_scope = expr_scope
-        elif self.has_local_scope:
-            self.expr_scope = Symtab.GeneratorExpressionScope(outer_scope)
-        else:
-            self.expr_scope = None
-
-    def analyse_declarations(self, env):
-        self.init_scope(env)
-
-    def analyse_scoped_declarations(self, env):
-        # this is called with the expr_scope as env
-        pass
-
-    def analyse_types(self, env):
-        # no recursion here, the children will be analysed separately below
-        return self
-
-    def analyse_scoped_expressions(self, env):
-        # this is called with the expr_scope as env
-        return self
-
-    def generate_evaluation_code(self, code):
-        # set up local variables and free their references on exit
-        generate_inner_evaluation_code = super(ScopedExprNode, self).generate_evaluation_code
-        if not self.has_local_scope or not self.expr_scope.var_entries:
-            # no local variables => delegate, done
-            generate_inner_evaluation_code(code)
-            return
-
-        code.putln('{ /* enter inner scope */')
-        py_entries = []
-        for _, entry in sorted(item for item in self.expr_scope.entries.items() if item[0]):
-            if not entry.in_closure:
-                if entry.type.is_pyobject and entry.used:
-                    py_entries.append(entry)
-        if not py_entries:
-            # no local Python references => no cleanup required
-            generate_inner_evaluation_code(code)
-            code.putln('} /* exit inner scope */')
-            return
-
-        # must free all local Python references at each exit point
-        old_loop_labels = code.new_loop_labels()
-        old_error_label = code.new_error_label()
-
-        generate_inner_evaluation_code(code)
-
-        # normal (non-error) exit
-        self._generate_vars_cleanup(code, py_entries)
-
-        # error/loop body exit points
-        exit_scope = code.new_label('exit_scope')
-        code.put_goto(exit_scope)
-        for label, old_label in ([(code.error_label, old_error_label)] +
-                                 list(zip(code.get_loop_labels(), old_loop_labels))):
-            if code.label_used(label):
-                code.put_label(label)
-                self._generate_vars_cleanup(code, py_entries)
-                code.put_goto(old_label)
-        code.put_label(exit_scope)
-        code.putln('} /* exit inner scope */')
-
-        code.set_loop_labels(old_loop_labels)
-        code.error_label = old_error_label
-
-    def _generate_vars_cleanup(self, code, py_entries):
-        for entry in py_entries:
-            if entry.is_cglobal:
-                code.put_var_gotref(entry)
-                code.put_decref_set(entry.cname, "Py_None")
-            else:
-                code.put_var_xdecref_clear(entry)
-
-
 class ComprehensionNode(ScopedExprNode):
     # A list/set/dict comprehension
 
@@ -8306,8 +8744,14 @@ class ComprehensionNode(ScopedExprNode):
         return self.type
 
     def analyse_declarations(self, env):
-        self.append.target = self # this is used in the PyList_Append of the inner loop
+        self.append.target = self  # this is used in the PyList_Append of the inner loop
         self.init_scope(env)
+        # setup loop scope
+        if isinstance(self.loop, Nodes._ForInStatNode):
+            assert isinstance(self.loop.iterator, ScopedExprNode), self.loop.iterator
+            self.loop.iterator.init_scope(None, env)
+        else:
+            assert isinstance(self.loop, Nodes.ForFromStatNode), self.loop
 
     def analyse_scoped_declarations(self, env):
         self.loop.analyse_declarations(env)
@@ -8341,7 +8785,7 @@ class ComprehensionNode(ScopedExprNode):
             self.result(), create_code,
             code.error_goto_if_null(self.result(), self.pos)))
 
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
         self.loop.generate_execution_code(code)
 
     def annotate(self, code):
@@ -8466,7 +8910,7 @@ class InlinedGeneratorExpressionNode(ExprNode):
         code.putln("%s = __Pyx_Generator_Next(%s); %s" % (
             self.result(), self.gen.result(),
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
 
 
 class MergedSequenceNode(ExprNode):
@@ -8574,10 +9018,12 @@ class MergedSequenceNode(ExprNode):
         else:
             code.putln("%s = %s(%s); %s" % (
                 self.result(),
-                'PySet_New' if is_set else 'PySequence_List',
+                'PySet_New' if is_set
+                    else "__Pyx_PySequence_ListKeepNew" if item.is_temp and item.type in (py_object_type, list_type)
+                    else "PySequence_List",
                 item.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
             item.generate_disposal_code(code)
         item.free_temps(code)
 
@@ -8627,7 +9073,7 @@ class MergedSequenceNode(ExprNode):
                 self.result(),
                 Naming.quick_temp_cname,
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.result())
+            self.generate_gotref(code)
             code.putln("}")
 
         for helper in sorted(helpers):
@@ -8660,7 +9106,7 @@ class SetNode(ExprNode):
         return False
 
     def calculate_constant_result(self):
-        self.constant_result = set([arg.constant_result for arg in self.args])
+        self.constant_result = {arg.constant_result for arg in self.args}
 
     def compile_time_value(self, denv):
         values = [arg.compile_time_value(denv) for arg in self.args]
@@ -8677,7 +9123,7 @@ class SetNode(ExprNode):
             "%s = PySet_New(0); %s" % (
                 self.result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
         for arg in self.args:
             code.put_error_if_neg(
                 self.pos,
@@ -8764,7 +9210,7 @@ class DictNode(ExprNode):
                     error(item.key.pos, "Invalid struct field identifier")
                     item.key = StringNode(item.key.pos, value="<error>")
                 else:
-                    key = str(item.key.value) # converts string literals to unicode in Py3
+                    key = str(item.key.value)  # converts string literals to unicode in Py3
                     member = dst_type.scope.lookup_here(key)
                     if not member:
                         error(item.key.pos, "struct '%s' has no field '%s'" % (dst_type, key))
@@ -8774,8 +9220,7 @@ class DictNode(ExprNode):
                             value = value.arg
                         item.value = value.coerce_to(member.type, env)
         else:
-            self.type = error_type
-            error(self.pos, "Cannot interpret dict as type '%s'" % dst_type)
+            return super(DictNode, self).coerce_to(dst_type, env)
         return self
 
     def release_errors(self):
@@ -8799,7 +9244,7 @@ class DictNode(ExprNode):
                     self.result(),
                     len(self.key_value_pairs),
                     code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
         keys_seen = set()
         key_type = None
@@ -8848,10 +9293,17 @@ class DictNode(ExprNode):
                 if self.exclude_null_values:
                     code.putln('}')
             else:
-                code.putln("%s.%s = %s;" % (
-                        self.result(),
-                        item.key.value,
-                        item.value.result()))
+                if item.value.type.is_array:
+                    code.putln("memcpy(%s.%s, %s, sizeof(%s));" % (
+                            self.result(),
+                            item.key.value,
+                            item.value.result(),
+                            item.value.result()))
+                else:
+                    code.putln("%s.%s = %s;" % (
+                            self.result(),
+                            item.key.value,
+                            item.value.result()))
             item.generate_disposal_code(code)
             item.free_temps(code)
 
@@ -8863,6 +9315,11 @@ class DictNode(ExprNode):
         for item in self.key_value_pairs:
             item.annotate(code)
 
+    def as_python_dict(self):
+        # returns a dict with constant keys and Node values
+        # (only works on DictNodes where the keys are ConstNodes or PyConstNode)
+        return dict([(key.value, value) for key, value in self.key_value_pairs])
+
 
 class DictItemNode(ExprNode):
     # Represents a single item in a DictNode
@@ -8871,7 +9328,7 @@ class DictItemNode(ExprNode):
     # value        ExprNode
     subexprs = ['key', 'value']
 
-    nogil_check = None # Parent DictNode takes care of it
+    nogil_check = None  # Parent DictNode takes care of it
 
     def calculate_constant_result(self):
         self.constant_result = (
@@ -8927,7 +9384,7 @@ class SortedDictKeysNode(ExprNode):
             code.putln('%s = PyDict_Keys(%s); %s' % (
                 self.result(), dict_result,
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
         else:
             # originally used PyMapping_Keys() here, but that may return a tuple
             code.globalstate.use_utility_code(UtilityCode.load_cached(
@@ -8936,11 +9393,11 @@ class SortedDictKeysNode(ExprNode):
             code.putln('%s = __Pyx_PyObject_CallMethod0(%s, %s); %s' % (
                 self.result(), dict_result, keys_cname,
                 code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
             code.putln("if (unlikely(!PyList_Check(%s))) {" % self.result())
-            code.put_decref_set(self.result(), "PySequence_List(%s)" % self.result())
+            self.generate_decref_set(code, "PySequence_List(%s)" % self.result())
             code.putln(code.error_goto_if_null(self.result(), self.pos))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
             code.putln("}")
         code.put_error_if_neg(
             self.pos, 'PyList_Sort(%s)' % self.py_result())
@@ -8970,6 +9427,9 @@ class ClassNode(ExprNode, ModuleNameMixin):
     type = py_object_type
     is_temp = True
 
+    def analyse_annotations(self, env):
+        pass
+
     def infer_type(self, env):
         # TODO: could return 'type' in some cases
         return py_object_type
@@ -9008,7 +9468,7 @@ class ClassNode(ExprNode, ModuleNameMixin):
                 qualname,
                 py_mod_name,
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class Py3ClassNode(ExprNode):
@@ -9021,9 +9481,11 @@ class Py3ClassNode(ExprNode):
     #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
     #  calculate_metaclass  bool       should call CalculateMetaclass()
     #  allow_py2_metaclass  bool       should look for Py2 metaclass
+    #  force_type           bool       always create a "new style" class, even with no bases
 
     subexprs = []
     type = py_object_type
+    force_type = False
     is_temp = True
 
     def infer_type(self, env):
@@ -9038,6 +9500,26 @@ class Py3ClassNode(ExprNode):
 
     gil_message = "Constructing Python class"
 
+    def analyse_annotations(self, env):
+        from .AutoDocTransforms import AnnotationWriter
+        position = self.class_def_node.pos
+        dict_items = [
+            DictItemNode(
+                entry.pos,
+                key=IdentifierStringNode(entry.pos, value=entry.name),
+                value=entry.annotation.string
+            )
+            for entry in env.entries.values() if entry.annotation
+        ]
+        # Annotations dict shouldn't exist for classes which don't declare any.
+        if dict_items:
+            annotations_dict = DictNode(position, key_value_pairs=dict_items)
+            lhs = NameNode(position, name=StringEncoding.EncodedString(u"__annotations__"))
+            lhs.entry = env.lookup_here(lhs.name) or env.declare_var(lhs.name, dict_type, position)
+            node = SingleAssignmentNode(position, lhs=lhs, rhs=annotations_dict)
+            node.analyse_declarations(env)
+            self.class_def_node.body.stats.insert(0, node)
+
     def generate_result_code(self, code):
         code.globalstate.use_utility_code(UtilityCode.load_cached("Py3ClassCreate", "ObjectHandling.c"))
         cname = code.intern_identifier(self.name)
@@ -9045,6 +9527,8 @@ class Py3ClassNode(ExprNode):
         mkw = class_def_node.mkw.py_result() if class_def_node.mkw else 'NULL'
         if class_def_node.metaclass:
             metaclass = class_def_node.metaclass.py_result()
+        elif self.force_type:
+            metaclass = "((PyObject*)&PyType_Type)"
         else:
             metaclass = "((PyObject*)&__Pyx_DefaultClassType)"
         code.putln(
@@ -9058,7 +9542,7 @@ class Py3ClassNode(ExprNode):
                 self.calculate_metaclass,
                 self.allow_py2_metaclass,
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class PyClassMetaclassNode(ExprNode):
@@ -9094,7 +9578,7 @@ class PyClassMetaclassNode(ExprNode):
             "%s = %s; %s" % (
                 self.result(), call,
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class PyClassNamespaceNode(ExprNode, ModuleNameMixin):
@@ -9136,7 +9620,7 @@ class PyClassNamespaceNode(ExprNode, ModuleNameMixin):
                 py_mod_name,
                 doc_code,
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class ClassCellInjectorNode(ExprNode):
@@ -9155,7 +9639,7 @@ class ClassCellInjectorNode(ExprNode):
             '%s = PyList_New(0); %s' % (
                 self.result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
 
     def generate_injection_code(self, code, classobj_cname):
         assert self.is_active
@@ -9197,7 +9681,6 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
     #  from a PyMethodDef struct.
     #
     #  pymethdef_cname   string             PyMethodDef structure
-    #  self_object       ExprNode or None
     #  binding           bool
     #  def_node          DefNode            the Python function node
     #  module_name       EncodedString      Name of defining module
@@ -9206,7 +9689,6 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
     subexprs = ['code_object', 'defaults_tuple', 'defaults_kwdict',
                 'annotations_dict']
 
-    self_object = None
     code_object = None
     binding = False
     def_node = None
@@ -9255,33 +9737,35 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
         must_use_constants = env.is_c_class_scope or (self.def_node.is_wrapper and env.is_module_scope)
 
         for arg in self.def_node.args:
-            if arg.default and not must_use_constants:
-                if not arg.default.is_literal:
-                    arg.is_dynamic = True
-                    if arg.type.is_pyobject:
-                        nonliteral_objects.append(arg)
+            if arg.default:
+                if not must_use_constants:
+                    if arg.default.is_literal:
+                        arg.default = DefaultLiteralArgNode(arg.pos, arg.default)
                     else:
-                        nonliteral_other.append(arg)
-                else:
-                    arg.default = DefaultLiteralArgNode(arg.pos, arg.default)
-                if arg.kw_only:
-                    default_kwargs.append(arg)
-                else:
-                    default_args.append(arg)
+                        arg.is_dynamic = True
+                        if arg.type.is_pyobject:
+                            nonliteral_objects.append(arg)
+                        else:
+                            nonliteral_other.append(arg)
+                if arg.default.type and arg.default.type.can_coerce_to_pyobject(env):
+                    if arg.kw_only:
+                        default_kwargs.append(arg)
+                    else:
+                        default_args.append(arg)
             if arg.annotation:
-                arg.annotation = self.analyse_annotation(env, arg.annotation)
-                annotations.append((arg.pos, arg.name, arg.annotation))
+                arg.annotation = arg.annotation.analyse_types(env)
+                annotations.append((arg.pos, arg.name, arg.annotation.string))
 
         for arg in (self.def_node.star_arg, self.def_node.starstar_arg):
             if arg and arg.annotation:
-                arg.annotation = self.analyse_annotation(env, arg.annotation)
-                annotations.append((arg.pos, arg.name, arg.annotation))
+                arg.annotation = arg.annotation.analyse_types(env)
+                annotations.append((arg.pos, arg.name, arg.annotation.string))
 
         annotation = self.def_node.return_type_annotation
         if annotation:
-            annotation = self.analyse_annotation(env, annotation)
-            self.def_node.return_type_annotation = annotation
-            annotations.append((annotation.pos, StringEncoding.EncodedString("return"), annotation))
+            self.def_node.return_type_annotation = annotation.analyse_types(env)
+            annotations.append((annotation.pos, StringEncoding.EncodedString("return"),
+                                annotation.string))
 
         if nonliteral_objects or nonliteral_other:
             module_scope = env.global_scope()
@@ -9289,7 +9773,10 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
             scope = Symtab.StructOrUnionScope(cname)
             self.defaults = []
             for arg in nonliteral_objects:
-                entry = scope.declare_var(arg.name, arg.type, None,
+                type_ = arg.type
+                if type_.is_buffer:
+                    type_ = type_.base
+                entry = scope.declare_var(arg.name, type_, None,
                                           Naming.arg_prefix + arg.name,
                                           allow_pyobject=True)
                 self.defaults.append((arg, entry))
@@ -9321,7 +9808,8 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
                             value=arg.default)
                         for arg in default_kwargs])
                     self.defaults_kwdict = defaults_kwdict.analyse_types(env)
-            else:
+            elif not self.specialized_cpdefs:
+                # Fused dispatch functions do not support (dynamic) default arguments, only the specialisations do.
                 if default_args:
                     defaults_tuple = DefaultsTupleNode(
                         self.pos, default_args, self.defaults_struct)
@@ -9358,31 +9846,13 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
                 for pos, name, value in annotations])
             self.annotations_dict = annotations_dict.analyse_types(env)
 
-    def analyse_annotation(self, env, annotation):
-        if annotation is None:
-            return None
-        atype = annotation.analyse_as_type(env)
-        if atype is not None:
-            # Keep parsed types as strings as they might not be Python representable.
-            annotation = UnicodeNode(
-                annotation.pos,
-                value=StringEncoding.EncodedString(atype.declaration_code('', for_display=True)))
-        annotation = annotation.analyse_types(env)
-        if not annotation.type.is_pyobject:
-            annotation = annotation.coerce_to_pyobject(env)
-        return annotation
-
     def may_be_none(self):
         return False
 
     gil_message = "Constructing Python function"
 
-    def self_result_code(self):
-        if self.self_object is None:
-            self_result = "NULL"
-        else:
-            self_result = self.self_object.py_result()
-        return self_result
+    def closure_result_code(self):
+        return "NULL"
 
     def generate_result_code(self, code):
         if self.binding:
@@ -9396,11 +9866,11 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
             '%s = PyCFunction_NewEx(&%s, %s, %s); %s' % (
                 self.result(),
                 self.pymethdef_cname,
-                self.self_result_code(),
+                self.closure_result_code(),
                 py_mod_name,
                 code.error_goto_if_null(self.result(), self.pos)))
 
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
     def generate_cyfunction_code(self, code):
         if self.specialized_cpdefs:
@@ -9431,6 +9901,9 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
         if def_node.local_scope.parent_scope.is_c_class_scope and not def_node.entry.is_anonymous:
             flags.append('__Pyx_CYFUNCTION_CCLASS')
 
+        if def_node.is_coroutine:
+            flags.append('__Pyx_CYFUNCTION_COROUTINE')
+
         if flags:
             flags = ' | '.join(flags)
         else:
@@ -9443,13 +9916,13 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
                 self.pymethdef_cname,
                 flags,
                 self.get_py_qualified_name(code),
-                self.self_result_code(),
+                self.closure_result_code(),
                 self.get_py_mod_name(code),
                 Naming.moddict_cname,
                 code_object_result,
                 code.error_goto_if_null(self.result(), self.pos)))
 
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
         if def_node.requires_classobj:
             assert code.pyclass_stack, "pyclass_stack is empty"
@@ -9459,7 +9932,7 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
                 'PyList_Append(%s, %s);' % (
                     class_node.class_cell.result(),
                     self.result()))
-            code.put_giveref(self.py_result())
+            self.generate_giveref(code)
 
         if self.defaults:
             code.putln(
@@ -9475,27 +9948,29 @@ class PyCFunctionNode(ExprNode, ModuleNameMixin):
         if self.defaults_tuple:
             code.putln('__Pyx_CyFunction_SetDefaultsTuple(%s, %s);' % (
                 self.result(), self.defaults_tuple.py_result()))
-        if self.defaults_kwdict:
-            code.putln('__Pyx_CyFunction_SetDefaultsKwDict(%s, %s);' % (
-                self.result(), self.defaults_kwdict.py_result()))
-        if def_node.defaults_getter and not self.specialized_cpdefs:
-            # Fused functions do not support dynamic defaults, only their specialisations can have them for now.
-            code.putln('__Pyx_CyFunction_SetDefaultsGetter(%s, %s);' % (
-                self.result(), def_node.defaults_getter.entry.pyfunc_cname))
-        if self.annotations_dict:
-            code.putln('__Pyx_CyFunction_SetAnnotationsDict(%s, %s);' % (
-                self.result(), self.annotations_dict.py_result()))
+        if not self.specialized_cpdefs:
+            # disable introspection functions for fused dispatcher function since the user never sees it
+            # TODO: this is mostly disabled because the attributes end up pointing to ones belonging
+            #  to the specializations - ideally this would be fixed instead
+            if self.defaults_kwdict:
+                code.putln('__Pyx_CyFunction_SetDefaultsKwDict(%s, %s);' % (
+                    self.result(), self.defaults_kwdict.py_result()))
+            if def_node.defaults_getter:
+                code.putln('__Pyx_CyFunction_SetDefaultsGetter(%s, %s);' % (
+                    self.result(), def_node.defaults_getter.entry.pyfunc_cname))
+            if self.annotations_dict:
+                code.putln('__Pyx_CyFunction_SetAnnotationsDict(%s, %s);' % (
+                    self.result(), self.annotations_dict.py_result()))
 
 
 class InnerFunctionNode(PyCFunctionNode):
     # Special PyCFunctionNode that depends on a closure class
-    #
 
     binding = True
-    needs_self_code = True
+    needs_closure_code = True
 
-    def self_result_code(self):
-        if self.needs_self_code:
+    def closure_result_code(self):
+        if self.needs_closure_code:
             return "((PyObject*)%s)" % Naming.cur_scope_cname
         return "NULL"
 
@@ -9552,10 +10027,17 @@ class CodeObjectNode(ExprNode):
             flags.append('CO_VARARGS')
         if self.def_node.starstar_arg:
             flags.append('CO_VARKEYWORDS')
-
-        code.putln("%s = (PyObject*)__Pyx_PyCode_New(%d, %d, %d, 0, %s, %s, %s, %s, %s, %s, %s, %s, %s, %d, %s); %s" % (
+        if self.def_node.is_asyncgen:
+            flags.append('CO_ASYNC_GENERATOR')
+        elif self.def_node.is_coroutine:
+            flags.append('CO_COROUTINE')
+        elif self.def_node.is_generator:
+            flags.append('CO_GENERATOR')
+
+        code.putln("%s = (PyObject*)__Pyx_PyCode_New(%d, %d, %d, %d, 0, %s, %s, %s, %s, %s, %s, %s, %s, %s, %d, %s); %s" % (
             self.result_code,
             len(func.args) - func.num_kwonly_args,  # argcount
+            func.num_posonly_args,     # posonlyargcount (Py3.8+ only)
             func.num_kwonly_args,      # kwonlyargcount (Py3 only)
             len(self.varnames.args),   # nlocals
             '|'.join(flags) or '0',    # flags
@@ -9674,11 +10156,14 @@ class LambdaNode(InnerFunctionNode):
     name = StringEncoding.EncodedString('<lambda>')
 
     def analyse_declarations(self, env):
+        if hasattr(self, "lambda_name"):
+            # this if-statement makes it safe to run twice
+            return
         self.lambda_name = self.def_node.lambda_name = env.next_id('lambda')
         self.def_node.no_assignment_synthesis = True
         self.def_node.pymethdef_required = True
-        self.def_node.analyse_declarations(env)
         self.def_node.is_cyfunction = True
+        self.def_node.analyse_declarations(env)
         self.pymethdef_cname = self.def_node.entry.pymethdef_cname
         env.add_lambda_def(self.def_node)
 
@@ -9698,11 +10183,22 @@ class GeneratorExpressionNode(LambdaNode):
     #
     # loop      ForStatNode   the for-loop, containing a YieldExprNode
     # def_node  DefNode       the underlying generator 'def' node
+    # call_parameters [ExprNode]   (Internal) parameters passed to the DefNode call
 
     name = StringEncoding.EncodedString('genexpr')
     binding = False
 
+    child_attrs = LambdaNode.child_attrs + ["call_parameters"]
+    subexprs = LambdaNode.subexprs + ["call_parameters"]
+
+    def __init__(self, pos, *args, **kwds):
+        super(GeneratorExpressionNode, self).__init__(pos, *args, **kwds)
+        self.call_parameters = []
+
     def analyse_declarations(self, env):
+        if hasattr(self, "genexpr_name"):
+            # this if-statement makes it safe to run twice
+            return
         self.genexpr_name = env.next_id('genexpr')
         super(GeneratorExpressionNode, self).analyse_declarations(env)
         # No pymethdef required
@@ -9711,15 +10207,24 @@ class GeneratorExpressionNode(LambdaNode):
         self.def_node.is_cyfunction = False
         # Force genexpr signature
         self.def_node.entry.signature = TypeSlots.pyfunction_noargs
+        # setup loop scope
+        if isinstance(self.loop, Nodes._ForInStatNode):
+            assert isinstance(self.loop.iterator, ScopedExprNode)
+            self.loop.iterator.init_scope(None, env)
+        else:
+            assert isinstance(self.loop, Nodes.ForFromStatNode)
 
     def generate_result_code(self, code):
+        args_to_call = ([self.closure_result_code()] +
+                        [ cp.result() for cp in self.call_parameters ])
+        args_to_call = ", ".join(args_to_call)
         code.putln(
             '%s = %s(%s); %s' % (
                 self.result(),
                 self.def_node.entry.pyfunc_cname,
-                self.self_result_code(),
+                args_to_call,
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class YieldExprNode(ExprNode):
@@ -9778,11 +10283,15 @@ class YieldExprNode(ExprNode):
         for cname, type, manage_ref in code.funcstate.temps_in_use():
             save_cname = code.funcstate.closure_temps.allocate_temp(type)
             saved.append((cname, save_cname, type))
-            if type.is_pyobject:
-                code.put_xgiveref(cname)
+            if type.is_cpp_class:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
+                cname = "__PYX_STD_MOVE_IF_SUPPORTED(%s)" % cname
+            else:
+                code.put_xgiveref(cname, type)
             code.putln('%s->%s = %s;' % (Naming.cur_scope_cname, save_cname, cname))
 
-        code.put_xgiveref(Naming.retval_cname)
+        code.put_xgiveref(Naming.retval_cname, py_object_type)
         profile = code.globalstate.directives['profile']
         linetrace = code.globalstate.directives['linetrace']
         if profile or linetrace:
@@ -9810,10 +10319,15 @@ class YieldExprNode(ExprNode):
 
         code.put_label(label_name)
         for cname, save_cname, type in saved:
-            code.putln('%s = %s->%s;' % (cname, Naming.cur_scope_cname, save_cname))
+            save_cname = "%s->%s" % (Naming.cur_scope_cname, save_cname)
+            if type.is_cpp_class:
+                save_cname = "__PYX_STD_MOVE_IF_SUPPORTED(%s)" % save_cname
+            code.putln('%s = %s;' % (cname, save_cname))
             if type.is_pyobject:
-                code.putln('%s->%s = 0;' % (Naming.cur_scope_cname, save_cname))
-                code.put_xgotref(cname)
+                code.putln('%s = 0;' % save_cname)
+                code.put_xgotref(cname, type)
+            elif type.is_memoryviewslice:
+                code.putln('%s.memview = NULL; %s.data = NULL;' % (save_cname, save_cname))
         self.generate_sent_value_handling_code(code, Naming.sent_value_cname)
         if self.result_is_used:
             self.allocate_temp_result(code)
@@ -9841,7 +10355,7 @@ class _YieldDelegationExprNode(YieldExprNode):
             self.arg.free_temps(code)
         elif decref_source:
             code.put_decref_clear(source_cname, py_object_type)
-        code.put_xgotref(Naming.retval_cname)
+        code.put_xgotref(Naming.retval_cname, py_object_type)
 
         code.putln("if (likely(%s)) {" % Naming.retval_cname)
         self.generate_yield_code(code)
@@ -9857,7 +10371,7 @@ class _YieldDelegationExprNode(YieldExprNode):
         # YieldExprNode has allocated the result temp for us
         code.putln("%s = NULL;" % self.result())
         code.put_error_if_neg(self.pos, "__Pyx_PyGen_FetchStopIterationValue(&%s)" % self.result())
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
 
     def handle_iteration_exception(self, code):
         code.putln("PyObject* exc_type = __Pyx_PyErr_Occurred();")
@@ -9949,7 +10463,7 @@ class GlobalsExprNode(AtomicExprNode):
         code.putln('%s = __Pyx_Globals(); %s' % (
             self.result(),
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
 
 
 class LocalsDictItemNode(DictItemNode):
@@ -10037,6 +10551,7 @@ class UnopNode(ExprNode):
 
     subexprs = ['operand']
     infix = True
+    is_inc_dec_op = False
 
     def calculate_constant_result(self):
         func = compile_time_unary_operators[self.operator]
@@ -10139,7 +10654,7 @@ class UnopNode(ExprNode):
                 function,
                 self.operand.py_result(),
                 code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
     def type_error(self):
         if not self.operand.type.is_error:
@@ -10148,7 +10663,10 @@ class UnopNode(ExprNode):
         self.type = PyrexTypes.error_type
 
     def analyse_cpp_operation(self, env, overload_check=True):
-        entry = env.lookup_operator(self.operator, [self.operand])
+        operand_types = [self.operand.type]
+        if self.is_inc_dec_op and not self.is_prefix:
+            operand_types.append(PyrexTypes.c_int_type)
+        entry = env.lookup_operator_for_types(self.pos, self.operator, operand_types)
         if overload_check and not entry:
             self.type_error()
             return
@@ -10157,12 +10675,17 @@ class UnopNode(ExprNode):
             self.exception_value = entry.type.exception_value
             if self.exception_check == '+':
                 self.is_temp = True
-                if self.exception_value is None:
+                if needs_cpp_exception_conversion(self):
                     env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
         else:
             self.exception_check = ''
             self.exception_value = ''
-        cpp_type = self.operand.type.find_cpp_operation_type(self.operator)
+        if self.is_inc_dec_op and not self.is_prefix:
+            cpp_type = self.operand.type.find_cpp_operation_type(
+                self.operator, operand_type=PyrexTypes.c_int_type
+            )
+        else:
+            cpp_type = self.operand.type.find_cpp_operation_type(self.operator)
         if overload_check and cpp_type is None:
             error(self.pos, "'%s' operator not defined for %s" % (
                 self.operator, type))
@@ -10291,7 +10814,10 @@ class DereferenceNode(CUnopNode):
 
     def analyse_c_operation(self, env):
         if self.operand.type.is_ptr:
-            self.type = self.operand.type.base_type
+            if env.is_cpp:
+                self.type = PyrexTypes.CReferenceType(self.operand.type.base_type)
+            else:
+                self.type = self.operand.type.base_type
         else:
             self.type_error()
 
@@ -10301,6 +10827,17 @@ class DereferenceNode(CUnopNode):
 
 class DecrementIncrementNode(CUnopNode):
     #  unary ++/-- operator
+    is_inc_dec_op = True
+
+    def type_error(self):
+        if not self.operand.type.is_error:
+            if self.is_prefix:
+                error(self.pos, "No match for 'operator%s' (operand type is '%s')" %
+                    (self.operator, self.operand.type))
+            else:
+                error(self.pos, "No 'operator%s(int)' declared for postfix '%s' (operand type is '%s')" %
+                    (self.operator, self.operator, self.operand.type))
+        self.type = PyrexTypes.error_type
 
     def analyse_c_operation(self, env):
         if self.operand.type.is_numeric:
@@ -10503,8 +11040,10 @@ class TypecastNode(ExprNode):
         if self.type.is_complex:
             operand_result = self.operand.result()
             if self.operand.type.is_complex:
-                real_part = self.type.real_type.cast_code("__Pyx_CREAL(%s)" % operand_result)
-                imag_part = self.type.real_type.cast_code("__Pyx_CIMAG(%s)" % operand_result)
+                real_part = self.type.real_type.cast_code(
+                    self.operand.type.real_code(operand_result))
+                imag_part = self.type.real_type.cast_code(
+                    self.operand.type.imag_code(operand_result))
             else:
                 real_part = self.type.real_type.cast_code(operand_result)
                 imag_part = "0"
@@ -10717,7 +11256,7 @@ class CythonArrayNode(ExprNode):
             type_info,
             code.error_goto_if_null(format_temp, self.pos),
         ))
-        code.put_gotref(format_temp)
+        code.put_gotref(format_temp, py_object_type)
 
         buildvalue_fmt = " __PYX_BUILD_PY_SSIZE_T " * len(shapes)
         code.putln('%s = Py_BuildValue((char*) "(" %s ")", %s); %s' % (
@@ -10726,15 +11265,14 @@ class CythonArrayNode(ExprNode):
             ", ".join(shapes),
             code.error_goto_if_null(shapes_temp, self.pos),
         ))
-        code.put_gotref(shapes_temp)
+        code.put_gotref(shapes_temp, py_object_type)
 
-        tup = (self.result(), shapes_temp, itemsize, format_temp,
-               self.mode, self.operand.result())
-        code.putln('%s = __pyx_array_new('
-                            '%s, %s, PyBytes_AS_STRING(%s), '
-                            '(char *) "%s", (char *) %s);' % tup)
-        code.putln(code.error_goto_if_null(self.result(), self.pos))
-        code.put_gotref(self.result())
+        code.putln('%s = __pyx_array_new(%s, %s, PyBytes_AS_STRING(%s), (char *) "%s", (char *) %s); %s' % (
+            self.result(),
+            shapes_temp, itemsize, format_temp, self.mode, self.operand.result(),
+            code.error_goto_if_null(self.result(), self.pos),
+        ))
+        self.generate_gotref(code)
 
         def dispose(temp):
             code.put_decref_clear(temp, py_object_type)
@@ -10843,7 +11381,11 @@ class SizeofVarNode(SizeofNode):
         if operand_as_type:
             self.arg_type = operand_as_type
             if self.arg_type.is_fused:
-                self.arg_type = self.arg_type.specialize(env.fused_to_specific)
+                try:
+                    self.arg_type = self.arg_type.specialize(env.fused_to_specific)
+                except CannotSpecialize:
+                    error(self.operand.pos,
+                          "Type cannot be specialized since it is not a fused argument to this function")
             self.__class__ = SizeofTypeNode
             self.check_type()
         else:
@@ -10864,8 +11406,6 @@ class TypeidNode(ExprNode):
     #  arg_type      ExprNode
     #  is_variable   boolean
 
-    type = PyrexTypes.error_type
-
     subexprs = ['operand']
 
     arg_type = None
@@ -10883,19 +11423,25 @@ class TypeidNode(ExprNode):
     cpp_message = 'typeid operator'
 
     def analyse_types(self, env):
+        if not self.type:
+            self.type = PyrexTypes.error_type  # default value if it isn't analysed successfully
         self.cpp_check(env)
         type_info = self.get_type_info_type(env)
         if not type_info:
             self.error("The 'libcpp.typeinfo' module must be cimported to use the typeid() operator")
             return self
+        if self.operand is None:
+            return self  # already analysed, no need to repeat
         self.type = type_info
-        as_type = self.operand.analyse_as_type(env)
+        as_type = self.operand.analyse_as_specialized_type(env)
         if as_type:
             self.arg_type = as_type
             self.is_type = True
+            self.operand = None  # nothing further uses self.operand - will only cause problems if its used in code generation
         else:
             self.arg_type = self.operand.analyse_types(env)
             self.is_type = False
+            self.operand = None  # nothing further uses self.operand - will only cause problems if its used in code generation
             if self.arg_type.type.is_pyobject:
                 self.error("Cannot use typeid on a Python object")
                 return self
@@ -10937,11 +11483,11 @@ class TypeofNode(ExprNode):
     literal = None
     type = py_object_type
 
-    subexprs = ['literal'] # 'operand' will be ignored after type analysis!
+    subexprs = ['literal']  # 'operand' will be ignored after type analysis!
 
     def analyse_types(self, env):
         self.operand = self.operand.analyse_types(env)
-        value = StringEncoding.EncodedString(str(self.operand.type)) #self.operand.type.typeof_name())
+        value = StringEncoding.EncodedString(str(self.operand.type))  #self.operand.type.typeof_name())
         literal = StringNode(self.pos, value=value)
         literal = literal.analyse_types(env)
         self.literal = literal.coerce_to_pyobject(env)
@@ -11100,7 +11646,7 @@ class BinopNode(ExprNode):
             # Used by NumBinopNodes to break up expressions involving multiple
             # operators so that exceptions can be handled properly.
             self.is_temp = 1
-            if self.exception_value is None:
+            if needs_cpp_exception_conversion(self):
                 env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
         if func_type.is_ptr:
             func_type = func_type.base_type
@@ -11155,6 +11701,8 @@ class BinopNode(ExprNode):
                 self.operand1.is_ephemeral() or self.operand2.is_ephemeral())
 
     def generate_result_code(self, code):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
         if self.type.is_pythran_expr:
             code.putln("// Pythran binop")
             code.putln("__Pyx_call_destructor(%s);" % self.result())
@@ -11171,21 +11719,20 @@ class BinopNode(ExprNode):
                     self.operand1.pythran_result(),
                     self.operator,
                     self.operand2.pythran_result()))
-        elif self.operand1.type.is_pyobject:
+        elif type1.is_pyobject or type2.is_pyobject:
             function = self.py_operation_function(code)
-            if self.operator == '**':
-                extra_args = ", Py_None"
-            else:
-                extra_args = ""
+            extra_args = ", Py_None" if self.operator == '**' else ""
+            op1_result = self.operand1.py_result() if type1.is_pyobject else self.operand1.result()
+            op2_result = self.operand2.py_result() if type2.is_pyobject else self.operand2.result()
             code.putln(
                 "%s = %s(%s, %s%s); %s" % (
                     self.result(),
                     function,
-                    self.operand1.py_result(),
-                    self.operand2.py_result(),
+                    op1_result,
+                    op2_result,
                     extra_args,
                     code.error_goto_if_null(self.result(), self.pos)))
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
         elif self.is_temp:
             # C++ overloaded operators with exception values are currently all
             # handled through temporaries.
@@ -11309,8 +11856,8 @@ class NumBinopNode(BinopNode):
 
     def c_types_okay(self, type1, type2):
         #print "NumBinopNode.c_types_okay:", type1, type2 ###
-        return (type1.is_numeric  or type1.is_enum) \
-            and (type2.is_numeric  or type2.is_enum)
+        return (type1.is_numeric or type1.is_enum) \
+            and (type2.is_numeric or type2.is_enum)
 
     def generate_evaluation_code(self, code):
         if self.overflow_check:
@@ -11421,7 +11968,7 @@ class AddNode(NumBinopNode):
 
     def py_operation_function(self, code):
         type1, type2 = self.operand1.type, self.operand2.type
-
+        func = None
         if type1 is unicode_type or type2 is unicode_type:
             if type1 in (unicode_type, str_type) and type2 in (unicode_type, str_type):
                 is_unicode_concat = True
@@ -11433,10 +11980,22 @@ class AddNode(NumBinopNode):
                 is_unicode_concat = False
 
             if is_unicode_concat:
-                if self.operand1.may_be_none() or self.operand2.may_be_none():
-                    return '__Pyx_PyUnicode_ConcatSafe'
-                else:
-                    return '__Pyx_PyUnicode_Concat'
+                if self.inplace or self.operand1.is_temp:
+                    code.globalstate.use_utility_code(
+                        UtilityCode.load_cached("UnicodeConcatInPlace", "ObjectHandling.c"))
+                func = '__Pyx_PyUnicode_Concat'
+        elif type1 is str_type and type2 is str_type:
+            code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("StrConcatInPlace", "ObjectHandling.c"))
+            func = '__Pyx_PyStr_Concat'
+
+        if func:
+            # any necessary utility code will be got by "NumberAdd" in generate_evaluation_code
+            if self.inplace or self.operand1.is_temp:
+                func += 'InPlace'  # upper case to indicate unintuitive macro
+            if self.operand1.may_be_none() or self.operand2.may_be_none():
+                func += 'Safe'
+            return func
 
         return super(AddNode, self).py_operation_function(code)
 
@@ -11456,22 +12015,76 @@ class SubNode(NumBinopNode):
 
 class MulNode(NumBinopNode):
     #  '*' operator.
+    is_sequence_mul = False
+
+    def analyse_types(self, env):
+        self.operand1 = self.operand1.analyse_types(env)
+        self.operand2 = self.operand2.analyse_types(env)
+        self.is_sequence_mul = self.calculate_is_sequence_mul()
+
+        # TODO: we could also optimise the case of "[...] * 2 * n", i.e. with an existing 'mult_factor'
+        if self.is_sequence_mul:
+            operand1 = self.operand1
+            operand2 = self.operand2
+            if operand1.is_sequence_constructor and operand1.mult_factor is None:
+                return self.analyse_sequence_mul(env, operand1, operand2)
+            elif operand2.is_sequence_constructor and operand2.mult_factor is None:
+                return self.analyse_sequence_mul(env, operand2, operand1)
+
+        self.analyse_operation(env)
+        return self
+
+    @staticmethod
+    def is_builtin_seqmul_type(type):
+        return type.is_builtin_type and type in builtin_sequence_types and type is not memoryview_type
+
+    def calculate_is_sequence_mul(self):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        if type1 is long_type or type1.is_int:
+            # normalise to (X * int)
+            type1, type2 = type2, type1
+        if type2 is long_type or type2.is_int:
+            if type1.is_string or type1.is_ctuple:
+                return True
+            if self.is_builtin_seqmul_type(type1):
+                return True
+        return False
+
+    def analyse_sequence_mul(self, env, seq, mult):
+        assert seq.mult_factor is None
+        seq = seq.coerce_to_pyobject(env)
+        seq.mult_factor = mult
+        return seq.analyse_types(env)
+
+    def coerce_operands_to_pyobjects(self, env):
+        if self.is_sequence_mul:
+            # Keep operands as they are, but ctuples must become Python tuples to multiply them.
+            if self.operand1.type.is_ctuple:
+                self.operand1 = self.operand1.coerce_to_pyobject(env)
+            elif self.operand2.type.is_ctuple:
+                self.operand2 = self.operand2.coerce_to_pyobject(env)
+            return
+        super(MulNode, self).coerce_operands_to_pyobjects(env)
 
     def is_py_operation_types(self, type1, type2):
-        if ((type1.is_string and type2.is_int) or
-                (type2.is_string and type1.is_int)):
-            return 1
-        else:
-            return NumBinopNode.is_py_operation_types(self, type1, type2)
+        return self.is_sequence_mul or super(MulNode, self).is_py_operation_types(type1, type2)
+
+    def py_operation_function(self, code):
+        if self.is_sequence_mul:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PySequenceMultiply", "ObjectHandling.c"))
+            return "__Pyx_PySequence_Multiply" if self.operand1.type.is_pyobject else "__Pyx_PySequence_Multiply_Left"
+        return super(MulNode, self).py_operation_function(code)
 
     def infer_builtin_types_operation(self, type1, type2):
-        # let's assume that whatever builtin type you multiply a string with
-        # will either return a string of the same type or fail with an exception
-        string_types = (bytes_type, bytearray_type, str_type, basestring_type, unicode_type)
-        if type1 in string_types and type2.is_builtin_type:
-            return type1
-        if type2 in string_types and type1.is_builtin_type:
-            return type2
+        # let's assume that whatever builtin type you multiply a builtin sequence type with
+        # will either return a sequence of the same type or fail with an exception
+        if type1.is_builtin_type and type2.is_builtin_type:
+            if self.is_builtin_seqmul_type(type1):
+                return type1
+            if self.is_builtin_seqmul_type(type2):
+                return type2
         # multiplication of containers/numbers with an integer value
         # always (?) returns the same type
         if type1.is_int:
@@ -11608,7 +12221,7 @@ class DivNode(NumBinopNode):
                         minus1_check = '(!(((%s)-1) > 0)) && unlikely(%s == (%s)-1)' % (
                             type_of_op2, self.operand2.result(), type_of_op2)
                     code.putln("else if (sizeof(%s) == sizeof(long) && %s "
-                               " && unlikely(UNARY_NEG_WOULD_OVERFLOW(%s))) {" % (
+                               " && unlikely(__Pyx_UNARY_NEG_WOULD_OVERFLOW(%s))) {" % (
                                self.type.empty_declaration_code(),
                                minus1_check,
                                self.operand1.result()))
@@ -11676,10 +12289,10 @@ _find_formatting_types = re.compile(
     br")").findall
 
 # These format conversion types can never trigger a Unicode string conversion in Py2.
-_safe_bytes_formats = set([
+_safe_bytes_formats = frozenset({
     # Excludes 's' and 'r', which can generate non-bytes strings.
     b'd', b'i', b'o', b'u', b'x', b'X', b'e', b'E', b'f', b'F', b'g', b'G', b'c', b'b', b'a',
-])
+})
 
 
 class ModNode(DivNode):
@@ -11779,12 +12392,21 @@ class ModNode(DivNode):
 class PowNode(NumBinopNode):
     #  '**' operator.
 
+    is_cpow = None
+    type_was_inferred = False  # was the result type affected by cpow==False?
+            # Intended to allow it to be changed if the node is coerced.
+
+    def _check_cpow(self, env):
+        if self.is_cpow is not None:
+            return  # already set
+        self.is_cpow = env.directives['cpow']
+
+    def infer_type(self, env):
+        self._check_cpow(env)
+        return super(PowNode, self).infer_type(env)
+
     def analyse_types(self, env):
-        if not env.directives['cpow']:
-            # Note - the check here won't catch cpow directives that don't use '**'
-            # but that's probably OK for a placeholder forward compatibility directive
-            error(self.pos, "The 'cpow' directive is provided for forward compatibility "
-                  "and must be True")
+        self._check_cpow(env)
         return super(PowNode, self).analyse_types(env)
 
     def analyse_c_operation(self, env):
@@ -11810,6 +12432,48 @@ class PowNode(NumBinopNode):
             error(self.pos, "got unexpected types for C power operator: %s, %s" %
                             (self.operand1.type, self.operand2.type))
 
+    def compute_c_result_type(self, type1, type2):
+        from numbers import Real
+        c_result_type = None
+        op1_is_definitely_positive = (
+            self.operand1.has_constant_result()
+            and self.operand1.constant_result >= 0
+        ) or (
+            type1.is_int and type1.signed == 0  # definitely unsigned
+        )
+        type2_is_int = type2.is_int or (
+            self.operand2.has_constant_result() and
+            isinstance(self.operand2.constant_result, Real) and
+            int(self.operand2.constant_result) == self.operand2.constant_result
+        )
+        needs_widening = False
+        if self.is_cpow:
+            c_result_type = super(PowNode, self).compute_c_result_type(type1, type2)
+            if not self.operand2.has_constant_result():
+                needs_widening = (
+                    isinstance(self.operand2.constant_result, _py_int_types) and self.operand2.constant_result < 0
+                )
+        elif op1_is_definitely_positive or type2_is_int:  # cpow==False
+            # if type2 is an integer then we can't end up going from real to complex
+            c_result_type = super(PowNode, self).compute_c_result_type(type1, type2)
+            if not self.operand2.has_constant_result():
+                needs_widening = type2.is_int and type2.signed
+                if needs_widening:
+                    self.type_was_inferred = True
+            else:
+                needs_widening = (
+                    isinstance(self.operand2.constant_result, _py_int_types) and self.operand2.constant_result < 0
+                )
+        elif self.c_types_okay(type1, type2):
+            # Allowable result types are double or complex double.
+            # Return the special "soft complex" type to store it as a
+            # complex number but with specialized coercions to Python
+            c_result_type = PyrexTypes.soft_complex_type
+            self.type_was_inferred = True
+        if needs_widening:
+            c_result_type = PyrexTypes.widest_numeric_type(c_result_type, PyrexTypes.c_double_type)
+        return c_result_type
+
     def calculate_result_code(self):
         # Work around MSVC overloading ambiguity.
         def typecast(operand):
@@ -11834,6 +12498,50 @@ class PowNode(NumBinopNode):
                 return '__Pyx_PyNumber_PowerOf2'
         return super(PowNode, self).py_operation_function(code)
 
+    def coerce_to(self, dst_type, env):
+        if dst_type == self.type:
+            return self
+        if (self.is_cpow is None and self.type_was_inferred and
+                (dst_type.is_float or dst_type.is_int)):
+            # if we're trying to coerce this directly to a C float or int
+            # then fall back to the cpow == True behaviour since this is
+            # almost certainly the user intent.
+            # However, ensure that the operand types are suitable C types
+            if self.type is PyrexTypes.soft_complex_type:
+                def check_types(operand, recurse=True):
+                    if operand.type.is_float or operand.type.is_int:
+                        return True, operand
+                    if recurse and isinstance(operand, CoerceToComplexNode):
+                        return check_types(operand.arg, recurse=False), operand.arg
+                    return False, None
+                msg_detail = "a non-complex C numeric type"
+            elif dst_type.is_int:
+                def check_types(operand):
+                    if operand.type.is_int:
+                        return True, operand
+                    else:
+                        # int, int doesn't seem to involve coercion nodes
+                        return False, None
+                msg_detail = "an integer C numeric type"
+            else:
+                def check_types(operand):
+                    return False, None
+            check_op1, op1 = check_types(self.operand1)
+            check_op2, op2 = check_types(self.operand2)
+            if check_op1 and check_op2:
+                warning(self.pos, "Treating '**' as if 'cython.cpow(True)' since it "
+                    "is directly assigned to a %s. "
+                    "This is likely to be fragile and we recommend setting "
+                    "'cython.cpow' explicitly." % msg_detail)
+                self.is_cpow = True
+                self.operand1 = op1
+                self.operand2 = op2
+                result = self.analyse_types(env)
+                if result.type != dst_type:
+                    result = result.coerce_to(dst_type, env)
+                return result
+        return super(PowNode, self).coerce_to(dst_type, env)
+
 
 class BoolBinopNode(ExprNode):
     """
@@ -12080,6 +12788,9 @@ class BoolBinopResultNode(ExprNode):
             code.putln("}")
         self.arg.free_temps(code)
 
+    def analyse_types(self, env):
+        return self
+
 
 class CondExprNode(ExprNode):
     #  Short-circuiting conditional expression.
@@ -12217,6 +12928,7 @@ class CmpNode(object):
 
     special_bool_cmp_function = None
     special_bool_cmp_utility_code = None
+    special_bool_extra_args = []
 
     def infer_type(self, env):
         # TODO: Actually implement this (after merging with -unstable).
@@ -12433,6 +13145,21 @@ class CmpNode(object):
                     self.special_bool_cmp_utility_code = UtilityCode.load_cached("StrEquals", "StringTools.c")
                     self.special_bool_cmp_function = "__Pyx_PyString_Equals"
                     return True
+                elif result_is_bool:
+                    from .Optimize import optimise_numeric_binop
+                    result = optimise_numeric_binop(
+                        "Eq" if self.operator == "==" else "Ne",
+                        self,
+                        PyrexTypes.c_bint_type,
+                        operand1,
+                        self.operand2
+                    )
+                    if result:
+                        (self.special_bool_cmp_function,
+                         self.special_bool_cmp_utility_code,
+                         self.special_bool_extra_args,
+                         _) = result
+                        return True
         elif self.operator in ('in', 'not_in'):
             if self.operand2.type is Builtin.dict_type:
                 self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
@@ -12458,7 +13185,7 @@ class CmpNode(object):
         return False
 
     def generate_operation_code(self, code, result_code,
-            operand1, op , operand2):
+            operand1, op, operand2):
         if self.type.is_pyobject:
             error_clause = code.error_goto_if_null
             got_ref = "__Pyx_XGOTREF(%s); " % result_code
@@ -12482,6 +13209,9 @@ class CmpNode(object):
                 result2 = operand2.py_result()
             else:
                 result2 = operand2.result()
+            special_bool_extra_args_result = ", ".join([
+                extra_arg.result() for extra_arg in self.special_bool_extra_args
+            ])
             if self.special_bool_cmp_utility_code:
                 code.globalstate.use_utility_code(self.special_bool_cmp_utility_code)
             code.putln(
@@ -12489,14 +13219,17 @@ class CmpNode(object):
                     result_code,
                     coerce_result,
                     self.special_bool_cmp_function,
-                    result1, result2, richcmp_constants[op],
+                    result1, result2,
+                    special_bool_extra_args_result if self.special_bool_extra_args else richcmp_constants[op],
                     got_ref,
                     error_clause(result_code, self.pos)))
 
         elif operand1.type.is_pyobject and op not in ('is', 'is_not'):
             assert op not in ('in', 'not_in'), op
-            code.putln("%s = PyObject_RichCompare(%s, %s, %s); %s%s" % (
+            assert self.type.is_pyobject or self.type is PyrexTypes.c_bint_type
+            code.putln("%s = PyObject_RichCompare%s(%s, %s, %s); %s%s" % (
                     result_code,
+                    "" if self.type.is_pyobject else "Bool",
                     operand1.py_result(),
                     operand2.py_result(),
                     richcmp_constants[op],
@@ -12563,7 +13296,8 @@ class PrimaryCmpNode(ExprNode, CmpNode):
     #  Instead, we override all the framework methods
     #  which use it.
 
-    child_attrs = ['operand1', 'operand2', 'coerced_operand2', 'cascade']
+    child_attrs = ['operand1', 'operand2', 'coerced_operand2', 'cascade',
+                   'special_bool_extra_args']
 
     cascade = None
     coerced_operand2 = None
@@ -12591,6 +13325,12 @@ class PrimaryCmpNode(ExprNode, CmpNode):
         operand1 = self.operand1.compile_time_value(denv)
         return self.cascaded_compile_time_value(operand1, denv)
 
+    def unify_cascade_type(self):
+        cdr = self.cascade
+        while cdr:
+            cdr.type = self.type
+            cdr = cdr.cascade
+
     def analyse_types(self, env):
         self.operand1 = self.operand1.analyse_types(env)
         self.operand2 = self.operand2.analyse_types(env)
@@ -12640,16 +13380,16 @@ class PrimaryCmpNode(ExprNode, CmpNode):
             elif self.find_special_bool_compare_function(env, self.operand1):
                 if not self.operand1.type.is_pyobject:
                     self.operand1 = self.operand1.coerce_to_pyobject(env)
-                common_type = None # if coercion needed, the method call above has already done it
-                self.is_pycmp = False # result is bint
+                common_type = None  # if coercion needed, the method call above has already done it
+                self.is_pycmp = False  # result is bint
             else:
                 common_type = py_object_type
                 self.is_pycmp = True
         elif self.find_special_bool_compare_function(env, self.operand1):
             if not self.operand1.type.is_pyobject:
                 self.operand1 = self.operand1.coerce_to_pyobject(env)
-            common_type = None # if coercion needed, the method call above has already done it
-            self.is_pycmp = False # result is bint
+            common_type = None  # if coercion needed, the method call above has already done it
+            self.is_pycmp = False  # result is bint
         else:
             common_type = self.find_common_type(env, self.operator, self.operand1)
             self.is_pycmp = common_type.is_pyobject
@@ -12669,10 +13409,7 @@ class PrimaryCmpNode(ExprNode, CmpNode):
             self.type = PyrexTypes.py_object_type
         else:
             self.type = PyrexTypes.c_bint_type
-        cdr = self.cascade
-        while cdr:
-            cdr.type = self.type
-            cdr = cdr.cascade
+        self.unify_cascade_type()
         if self.is_pycmp or self.cascade or self.special_bool_cmp_function:
             # 1) owned reference, 2) reused value, 3) potential function error return value
             self.is_temp = 1
@@ -12696,7 +13433,7 @@ class PrimaryCmpNode(ExprNode, CmpNode):
         self.exception_value = func_type.exception_value
         if self.exception_check == '+':
             self.is_temp = True
-            if self.exception_value is None:
+            if needs_cpp_exception_conversion(self):
                 env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
         if len(func_type.args) == 1:
             self.operand2 = self.operand2.coerce_to(func_type.args[0].type, env)
@@ -12731,6 +13468,7 @@ class PrimaryCmpNode(ExprNode, CmpNode):
                         self.operand2, env, result_is_bool=True)
                     if operand2 is not self.operand2:
                         self.coerced_operand2 = operand2
+                self.unify_cascade_type()
                 return self
         # TODO: check if we can optimise parts of the cascade here
         return ExprNode.coerce_to_boolean(self, env)
@@ -12791,6 +13529,8 @@ class PrimaryCmpNode(ExprNode, CmpNode):
     def generate_evaluation_code(self, code):
         self.operand1.generate_evaluation_code(code)
         self.operand2.generate_evaluation_code(code)
+        for extra_arg in self.special_bool_extra_args:
+            extra_arg.generate_evaluation_code(code)
         if self.is_temp:
             self.allocate_temp_result(code)
             self.generate_operation_code(code, self.result(),
@@ -12833,11 +13573,12 @@ class CascadedCmpNode(Node, CmpNode):
     #  operand2      ExprNode
     #  cascade       CascadedCmpNode
 
-    child_attrs = ['operand2', 'coerced_operand2', 'cascade']
+    child_attrs = ['operand2', 'coerced_operand2', 'cascade',
+                   'special_bool_extra_args']
 
     cascade = None
     coerced_operand2 = None
-    constant_result = constant_value_not_set # FIXME: where to calculate this?
+    constant_result = constant_value_not_set  # FIXME: where to calculate this?
 
     def infer_type(self, env):
         # TODO: Actually implement this (after merging with -unstable).
@@ -12897,6 +13638,8 @@ class CascadedCmpNode(Node, CmpNode):
         if needs_evaluation:
             operand1.generate_evaluation_code(code)
         self.operand2.generate_evaluation_code(code)
+        for extra_arg in self.special_bool_extra_args:
+            extra_arg.generate_evaluation_code(code)
         self.generate_operation_code(code, result,
             operand1, self.operator, self.operand2)
         if self.cascade:
@@ -12984,6 +13727,9 @@ class CoercionNode(ExprNode):
             code.annotate((file, line, col-1), AnnotationItem(
                 style='coerce', tag='coerce', text='[%s] to [%s]' % (self.arg.type, self.type)))
 
+    def analyse_types(self, env):
+        return self
+
 
 class CoerceToMemViewSliceNode(CoercionNode):
     """
@@ -13035,9 +13781,10 @@ class PyTypeTestNode(CoercionNode):
     exact_builtin_type = True
 
     def __init__(self, arg, dst_type, env, notnone=False):
-        #  The arg is know to be a Python object, and
+        #  The arg is known to be a Python object, and
         #  the dst_type is known to be an extension type.
-        assert dst_type.is_extension_type or dst_type.is_builtin_type, "PyTypeTest on non extension type"
+        assert dst_type.is_extension_type or dst_type.is_builtin_type, \
+            "PyTypeTest for %s against non extension type %s" % (arg.type, dst_type)
         CoercionNode.__init__(self, arg)
         self.type = dst_type
         self.result_ctype = arg.ctype()
@@ -13088,6 +13835,8 @@ class PyTypeTestNode(CoercionNode):
                 type_test = self.type.type_test_code(
                     self.arg.py_result(),
                     self.notnone, exact=self.exact_builtin_type)
+                code.globalstate.use_utility_code(UtilityCode.load_cached(
+                    "RaiseUnexpectedTypeError", "ObjectHandling.c"))
             else:
                 type_test = self.type.type_test_code(
                     self.arg.py_result(), self.notnone)
@@ -13131,7 +13880,7 @@ class NoneCheckNode(CoercionNode):
         self.exception_message = exception_message
         self.exception_format_args = tuple(exception_format_args or ())
 
-    nogil_check = None # this node only guards an operation that would fail already
+    nogil_check = None  # this node only guards an operation that would fail already
 
     def analyse_types(self, env):
         return self
@@ -13254,7 +14003,7 @@ class CoerceToPyTypeNode(CoercionNode):
     def coerce_to_boolean(self, env):
         arg_type = self.arg.type
         if (arg_type == PyrexTypes.c_bint_type or
-            (arg_type.is_pyobject and arg_type.name == 'bool')):
+                (arg_type.is_pyobject and arg_type.name == 'bool')):
             return self.arg.coerce_to_temp(env)
         else:
             return CoerceToBooleanNode(self, env)
@@ -13278,7 +14027,7 @@ class CoerceToPyTypeNode(CoercionNode):
                 self.target_type),
             code.error_goto_if_null(self.result(), self.pos)))
 
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class CoerceIntToBytesNode(CoerceToPyTypeNode):
@@ -13318,13 +14067,16 @@ class CoerceIntToBytesNode(CoerceToPyTypeNode):
             code.error_goto_if_null(self.result(), self.pos)))
         if temp is not None:
             code.funcstate.release_temp(temp)
-        code.put_gotref(self.py_result())
+        self.generate_gotref(code)
 
 
 class CoerceFromPyTypeNode(CoercionNode):
     #  This node is used to convert a Python object
     #  to a C data type.
 
+    # Allow 'None' to map to a difference C value independent of the coercion, e.g. to 'NULL' or '0'.
+    special_none_cvalue = None
+
     def __init__(self, result_type, arg, env):
         CoercionNode.__init__(self, arg)
         self.type = result_type
@@ -13354,9 +14106,12 @@ class CoerceFromPyTypeNode(CoercionNode):
                 NoneCheckNode.generate_if_needed(self.arg, code, "expected bytes, NoneType found")
 
         code.putln(self.type.from_py_call_code(
-            self.arg.py_result(), self.result(), self.pos, code, from_py_function=from_py_function))
+            self.arg.py_result(), self.result(), self.pos, code,
+            from_py_function=from_py_function,
+            special_none_cvalue=self.special_none_cvalue,
+        ))
         if self.type.is_pyobject:
-            code.put_gotref(self.py_result())
+            self.generate_gotref(code)
 
     def nogil_check(self, env):
         error(self.pos, "Coercion from Python not allowed without the GIL")
@@ -13413,6 +14168,9 @@ class CoerceToBooleanNode(CoercionNode):
                     self.arg.py_result(),
                     code.error_goto_if_neg(self.result(), self.pos)))
 
+    def analyse_types(self, env):
+        return self
+
 
 class CoerceToComplexNode(CoercionNode):
 
@@ -13425,8 +14183,8 @@ class CoerceToComplexNode(CoercionNode):
 
     def calculate_result_code(self):
         if self.arg.type.is_complex:
-            real_part = "__Pyx_CREAL(%s)" % self.arg.result()
-            imag_part = "__Pyx_CIMAG(%s)" % self.arg.result()
+            real_part = self.arg.type.real_code(self.arg.result())
+            imag_part = self.arg.type.imag_code(self.arg.result())
         else:
             real_part = self.arg.result()
             imag_part = "0"
@@ -13438,6 +14196,33 @@ class CoerceToComplexNode(CoercionNode):
     def generate_result_code(self, code):
         pass
 
+    def analyse_types(self, env):
+        return self
+
+
+def coerce_from_soft_complex(arg, dst_type, env):
+    from .UtilNodes import HasGilNode
+    cfunc_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_double_type,
+        [ PyrexTypes.CFuncTypeArg("value", PyrexTypes.soft_complex_type, None),
+          PyrexTypes.CFuncTypeArg("have_gil", PyrexTypes.c_bint_type, None) ],
+        exception_value="-1",
+        exception_check=True,
+        nogil=True  # We can acquire the GIL internally on failure
+    )
+    call = PythonCapiCallNode(
+        arg.pos,
+        "__Pyx_SoftComplexToDouble",
+        cfunc_type,
+        utility_code = UtilityCode.load_cached("SoftComplexToDouble", "Complex.c"),
+        args = [arg, HasGilNode(arg.pos)],
+    )
+    call = call.analyse_types(env)
+    if call.type != dst_type:
+        call = call.coerce_to(dst_type, env)
+    return call
+
+
 class CoerceToTempNode(CoercionNode):
     #  This node is used to force the result of another node
     #  to be stored in a temporary. It is only used if the
@@ -13457,6 +14242,9 @@ class CoerceToTempNode(CoercionNode):
         # The arg is always already analysed
         return self
 
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
     def coerce_to_boolean(self, env):
         self.arg = self.arg.coerce_to_boolean(env)
         if self.arg.is_simple():
@@ -13471,11 +14259,12 @@ class CoerceToTempNode(CoercionNode):
         code.putln("%s = %s;" % (
             self.result(), self.arg.result_as(self.ctype())))
         if self.use_managed_ref:
-            if self.type.is_pyobject:
+            if not self.type.is_memoryviewslice:
                 code.put_incref(self.result(), self.ctype())
-            elif self.type.is_memoryviewslice:
-                code.put_incref_memoryviewslice(self.result(),
-                                                not self.in_nogil_context)
+            else:
+                code.put_incref_memoryviewslice(self.result(), self.type,
+                                            have_gil=not self.in_nogil_context)
+
 
 class ProxyNode(CoercionNode):
     """
@@ -13491,22 +14280,24 @@ class ProxyNode(CoercionNode):
     def __init__(self, arg):
         super(ProxyNode, self).__init__(arg)
         self.constant_result = arg.constant_result
-        self._proxy_type()
+        self.update_type_and_entry()
 
     def analyse_types(self, env):
         self.arg = self.arg.analyse_expressions(env)
-        self._proxy_type()
+        self.update_type_and_entry()
         return self
 
     def infer_type(self, env):
         return self.arg.infer_type(env)
 
-    def _proxy_type(self):
-        if hasattr(self.arg, 'type'):
-            self.type = self.arg.type
+    def update_type_and_entry(self):
+        type = getattr(self.arg, 'type', None)
+        if type:
+            self.type = type
             self.result_ctype = self.arg.result_ctype
-        if hasattr(self.arg, 'entry'):
-            self.entry = self.arg.entry
+        arg_entry = getattr(self.arg, 'entry', None)
+        if arg_entry:
+            self.entry = arg_entry
 
     def generate_result_code(self, code):
         self.arg.generate_result_code(code)
@@ -13537,17 +14328,19 @@ class CloneNode(CoercionNode):
     #  disposal code for it. The original owner of the argument
     #  node is responsible for doing those things.
 
-    subexprs = [] # Arg is not considered a subexpr
+    subexprs = []  # Arg is not considered a subexpr
     nogil_check = None
 
     def __init__(self, arg):
         CoercionNode.__init__(self, arg)
         self.constant_result = arg.constant_result
-        if hasattr(arg, 'type'):
-            self.type = arg.type
+        type = getattr(arg, 'type', None)
+        if type:
+            self.type = type
             self.result_ctype = arg.result_ctype
-        if hasattr(arg, 'entry'):
-            self.entry = arg.entry
+        arg_entry = getattr(arg, 'entry', None)
+        if arg_entry:
+            self.entry = arg_entry
 
     def result(self):
         return self.arg.result()
@@ -13565,8 +14358,9 @@ class CloneNode(CoercionNode):
         self.type = self.arg.type
         self.result_ctype = self.arg.result_ctype
         self.is_temp = 1
-        if hasattr(self.arg, 'entry'):
-            self.entry = self.arg.entry
+        arg_entry = getattr(self.arg, 'entry', None)
+        if arg_entry:
+            self.entry = arg_entry
         return self
 
     def coerce_to(self, dest_type, env):
@@ -13575,7 +14369,7 @@ class CloneNode(CoercionNode):
         return super(CloneNode, self).coerce_to(dest_type, env)
 
     def is_simple(self):
-        return True # result is always in a temp (or a name)
+        return True  # result is always in a temp (or a name)
 
     def generate_evaluation_code(self, code):
         pass
@@ -13586,10 +14380,38 @@ class CloneNode(CoercionNode):
     def generate_disposal_code(self, code):
         pass
 
+    def generate_post_assignment_code(self, code):
+        # if we're assigning from a CloneNode then it's "giveref"ed away, so it does
+        # need a matching incref (ideally this should happen before the assignment though)
+        if self.is_temp:  # should usually be true
+            code.put_incref(self.result(), self.ctype())
+
     def free_temps(self, code):
         pass
 
 
+class CppOptionalTempCoercion(CoercionNode):
+    """
+    Used only in CoerceCppTemps - handles cases the temp is actually a OptionalCppClassType (and thus needs dereferencing when on the rhs)
+    """
+    is_temp = False
+
+    @property
+    def type(self):
+        return self.arg.type
+
+    def calculate_result_code(self):
+        return "(*%s)" % self.arg.result()
+
+    def generate_result_code(self, code):
+        pass
+
+    def _make_move_result_rhs(self, result, optional=False):
+        # this wouldn't normally get moved (because it isn't a temp), but force it to be because it
+        # is a thin wrapper around a temp
+        return super(CppOptionalTempCoercion, self)._make_move_result_rhs(result, optional=False)
+
+
 class CMethodSelfCloneNode(CloneNode):
     # Special CloneNode for the self argument of builtin C methods
     # that accepts subtypes of the builtin type.  This is safe only
@@ -13641,77 +14463,215 @@ class DocstringRefNode(ExprNode):
             self.result(), self.body.result(),
             code.intern_identifier(StringEncoding.EncodedString("__doc__")),
             code.error_goto_if_null(self.result(), self.pos)))
-        code.put_gotref(self.result())
+        self.generate_gotref(code)
+
+class AnnotationNode(ExprNode):
+    # Deals with the two possible uses of an annotation.
+    # 1. The post PEP-563 use where an annotation is stored
+    #  as a string
+    # 2. The Cython use where the annotation can indicate an
+    #  object type
+    #
+    # Doesn't handle the pre PEP-563 version where the
+    # annotation is evaluated into a Python Object.
 
+    subexprs = []
 
+    # 'untyped' is set for fused specializations:
+    # Once a fused function has been created we don't want
+    # annotations to override an already set type.
+    untyped = False
 
-#------------------------------------------------------------------------------------
-#
-#  Runtime support code
-#
-#------------------------------------------------------------------------------------
-
-pyerr_occurred_withgil_utility_code= UtilityCode(
-proto = """
-static CYTHON_INLINE int __Pyx_ErrOccurredWithGIL(void); /* proto */
-""",
-impl = """
-static CYTHON_INLINE int __Pyx_ErrOccurredWithGIL(void) {
-  int err;
-  #ifdef WITH_THREAD
-  PyGILState_STATE _save = PyGILState_Ensure();
-  #endif
-  err = !!PyErr_Occurred();
-  #ifdef WITH_THREAD
-  PyGILState_Release(_save);
-  #endif
-  return err;
-}
-"""
-)
+    def __init__(self, pos, expr, string=None):
+        """string is expected to already be a StringNode or None"""
+        ExprNode.__init__(self, pos)
+        if string is None:
+            # import doesn't work at top of file?
+            from .AutoDocTransforms import AnnotationWriter
+            string = StringEncoding.EncodedString(
+                AnnotationWriter(description="annotation").write(expr))
+            string = StringNode(pos, unicode_value=string, value=string.as_utf8_string())
+        self.string = string
+        self.expr = expr
 
-#------------------------------------------------------------------------------------
+    def analyse_types(self, env):
+        return self  # nothing needs doing
 
-raise_unbound_local_error_utility_code = UtilityCode(
-proto = """
-static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname);
-""",
-impl = """
-static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) {
-    PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname);
-}
-""")
-
-raise_closure_name_error_utility_code = UtilityCode(
-proto = """
-static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname);
-""",
-impl = """
-static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname) {
-    PyErr_Format(PyExc_NameError, "free variable '%s' referenced before assignment in enclosing scope", varname);
-}
-""")
-
-# Don't inline the function, it should really never be called in production
-raise_unbound_memoryview_utility_code_nogil = UtilityCode(
-proto = """
-static void __Pyx_RaiseUnboundMemoryviewSliceNogil(const char *varname);
-""",
-impl = """
-static void __Pyx_RaiseUnboundMemoryviewSliceNogil(const char *varname) {
-    #ifdef WITH_THREAD
-    PyGILState_STATE gilstate = PyGILState_Ensure();
-    #endif
-    __Pyx_RaiseUnboundLocalError(varname);
-    #ifdef WITH_THREAD
-    PyGILState_Release(gilstate);
-    #endif
-}
-""",
-requires = [raise_unbound_local_error_utility_code])
+    def analyse_as_type(self, env):
+        # for compatibility when used as a return_type_node, have this interface too
+        return self.analyse_type_annotation(env)[1]
+
+    def _warn_on_unknown_annotation(self, env, annotation):
+        """Method checks for cases when user should be warned that annotation contains unknown types."""
+        if annotation.is_name:
+            # Validate annotation in form `var: type`
+            if not env.lookup(annotation.name):
+                warning(annotation.pos,
+                        "Unknown type declaration '%s' in annotation, ignoring" % self.string.value, level=1)
+        elif annotation.is_attribute and annotation.obj.is_name:
+            # Validate annotation in form `var: module.type`
+            if not env.lookup(annotation.obj.name):
+                # `module` is undeclared
+                warning(annotation.pos,
+                        "Unknown type declaration '%s' in annotation, ignoring" % self.string.value, level=1)
+            elif annotation.obj.is_cython_module:
+                # `module` is cython
+                module_scope = annotation.obj.analyse_as_module(env)
+                if module_scope and not module_scope.lookup_type(annotation.attribute):
+                    error(annotation.pos,
+                            "Unknown type declaration '%s' in annotation" % self.string.value)
+            else:
+                module_scope = annotation.obj.analyse_as_module(env)
+                if module_scope and module_scope.pxd_file_loaded:
+                    warning(annotation.pos,
+                            "Unknown type declaration '%s' in annotation, ignoring" % self.string.value, level=1)
+        else:
+            warning(annotation.pos, "Unknown type declaration in annotation, ignoring")
+
+    def analyse_type_annotation(self, env, assigned_value=None):
+        if self.untyped:
+            # Already applied as a fused type, not re-evaluating it here.
+            return [], None
+        annotation = self.expr
+        explicit_pytype = explicit_ctype = False
+        if annotation.is_dict_literal:
+            warning(annotation.pos,
+                    "Dicts should no longer be used as type annotations. Use 'cython.int' etc. directly.", level=1)
+            for name, value in annotation.key_value_pairs:
+                if not name.is_string_literal:
+                    continue
+                if name.value in ('type', b'type'):
+                    explicit_pytype = True
+                    if not explicit_ctype:
+                        annotation = value
+                elif name.value in ('ctype', b'ctype'):
+                    explicit_ctype = True
+                    annotation = value
+            if explicit_pytype and explicit_ctype:
+                warning(annotation.pos, "Duplicate type declarations found in signature annotation", level=1)
+        elif isinstance(annotation, TupleNode):
+            warning(annotation.pos,
+                    "Tuples cannot be declared as simple tuples of types. Use 'tuple[type1, type2, ...]'.", level=1)
+            return [], None
+
+        with env.new_c_type_context(in_c_type_context=explicit_ctype):
+            arg_type = annotation.analyse_as_type(env)
+
+            if arg_type is None:
+                self._warn_on_unknown_annotation(env, annotation)
+                return [], arg_type
+
+            if annotation.is_string_literal:
+                warning(annotation.pos,
+                        "Strings should no longer be used for type declarations. Use 'cython.int' etc. directly.",
+                        level=1)
+            if explicit_pytype and not explicit_ctype and not (arg_type.is_pyobject or arg_type.equivalent_type):
+                warning(annotation.pos,
+                        "Python type declaration in signature annotation does not refer to a Python type")
+            if arg_type.is_complex:
+                # creating utility code needs to be special-cased for complex types
+                arg_type.create_declaration_utility_code(env)
+
+            # Check for declaration modifiers, e.g. "typing.Optional[...]" or "dataclasses.InitVar[...]"
+            modifiers = annotation.analyse_pytyping_modifiers(env) if annotation.is_subscript else []
+
+        return modifiers, arg_type
+
+
+class AssignmentExpressionNode(ExprNode):
+    """
+    Also known as a named expression or the walrus operator
+
+    Arguments
+    lhs - NameNode - not stored directly as an attribute of the node
+    rhs - ExprNode
 
-#------------------------------------------------------------------------------------
+    Attributes
+    rhs        - ExprNode
+    assignment - SingleAssignmentNode
+    """
+    # subexprs and child_attrs are intentionally different here, because the assignment is not an expression
+    subexprs = ["rhs"]
+    child_attrs = ["rhs", "assignment"]  # This order is important for control-flow (i.e. xdecref) to be right
+
+    is_temp = False
+    assignment = None
+    clone_node = None
+
+    def __init__(self, pos, lhs, rhs, **kwds):
+        super(AssignmentExpressionNode, self).__init__(pos, **kwds)
+        self.rhs = ProxyNode(rhs)
+        assign_expr_rhs = CloneNode(self.rhs)
+        self.assignment = SingleAssignmentNode(
+            pos, lhs=lhs, rhs=assign_expr_rhs, is_assignment_expression=True)
+
+    @property
+    def type(self):
+        return self.rhs.type
+
+    @property
+    def target_name(self):
+        return self.assignment.lhs.name
 
-raise_too_many_values_to_unpack = UtilityCode.load_cached("RaiseTooManyValuesToUnpack", "ObjectHandling.c")
-raise_need_more_values_to_unpack = UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c")
-tuple_unpacking_error_code = UtilityCode.load_cached("UnpackTupleError", "ObjectHandling.c")
+    def infer_type(self, env):
+        return self.rhs.infer_type(env)
+
+    def analyse_declarations(self, env):
+        self.assignment.analyse_declarations(env)
+
+    def analyse_types(self, env):
+        # we're trying to generate code that looks roughly like:
+        #   __pyx_t_1 = rhs
+        #   lhs = __pyx_t_1
+        #   __pyx_t_1
+        # (plus any reference counting that's needed)
+
+        self.rhs = self.rhs.analyse_types(env)
+        if not self.rhs.arg.is_temp:
+            if not self.rhs.arg.is_literal:
+                # for anything but the simplest cases (where it can be used directly)
+                # we convert rhs to a temp, because CloneNode requires arg to be a temp
+                self.rhs.arg = self.rhs.arg.coerce_to_temp(env)
+            else:
+                # For literals we can optimize by just using the literal twice
+                #
+                # We aren't including `self.rhs.is_name` in this optimization
+                # because that goes wrong for assignment expressions run in
+                # parallel. e.g. `(a := b) + (b := a + c)`)
+                # This is a special case of https://github.com/cython/cython/issues/4146
+                # TODO - once that's fixed general revisit this code and possibly
+                # use coerce_to_simple
+                self.assignment.rhs = copy.copy(self.rhs)
+
+        # TODO - there's a missed optimization in the code generation stage
+        # for self.rhs.arg.is_temp: an incref/decref pair can be removed
+        # (but needs a general mechanism to do that)
+        self.assignment = self.assignment.analyse_types(env)
+        return self
+
+    def coerce_to(self, dst_type, env):
+        if dst_type == self.assignment.rhs.type:
+            # in this quite common case (for example, when both lhs, and self are being coerced to Python)
+            # we can optimize the coercion out by sharing it between
+            # this and the assignment
+            old_rhs_arg = self.rhs.arg
+            if isinstance(old_rhs_arg, CoerceToTempNode):
+                old_rhs_arg = old_rhs_arg.arg
+            rhs_arg = old_rhs_arg.coerce_to(dst_type, env)
+            if rhs_arg is not old_rhs_arg:
+                self.rhs.arg = rhs_arg
+                self.rhs.update_type_and_entry()
+                # clean up the old coercion node that the assignment has likely generated
+                if (isinstance(self.assignment.rhs, CoercionNode)
+                        and not isinstance(self.assignment.rhs, CloneNode)):
+                    self.assignment.rhs = self.assignment.rhs.arg
+                    self.assignment.rhs.type = self.assignment.rhs.arg.type
+                return self
+        return super(AssignmentExpressionNode, self).coerce_to(dst_type, env)
+
+    def calculate_result_code(self):
+        return self.rhs.result()
+
+    def generate_result_code(self, code):
+        # we have to do this manually because it isn't a subexpression
+        self.assignment.generate_execution_code(code)
diff --git a/Cython/Compiler/FlowControl.pxd b/Cython/Compiler/FlowControl.pxd
index c87370b81..5338d4fe4 100644
--- a/Cython/Compiler/FlowControl.pxd
+++ b/Cython/Compiler/FlowControl.pxd
@@ -1,30 +1,30 @@
-from __future__ import absolute_import
+# cython: language_level=3
 
 cimport cython
 
 from .Visitor cimport CythonTransform, TreeVisitor
 
 cdef class ControlBlock:
-     cdef public set children
-     cdef public set parents
-     cdef public set positions
-     cdef public list stats
-     cdef public dict gen
-     cdef public set bounded
-
-     # Big integer bitsets
-     cdef public object i_input
-     cdef public object i_output
-     cdef public object i_gen
-     cdef public object i_kill
-     cdef public object i_state
-
-     cpdef bint empty(self)
-     cpdef detach(self)
-     cpdef add_child(self, block)
+    cdef public set children
+    cdef public set parents
+    cdef public set positions
+    cdef public list stats
+    cdef public dict gen
+    cdef public set bounded
+
+    # Big integer bitsets
+    cdef public object i_input
+    cdef public object i_output
+    cdef public object i_gen
+    cdef public object i_kill
+    cdef public object i_state
+
+    cpdef bint empty(self)
+    cpdef detach(self)
+    cpdef add_child(self, block)
 
 cdef class ExitBlock(ControlBlock):
-     cpdef bint empty(self)
+    cpdef bint empty(self)
 
 cdef class NameAssignment:
     cdef public bint is_arg
@@ -36,6 +36,7 @@ cdef class NameAssignment:
     cdef public set refs
     cdef public object bit
     cdef public object inferred_type
+    cdef public object rhs_scope
 
 cdef class AssignmentList:
     cdef public object bit
@@ -47,51 +48,50 @@ cdef class AssignmentCollector(TreeVisitor):
 
 @cython.final
 cdef class ControlFlow:
-     cdef public set blocks
-     cdef public set entries
-     cdef public list loops
-     cdef public list exceptions
+    cdef public set blocks
+    cdef public set entries
+    cdef public list loops
+    cdef public list exceptions
+
+    cdef public ControlBlock entry_point
+    cdef public ExitBlock exit_point
+    cdef public ControlBlock block
 
-     cdef public ControlBlock entry_point
-     cdef public ExitBlock exit_point
-     cdef public ControlBlock block
+    cdef public dict assmts
 
-     cdef public dict assmts
+    cdef public Py_ssize_t in_try_block
 
-     cpdef newblock(self, ControlBlock parent=*)
-     cpdef nextblock(self, ControlBlock parent=*)
-     cpdef bint is_tracked(self, entry)
-     cpdef bint is_statically_assigned(self, entry)
-     cpdef mark_position(self, node)
-     cpdef mark_assignment(self, lhs, rhs, entry)
-     cpdef mark_argument(self, lhs, rhs, entry)
-     cpdef mark_deletion(self, node, entry)
-     cpdef mark_reference(self, node, entry)
+    cpdef newblock(self, ControlBlock parent=*)
+    cpdef nextblock(self, ControlBlock parent=*)
+    cpdef bint is_tracked(self, entry)
+    cpdef bint is_statically_assigned(self, entry)
+    cpdef mark_position(self, node)
+    cpdef mark_assignment(self, lhs, rhs, entry, rhs_scope=*)
+    cpdef mark_argument(self, lhs, rhs, entry)
+    cpdef mark_deletion(self, node, entry)
+    cpdef mark_reference(self, node, entry)
 
-     @cython.locals(block=ControlBlock, parent=ControlBlock, unreachable=set)
-     cpdef normalize(self)
+    @cython.locals(block=ControlBlock, parent=ControlBlock, unreachable=set)
+    cpdef normalize(self)
 
-     @cython.locals(bit=object, assmts=AssignmentList,
-                    block=ControlBlock)
-     cpdef initialize(self)
+    @cython.locals(bit=object, assmts=AssignmentList, block=ControlBlock)
+    cpdef initialize(self)
 
-     @cython.locals(assmts=AssignmentList, assmt=NameAssignment)
-     cpdef set map_one(self, istate, entry)
+    @cython.locals(assmts=AssignmentList, assmt=NameAssignment)
+    cpdef set map_one(self, istate, entry)
 
-     @cython.locals(block=ControlBlock, parent=ControlBlock)
-     cdef reaching_definitions(self)
+    @cython.locals(block=ControlBlock, parent=ControlBlock)
+    cdef reaching_definitions(self)
 
 cdef class Uninitialized:
-     pass
+    pass
 
 cdef class Unknown:
     pass
 
-
 cdef class MessageCollection:
     cdef set messages
 
-
 @cython.locals(dirty=bint, block=ControlBlock, parent=ControlBlock,
                assmt=NameAssignment)
 cdef check_definitions(ControlFlow flow, dict compiler_directives)
@@ -101,11 +101,11 @@ cdef class ControlFlowAnalysis(CythonTransform):
     cdef object gv_ctx
     cdef object constant_folder
     cdef set reductions
-    cdef list env_stack
-    cdef list stack
+    cdef list stack  # a stack of (env, flow) tuples
     cdef object env
     cdef ControlFlow flow
+    cdef object object_expr
     cdef bint in_inplace_assignment
 
-    cpdef mark_assignment(self, lhs, rhs=*)
+    cpdef mark_assignment(self, lhs, rhs=*, rhs_scope=*)
     cpdef mark_position(self, node)
diff --git a/Cython/Compiler/FlowControl.py b/Cython/Compiler/FlowControl.py
index df04471f9..294bce9ee 100644
--- a/Cython/Compiler/FlowControl.py
+++ b/Cython/Compiler/FlowControl.py
@@ -1,21 +1,22 @@
+# cython: language_level=3str
+# cython: auto_pickle=True
+
 from __future__ import absolute_import
 
 import cython
-cython.declare(PyrexTypes=object, ExprNodes=object, Nodes=object,
-               Builtin=object, InternalError=object, error=object, warning=object,
-               py_object_type=object, unspecified_type=object,
-               object_expr=object, fake_rhs_expr=object, TypedExprNode=object)
+cython.declare(PyrexTypes=object, ExprNodes=object, Nodes=object, Builtin=object,
+               Options=object, TreeVisitor=object, CythonTransform=object,
+               InternalError=object, error=object, warning=object,
+               fake_rhs_expr=object, TypedExprNode=object)
 
 from . import Builtin
 from . import ExprNodes
 from . import Nodes
 from . import Options
-from .PyrexTypes import py_object_type, unspecified_type
 from . import PyrexTypes
 
 from .Visitor import TreeVisitor, CythonTransform
 from .Errors import error, warning, InternalError
-from .Optimize import ConstantFolding
 
 
 class TypedExprNode(ExprNodes.ExprNode):
@@ -28,9 +29,8 @@ class TypedExprNode(ExprNodes.ExprNode):
     def may_be_none(self):
         return self._may_be_none != False
 
-object_expr = TypedExprNode(py_object_type, may_be_none=True)
 # Fake rhs to silence "unused variable" warning
-fake_rhs_expr = TypedExprNode(unspecified_type)
+fake_rhs_expr = TypedExprNode(PyrexTypes.unspecified_type)
 
 
 class ControlBlock(object):
@@ -52,7 +52,7 @@ class ControlBlock(object):
         stats = [Assignment(a), NameReference(a), NameReference(c),
                      Assignment(b)]
         gen = {Entry(a): Assignment(a), Entry(b): Assignment(b)}
-        bounded = set([Entry(a), Entry(c)])
+        bounded = {Entry(a), Entry(c)}
 
     """
 
@@ -110,6 +110,7 @@ class ControlFlow(object):
        entries     set    tracked entries
        loops       list   stack for loop descriptors
        exceptions  list   stack for exception descriptors
+       in_try_block  int  track if we're in a try...except or try...finally block
     """
 
     def __init__(self):
@@ -122,6 +123,7 @@ class ControlFlow(object):
         self.exit_point = ExitBlock()
         self.blocks.add(self.exit_point)
         self.block = self.entry_point
+        self.in_try_block = 0
 
     def newblock(self, parent=None):
         """Create floating block linked to `parent` if given.
@@ -160,7 +162,7 @@ class ControlFlow(object):
                 (entry.type.is_struct_or_union or
                  entry.type.is_complex or
                  entry.type.is_array or
-                 entry.type.is_cpp_class)):
+                 (entry.type.is_cpp_class and not entry.is_cpp_optional))):
             # stack allocated structured variable => never uninitialised
             return True
         return False
@@ -170,9 +172,9 @@ class ControlFlow(object):
         if self.block:
             self.block.positions.add(node.pos[:2])
 
-    def mark_assignment(self, lhs, rhs, entry):
+    def mark_assignment(self, lhs, rhs, entry, rhs_scope=None):
         if self.block and self.is_tracked(entry):
-            assignment = NameAssignment(lhs, rhs, entry)
+            assignment = NameAssignment(lhs, rhs, entry, rhs_scope=rhs_scope)
             self.block.stats.append(assignment)
             self.block.gen[entry] = assignment
             self.entries.add(entry)
@@ -203,7 +205,7 @@ class ControlFlow(object):
 
     def normalize(self):
         """Delete unreachable and orphan blocks."""
-        queue = set([self.entry_point])
+        queue = {self.entry_point}
         visited = set()
         while queue:
             root = queue.pop()
@@ -217,7 +219,7 @@ class ControlFlow(object):
         visited.remove(self.entry_point)
         for block in visited:
             if block.empty():
-                for parent in block.parents: # Re-parent
+                for parent in block.parents:  # Re-parent
                     for child in block.children:
                         parent.add_child(child)
                 block.detach()
@@ -313,7 +315,7 @@ class ExceptionDescr(object):
 
 
 class NameAssignment(object):
-    def __init__(self, lhs, rhs, entry):
+    def __init__(self, lhs, rhs, entry, rhs_scope=None):
         if lhs.cf_state is None:
             lhs.cf_state = set()
         self.lhs = lhs
@@ -324,16 +326,18 @@ class NameAssignment(object):
         self.is_arg = False
         self.is_deletion = False
         self.inferred_type = None
+        # For generator expression targets, the rhs can have a different scope than the lhs.
+        self.rhs_scope = rhs_scope
 
     def __repr__(self):
         return '%s(entry=%r)' % (self.__class__.__name__, self.entry)
 
     def infer_type(self):
-        self.inferred_type = self.rhs.infer_type(self.entry.scope)
+        self.inferred_type = self.rhs.infer_type(self.rhs_scope or self.entry.scope)
         return self.inferred_type
 
     def type_dependencies(self):
-        return self.rhs.type_dependencies(self.entry.scope)
+        return self.rhs.type_dependencies(self.rhs_scope or self.entry.scope)
 
     @property
     def type(self):
@@ -373,9 +377,9 @@ class NameDeletion(NameAssignment):
 
     def infer_type(self):
         inferred_type = self.rhs.infer_type(self.entry.scope)
-        if (not inferred_type.is_pyobject and
-            inferred_type.can_coerce_to_pyobject(self.entry.scope)):
-            return py_object_type
+        if (not inferred_type.is_pyobject
+                and inferred_type.can_coerce_to_pyobject(self.entry.scope)):
+            return PyrexTypes.py_object_type
         self.inferred_type = inferred_type
         return inferred_type
 
@@ -455,7 +459,7 @@ class GVContext(object):
         start = min(block.positions)
         stop = max(block.positions)
         srcdescr = start[0]
-        if not srcdescr in self.sources:
+        if srcdescr not in self.sources:
             self.sources[srcdescr] = list(srcdescr.get_lines())
         lines = self.sources[srcdescr]
         return '\\n'.join([l.strip() for l in lines[start[1] - 1:stop[1]]])
@@ -590,7 +594,7 @@ def check_definitions(flow, compiler_directives):
             if (node.allow_null or entry.from_closure
                     or entry.is_pyclass_attr or entry.type.is_error):
                 pass  # Can be uninitialized here
-            elif node.cf_is_null:
+            elif node.cf_is_null and not entry.in_closure:
                 if entry.error_on_uninitialized or (
                         Options.error_on_uninitialized and (
                         entry.type.is_pyobject or entry.type.is_unspecified)):
@@ -604,10 +608,12 @@ def check_definitions(flow, compiler_directives):
                         "local variable '%s' referenced before assignment"
                         % entry.name)
             elif warn_maybe_uninitialized:
+                msg = "local variable '%s' might be referenced before assignment" % entry.name
+                if entry.in_closure:
+                    msg += " (maybe initialized inside a closure)"
                 messages.warning(
                     node.pos,
-                    "local variable '%s' might be referenced before assignment"
-                    % entry.name)
+                    msg)
         elif Unknown in node.cf_state:
             # TODO: better cross-closure analysis to know when inner functions
             #       are being called before a variable is being set, and when
@@ -621,7 +627,7 @@ def check_definitions(flow, compiler_directives):
     # Unused result
     for assmt in assignments:
         if (not assmt.refs and not assmt.entry.is_pyclass_attr
-            and not assmt.entry.in_closure):
+                and not assmt.entry.in_closure):
             if assmt.entry.cf_references and warn_unused_result:
                 if assmt.is_arg:
                     messages.warning(assmt.pos, "Unused argument value '%s'" %
@@ -661,7 +667,7 @@ class AssignmentCollector(TreeVisitor):
         self.assignments = []
 
     def visit_Node(self):
-        self._visitchildren(self, None)
+        self._visitchildren(self, None, None)
 
     def visit_SingleAssignmentNode(self, node):
         self.assignments.append((node.lhs, node.rhs))
@@ -673,30 +679,37 @@ class AssignmentCollector(TreeVisitor):
 
 class ControlFlowAnalysis(CythonTransform):
 
+    def find_in_stack(self, env):
+        if env == self.env:
+            return self.flow
+        for e, flow in reversed(self.stack):
+            if e is env:
+                return flow
+        assert False
+
     def visit_ModuleNode(self, node):
-        self.gv_ctx = GVContext()
+        dot_output = self.current_directives['control_flow.dot_output']
+        self.gv_ctx = GVContext() if dot_output else None
+
+        from .Optimize import ConstantFolding
         self.constant_folder = ConstantFolding()
 
         # Set of NameNode reductions
         self.reductions = set()
 
         self.in_inplace_assignment = False
-        self.env_stack = []
         self.env = node.scope
-        self.stack = []
         self.flow = ControlFlow()
+        self.stack = []  # a stack of (env, flow) tuples
+        self.object_expr = TypedExprNode(PyrexTypes.py_object_type, may_be_none=True)
         self.visitchildren(node)
 
         check_definitions(self.flow, self.current_directives)
 
-        dot_output = self.current_directives['control_flow.dot_output']
         if dot_output:
             annotate_defs = self.current_directives['control_flow.dot_annotate_defs']
-            fp = open(dot_output, 'wt')
-            try:
+            with open(dot_output, 'wt') as fp:
                 self.gv_ctx.render(fp, 'module', annotate_defs=annotate_defs)
-            finally:
-                fp.close()
         return node
 
     def visit_FuncDefNode(self, node):
@@ -704,9 +717,8 @@ class ControlFlowAnalysis(CythonTransform):
             if arg.default:
                 self.visitchildren(arg)
         self.visitchildren(node, ('decorators',))
-        self.env_stack.append(self.env)
+        self.stack.append((self.env, self.flow))
         self.env = node.local_scope
-        self.stack.append(self.flow)
         self.flow = ControlFlow()
 
         # Collect all entries
@@ -744,10 +756,10 @@ class ControlFlowAnalysis(CythonTransform):
         check_definitions(self.flow, self.current_directives)
         self.flow.blocks.add(self.flow.entry_point)
 
-        self.gv_ctx.add(GV(node.local_scope.name, self.flow))
+        if self.gv_ctx is not None:
+            self.gv_ctx.add(GV(node.local_scope.name, self.flow))
 
-        self.flow = self.stack.pop()
-        self.env = self.env_stack.pop()
+        self.env, self.flow = self.stack.pop()
         return node
 
     def visit_DefNode(self, node):
@@ -760,7 +772,7 @@ class ControlFlowAnalysis(CythonTransform):
     def visit_CTypeDefNode(self, node):
         return node
 
-    def mark_assignment(self, lhs, rhs=None):
+    def mark_assignment(self, lhs, rhs=None, rhs_scope=None):
         if not self.flow.block:
             return
         if self.flow.exceptions:
@@ -769,19 +781,22 @@ class ControlFlowAnalysis(CythonTransform):
             self.flow.nextblock()
 
         if not rhs:
-            rhs = object_expr
+            rhs = self.object_expr
         if lhs.is_name:
             if lhs.entry is not None:
                 entry = lhs.entry
             else:
                 entry = self.env.lookup(lhs.name)
-            if entry is None: # TODO: This shouldn't happen...
+            if entry is None:  # TODO: This shouldn't happen...
                 return
-            self.flow.mark_assignment(lhs, rhs, entry)
+            self.flow.mark_assignment(lhs, rhs, entry, rhs_scope=rhs_scope)
         elif lhs.is_sequence_constructor:
             for i, arg in enumerate(lhs.args):
-                if not rhs or arg.is_starred:
-                    item_node = None
+                if arg.is_starred:
+                    # "a, *b = x" assigns a list to "b"
+                    item_node = TypedExprNode(Builtin.list_type, may_be_none=False, pos=arg.pos)
+                elif rhs is self.object_expr:
+                    item_node = rhs
                 else:
                     item_node = rhs.inferable_item_node(i)
                 self.mark_assignment(arg, item_node)
@@ -806,7 +821,7 @@ class ControlFlowAnalysis(CythonTransform):
         return node
 
     def visit_AssignmentNode(self, node):
-        raise InternalError("Unhandled assignment node")
+        raise InternalError("Unhandled assignment node %s" % type(node))
 
     def visit_SingleAssignmentNode(self, node):
         self._visit(node.rhs)
@@ -916,6 +931,26 @@ class ControlFlowAnalysis(CythonTransform):
             self.flow.block = None
         return node
 
+    def visit_AssertStatNode(self, node):
+        """Essentially an if-condition that wraps a RaiseStatNode.
+        """
+        self.mark_position(node)
+        next_block = self.flow.newblock()
+        parent = self.flow.block
+        # failure case
+        parent = self.flow.nextblock(parent)
+        self._visit(node.condition)
+        self.flow.nextblock()
+        self._visit(node.exception)
+        if self.flow.block:
+            self.flow.block.add_child(next_block)
+        parent.add_child(next_block)
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
     def visit_WhileStatNode(self, node):
         condition_block = self.flow.nextblock()
         next_block = self.flow.newblock()
@@ -951,10 +986,11 @@ class ControlFlowAnalysis(CythonTransform):
         is_special = False
         sequence = node.iterator.sequence
         target = node.target
+        env = node.iterator.expr_scope or self.env
         if isinstance(sequence, ExprNodes.SimpleCallNode):
             function = sequence.function
             if sequence.self is None and function.is_name:
-                entry = self.env.lookup(function.name)
+                entry = env.lookup(function.name)
                 if not entry or entry.is_builtin:
                     if function.name == 'reversed' and len(sequence.args) == 1:
                         sequence = sequence.args[0]
@@ -962,30 +998,32 @@ class ControlFlowAnalysis(CythonTransform):
                         if target.is_sequence_constructor and len(target.args) == 2:
                             iterator = sequence.args[0]
                             if iterator.is_name:
-                                iterator_type = iterator.infer_type(self.env)
+                                iterator_type = iterator.infer_type(env)
                                 if iterator_type.is_builtin_type:
                                     # assume that builtin types have a length within Py_ssize_t
                                     self.mark_assignment(
                                         target.args[0],
                                         ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX',
-                                                          type=PyrexTypes.c_py_ssize_t_type))
+                                                          type=PyrexTypes.c_py_ssize_t_type),
+                                        rhs_scope=node.iterator.expr_scope)
                                     target = target.args[1]
                                     sequence = sequence.args[0]
         if isinstance(sequence, ExprNodes.SimpleCallNode):
             function = sequence.function
             if sequence.self is None and function.is_name:
-                entry = self.env.lookup(function.name)
+                entry = env.lookup(function.name)
                 if not entry or entry.is_builtin:
                     if function.name in ('range', 'xrange'):
                         is_special = True
                         for arg in sequence.args[:2]:
-                            self.mark_assignment(target, arg)
+                            self.mark_assignment(target, arg, rhs_scope=node.iterator.expr_scope)
                         if len(sequence.args) > 2:
                             self.mark_assignment(target, self.constant_folder(
                                 ExprNodes.binop_node(node.pos,
                                                      '+',
                                                      sequence.args[0],
-                                                     sequence.args[2])))
+                                                     sequence.args[2])),
+                                                rhs_scope=node.iterator.expr_scope)
 
         if not is_special:
             # A for-loop basically translates to subsequent calls to
@@ -994,7 +1032,7 @@ class ControlFlowAnalysis(CythonTransform):
             # Python strings, etc., while correctly falling back to an
             # object type when the base type cannot be handled.
 
-            self.mark_assignment(target, node.item)
+            self.mark_assignment(target, node.item, rhs_scope=node.iterator.expr_scope)
 
     def visit_AsyncForStatNode(self, node):
         return self.visit_ForInStatNode(node)
@@ -1013,7 +1051,7 @@ class ControlFlowAnalysis(CythonTransform):
         elif isinstance(node, Nodes.AsyncForStatNode):
             # not entirely correct, but good enough for now
             self.mark_assignment(node.target, node.item)
-        else: # Parallel
+        else:  # Parallel
             self.mark_assignment(node.target)
 
         # Body block
@@ -1140,7 +1178,9 @@ class ControlFlowAnalysis(CythonTransform):
         ## XXX: children nodes
         self.flow.block.add_child(entry_point)
         self.flow.nextblock()
+        self.flow.in_try_block += 1
         self._visit(node.body)
+        self.flow.in_try_block -= 1
         self.flow.exceptions.pop()
 
         # After exception
@@ -1200,7 +1240,9 @@ class ControlFlowAnalysis(CythonTransform):
         self.flow.block = body_block
         body_block.add_child(entry_point)
         self.flow.nextblock()
+        self.flow.in_try_block += 1
         self._visit(node.body)
+        self.flow.in_try_block -= 1
         self.flow.exceptions.pop()
         if self.flow.loops:
             self.flow.loops[-1].exceptions.pop()
@@ -1219,6 +1261,8 @@ class ControlFlowAnalysis(CythonTransform):
         if self.flow.exceptions:
             self.flow.block.add_child(self.flow.exceptions[-1].entry_point)
         self.flow.block = None
+        if self.flow.in_try_block:
+            node.in_try_block = True
         return node
 
     def visit_ReraiseStatNode(self, node):
@@ -1287,34 +1331,47 @@ class ControlFlowAnalysis(CythonTransform):
 
     def visit_ComprehensionNode(self, node):
         if node.expr_scope:
-            self.env_stack.append(self.env)
+            self.stack.append((self.env, self.flow))
             self.env = node.expr_scope
         # Skip append node here
         self._visit(node.loop)
         if node.expr_scope:
-            self.env = self.env_stack.pop()
+            self.env, _ = self.stack.pop()
         return node
 
     def visit_ScopedExprNode(self, node):
+        # currently this is written to deal with these two types
+        # (with comprehensions covered in their own function)
+        assert isinstance(node, (ExprNodes.IteratorNode, ExprNodes.AsyncIteratorNode)), node
         if node.expr_scope:
-            self.env_stack.append(self.env)
+            self.stack.append((self.env, self.flow))
+            self.flow = self.find_in_stack(node.expr_scope)
             self.env = node.expr_scope
         self.visitchildren(node)
         if node.expr_scope:
-            self.env = self.env_stack.pop()
+            self.env, self.flow = self.stack.pop()
         return node
 
     def visit_PyClassDefNode(self, node):
         self.visitchildren(node, ('dict', 'metaclass',
                                   'mkw', 'bases', 'class_result'))
         self.flow.mark_assignment(node.target, node.classobj,
-                                  self.env.lookup(node.name))
-        self.env_stack.append(self.env)
+                                  self.env.lookup(node.target.name))
+        self.stack.append((self.env, self.flow))
         self.env = node.scope
         self.flow.nextblock()
+        if node.doc_node:
+            self.flow.mark_assignment(node.doc_node, fake_rhs_expr, node.doc_node.entry)
         self.visitchildren(node, ('body',))
         self.flow.nextblock()
-        self.env = self.env_stack.pop()
+        self.env, _ = self.stack.pop()
+        return node
+
+    def visit_CClassDefNode(self, node):
+        # just make sure the nodes scope is findable in-case there is a list comprehension in it
+        self.stack.append((node.scope, self.flow))
+        self.visitchildren(node)
+        self.stack.pop()
         return node
 
     def visit_AmpersandNode(self, node):
diff --git a/Cython/Compiler/FusedNode.py b/Cython/Compiler/FusedNode.py
index 26d6ffd3d..7876916db 100644
--- a/Cython/Compiler/FusedNode.py
+++ b/Cython/Compiler/FusedNode.py
@@ -7,6 +7,7 @@ from . import (ExprNodes, PyrexTypes, MemoryView,
 from .ExprNodes import CloneNode, ProxyNode, TupleNode
 from .Nodes import FuncDefNode, CFuncDefNode, StatListNode, DefNode
 from ..Utils import OrderedSet
+from .Errors import error, CannotSpecialize
 
 
 class FusedCFuncDefNode(StatListNode):
@@ -141,7 +142,14 @@ class FusedCFuncDefNode(StatListNode):
             copied_node = copy.deepcopy(self.node)
 
             # Make the types in our CFuncType specific.
-            type = copied_node.type.specialize(fused_to_specific)
+            try:
+                type = copied_node.type.specialize(fused_to_specific)
+            except CannotSpecialize:
+                # unlike for the argument types, specializing the return type can fail
+                error(copied_node.pos, "Return type is a fused type that cannot "
+                      "be determined from the function arguments")
+                self.py_func = None  # this is just to let the compiler exit gracefully
+                return
             entry = copied_node.entry
             type.specialize_entry(entry, cname)
 
@@ -220,6 +228,10 @@ class FusedCFuncDefNode(StatListNode):
                 arg.type = arg.type.specialize(fused_to_specific)
                 if arg.type.is_memoryviewslice:
                     arg.type.validate_memslice_dtype(arg.pos)
+                if arg.annotation:
+                    # TODO might be nice if annotations were specialized instead?
+                    # (Or might be hard to do reliably)
+                    arg.annotation.untyped = True
 
     def create_new_local_scope(self, node, env, f2s):
         """
@@ -264,12 +276,12 @@ class FusedCFuncDefNode(StatListNode):
         Returns whether an error was issued and whether we should stop in
         in order to prevent a flood of errors.
         """
-        num_errors = Errors.num_errors
+        num_errors = Errors.get_errors_count()
         transform = ParseTreeTransforms.ReplaceFusedTypeChecks(
                                        copied_node.local_scope)
         transform(copied_node)
 
-        if Errors.num_errors > num_errors:
+        if Errors.get_errors_count() > num_errors:
             return False
 
         return True
@@ -309,25 +321,21 @@ class FusedCFuncDefNode(StatListNode):
 
     def _buffer_check_numpy_dtype_setup_cases(self, pyx_code):
         "Setup some common cases to match dtypes against specializations"
-        if pyx_code.indenter("if kind in b'iu':"):
+        with pyx_code.indenter("if kind in b'iu':"):
             pyx_code.putln("pass")
             pyx_code.named_insertion_point("dtype_int")
-            pyx_code.dedent()
 
-        if pyx_code.indenter("elif kind == b'f':"):
+        with pyx_code.indenter("elif kind == b'f':"):
             pyx_code.putln("pass")
             pyx_code.named_insertion_point("dtype_float")
-            pyx_code.dedent()
 
-        if pyx_code.indenter("elif kind == b'c':"):
+        with pyx_code.indenter("elif kind == b'c':"):
             pyx_code.putln("pass")
             pyx_code.named_insertion_point("dtype_complex")
-            pyx_code.dedent()
 
-        if pyx_code.indenter("elif kind == b'O':"):
+        with pyx_code.indenter("elif kind == b'O':"):
             pyx_code.putln("pass")
             pyx_code.named_insertion_point("dtype_object")
-            pyx_code.dedent()
 
     match = "dest_sig[{{dest_sig_idx}}] = '{{specialized_type_name}}'"
     no_match = "dest_sig[{{dest_sig_idx}}] = None"
@@ -364,11 +372,10 @@ class FusedCFuncDefNode(StatListNode):
                     if final_type.is_pythran_expr:
                         cond += ' and arg_is_pythran_compatible'
 
-                    if codewriter.indenter("if %s:" % cond):
+                    with codewriter.indenter("if %s:" % cond):
                         #codewriter.putln("print 'buffer match found based on numpy dtype'")
                         codewriter.putln(self.match)
                         codewriter.putln("break")
-                        codewriter.dedent()
 
     def _buffer_parse_format_string_check(self, pyx_code, decl_code,
                                           specialized_type, env):
@@ -394,15 +401,30 @@ class FusedCFuncDefNode(StatListNode):
 
         pyx_code.context.update(
             specialized_type_name=specialized_type.specialization_string,
-            sizeof_dtype=self._sizeof_dtype(dtype))
+            sizeof_dtype=self._sizeof_dtype(dtype),
+            ndim_dtype=specialized_type.ndim,
+            dtype_is_struct_obj=int(dtype.is_struct or dtype.is_pyobject))
 
+        # use the memoryview object to check itemsize and ndim.
+        # In principle it could check more, but these are the easiest to do quickly
         pyx_code.put_chunk(
             u"""
                 # try {{dtype}}
-                if itemsize == -1 or itemsize == {{sizeof_dtype}}:
-                    memslice = {{coerce_from_py_func}}(arg, 0)
+                if (((itemsize == -1 and arg_as_memoryview.itemsize == {{sizeof_dtype}})
+                        or itemsize == {{sizeof_dtype}})
+                        and arg_as_memoryview.ndim == {{ndim_dtype}}):
+                    {{if dtype_is_struct_obj}}
+                    if __PYX_IS_PYPY2:
+                        # I wasn't able to diagnose why, but PyPy2 fails to convert a
+                        # memoryview to a Cython memoryview in this case
+                        memslice = {{coerce_from_py_func}}(arg, 0)
+                    else:
+                    {{else}}
+                    if True:
+                    {{endif}}
+                        memslice = {{coerce_from_py_func}}(arg_as_memoryview, 0)
                     if memslice.memview:
-                        __PYX_XDEC_MEMVIEW(&memslice, 1)
+                        __PYX_XCLEAR_MEMVIEW(&memslice, 1)
                         # print 'found a match for the buffer through format parsing'
                         %s
                         break
@@ -410,7 +432,7 @@ class FusedCFuncDefNode(StatListNode):
                         __pyx_PyErr_Clear()
             """ % self.match)
 
-    def _buffer_checks(self, buffer_types, pythran_types, pyx_code, decl_code, env):
+    def _buffer_checks(self, buffer_types, pythran_types, pyx_code, decl_code, accept_none, env):
         """
         Generate Cython code to match objects to buffer specializations.
         First try to get a numpy dtype object and match it against the individual
@@ -467,9 +489,35 @@ class FusedCFuncDefNode(StatListNode):
         self._buffer_check_numpy_dtype(pyx_code, buffer_types, pythran_types)
         pyx_code.dedent(2)
 
-        for specialized_type in buffer_types:
-            self._buffer_parse_format_string_check(
-                    pyx_code, decl_code, specialized_type, env)
+        if accept_none:
+            # If None is acceptable, then Cython <3.0 matched None with the
+            # first type. This behaviour isn't ideal, but keep it for backwards
+            # compatibility. Better behaviour would be to see if subsequent
+            # arguments give a stronger match.
+            pyx_code.context.update(
+                specialized_type_name=buffer_types[0].specialization_string
+            )
+            pyx_code.put_chunk(
+                """
+                if arg is None:
+                    %s
+                    break
+                """ % self.match)
+
+        # creating a Cython memoryview from a Python memoryview avoids the
+        # need to get the buffer multiple times, and we can
+        # also use it to check itemsizes etc
+        pyx_code.put_chunk(
+            """
+            try:
+                arg_as_memoryview = memoryview(arg)
+            except (ValueError, TypeError):
+                pass
+            """)
+        with pyx_code.indenter("else:"):
+            for specialized_type in buffer_types:
+                self._buffer_parse_format_string_check(
+                        pyx_code, decl_code, specialized_type, env)
 
     def _buffer_declarations(self, pyx_code, decl_code, all_buffer_types, pythran_types):
         """
@@ -481,8 +529,9 @@ class FusedCFuncDefNode(StatListNode):
                 ctypedef struct {{memviewslice_cname}}:
                     void *memview
 
-                void __PYX_XDEC_MEMVIEW({{memviewslice_cname}} *, int have_gil)
+                void __PYX_XCLEAR_MEMVIEW({{memviewslice_cname}} *, int have_gil)
                 bint __pyx_memoryview_check(object)
+                bint __PYX_IS_PYPY2 "(CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION == 2)"
             """)
 
         pyx_code.local_variable_declarations.put_chunk(
@@ -507,6 +556,12 @@ class FusedCFuncDefNode(StatListNode):
                 ndarray = __Pyx_ImportNumPyArrayTypeIfAvailable()
             """)
 
+        pyx_code.imports.put_chunk(
+            u"""
+                cdef memoryview arg_as_memoryview
+            """
+        )
+
         seen_typedefs = set()
         seen_int_dtypes = set()
         for buffer_type in all_buffer_types:
@@ -580,6 +635,26 @@ class FusedCFuncDefNode(StatListNode):
                 {{endif}}
             """)
 
+    def _fused_signature_index(self, pyx_code):
+        """
+        Generate Cython code for constructing a persistent nested dictionary index of
+        fused type specialization signatures.
+        """
+        pyx_code.put_chunk(
+            u"""
+                if not _fused_sigindex:
+                    for sig in <dict>signatures:
+                        sigindex_node = _fused_sigindex
+                        *sig_series, last_type = sig.strip('()').split('|')
+                        for sig_type in sig_series:
+                            if sig_type not in sigindex_node:
+                                sigindex_node[sig_type] = sigindex_node = {}
+                            else:
+                                sigindex_node = sigindex_node[sig_type]
+                        sigindex_node[last_type] = sig
+            """
+        )
+
     def make_fused_cpdef(self, orig_py_func, env, is_def):
         """
         This creates the function that is indexable from Python and does
@@ -616,10 +691,14 @@ class FusedCFuncDefNode(StatListNode):
 
         pyx_code.put_chunk(
             u"""
-                def __pyx_fused_cpdef(signatures, args, kwargs, defaults):
+                def __pyx_fused_cpdef(signatures, args, kwargs, defaults, _fused_sigindex={}):
                     # FIXME: use a typed signature - currently fails badly because
                     #        default arguments inherit the types we specify here!
 
+                    cdef list search_list
+
+                    cdef dict sn, sigindex_node
+
                     dest_sig = [None] * {{n_fused}}
 
                     if kwargs is not None and not kwargs:
@@ -630,7 +709,7 @@ class FusedCFuncDefNode(StatListNode):
                     # instance check body
             """)
 
-        pyx_code.indent() # indent following code to function body
+        pyx_code.indent()  # indent following code to function body
         pyx_code.named_insertion_point("imports")
         pyx_code.named_insertion_point("func_defs")
         pyx_code.named_insertion_point("local_variable_declarations")
@@ -661,19 +740,20 @@ class FusedCFuncDefNode(StatListNode):
                 self._unpack_argument(pyx_code)
 
                 # 'unrolled' loop, first match breaks out of it
-                if pyx_code.indenter("while 1:"):
+                with pyx_code.indenter("while 1:"):
                     if normal_types:
                         self._fused_instance_checks(normal_types, pyx_code, env)
                     if buffer_types or pythran_types:
                         env.use_utility_code(Code.UtilityCode.load_cached("IsLittleEndian", "ModuleSetupCode.c"))
-                        self._buffer_checks(buffer_types, pythran_types, pyx_code, decl_code, env)
+                        self._buffer_checks(
+                            buffer_types, pythran_types, pyx_code, decl_code,
+                            arg.accept_none, env)
                     if has_object_fallback:
                         pyx_code.context.update(specialized_type_name='object')
                         pyx_code.putln(self.match)
                     else:
                         pyx_code.putln(self.no_match)
                     pyx_code.putln("break")
-                    pyx_code.dedent()
 
                 fused_index += 1
                 all_buffer_types.update(buffer_types)
@@ -687,23 +767,36 @@ class FusedCFuncDefNode(StatListNode):
             env.use_utility_code(Code.UtilityCode.load_cached("Import", "ImportExport.c"))
             env.use_utility_code(Code.UtilityCode.load_cached("ImportNumPyArray", "ImportExport.c"))
 
+        self._fused_signature_index(pyx_code)
+
         pyx_code.put_chunk(
             u"""
-                candidates = []
-                for sig in <dict>signatures:
-                    match_found = False
-                    src_sig = sig.strip('()').split('|')
-                    for i in range(len(dest_sig)):
-                        dst_type = dest_sig[i]
-                        if dst_type is not None:
-                            if src_sig[i] == dst_type:
-                                match_found = True
-                            else:
-                                match_found = False
-                                break
+                sigindex_matches = []
+                sigindex_candidates = [_fused_sigindex]
+
+                for dst_type in dest_sig:
+                    found_matches = []
+                    found_candidates = []
+                    # Make two seperate lists: One for signature sub-trees
+                    #        with at least one definite match, and another for
+                    #        signature sub-trees with only ambiguous matches
+                    #        (where `dest_sig[i] is None`).
+                    if dst_type is None:
+                        for sn in sigindex_matches:
+                            found_matches.extend(sn.values())
+                        for sn in sigindex_candidates:
+                            found_candidates.extend(sn.values())
+                    else:
+                        for search_list in (sigindex_matches, sigindex_candidates):
+                            for sn in search_list:
+                                if dst_type in sn:
+                                    found_matches.append(sn[dst_type])
+                    sigindex_matches = found_matches
+                    sigindex_candidates = found_candidates
+                    if not (found_matches or found_candidates):
+                        break
 
-                    if match_found:
-                        candidates.append(sig)
+                candidates = sigindex_matches
 
                 if not candidates:
                     raise TypeError("No matching signature found")
@@ -792,16 +885,18 @@ class FusedCFuncDefNode(StatListNode):
         for arg in self.node.args:
             if arg.default:
                 arg.default = arg.default.analyse_expressions(env)
-                defaults.append(ProxyNode(arg.default))
+                # coerce the argument to temp since CloneNode really requires a temp
+                defaults.append(ProxyNode(arg.default.coerce_to_temp(env)))
             else:
                 defaults.append(None)
 
         for i, stat in enumerate(self.stats):
             stat = self.stats[i] = stat.analyse_expressions(env)
-            if isinstance(stat, FuncDefNode):
+            if isinstance(stat, FuncDefNode) and stat is not self.py_func:
+                # the dispatcher specifically doesn't want its defaults overriding
                 for arg, default in zip(stat.args, defaults):
                     if default is not None:
-                        arg.default = CloneNode(default).coerce_to(arg.type, env)
+                        arg.default = CloneNode(default).analyse_expressions(env).coerce_to(arg.type, env)
 
         if self.py_func:
             args = [CloneNode(default) for default in defaults if default]
@@ -829,6 +924,10 @@ class FusedCFuncDefNode(StatListNode):
         else:
             nodes = self.nodes
 
+        # For the moment, fused functions do not support METH_FASTCALL
+        for node in nodes:
+            node.entry.signature.use_fastcall = False
+
         signatures = [StringEncoding.EncodedString(node.specialized_signature_string)
                       for node in nodes]
         keys = [ExprNodes.StringNode(node.pos, value=sig)
@@ -847,8 +946,10 @@ class FusedCFuncDefNode(StatListNode):
             self.py_func.pymethdef_required = True
             self.fused_func_assignment.generate_function_definitions(env, code)
 
+        from . import Options
         for stat in self.stats:
-            if isinstance(stat, FuncDefNode) and stat.entry.used:
+            from_pyx = Options.cimport_from_pyx and not stat.entry.visibility == 'extern'
+            if isinstance(stat, FuncDefNode) and (stat.entry.used or from_pyx):
                 code.mark_pos(stat.pos)
                 stat.generate_function_definitions(env, code)
 
@@ -877,7 +978,7 @@ class FusedCFuncDefNode(StatListNode):
                 "((__pyx_FusedFunctionObject *) %s)->__signatures__ = %s;" %
                                     (self.resulting_fused_function.result(),
                                      self.__signatures__.result()))
-            code.put_giveref(self.__signatures__.result())
+            self.__signatures__.generate_giveref(code)
             self.__signatures__.generate_post_assignment_code(code)
             self.__signatures__.free_temps(code)
 
diff --git a/Cython/Compiler/Future.py b/Cython/Compiler/Future.py
index 848792e00..8de10c0cb 100644
--- a/Cython/Compiler/Future.py
+++ b/Cython/Compiler/Future.py
@@ -11,5 +11,6 @@ absolute_import = _get_feature("absolute_import")
 nested_scopes = _get_feature("nested_scopes")  # dummy
 generators = _get_feature("generators")  # dummy
 generator_stop = _get_feature("generator_stop")
+annotations = _get_feature("annotations")
 
 del _get_feature
diff --git a/Cython/Compiler/Interpreter.py b/Cython/Compiler/Interpreter.py
index 9ec391f2a..244397264 100644
--- a/Cython/Compiler/Interpreter.py
+++ b/Cython/Compiler/Interpreter.py
@@ -47,8 +47,8 @@ def interpret_compiletime_options(optlist, optdict, type_env=None, type_args=())
                 raise CompileError(node.pos, "Type not allowed here.")
         else:
             if (sys.version_info[0] >=3 and
-                isinstance(node, StringNode) and
-                node.unicode_value is not None):
+                    isinstance(node, StringNode) and
+                    node.unicode_value is not None):
                 return (node.unicode_value, node.pos)
             return (node.compile_time_value(empty_scope), node.pos)
 
diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py
index 72c9ceaef..c3ca05b56 100644
--- a/Cython/Compiler/Lexicon.py
+++ b/Cython/Compiler/Lexicon.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # cython: language_level=3, py2_import=True
 #
 #   Cython Scanner - Lexical Definitions
@@ -16,28 +17,43 @@ IDENT = 'IDENT'
 def make_lexicon():
     from ..Plex import \
         Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \
-        TEXT, IGNORE, State, Lexicon
-    from .Scanning import Method
+        TEXT, IGNORE, Method, State, Lexicon, Range
 
-    letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
+    nonzero_digit = Any("123456789")
     digit = Any("0123456789")
     bindigit = Any("01")
     octdigit = Any("01234567")
     hexdigit = Any("0123456789ABCDEFabcdef")
     indentation = Bol + Rep(Any(" \t"))
 
+    # The list of valid unicode identifier characters are pretty slow to generate at runtime,
+    # and require Python3, so are just included directly here
+    # (via the generated code block at the bottom of the file)
+    unicode_start_character = (Any(unicode_start_ch_any) | Range(unicode_start_ch_range))
+    unicode_continuation_character = (
+        unicode_start_character |
+        Any(unicode_continuation_ch_any) | Range(unicode_continuation_ch_range))
+
     def underscore_digits(d):
         return Rep1(d) + Rep(Str("_") + Rep1(d))
 
+    def prefixed_digits(prefix, digits):
+        return prefix + Opt(Str("_")) + underscore_digits(digits)
+
     decimal = underscore_digits(digit)
     dot = Str(".")
     exponent = Any("Ee") + Opt(Any("+-")) + decimal
     decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
 
-    name = letter + Rep(letter | digit)
-    intconst = decimal | (Str("0") + ((Any("Xx") + underscore_digits(hexdigit)) |
-                                      (Any("Oo") + underscore_digits(octdigit)) |
-                                      (Any("Bb") + underscore_digits(bindigit)) ))
+    #name = letter + Rep(letter | digit)
+    name = unicode_start_character + Rep(unicode_continuation_character)
+    intconst = (prefixed_digits(nonzero_digit, digit) |  # decimal literals with underscores must not start with '0'
+                (Str("0") + (prefixed_digits(Any("Xx"), hexdigit) |
+                             prefixed_digits(Any("Oo"), octdigit) |
+                             prefixed_digits(Any("Bb"), bindigit) )) |
+                underscore_digits(Str('0'))  # 0_0_0_0... is allowed as a decimal literal
+                | Rep1(digit)  # FIXME: remove these Py2 style decimal/octal literals (PY_VERSION_HEX < 3)
+                )
     intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
     intliteral = intconst + intsuffix
     fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
@@ -58,10 +74,11 @@ def make_lexicon():
 
     bra = Any("([{")
     ket = Any(")]}")
+    ellipsis = Str("...")
     punct = Any(":,;+-*/|&<>=.%`~^?!@")
     diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//",
                     "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=",
-                    "<<=", ">>=", "**=", "//=", "->", "@=")
+                    "<<=", ">>=", "**=", "//=", "->", "@=", "&&", "||", ':=')
     spaces = Rep1(Any(" \t\f"))
     escaped_newline = Str("\\\n")
     lineterm = Eol + Opt(Str("\n"))
@@ -69,11 +86,11 @@ def make_lexicon():
     comment = Str("#") + Rep(AnyBut("\n"))
 
     return Lexicon([
-        (name, IDENT),
+        (name, Method('normalize_ident')),
         (intliteral, Method('strip_underscores', symbol='INT')),
         (fltconst, Method('strip_underscores', symbol='FLOAT')),
         (imagconst, Method('strip_underscores', symbol='IMAG')),
-        (punct | diphthong, TEXT),
+        (ellipsis | punct | diphthong, TEXT),
 
         (bra, Method('open_bracket_action')),
         (ket, Method('close_bracket_action')),
@@ -136,3 +153,50 @@ def make_lexicon():
         #debug_file = scanner_dump_file
         )
 
+
+# BEGIN GENERATED CODE
+# generated with:
+# cpython 3.10.0a0 (heads/master:2b0e654f91, May 29 2020, 16:17:52)
+
+unicode_start_ch_any = (
+    u"_ªµºˬˮͿΆΌՙەۿܐޱߺࠚࠤࠨऽॐলঽৎৼਫ਼ઽૐૹଽୱஃஜௐఽಀಽೞഽൎලาຄລາຽໆༀဿၡႎჇჍቘዀៗៜᢪᪧᳺὙ"
+    u"ὛὝιⁱⁿℂℇℕℤΩℨⅎⴧⴭⵯꣻꧏꩺꪱꫀꫂיִמּﹱﹳﹷﹹﹻﹽ𐠈𐠼𐨀𐼧𑅄𑅇𑅶𑇚𑇜𑊈𑌽𑍐𑓇𑙄𑚸𑤉𑤿𑥁𑧡𑧣𑨀𑨺𑩐𑪝𑱀𑵆𑶘𑾰𖽐𖿣𝒢"
+    u"𝒻𝕆𞅎𞥋𞸤𞸧𞸹𞸻𞹂𞹇𞹉𞹋𞹔𞹗𞹙𞹛𞹝𞹟𞹤𞹾"
+)
+unicode_start_ch_range = (
+    u"AZazÀÖØöøˁˆˑˠˤͰʹͶͷͻͽΈΊΎΡΣϵϷҁҊԯԱՖՠֈאתׯײؠيٮٯٱۓۥۦۮۯۺۼܒܯݍޥߊߪߴߵࠀࠕ"
+    u"ࡀࡘࡠࡪࢠࢴࢶࣇऄहक़ॡॱঀঅঌএঐওনপরশহড়ঢ়য়ৡৰৱਅਊਏਐਓਨਪਰਲਲ਼ਵਸ਼ਸਹਖ਼ੜੲੴઅઍએઑઓનપરલળવહ"
+    u"ૠૡଅଌଏଐଓନପରଲଳଵହଡ଼ଢ଼ୟୡஅஊஎஐஒகஙசஞடணதநபமஹఅఌఎఐఒనపహౘౚౠౡಅಌಎಐಒನಪಳವಹೠೡೱೲ"
+    u"ഄഌഎഐഒഺൔൖൟൡൺൿඅඖකනඳරවෆกะเๆກຂຆຊຌຣວະເໄໜໟཀཇཉཬྈྌကဪၐၕၚၝၥၦၮၰၵႁႠჅაჺჼቈ"
+    u"ቊቍቐቖቚቝበኈኊኍነኰኲኵኸኾዂዅወዖዘጐጒጕጘፚᎀᎏᎠᏵᏸᏽᐁᙬᙯᙿᚁᚚᚠᛪᛮᛸᜀᜌᜎᜑᜠᜱᝀᝑᝠᝬᝮᝰកឳᠠᡸᢀᢨ"
+    u"ᢰᣵᤀᤞᥐᥭᥰᥴᦀᦫᦰᧉᨀᨖᨠᩔᬅᬳᭅᭋᮃᮠᮮᮯᮺᯥᰀᰣᱍᱏᱚᱽᲀᲈᲐᲺᲽᲿᳩᳬᳮᳳᳵᳶᴀᶿḀἕἘἝἠὅὈὍὐὗὟώᾀᾴ"
+    u"ᾶᾼῂῄῆῌῐΐῖΊῠῬῲῴῶῼₐₜℊℓ℘ℝKℹℼℿⅅⅉⅠↈⰀⰮⰰⱞⱠⳤⳫⳮⳲⳳⴀⴥⴰⵧⶀⶖⶠⶦⶨⶮⶰⶶⶸⶾⷀⷆⷈⷎⷐⷖ"
+    u"ⷘⷞ々〇〡〩〱〵〸〼ぁゖゝゟァヺーヿㄅㄯㄱㆎㆠㆿㇰㇿ㐀䶿一鿼ꀀꒌꓐꓽꔀꘌꘐꘟꘪꘫꙀꙮꙿꚝꚠꛯꜗꜟꜢꞈꞋꞿꟂꟊꟵꠁꠃꠅꠇꠊ"
+    u"ꠌꠢꡀꡳꢂꢳꣲꣷꣽꣾꤊꤥꤰꥆꥠꥼꦄꦲꧠꧤꧦꧯꧺꧾꨀꨨꩀꩂꩄꩋꩠꩶꩾꪯꪵꪶꪹꪽꫛꫝꫠꫪꫲꫴꬁꬆꬉꬎꬑꬖꬠꬦꬨꬮꬰꭚꭜꭩꭰꯢ"
+    u"가힣ힰퟆퟋퟻ豈舘並龎ﬀﬆﬓﬗײַﬨשׁזּטּלּנּסּףּפּצּﮱﯓﱝﱤﴽﵐﶏﶒﷇﷰﷹﹿﻼＡＺａｚｦﾝﾠﾾￂￇￊￏￒￗￚￜ𐀀𐀋𐀍𐀦𐀨𐀺"
+    u"𐀼𐀽𐀿𐁍𐁐𐁝𐂀𐃺𐅀𐅴𐊀𐊜𐊠𐋐𐌀𐌟𐌭𐍊𐍐𐍵𐎀𐎝𐎠𐏃𐏈𐏏𐏑𐏕𐐀𐒝𐒰𐓓𐓘𐓻𐔀𐔧𐔰𐕣𐘀𐜶𐝀𐝕𐝠𐝧𐠀𐠅𐠊𐠵𐠷𐠸𐠿𐡕𐡠𐡶𐢀𐢞𐣠𐣲𐣴𐣵"
+    u"𐤀𐤕𐤠𐤹𐦀𐦷𐦾𐦿𐨐𐨓𐨕𐨗𐨙𐨵𐩠𐩼𐪀𐪜𐫀𐫇𐫉𐫤𐬀𐬵𐭀𐭕𐭠𐭲𐮀𐮑𐰀𐱈𐲀𐲲𐳀𐳲𐴀𐴣𐺀𐺩𐺰𐺱𐼀𐼜𐼰𐽅𐾰𐿄𐿠𐿶𑀃𑀷𑂃𑂯𑃐𑃨𑄃𑄦𑅐𑅲"
+    u"𑆃𑆲𑇁𑇄𑈀𑈑𑈓𑈫𑊀𑊆𑊊𑊍𑊏𑊝𑊟𑊨𑊰𑋞𑌅𑌌𑌏𑌐𑌓𑌨𑌪𑌰𑌲𑌳𑌵𑌹𑍝𑍡𑐀𑐴𑑇𑑊𑑟𑑡𑒀𑒯𑓄𑓅𑖀𑖮𑗘𑗛𑘀𑘯𑚀𑚪𑜀𑜚𑠀𑠫𑢠𑣟𑣿𑤆𑤌𑤓"
+    u"𑤕𑤖𑤘𑤯𑦠𑦧𑦪𑧐𑨋𑨲𑩜𑪉𑫀𑫸𑰀𑰈𑰊𑰮𑱲𑲏𑴀𑴆𑴈𑴉𑴋𑴰𑵠𑵥𑵧𑵨𑵪𑶉𑻠𑻲𒀀𒎙𒐀𒑮𒒀𒕃𓀀𓐮𔐀𔙆𖠀𖨸𖩀𖩞𖫐𖫭𖬀𖬯𖭀𖭃𖭣𖭷𖭽𖮏𖹀𖹿"
+    u"𖼀𖽊𖾓𖾟𖿠𖿡𗀀𘟷𘠀𘳕𘴀𘴈𛀀𛄞𛅐𛅒𛅤𛅧𛅰𛋻𛰀𛱪𛱰𛱼𛲀𛲈𛲐𛲙𝐀𝑔𝑖𝒜𝒞𝒟𝒥𝒦𝒩𝒬𝒮𝒹𝒽𝓃𝓅𝔅𝔇𝔊𝔍𝔔𝔖𝔜𝔞𝔹𝔻𝔾𝕀𝕄𝕊𝕐𝕒𝚥"
+    u"𝚨𝛀𝛂𝛚𝛜𝛺𝛼𝜔𝜖𝜴𝜶𝝎𝝐𝝮𝝰𝞈𝞊𝞨𝞪𝟂𝟄𝟋𞄀𞄬𞄷𞄽𞋀𞋫𞠀𞣄𞤀𞥃𞸀𞸃𞸅𞸟𞸡𞸢𞸩𞸲𞸴𞸷𞹍𞹏𞹑𞹒𞹡𞹢𞹧𞹪𞹬𞹲𞹴𞹷𞹹𞹼𞺀𞺉𞺋𞺛"
+    u"𞺡𞺣𞺥𞺩𞺫𞺻𠀀𪛝𪜀𫜴𫝀𫠝𫠠𬺡𬺰𮯠丽𪘀"
+)
+unicode_continuation_ch_any = (
+    u"··়ׇֿٰܑ߽ৗ਼৾ੑੵ઼଼ஂௗ಼ൗ්ූัັ༹༵༷࿆᳭ᢩ៝᳴⁔⵿⃡꙯ꠂ꠆ꠋ꠬ꧥꩃﬞꪰ꫁＿𑅳𐨿𐇽𐋠𑈾𑍗𑑞𑥀𑧤𑩇𑴺𑵇𖽏𖿤𝩵"
+    u"𝪄"
+)
+unicode_continuation_ch_range = (
+    u"09ֽׁׂًؚ֑ׅ̀ͯ҃҇ׄؐ٩۪ۭۖۜ۟ۤۧۨ۰۹ܰ݊ަް߀߉࡙࡛࣓ࣣ߫߳ࠖ࠙ࠛࠣࠥࠧࠩ࠭࣡ःऺ़ाॏ॑ॗॢॣ०९ঁঃ"
+    u"াৄেৈো্ৢৣ০৯ਁਃਾੂੇੈੋ੍੦ੱઁઃાૅેૉો્ૢૣ૦૯ૺ૿ଁଃାୄେୈୋ୍୕ୗୢୣ୦୯ாூெைொ்௦௯ఀఄాౄ"
+    u"ెైొ్ౕౖౢౣ౦౯ಁಃಾೄೆೈೊ್ೕೖೢೣ೦೯ഀഃ഻഼ാൄെൈൊ്ൢൣ൦൯ඁඃාුෘෟ෦෯ෲෳำฺ็๎๐๙ຳຼ່ໍ໐໙"
+    u"༘༙༠༩༾༿྄ཱ྆྇ྍྗྙྼါှ၀၉ၖၙၞၠၢၤၧၭၱၴႂႍႏႝ፝፟፩፱ᜒ᜔ᜲ᜴ᝒᝓᝲᝳ឴៓០៩᠋᠍᠐᠙ᤠᤫᤰ᤻᥆᥏᧐᧚"
+    u"ᨗᨛᩕᩞ᩠᩿᩼᪉᪐᪙᪽ᪿᫀ᪰ᬀᬄ᬴᭄᭐᭙᭫᭳ᮀᮂᮡᮭ᮰᮹᯦᯳ᰤ᰷᱀᱉᱐᱙᳔᳨᳐᳒᳷᷹᷿᳹᷀᷻‿⁀⃥゙゚〪〯⃐⃜⃰⳯⳱ⷠⷿ"
+    u"꘠꘩ꙴ꙽ꚞꚟ꛰꛱ꠣꠧꢀꢁꢴꣅ꣐꣙꣠꣱ꣿ꤉ꤦ꤭ꥇ꥓ꦀꦃ꦳꧀꧐꧙꧰꧹ꨩꨶꩌꩍ꩐꩙ꩻꩽꪴꪲꪷꪸꪾ꪿ꫫꫯꫵ꫶ꯣꯪ꯬꯭꯰꯹︀️︠︯"
+    u"︳︴﹍﹏０９ﾞﾟ𐍶𐍺𐒠𐒩𐨁𐨃𐨅𐨆𐨌𐨺𐫦𐨏𐨸𐫥𐴤𐴧𐴰𐴹𐽆𐽐𐺫𐺬𑀀𑀂𑀸𑁆𑁦𑁯𑁿𑂂𑂰𑂺𑃰𑃹𑄀𑄂𑄧𑄴𑄶𑄿𑅅𑅆𑆀𑆂𑆳𑇀𑇉𑇌𑇎𑇙𑈬𑈷"
+    u"𑋟𑋪𑋰𑋹𑌀𑌃𑌻𑌼𑌾𑍄𑍇𑍈𑍋𑍍𑍢𑍣𑍦𑍬𑍰𑍴𑐵𑑆𑑐𑑙𑒰𑓃𑓐𑓙𑖯𑖵𑖸𑗀𑗜𑗝𑘰𑙀𑙐𑙙𑚫𑚷𑛀𑛉𑜝𑜫𑜰𑜹𑠬𑠺𑣠𑣩𑤰𑤵𑤷𑤸𑤻𑤾𑥂𑥃𑥐𑥙"
+    u"𑧑𑧗𑧚𑧠𑨁𑨊𑨳𑨹𑨻𑨾𑩑𑩛𑪊𑪙𑰯𑰶𑰸𑰿𑱐𑱙𑲒𑲧𑲩𑲶𑴱𑴶𑴼𑴽𑴿𑵅𑵐𑵙𑶊𑶎𑶐𑶑𑶓𑶗𑶠𑶩𑻳𑻶𖩠𖩩𖫰𖫴𖬰𖬶𖭐𖭙𖽑𖾇𖾏𖾒𖿰𖿱𛲝𛲞𝅩𝅥"
+    u"𝅲𝅻𝆂𝆋𝅭𝆅𝆪𝆭𝉂𝉄𝟎𝟿𝨀𝨶𝨻𝩬𝪛𝪟𝪡𝪯𞀀𞀆𞀈𞀘𞀛𞀡𞀣𞀤𞀦𞀪𞄰𞄶𞅀𞅉𞋬𞋹𞥊𞣐𞣖𞥄𞥐𞥙🯰🯹"
+)
+
+# END GENERATED CODE
diff --git a/Cython/Compiler/Main.py b/Cython/Compiler/Main.py
index 561ac222d..36813975d 100644
--- a/Cython/Compiler/Main.py
+++ b/Cython/Compiler/Main.py
@@ -2,15 +2,15 @@
 #   Cython Top Level
 #
 
-from __future__ import absolute_import
+from __future__ import absolute_import, print_function
 
 import os
 import re
 import sys
 import io
 
-if sys.version_info[:2] < (2, 6) or (3, 0) <= sys.version_info[:2] < (3, 3):
-    sys.stderr.write("Sorry, Cython requires Python 2.6+ or 3.3+, found %d.%d\n" % tuple(sys.version_info[:2]))
+if sys.version_info[:2] < (2, 7) or (3, 0) <= sys.version_info[:2] < (3, 3):
+    sys.stderr.write("Sorry, Cython requires Python 2.7 or 3.3+, found %d.%d\n" % tuple(sys.version_info[:2]))
     sys.exit(1)
 
 try:
@@ -30,30 +30,28 @@ from .Errors import PyrexError, CompileError, error, warning
 from .Symtab import ModuleScope
 from .. import Utils
 from . import Options
+from .Options import CompilationOptions, default_options
+from .CmdLine import parse_command_line
+from .Lexicon import (unicode_start_ch_any, unicode_continuation_ch_any,
+                      unicode_start_ch_range, unicode_continuation_ch_range)
 
-from . import Version  # legacy import needed by old PyTables versions
-version = Version.version  # legacy attribute - use "Cython.__version__" instead
 
-module_name_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$")
+def _make_range_re(chrs):
+    out = []
+    for i in range(0, len(chrs), 2):
+        out.append(u"{0}-{1}".format(chrs[i], chrs[i+1]))
+    return u"".join(out)
 
-verbose = 0
+# py2 version looked like r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$"
+module_name_pattern = u"[{0}{1}][{0}{2}{1}{3}]*".format(
+    unicode_start_ch_any, _make_range_re(unicode_start_ch_range),
+    unicode_continuation_ch_any,
+    _make_range_re(unicode_continuation_ch_range))
+module_name_pattern = re.compile(u"{0}(\\.{0})*$".format(module_name_pattern))
 
-standard_include_path = os.path.abspath(os.path.join(os.path.dirname(__file__),
-                                        os.path.pardir, 'Includes'))
 
-class CompilationData(object):
-    #  Bundles the information that is passed from transform to transform.
-    #  (For now, this is only)
-
-    #  While Context contains every pxd ever loaded, path information etc.,
-    #  this only contains the data related to a single compilation pass
-    #
-    #  pyx                   ModuleNode              Main code tree of this compilation.
-    #  pxds                  {string : ModuleNode}   Trees for the pxds used in the pyx.
-    #  codewriter            CCodeWriter             Where to output final code.
-    #  options               CompilationOptions
-    #  result                CompilationResult
-    pass
+standard_include_path = os.path.abspath(
+    os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Includes'))
 
 
 class Context(object):
@@ -93,10 +91,17 @@ class Context(object):
         if language_level is not None:
             self.set_language_level(language_level)
 
+        self.legacy_implicit_noexcept = self.compiler_directives.get('legacy_implicit_noexcept', False)
+
         self.gdb_debug_outputwriter = None
 
+    @classmethod
+    def from_options(cls, options):
+        return cls(options.include_path, options.compiler_directives,
+                   options.cplus, options.language_level, options=options)
+
     def set_language_level(self, level):
-        from .Future import print_function, unicode_literals, absolute_import, division
+        from .Future import print_function, unicode_literals, absolute_import, division, generator_stop
         future_directives = set()
         if level == '3str':
             level = 3
@@ -105,7 +110,7 @@ class Context(object):
             if level >= 3:
                 future_directives.add(unicode_literals)
         if level >= 3:
-            future_directives.update([print_function, absolute_import, division])
+            future_directives.update([print_function, absolute_import, division, generator_stop])
         self.language_level = level
         self.future_directives = future_directives
         if level >= 3:
@@ -123,15 +128,6 @@ class Context(object):
         self._interned[key] = value
         return value
 
-    def intern_value(self, value, *key):
-        key = (type(value), value) + key
-        try:
-            return self._interned[key]
-        except KeyError:
-            pass
-        self._interned[key] = value
-        return value
-
     # pipeline creation functions can now be found in Pipeline.py
 
     def process_pxd(self, source_desc, scope, module_name):
@@ -149,6 +145,29 @@ class Context(object):
     def nonfatal_error(self, exc):
         return Errors.report_error(exc)
 
+    def _split_qualified_name(self, qualified_name):
+        # Splits qualified_name into parts in form of 2-tuples: (PART_NAME, IS_PACKAGE).
+        qualified_name_parts = qualified_name.split('.')
+        last_part = qualified_name_parts.pop()
+        qualified_name_parts = [(p, True) for p in qualified_name_parts]
+        if last_part != '__init__':
+            # If Last part is __init__, then it is omitted. Otherwise, we need to check whether we can find
+            # __init__.pyx/__init__.py file to determine if last part is package or not.
+            is_package = False
+            for suffix in ('.py', '.pyx'):
+                path = self.search_include_directories(
+                    qualified_name, suffix=suffix, source_pos=None, source_file_path=None)
+                if path:
+                    is_package = self._is_init_file(path)
+                    break
+
+            qualified_name_parts.append((last_part, is_package))
+        return qualified_name_parts
+
+    @staticmethod
+    def _is_init_file(path):
+        return os.path.basename(path) in ('__init__.pyx', '__init__.py', '__init__.pxd') if path else False
+
     def find_module(self, module_name, relative_to=None, pos=None, need_pxd=1,
                     absolute_fallback=True):
         # Finds and returns the module scope corresponding to
@@ -179,7 +198,7 @@ class Context(object):
 
         if not module_name_pattern.match(qualified_name):
             raise CompileError(pos or (module_name, 0, 0),
-                               "'%s' is not a valid module name" % module_name)
+                               u"'%s' is not a valid module name" % module_name)
 
         if relative_to:
             if debug_find_module:
@@ -188,16 +207,16 @@ class Context(object):
             if not scope:
                 pxd_pathname = self.find_pxd_file(qualified_name, pos)
                 if pxd_pathname:
-                    scope = relative_to.find_submodule(module_name)
+                    is_package = self._is_init_file(pxd_pathname)
+                    scope = relative_to.find_submodule(module_name, as_package=is_package)
         if not scope:
             if debug_find_module:
                 print("...trying absolute import")
             if absolute_fallback:
                 qualified_name = module_name
             scope = self
-            for name in qualified_name.split("."):
-                scope = scope.find_submodule(name)
-
+            for name, is_package in self._split_qualified_name(qualified_name):
+                scope = scope.find_submodule(name, as_package=is_package)
         if debug_find_module:
             print("...scope = %s" % scope)
         if not scope.pxd_file_loaded:
@@ -215,8 +234,9 @@ class Context(object):
                     # Set pxd_file_loaded such that we don't need to
                     # look for the non-existing pxd file next time.
                     scope.pxd_file_loaded = True
-                    package_pathname = self.search_include_directories(qualified_name, ".py", pos)
-                    if package_pathname and package_pathname.endswith('__init__.py'):
+                    package_pathname = self.search_include_directories(
+                        qualified_name, suffix=".py", source_pos=pos)
+                    if package_pathname and package_pathname.endswith(Utils.PACKAGE_FILES):
                         pass
                     else:
                         error(pos, "'%s.pxd' not found" % qualified_name.replace('.', os.sep))
@@ -238,7 +258,7 @@ class Context(object):
                     pass
         return scope
 
-    def find_pxd_file(self, qualified_name, pos, sys_path=True):
+    def find_pxd_file(self, qualified_name, pos=None, sys_path=True, source_file_path=None):
         # Search include path (and sys.path if sys_path is True) for
         # the .pxd file corresponding to the given fully-qualified
         # module name.
@@ -247,53 +267,36 @@ class Context(object):
         # the directory containing the source file is searched first
         # for a dotted filename, and its containing package root
         # directory is searched first for a non-dotted filename.
-        pxd = self.search_include_directories(qualified_name, ".pxd", pos, sys_path=sys_path)
-        if pxd is None: # XXX Keep this until Includes/Deprecated is removed
-            if (qualified_name.startswith('python') or
-                    qualified_name in ('stdlib', 'stdio', 'stl')):
-                standard_include_path = os.path.abspath(os.path.normpath(
-                        os.path.join(os.path.dirname(__file__), os.path.pardir, 'Includes')))
-                deprecated_include_path = os.path.join(standard_include_path, 'Deprecated')
-                self.include_directories.append(deprecated_include_path)
-                try:
-                    pxd = self.search_include_directories(qualified_name, ".pxd", pos)
-                finally:
-                    self.include_directories.pop()
-                if pxd:
-                    name = qualified_name
-                    if name.startswith('python'):
-                        warning(pos, "'%s' is deprecated, use 'cpython'" % name, 1)
-                    elif name in ('stdlib', 'stdio'):
-                        warning(pos, "'%s' is deprecated, use 'libc.%s'" % (name, name), 1)
-                    elif name in ('stl'):
-                        warning(pos, "'%s' is deprecated, use 'libcpp.*.*'" % name, 1)
+        pxd = self.search_include_directories(
+            qualified_name, suffix=".pxd", source_pos=pos, sys_path=sys_path, source_file_path=source_file_path)
         if pxd is None and Options.cimport_from_pyx:
             return self.find_pyx_file(qualified_name, pos)
         return pxd
 
-    def find_pyx_file(self, qualified_name, pos):
+    def find_pyx_file(self, qualified_name, pos=None, source_file_path=None):
         # Search include path for the .pyx file corresponding to the
         # given fully-qualified module name, as for find_pxd_file().
-        return self.search_include_directories(qualified_name, ".pyx", pos)
+        return self.search_include_directories(
+            qualified_name, suffix=".pyx", source_pos=pos, source_file_path=source_file_path)
 
-    def find_include_file(self, filename, pos):
+    def find_include_file(self, filename, pos=None, source_file_path=None):
         # Search list of include directories for filename.
         # Reports an error and returns None if not found.
-        path = self.search_include_directories(filename, "", pos,
-                                               include=True)
+        path = self.search_include_directories(
+            filename, source_pos=pos, include=True, source_file_path=source_file_path)
         if not path:
             error(pos, "'%s' not found" % filename)
         return path
 
-    def search_include_directories(self, qualified_name, suffix, pos,
-                                   include=False, sys_path=False):
+    def search_include_directories(self, qualified_name,
+                                   suffix=None, source_pos=None, include=False, sys_path=False, source_file_path=None):
         include_dirs = self.include_directories
         if sys_path:
             include_dirs = include_dirs + sys.path
         # include_dirs must be hashable for caching in @cached_function
         include_dirs = tuple(include_dirs + [standard_include_path])
-        return search_include_directories(include_dirs, qualified_name,
-                                          suffix, pos, include)
+        return search_include_directories(
+            include_dirs, qualified_name, suffix or "", source_pos, include, source_file_path)
 
     def find_root_package_dir(self, file_path):
         return Utils.find_root_package_dir(file_path)
@@ -307,15 +310,14 @@ class Context(object):
         c_time = Utils.modification_time(output_path)
         if Utils.file_newer_than(source_path, c_time):
             return 1
-        pos = [source_path]
         pxd_path = Utils.replace_suffix(source_path, ".pxd")
         if os.path.exists(pxd_path) and Utils.file_newer_than(pxd_path, c_time):
             return 1
         for kind, name in self.read_dependency_file(source_path):
             if kind == "cimport":
-                dep_path = self.find_pxd_file(name, pos)
+                dep_path = self.find_pxd_file(name, source_file_path=source_path)
             elif kind == "include":
-                dep_path = self.search_include_directories(name, pos)
+                dep_path = self.search_include_directories(name, source_file_path=source_path)
             else:
                 continue
             if dep_path and Utils.file_newer_than(dep_path, c_time):
@@ -332,11 +334,10 @@ class Context(object):
     def read_dependency_file(self, source_path):
         dep_path = Utils.replace_suffix(source_path, ".dep")
         if os.path.exists(dep_path):
-            f = open(dep_path, "rU")
-            chunks = [ line.strip().split(" ", 1)
-                       for line in f.readlines()
-                       if " " in line.strip() ]
-            f.close()
+            with open(dep_path, "rU") as f:
+                chunks = [ line.split(" ", 1)
+                           for line in (l.strip() for l in f)
+                           if " " in line ]
             return chunks
         else:
             return ()
@@ -345,12 +346,12 @@ class Context(object):
         # Look up a top-level module. Returns None if not found.
         return self.modules.get(name, None)
 
-    def find_submodule(self, name):
+    def find_submodule(self, name, as_package=False):
         # Find a top-level module, creating a new one if needed.
         scope = self.lookup_submodule(name)
         if not scope:
             scope = ModuleScope(name,
-                parent_module = None, context = self)
+                parent_module = None, context = self, is_package=as_package)
             self.modules[name] = scope
         return scope
 
@@ -360,7 +361,7 @@ class Context(object):
         source_filename = source_desc.filename
         scope.cpp = self.cpp
         # Parse the given source file and return a parse tree.
-        num_errors = Errors.num_errors
+        num_errors = Errors.get_errors_count()
         try:
             with Utils.open_source_file(source_filename) as f:
                 from . import Parsing
@@ -379,7 +380,7 @@ class Context(object):
             #traceback.print_exc()
             raise self._report_decode_error(source_desc, e)
 
-        if Errors.num_errors > num_errors:
+        if Errors.get_errors_count() > num_errors:
             raise CompileError()
         return tree
 
@@ -419,20 +420,19 @@ class Context(object):
         return ".".join(names)
 
     def setup_errors(self, options, result):
-        Errors.reset()  # clear any remaining error state
+        Errors.init_thread()
         if options.use_listing_file:
             path = result.listing_file = Utils.replace_suffix(result.main_source_file, ".lis")
         else:
             path = None
-        Errors.open_listing_file(path=path,
-                                 echo_to_stderr=options.errors_to_stderr)
+        Errors.open_listing_file(path=path, echo_to_stderr=options.errors_to_stderr)
 
     def teardown_errors(self, err, options, result):
         source_desc = result.compilation_source.source_desc
         if not isinstance(source_desc, FileSourceDescriptor):
             raise RuntimeError("Only file sources for code supported")
         Errors.close_listing_file()
-        result.num_errors = Errors.num_errors
+        result.num_errors = Errors.get_errors_count()
         if result.num_errors > 0:
             err = True
         if err and result.c_file:
@@ -473,22 +473,29 @@ def create_default_resultobj(compilation_source, options):
 def run_pipeline(source, options, full_module_name=None, context=None):
     from . import Pipeline
 
+    # ensure that the inputs are unicode (for Python 2)
+    if sys.version_info[0] == 2:
+        source = Utils.decode_filename(source)
+        if full_module_name:
+            full_module_name = Utils.decode_filename(full_module_name)
+
     source_ext = os.path.splitext(source)[1]
-    options.configure_language_defaults(source_ext[1:]) # py/pyx
+    options.configure_language_defaults(source_ext[1:])  # py/pyx
     if context is None:
-        context = options.create_context()
+        context = Context.from_options(options)
 
     # Set up source object
     cwd = os.getcwd()
     abs_path = os.path.abspath(source)
     full_module_name = full_module_name or context.extract_module_name(source, options)
+    full_module_name = EncodedString(full_module_name)
 
     Utils.raise_error_if_module_name_forbidden(full_module_name)
 
     if options.relative_path_in_code_position_comments:
         rel_path = full_module_name.replace('.', os.sep) + source_ext
         if not abs_path.endswith(rel_path):
-            rel_path = source # safety measure to prevent printing incorrect paths
+            rel_path = source  # safety measure to prevent printing incorrect paths
     else:
         rel_path = abs_path
     source_desc = FileSourceDescriptor(abs_path, rel_path)
@@ -512,6 +519,12 @@ def run_pipeline(source, options, full_module_name=None, context=None):
         pipeline = Pipeline.create_pyx_pipeline(context, options, result)
 
     context.setup_errors(options, result)
+
+    if '.' in full_module_name and '.' in os.path.splitext(os.path.basename(abs_path))[0]:
+        warning((source_desc, 1, 0),
+                "Dotted filenames ('%s') are deprecated."
+                " Please use the normal Python package directory layout." % os.path.basename(abs_path), level=1)
+
     err, enddata = Pipeline.run_pipeline(pipeline, source)
     context.teardown_errors(err, options, result)
     if err is None and options.depfile:
@@ -538,146 +551,6 @@ class CompilationSource(object):
         self.cwd = cwd
 
 
-class CompilationOptions(object):
-    r"""
-    See default_options at the end of this module for a list of all possible
-    options and CmdLine.usage and CmdLine.parse_command_line() for their
-    meaning.
-    """
-    def __init__(self, defaults=None, **kw):
-        self.include_path = []
-        if defaults:
-            if isinstance(defaults, CompilationOptions):
-                defaults = defaults.__dict__
-        else:
-            defaults = default_options
-
-        options = dict(defaults)
-        options.update(kw)
-
-        # let's assume 'default_options' contains a value for most known compiler options
-        # and validate against them
-        unknown_options = set(options) - set(default_options)
-        # ignore valid options that are not in the defaults
-        unknown_options.difference_update(['include_path'])
-        if unknown_options:
-            message = "got unknown compilation option%s, please remove: %s" % (
-                's' if len(unknown_options) > 1 else '',
-                ', '.join(unknown_options))
-            raise ValueError(message)
-
-        directive_defaults = Options.get_directive_defaults()
-        directives = dict(options['compiler_directives'])  # copy mutable field
-        # check for invalid directives
-        unknown_directives = set(directives) - set(directive_defaults)
-        if unknown_directives:
-            message = "got unknown compiler directive%s: %s" % (
-                's' if len(unknown_directives) > 1 else '',
-                ', '.join(unknown_directives))
-            raise ValueError(message)
-        options['compiler_directives'] = directives
-        if directives.get('np_pythran', False) and not options['cplus']:
-            import warnings
-            warnings.warn("C++ mode forced when in Pythran mode!")
-            options['cplus'] = True
-        if 'language_level' in directives and 'language_level' not in kw:
-            options['language_level'] = directives['language_level']
-        elif not options.get('language_level'):
-            options['language_level'] = directive_defaults.get('language_level')
-        if 'formal_grammar' in directives and 'formal_grammar' not in kw:
-            options['formal_grammar'] = directives['formal_grammar']
-        if options['cache'] is True:
-            options['cache'] = os.path.join(Utils.get_cython_cache_dir(), 'compiler')
-
-        self.__dict__.update(options)
-
-    def configure_language_defaults(self, source_extension):
-        if source_extension == 'py':
-            if self.compiler_directives.get('binding') is None:
-                self.compiler_directives['binding'] = True
-
-    def create_context(self):
-        return Context(self.include_path, self.compiler_directives,
-                       self.cplus, self.language_level, options=self)
-
-    def get_fingerprint(self):
-        r"""
-        Return a string that contains all the options that are relevant for cache invalidation.
-        """
-        # Collect only the data that can affect the generated file(s).
-        data = {}
-
-        for key, value in self.__dict__.items():
-            if key in ['show_version', 'errors_to_stderr', 'verbose', 'quiet']:
-                # verbosity flags have no influence on the compilation result
-                continue
-            elif key in ['output_file', 'output_dir']:
-                # ignore the exact name of the output file
-                continue
-            elif key in ['timestamps']:
-                # the cache cares about the content of files, not about the timestamps of sources
-                continue
-            elif key in ['cache']:
-                # hopefully caching has no influence on the compilation result
-                continue
-            elif key in ['compiler_directives']:
-                # directives passed on to the C compiler do not influence the generated C code
-                continue
-            elif key in ['include_path']:
-                # this path changes which headers are tracked as dependencies,
-                # it has no influence on the generated C code
-                continue
-            elif key in ['working_path']:
-                # this path changes where modules and pxd files are found;
-                # their content is part of the fingerprint anyway, their
-                # absolute path does not matter
-                continue
-            elif key in ['create_extension']:
-                # create_extension() has already mangled the options, e.g.,
-                # embedded_metadata, when the fingerprint is computed so we
-                # ignore it here.
-                continue
-            elif key in ['build_dir']:
-                # the (temporary) directory where we collect dependencies
-                # has no influence on the C output
-                continue
-            elif key in ['use_listing_file', 'generate_pxi', 'annotate', 'annotate_coverage_xml']:
-                # all output files are contained in the cache so the types of
-                # files generated must be part of the fingerprint
-                data[key] = value
-            elif key in ['formal_grammar', 'evaluate_tree_assertions']:
-                # these bits can change whether compilation to C passes/fails
-                data[key] = value
-            elif key in ['embedded_metadata', 'emit_linenums', 'c_line_in_traceback', 'gdb_debug', 'relative_path_in_code_position_comments']:
-                # the generated code contains additional bits when these are set
-                data[key] = value
-            elif key in ['cplus', 'language_level', 'compile_time_env', 'np_pythran']:
-                # assorted bits that, e.g., influence the parser
-                data[key] = value
-            elif key == ['capi_reexport_cincludes']:
-                if self.capi_reexport_cincludes:
-                    # our caching implementation does not yet include fingerprints of all the header files
-                    raise NotImplementedError('capi_reexport_cincludes is not compatible with Cython caching')
-            elif key == ['common_utility_include_dir']:
-                if self.common_utility_include_dir:
-                    raise NotImplementedError('common_utility_include_dir is not compatible with Cython caching yet')
-            else:
-                # any unexpected option should go into the fingerprint; it's better
-                # to recompile than to return incorrect results from the cache.
-                data[key] = value
-
-        def to_fingerprint(item):
-            r"""
-            Recursively turn item into a string, turning dicts into lists with
-            deterministic ordering.
-            """
-            if isinstance(item, dict):
-                item = sorted([(repr(key), to_fingerprint(value)) for key, value in item.items()])
-            return repr(item)
-
-        return to_fingerprint(data)
-
-
 class CompilationResult(object):
     """
     Results from the Cython compiler:
@@ -739,11 +612,11 @@ def compile_multiple(sources, options):
     a CompilationResultSet. Performs timestamp checking and/or recursion
     if these are specified in the options.
     """
-    if options.module_name and len(sources) > 1:
+    if len(sources) > 1 and options.module_name:
         raise RuntimeError('Full module name can only be set '
                            'for single source compilation')
     # run_pipeline creates the context
-    # context = options.create_context()
+    # context = Context.from_options(options)
     sources = [os.path.abspath(source) for source in sources]
     processed = set()
     results = CompilationResultSet()
@@ -754,7 +627,7 @@ def compile_multiple(sources, options):
     for source in sources:
         if source not in processed:
             if context is None:
-                context = options.create_context()
+                context = Context.from_options(options)
             output_filename = get_output_filename(source, cwd, options)
             out_of_date = context.c_file_out_of_date(source, output_filename)
             if (not timestamps) or out_of_date:
@@ -789,55 +662,79 @@ def compile(source, options = None, full_module_name = None, **kwds):
 
 
 @Utils.cached_function
-def search_include_directories(dirs, qualified_name, suffix, pos, include=False):
+def search_include_directories(dirs, qualified_name, suffix="", pos=None, include=False, source_file_path=None):
     """
     Search the list of include directories for the given file name.
 
-    If a source file position is given, first searches the directory
-    containing that file. Returns None if not found, but does not
-    report an error.
+    If a source file path or position is given, first searches the directory
+    containing that file.  Returns None if not found, but does not report an error.
 
     The 'include' option will disable package dereferencing.
     """
-
-    if pos:
+    if pos and not source_file_path:
         file_desc = pos[0]
         if not isinstance(file_desc, FileSourceDescriptor):
             raise RuntimeError("Only file sources for code supported")
+        source_file_path = file_desc.filename
+    if source_file_path:
         if include:
-            dirs = (os.path.dirname(file_desc.filename),) + dirs
+            dirs = (os.path.dirname(source_file_path),) + dirs
         else:
-            dirs = (Utils.find_root_package_dir(file_desc.filename),) + dirs
+            dirs = (Utils.find_root_package_dir(source_file_path),) + dirs
 
+    # search for dotted filename e.g. <dir>/foo.bar.pxd
     dotted_filename = qualified_name
     if suffix:
         dotted_filename += suffix
 
-    if not include:
-        names = qualified_name.split('.')
-        package_names = tuple(names[:-1])
-        module_name = names[-1]
-        module_filename = module_name + suffix
-        package_filename = "__init__" + suffix
-
     for dirname in dirs:
         path = os.path.join(dirname, dotted_filename)
         if os.path.exists(path):
+            if not include and '.' in qualified_name and '.' in os.path.splitext(dotted_filename)[0]:
+                warning(pos, "Dotted filenames ('%s') are deprecated."
+                             " Please use the normal Python package directory layout." % dotted_filename, level=1)
             return path
 
-        if not include:
-            package_dir = Utils.check_package_dir(dirname, package_names)
+    # search for filename in package structure e.g. <dir>/foo/bar.pxd or <dir>/foo/bar/__init__.pxd
+    if not include:
+
+        names = qualified_name.split('.')
+        package_names = tuple(names[:-1])
+        module_name = names[-1]
+
+        # search for standard packages first - PEP420
+        namespace_dirs = []
+        for dirname in dirs:
+            package_dir, is_namespace = Utils.check_package_dir(dirname, package_names)
             if package_dir is not None:
-                path = os.path.join(package_dir, module_filename)
-                if os.path.exists(path):
-                    return path
-                path = os.path.join(package_dir, module_name,
-                                    package_filename)
-                if os.path.exists(path):
+                if is_namespace:
+                    namespace_dirs.append(package_dir)
+                    continue
+                path = search_module_in_dir(package_dir, module_name, suffix)
+                if path:
                     return path
+
+        # search for namespaces second - PEP420
+        for package_dir in namespace_dirs:
+            path = search_module_in_dir(package_dir, module_name, suffix)
+            if path:
+                return path
+
     return None
 
 
+@Utils.cached_function
+def search_module_in_dir(package_dir, module_name, suffix):
+    # matches modules of the form: <dir>/foo/bar.pxd
+    path = Utils.find_versioned_file(package_dir, module_name, suffix)
+
+    # matches modules of the form: <dir>/foo/bar/__init__.pxd
+    if not path and suffix:
+        path = Utils.find_versioned_file(os.path.join(package_dir, module_name), "__init__", suffix)
+
+    return path
+
+
 # ------------------------------------------------------------------------
 #
 #  Main command-line entry point
@@ -852,14 +749,23 @@ def main(command_line = 0):
     args = sys.argv[1:]
     any_failures = 0
     if command_line:
-        from .CmdLine import parse_command_line
-        options, sources = parse_command_line(args)
+        try:
+            options, sources = parse_command_line(args)
+        except IOError as e:
+            # TODO: IOError can be replaced with FileNotFoundError in Cython 3.1
+            import errno
+            if errno.ENOENT != e.errno:
+                # Raised IOError is not caused by missing file.
+                raise
+            print("{}: No such file or directory: '{}'".format(sys.argv[0], e.filename), file=sys.stderr)
+            sys.exit(1)
     else:
         options = CompilationOptions(default_options)
         sources = args
 
     if options.show_version:
-        sys.stderr.write("Cython version %s\n" % version)
+        from .. import __version__
+        sys.stderr.write("Cython version %s\n" % __version__)
     if options.working_path!="":
         os.chdir(options.working_path)
     try:
@@ -871,44 +777,3 @@ def main(command_line = 0):
         any_failures = 1
     if any_failures:
         sys.exit(1)
-
-
-# ------------------------------------------------------------------------
-#
-#  Set the default options depending on the platform
-#
-# ------------------------------------------------------------------------
-
-default_options = dict(
-    show_version = 0,
-    use_listing_file = 0,
-    errors_to_stderr = 1,
-    cplus = 0,
-    output_file = None,
-    depfile = None,
-    annotate = None,
-    annotate_coverage_xml = None,
-    generate_pxi = 0,
-    capi_reexport_cincludes = 0,
-    working_path = "",
-    timestamps = None,
-    verbose = 0,
-    quiet = 0,
-    compiler_directives = {},
-    embedded_metadata = {},
-    evaluate_tree_assertions = False,
-    emit_linenums = False,
-    relative_path_in_code_position_comments = True,
-    c_line_in_traceback = True,
-    language_level = None,  # warn but default to 2
-    formal_grammar = False,
-    gdb_debug = False,
-    compile_time_env = None,
-    common_utility_include_dir = None,
-    output_dir=None,
-    build_dir=None,
-    cache=None,
-    create_extension=None,
-    module_name=None,
-    np_pythran=False
-)
diff --git a/Cython/Compiler/MemoryView.py b/Cython/Compiler/MemoryView.py
index 0406d6c71..5ebd396be 100644
--- a/Cython/Compiler/MemoryView.py
+++ b/Cython/Compiler/MemoryView.py
@@ -22,10 +22,6 @@ ERR_UNINITIALIZED = ("Cannot check if memoryview %s is initialized without the "
                      "GIL, consider using initializedcheck(False)")
 
 
-def concat_flags(*flags):
-    return "(%s)" % "|".join(flags)
-
-
 format_flag = "PyBUF_FORMAT"
 
 memview_c_contiguous = "(PyBUF_C_CONTIGUOUS | PyBUF_FORMAT)"
@@ -100,8 +96,15 @@ def put_acquire_memoryviewslice(lhs_cname, lhs_type, lhs_pos, rhs, code,
 
 def put_assign_to_memviewslice(lhs_cname, rhs, rhs_cname, memviewslicetype, code,
                                have_gil=False, first_assignment=False):
+    if lhs_cname == rhs_cname:
+        # self assignment is tricky because memoryview xdecref clears the memoryview
+        # thus invalidating both sides of the assignment. Therefore make it actually do nothing
+        code.putln("/* memoryview self assignment no-op */")
+        return
+
     if not first_assignment:
-        code.put_xdecref_memoryviewslice(lhs_cname, have_gil=have_gil)
+        code.put_xdecref(lhs_cname, memviewslicetype,
+                         have_gil=have_gil)
 
     if not rhs.result_in_temp():
         rhs.make_owned_memoryviewslice(code)
@@ -167,7 +170,7 @@ def valid_memslice_dtype(dtype, i=0):
          valid_memslice_dtype(dtype.base_type, i + 1)) or
         dtype.is_numeric or
         dtype.is_pyobject or
-        dtype.is_fused or # accept this as it will be replaced by specializations later
+        dtype.is_fused or  # accept this as it will be replaced by specializations later
         (dtype.is_typedef and valid_memslice_dtype(dtype.typedef_base_type))
     )
 
@@ -248,7 +251,7 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
 
         return bufp
 
-    def generate_buffer_slice_code(self, code, indices, dst, have_gil,
+    def generate_buffer_slice_code(self, code, indices, dst, dst_type, have_gil,
                                    have_slices, directives):
         """
         Slice a memoryviewslice.
@@ -265,7 +268,7 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
 
         code.putln("%(dst)s.data = %(src)s.data;" % locals())
         code.putln("%(dst)s.memview = %(src)s.memview;" % locals())
-        code.put_incref_memoryviewslice(dst)
+        code.put_incref_memoryviewslice(dst, dst_type, have_gil=have_gil)
 
         all_dimensions_direct = all(access == 'direct' for access, packing in self.type.axes)
         suboffset_dim_temp = []
@@ -292,7 +295,7 @@ class MemoryViewSliceBufferEntry(Buffer.BufferEntry):
             dim += 1
             access, packing = self.type.axes[dim]
 
-            if isinstance(index, ExprNodes.SliceNode):
+            if index.is_slice:
                 # slice, unspecified dimension, or part of ellipsis
                 d = dict(locals())
                 for s in "start stop step".split():
@@ -404,8 +407,8 @@ def get_is_contig_utility(contig_type, ndim):
     return utility
 
 
-def slice_iter(slice_type, slice_result, ndim, code):
-    if slice_type.is_c_contig or slice_type.is_f_contig:
+def slice_iter(slice_type, slice_result, ndim, code, force_strided=False):
+    if (slice_type.is_c_contig or slice_type.is_f_contig) and not force_strided:
         return ContigSliceIter(slice_type, slice_result, ndim, code)
     else:
         return StridedSliceIter(slice_type, slice_result, ndim, code)
@@ -489,7 +492,7 @@ def copy_c_or_fortran_cname(memview):
 
 def get_copy_new_utility(pos, from_memview, to_memview):
     if (from_memview.dtype != to_memview.dtype and
-            not (from_memview.dtype.is_const and from_memview.dtype.const_base_type == to_memview.dtype)):
+            not (from_memview.dtype.is_cv_qualified and from_memview.dtype.cv_base_type == to_memview.dtype)):
         error(pos, "dtypes must be the same!")
         return
     if len(from_memview.axes) != len(to_memview.axes):
@@ -507,7 +510,8 @@ def get_copy_new_utility(pos, from_memview, to_memview):
     if to_memview.is_c_contig:
         mode = 'c'
         contig_flag = memview_c_contiguous
-    elif to_memview.is_f_contig:
+    else:
+        assert to_memview.is_f_contig
         mode = 'fortran'
         contig_flag = memview_f_contiguous
 
@@ -654,13 +658,13 @@ def is_cf_contig(specs):
         is_c_contig = True
 
     elif (specs[-1] == ('direct','contig') and
-          all(axis == ('direct','follow') for axis in specs[:-1])):
+            all(axis == ('direct','follow') for axis in specs[:-1])):
         # c_contiguous: 'follow', 'follow', ..., 'follow', 'contig'
         is_c_contig = True
 
     elif (len(specs) > 1 and
-        specs[0] == ('direct','contig') and
-        all(axis == ('direct','follow') for axis in specs[1:])):
+            specs[0] == ('direct','contig') and
+            all(axis == ('direct','follow') for axis in specs[1:])):
         # f_contiguous: 'contig', 'follow', 'follow', ..., 'follow'
         is_f_contig = True
 
@@ -809,7 +813,8 @@ context = {
     'memview_struct_name': memview_objstruct_cname,
     'max_dims': Options.buffer_max_dims,
     'memviewslice_name': memviewslice_cname,
-    'memslice_init': memslice_entry_init,
+    'memslice_init': PyrexTypes.MemoryViewSliceType.default_value,
+    'THREAD_LOCKS_PREALLOCATED': 8,
 }
 memviewslice_declare_code = load_memview_c_utility(
         "MemviewSliceStruct",
@@ -835,7 +840,7 @@ overlapping_utility = load_memview_c_utility("OverlappingSlices", context)
 copy_contents_new_utility = load_memview_c_utility(
     "MemviewSliceCopyTemplate",
     context,
-    requires=[], # require cython_array_utility_code
+    requires=[],  # require cython_array_utility_code
 )
 
 view_utility_code = load_memview_cy_utility(
@@ -848,9 +853,9 @@ view_utility_code = load_memview_cy_utility(
                   is_contig_utility,
                   overlapping_utility,
                   copy_contents_new_utility,
-                  ModuleNode.capsule_utility_code],
+                  ],
 )
-view_utility_whitelist = ('array', 'memoryview', 'array_cwrapper',
+view_utility_allowlist = ('array', 'memoryview', 'array_cwrapper',
                           'generic', 'strided', 'indirect', 'contiguous',
                           'indirect_contiguous')
 
diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py
index a2400bbe2..34ef35880 100644
--- a/Cython/Compiler/ModuleNode.py
+++ b/Cython/Compiler/ModuleNode.py
@@ -14,6 +14,7 @@ import json
 import operator
 import os
 import re
+import sys
 
 from .PyrexTypes import CPtrType
 from . import Future
@@ -26,13 +27,25 @@ from . import TypeSlots
 from . import PyrexTypes
 from . import Pythran
 
-from .Errors import error, warning
+from .Errors import error, warning, CompileError
 from .PyrexTypes import py_object_type
-from ..Utils import open_new_file, replace_suffix, decode_filename, build_hex_version
-from .Code import UtilityCode, IncludeCode
-from .StringEncoding import EncodedString
+from ..Utils import open_new_file, replace_suffix, decode_filename, build_hex_version, is_cython_generated_file
+from .Code import UtilityCode, IncludeCode, TempitaUtilityCode
+from .StringEncoding import EncodedString, encoded_string_or_bytes_literal
 from .Pythran import has_np_pythran
 
+
+def replace_suffix_encoded(path, newsuf):
+    # calls replace suffix and returns a EncodedString or BytesLiteral with the encoding set
+    newpath = replace_suffix(path, newsuf)
+    return as_encoded_filename(newpath)
+
+def as_encoded_filename(path):
+    # wraps the path with either EncodedString or BytesLiteral (depending on its input type)
+    # and sets the encoding to the file system encoding
+    return encoded_string_or_bytes_literal(path, sys.getfilesystemencoding())
+
+
 def check_c_declarations_pxd(module_node):
     module_node.scope.check_c_classes_pxd()
     return module_node
@@ -50,11 +63,50 @@ def generate_c_code_config(env, options):
     else:
         emit_linenums = options.emit_linenums
 
+    if hasattr(options, "emit_code_comments"):
+        print('Warning: option emit_code_comments is deprecated. '
+              'Instead, use compiler directive emit_code_comments.')
+
     return Code.CCodeConfig(
         emit_linenums=emit_linenums,
         emit_code_comments=env.directives['emit_code_comments'],
         c_line_in_traceback=options.c_line_in_traceback)
 
+# The code required to generate one comparison from another.
+# The keys are (from, to).
+# The comparison operator always goes first, with equality possibly second.
+# The first value specifies if the comparison is inverted. The second is the
+# logic op to use, and the third is if the equality is inverted or not.
+TOTAL_ORDERING = {
+    # a > b from (not a < b) and (a != b)
+    ('__lt__', '__gt__'): (True, '&&', True),
+    # a <= b from (a < b) or (a == b)
+    ('__lt__', '__le__'): (False, '||', False),
+    # a >= b from (not a < b).
+    ('__lt__', '__ge__'): (True, '', None),
+
+    # a >= b from (not a <= b) or (a == b)
+    ('__le__', '__ge__'): (True, '||', False),
+    # a < b, from (a <= b) and (a != b)
+    ('__le__', '__lt__'): (False, '&&', True),
+    # a > b from (not a <= b)
+    ('__le__', '__gt__'): (True, '', None),
+
+    # a < b from (not a > b) and (a != b)
+    ('__gt__', '__lt__'): (True, '&&', True),
+    # a >= b from (a > b) or (a == b)
+    ('__gt__', '__ge__'): (False, '||', False),
+    # a <= b from (not a > b)
+    ('__gt__', '__le__'): (True, '', None),
+
+    # Return a <= b from (not a >= b) or (a == b)
+    ('__ge__', '__le__'): (True, '||', False),
+    # a > b from (a >= b) and (a != b)
+    ('__ge__', '__gt__'): (False, '&&', True),
+    # a < b from (not a >= b)
+    ('__ge__', '__lt__'): (True, '', None),
+}
+
 
 class ModuleNode(Nodes.Node, Nodes.BlockNode):
     #  doc       string or None
@@ -69,21 +121,41 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
     child_attrs = ["body"]
     directives = None
+    # internal - used in merging
+    pxd_stats = None
+    utility_code_stats = None
 
-    def merge_in(self, tree, scope, merge_scope=False):
+
+    def merge_in(self, tree, scope, stage, merge_scope=False):
         # Merges in the contents of another tree, and possibly scope. With the
         # current implementation below, this must be done right prior
         # to code generation.
+        # Stage is one of "pxd" or "utility" to indicate pxd file or utility
+        # code. This helps define the order.
         #
         # Note: This way of doing it seems strange -- I believe the
         # right concept is to split ModuleNode into a ModuleNode and a
         # CodeGenerator, and tell that CodeGenerator to generate code
         # from multiple sources.
         assert isinstance(self.body, Nodes.StatListNode)
+        assert stage in ('pxd', 'utility')
+
+        if self.pxd_stats is None:
+            self.pxd_stats = Nodes.StatListNode(self.body.pos, stats=[])
+            self.utility_code_stats = Nodes.StatListNode(self.body.pos, stats=[])
+            self.body.stats.insert(0, self.pxd_stats)
+            self.body.stats.insert(0, self.utility_code_stats)
+
+        if scope.directives != self.scope.directives:
+            # merged in nodes should keep their original compiler directives
+            # (for example inline cdef functions)
+            tree = Nodes.CompilerDirectivesNode(tree.pos, body=tree, directives=scope.directives)
+
+        target_stats = self.pxd_stats if stage == "pxd" else self.utility_code_stats
         if isinstance(tree, Nodes.StatListNode):
-            self.body.stats.extend(tree.stats)
+            target_stats.stats.extend(tree.stats)
         else:
-            self.body.stats.append(tree)
+            target_stats.stats.append(tree)
 
         self.scope.utility_code_list.extend(scope.utility_code_list)
 
@@ -105,6 +177,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
             self.scope.merge_in(scope)
 
+    def with_compiler_directives(self):
+        # When merging a utility code module into the user code we need to preserve
+        # the original compiler directives. This returns the body of the module node,
+        # wrapped in its set of directives.
+        body = Nodes.CompilerDirectivesNode(self.pos, directives=self.directives, body=self.body)
+        return body
+
     def analyse_declarations(self, env):
         if has_np_pythran(env):
             Pythran.include_pythran_generic(env)
@@ -131,8 +210,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             self.create_import_star_conversion_utility_code(env)
         for name, entry in sorted(env.entries.items()):
             if (entry.create_wrapper and entry.scope is env
-                and entry.is_type and entry.type.is_enum):
-                    entry.type.create_type_wrapper(env)
+                    and entry.is_type and (entry.type.is_enum or entry.type.is_cpp_enum)):
+                entry.type.create_type_wrapper(env)
 
     def process_implementation(self, options, result):
         env = self.scope
@@ -151,6 +230,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     return 1
         return 0
 
+    def assure_safe_target(self, path, allow_failed=False):
+        # Check for a common gotcha for new users: naming your .pyx file after the .c file you want to wrap
+        if not is_cython_generated_file(path, allow_failed=allow_failed, if_not_found=True):
+            # Raising a fatal CompileError instead of calling error() to prevent castrating an existing file.
+            raise CompileError(
+                self.pos, 'The output file already exists and does not look like it was generated by Cython: "%s"' %
+                          os.path.basename(path))
+
     def generate_h_code(self, env, options, result):
         def h_entries(entries, api=0, pxd=0):
             return [entry for entry in entries
@@ -161,65 +248,99 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         h_vars = h_entries(env.var_entries)
         h_funcs = h_entries(env.cfunc_entries)
         h_extension_types = h_entries(env.c_class_entries)
-        if h_types or  h_vars or h_funcs or h_extension_types:
-            result.h_file = replace_suffix(result.c_file, ".h")
-            h_code = Code.CCodeWriter()
+
+        if h_types or h_vars or h_funcs or h_extension_types:
+            result.h_file = replace_suffix_encoded(result.c_file, ".h")
+            self.assure_safe_target(result.h_file)
+
+            h_code_writer = Code.CCodeWriter()
             c_code_config = generate_c_code_config(env, options)
-            Code.GlobalState(h_code, self, c_code_config)
+            globalstate = Code.GlobalState(h_code_writer, self, c_code_config)
+            globalstate.initialize_main_h_code()  # in-case utility code is used in the header
+            h_code_start = globalstate.parts['h_code']
+            h_code_main = globalstate.parts['type_declarations']
+            h_code_end = globalstate.parts['end']
             if options.generate_pxi:
-                result.i_file = replace_suffix(result.c_file, ".pxi")
+                result.i_file = replace_suffix_encoded(result.c_file, ".pxi")
                 i_code = Code.PyrexCodeWriter(result.i_file)
             else:
                 i_code = None
 
-            h_code.put_generated_by()
-            h_guard = Naming.h_guard_prefix + self.api_name(env)
-            h_code.put_h_guard(h_guard)
-            h_code.putln("")
-            h_code.putln('#include "Python.h"')
-            self.generate_type_header_code(h_types, h_code)
+            h_code_start.put_generated_by()
+            h_guard = self.api_name(Naming.h_guard_prefix, env)
+            h_code_start.put_h_guard(h_guard)
+            h_code_start.putln("")
+            h_code_start.putln('#include "Python.h"')
+            self.generate_type_header_code(h_types, h_code_start)
             if options.capi_reexport_cincludes:
-                self.generate_includes(env, [], h_code)
-            h_code.putln("")
-            api_guard = Naming.api_guard_prefix + self.api_name(env)
-            h_code.putln("#ifndef %s" % api_guard)
-            h_code.putln("")
-            self.generate_extern_c_macro_definition(h_code)
-            h_code.putln("")
-            self.generate_dl_import_macro(h_code)
+                self.generate_includes(env, [], h_code_start)
+            h_code_start.putln("")
+            api_guard = self.api_name(Naming.api_guard_prefix, env)
+            h_code_start.putln("#ifndef %s" % api_guard)
+            h_code_start.putln("")
+            self.generate_extern_c_macro_definition(h_code_start, env.is_cpp())
+            h_code_start.putln("")
+            self.generate_dl_import_macro(h_code_start)
             if h_extension_types:
-                h_code.putln("")
+                h_code_main.putln("")
                 for entry in h_extension_types:
-                    self.generate_cclass_header_code(entry.type, h_code)
+                    self.generate_cclass_header_code(entry.type, h_code_main)
                     if i_code:
                         self.generate_cclass_include_code(entry.type, i_code)
             if h_funcs:
-                h_code.putln("")
+                h_code_main.putln("")
                 for entry in h_funcs:
-                    self.generate_public_declaration(entry, h_code, i_code)
+                    self.generate_public_declaration(entry, h_code_main, i_code)
             if h_vars:
-                h_code.putln("")
+                h_code_main.putln("")
                 for entry in h_vars:
-                    self.generate_public_declaration(entry, h_code, i_code)
-            h_code.putln("")
-            h_code.putln("#endif /* !%s */" % api_guard)
-            h_code.putln("")
-            h_code.putln("/* WARNING: the interface of the module init function changed in CPython 3.5. */")
-            h_code.putln("/* It now returns a PyModuleDef instance instead of a PyModule instance. */")
-            h_code.putln("")
-            h_code.putln("#if PY_MAJOR_VERSION < 3")
-            h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name)
-            h_code.putln("#else")
-            h_code.putln("PyMODINIT_FUNC %s(void);" % self.mod_init_func_cname('PyInit', env))
-            h_code.putln("#endif")
-            h_code.putln("")
-            h_code.putln("#endif /* !%s */" % h_guard)
-
-            f = open_new_file(result.h_file)
-            try:
-                h_code.copyto(f)
-            finally:
-                f.close()
+                    self.generate_public_declaration(entry, h_code_main, i_code)
+            h_code_main.putln("")
+            h_code_main.putln("#endif /* !%s */" % api_guard)
+            h_code_main.putln("")
+            h_code_main.putln("/* WARNING: the interface of the module init function changed in CPython 3.5. */")
+            h_code_main.putln("/* It now returns a PyModuleDef instance instead of a PyModule instance. */")
+            h_code_main.putln("")
+            h_code_main.putln("#if PY_MAJOR_VERSION < 3")
+            if env.module_name.isascii():
+                py2_mod_name = env.module_name
+            else:
+                py2_mod_name = env.module_name.encode("ascii", errors="ignore").decode("utf-8")
+                h_code_main.putln('#error "Unicode module names are not supported in Python 2";')
+            h_code_main.putln("PyMODINIT_FUNC init%s(void);" % py2_mod_name)
+            h_code_main.putln("#else")
+            py3_mod_func_name = self.mod_init_func_cname('PyInit', env)
+            warning_string = EncodedString('Use PyImport_AppendInittab("%s", %s) instead of calling %s directly.' % (
+                py2_mod_name, py3_mod_func_name, py3_mod_func_name))
+            h_code_main.putln('/* WARNING: %s from Python 3.5 */' % warning_string.rstrip('.'))
+            h_code_main.putln("PyMODINIT_FUNC %s(void);" % py3_mod_func_name)
+            h_code_main.putln("")
+            h_code_main.putln("#if PY_VERSION_HEX >= 0x03050000 "
+                "&& (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) "
+                "|| (defined(__cplusplus) && __cplusplus >= 201402L))")
+            h_code_main.putln("#if defined(__cplusplus) && __cplusplus >= 201402L")
+            h_code_main.putln("[[deprecated(%s)]] inline" % warning_string.as_c_string_literal())
+            h_code_main.putln("#elif defined(__GNUC__) || defined(__clang__)")
+            h_code_main.putln('__attribute__ ((__deprecated__(%s), __unused__)) __inline__' % (
+                warning_string.as_c_string_literal()))
+            h_code_main.putln("#elif defined(_MSC_VER)")
+            h_code_main.putln('__declspec(deprecated(%s)) __inline' % (
+                warning_string.as_c_string_literal()))
+            h_code_main.putln('#endif')
+            h_code_main.putln("static PyObject* __PYX_WARN_IF_%s_INIT_CALLED(PyObject* res) {" % py3_mod_func_name)
+            h_code_main.putln("return res;")
+            h_code_main.putln("}")
+            # Function call is converted to warning macro; uncalled (pointer) is not
+            h_code_main.putln('#define %s() __PYX_WARN_IF_%s_INIT_CALLED(%s())' % (
+                py3_mod_func_name, py3_mod_func_name, py3_mod_func_name))
+            h_code_main.putln('#endif')
+            h_code_main.putln('#endif')
+
+            h_code_end.putln("")
+            h_code_end.putln("#endif /* !%s */" % h_guard)
+
+            with open_new_file(result.h_file) as f:
+                h_code_writer.copyto(f)
 
     def generate_public_declaration(self, entry, h_code, i_code):
         h_code.putln("%s %s;" % (
@@ -229,8 +350,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             i_code.putln("cdef extern %s" % (
                 entry.type.declaration_code(entry.cname, pyrex=1)))
 
-    def api_name(self, env):
-        return env.qualified_name.replace(".", "__")
+    def api_name(self, prefix, env):
+        api_name = self.punycode_module_name(prefix, env.qualified_name)
+        return api_name.replace(".", "__")
 
     def generate_api_code(self, env, options, result):
         def api_entries(entries, pxd=0):
@@ -239,13 +361,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         api_vars = api_entries(env.var_entries)
         api_funcs = api_entries(env.cfunc_entries)
         api_extension_types = api_entries(env.c_class_entries)
+
         if api_vars or api_funcs or api_extension_types:
-            result.api_file = replace_suffix(result.c_file, "_api.h")
+            result.api_file = replace_suffix_encoded(result.c_file, "_api.h")
+            self.assure_safe_target(result.api_file)
+
             h_code = Code.CCodeWriter()
             c_code_config = generate_c_code_config(env, options)
             Code.GlobalState(h_code, self, c_code_config)
             h_code.put_generated_by()
-            api_guard = Naming.api_guard_prefix + self.api_name(env)
+            api_guard = self.api_name(Naming.api_guard_prefix, env)
             h_code.put_h_guard(api_guard)
             # Work around https://bugs.python.org/issue4709
             h_code.putln('#ifdef __MINGW64__')
@@ -254,7 +379,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
             h_code.putln('#include "Python.h"')
             if result.h_file:
-                h_code.putln('#include "%s"' % os.path.basename(result.h_file))
+                h_filename = os.path.basename(result.h_file)
+                h_filename = as_encoded_filename(h_filename)
+                h_code.putln('#include %s' % h_filename.as_c_string_literal())
             if api_extension_types:
                 h_code.putln("")
                 for entry in api_extension_types:
@@ -274,9 +401,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 for entry in api_vars:
                     type = CPtrType(entry.type)
                     cname = env.mangle(Naming.varptr_prefix_api, entry.name)
-                    h_code.putln("static %s = 0;" %  type.declaration_code(cname))
+                    h_code.putln("static %s = 0;" % type.declaration_code(cname))
                     h_code.putln("#define %s (*%s)" % (entry.name, cname))
-            h_code.put(UtilityCode.load_as_string("PyIdentifierFromString", "ImportExport.c")[0])
             if api_vars:
                 h_code.put(UtilityCode.load_as_string("VoidPtrImport", "ImportExport.c")[1])
             if api_funcs:
@@ -285,22 +411,22 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[0])
                 h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[1])
             h_code.putln("")
-            h_code.putln("static int import_%s(void) {" % self.api_name(env))
+            h_code.putln("static int %s(void) {" % self.api_name("import", env))
             h_code.putln("PyObject *module = 0;")
-            h_code.putln('module = PyImport_ImportModule("%s");' % env.qualified_name)
+            h_code.putln('module = PyImport_ImportModule(%s);' % env.qualified_name.as_c_string_literal())
             h_code.putln("if (!module) goto bad;")
             for entry in api_funcs:
                 cname = env.mangle(Naming.func_prefix_api, entry.name)
                 sig = entry.type.signature_string()
                 h_code.putln(
-                    'if (__Pyx_ImportFunction_%s(module, "%s", (void (**)(void))&%s, "%s") < 0) goto bad;'
-                    % (Naming.cyversion, entry.name, cname, sig))
+                    'if (__Pyx_ImportFunction_%s(module, %s, (void (**)(void))&%s, "%s") < 0) goto bad;'
+                    % (Naming.cyversion, entry.name.as_c_string_literal(), cname, sig))
             for entry in api_vars:
                 cname = env.mangle(Naming.varptr_prefix_api, entry.name)
                 sig = entry.type.empty_declaration_code()
                 h_code.putln(
-                    'if (__Pyx_ImportVoidPtr_%s(module, "%s", (void **)&%s, "%s") < 0) goto bad;'
-                    % (Naming.cyversion, entry.name, cname, sig))
+                    'if (__Pyx_ImportVoidPtr_%s(module, %s, (void **)&%s, "%s") < 0) goto bad;'
+                    % (Naming.cyversion, entry.name.as_c_string_literal(), cname, sig))
             with ModuleImportGenerator(h_code, imported_modules={env.qualified_name: 'module'}) as import_generator:
                 for entry in api_extension_types:
                     self.generate_type_import_call(entry.type, h_code, import_generator, error_code="goto bad;")
@@ -339,10 +465,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         i_code.dedent()
 
     def generate_c_code(self, env, options, result):
+        self.assure_safe_target(result.c_file, allow_failed=True)
         modules = self.referenced_modules
 
         if Options.annotate or options.annotate:
-            rootwriter = Annotate.AnnotationCCodeWriter()
+            show_entire_c_code = Options.annotate == "fullc" or options.annotate == "fullc"
+            rootwriter = Annotate.AnnotationCCodeWriter(
+                show_entire_c_code=show_entire_c_code,
+                source_desc=self.compilation_source.source_desc,
+            )
         else:
             rootwriter = Code.CCodeWriter()
 
@@ -364,24 +495,24 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         globalstate.use_utility_code(refnanny_utility_code)
 
         code = globalstate['before_global_var']
-        code.putln('#define __Pyx_MODULE_NAME "%s"' % self.full_module_name)
-        module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))
+        code.putln('#define __Pyx_MODULE_NAME %s' %
+                   self.full_module_name.as_c_string_literal())
+        module_is_main = self.is_main_module_flag_cname()
         code.putln("extern int %s;" % module_is_main)
         code.putln("int %s = 0;" % module_is_main)
         code.putln("")
-        code.putln("/* Implementation of '%s' */" % env.qualified_name)
+        code.putln("/* Implementation of %s */" % env.qualified_name.as_c_string_literal())
 
         code = globalstate['late_includes']
-        code.putln("/* Late includes */")
         self.generate_includes(env, modules, code, early=False)
 
-        code = globalstate['all_the_rest']
+        code = globalstate['module_code']
 
         self.generate_cached_builtins_decls(env, code)
-        self.generate_lambda_definitions(env, code)
+
         # generate normal variable and function definitions
+        self.generate_lambda_definitions(env, code)
         self.generate_variable_definitions(env, code)
-
         self.body.generate_function_definitions(env, code)
 
         code.mark_pos(None)
@@ -389,11 +520,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         self.generate_method_table(env, code)
         if env.has_import_star:
             self.generate_import_star(env, code)
-        self.generate_pymoduledef_struct(env, code)
 
         # initialise the macro to reduce the code size of one-time functionality
         code.putln(UtilityCode.load_as_string("SmallCodeConfig", "ModuleSetupCode.c")[0].strip())
 
+        self.generate_module_state_start(env, globalstate['module_state'])
+        self.generate_module_state_defines(env, globalstate['module_state_defines'])
+        self.generate_module_state_clear(env, globalstate['module_state_clear'])
+        self.generate_module_state_traverse(env, globalstate['module_state_traverse'])
+
         # init_globals is inserted before this
         self.generate_module_init_func(modules[:-1], env, globalstate['init_module'])
         self.generate_module_cleanup_func(env, globalstate['cleanup_module'])
@@ -408,6 +543,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             globalstate.use_utility_code(utilcode)
         globalstate.finalize_main_c_code()
 
+        self.generate_module_state_end(env, modules, globalstate)
+
         f = open_new_file(result.c_file)
         try:
             rootwriter.copyto(f)
@@ -429,11 +566,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             except ImportError:
                 import xml.etree.ElementTree as ET
             coverage_xml = ET.parse(coverage_xml_filename).getroot()
-            if hasattr(coverage_xml, 'iter'):
-                iterator = coverage_xml.iter()  # Python 2.7 & 3.2+
-            else:
-                iterator = coverage_xml.getiterator()
-            for el in iterator:
+            for el in coverage_xml.iter():
                 el.tail = None  # save some memory
         else:
             coverage_xml = None
@@ -452,7 +585,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             if not target_file_dir.startswith(target_dir):
                 # any other directories may not be writable => avoid trying
                 continue
-            source_file = search_include_file(included_file, "", self.pos, include=True)
+            source_file = search_include_file(included_file, source_pos=self.pos, include=True)
             if not source_file:
                 continue
             if target_file_dir != target_dir and not os.path.exists(target_file_dir):
@@ -469,16 +602,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         markers = ccodewriter.buffer.allmarkers()
 
         d = defaultdict(list)
-        for c_lineno, cython_lineno in enumerate(markers):
-            if cython_lineno > 0:
-                d[cython_lineno].append(c_lineno + 1)
+        for c_lineno, (src_desc, src_lineno) in enumerate(markers):
+            if src_lineno > 0 and src_desc.filename is not None:
+                d[src_desc, src_lineno].append(c_lineno + 1)
 
         tb.start('LineNumberMapping')
-        for cython_lineno, c_linenos in sorted(d.items()):
+        for (src_desc, src_lineno), c_linenos in sorted(d.items()):
+            assert src_desc.filename is not None
             tb.add_entry(
                 'LineNumber',
                 c_linenos=' '.join(map(str, c_linenos)),
-                cython_lineno=str(cython_lineno),
+                src_path=src_desc.filename,
+                src_lineno=str(src_lineno),
             )
         tb.end('LineNumberMapping')
         tb.serialize()
@@ -491,33 +626,36 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             module_list.append(env)
 
     def sort_types_by_inheritance(self, type_dict, type_order, getkey):
-        # copy the types into a list moving each parent type before
-        # its first child
-        type_list = []
-        for i, key in enumerate(type_order):
+        subclasses = defaultdict(list)  # maps type key to list of subclass keys
+        for key in type_order:
             new_entry = type_dict[key]
-
             # collect all base classes to check for children
-            hierarchy = set()
-            base = new_entry
+            base = new_entry.type.base_type
             while base:
-                base_type = base.type.base_type
-                if not base_type:
-                    break
-                base_key = getkey(base_type)
-                hierarchy.add(base_key)
-                base = type_dict.get(base_key)
-            new_entry.base_keys = hierarchy
-
-            # find the first (sub-)subclass and insert before that
-            for j in range(i):
-                entry = type_list[j]
-                if key in entry.base_keys:
-                    type_list.insert(j, new_entry)
+                base_key = getkey(base)
+                subclasses[base_key].append(key)
+                base_entry = type_dict.get(base_key)
+                if base_entry is None:
                     break
-            else:
-                type_list.append(new_entry)
-        return type_list
+                base = base_entry.type.base_type
+
+        # Simple topological sort using recursive DFS, based on
+        # https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
+        seen = set()
+        result = []
+        def dfs(u):
+            if u in seen:
+                return
+            seen.add(u)
+            for v in subclasses[getkey(u.type)]:
+                dfs(type_dict[v])
+            result.append(u)
+
+        for key in reversed(type_order):
+            dfs(type_dict[key])
+
+        result.reverse()
+        return result
 
     def sort_type_hierarchy(self, module_list, env):
         # poor developer's OrderedDict
@@ -619,12 +757,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         for module in modules:
             defined_here = module is env
             modulecode.putln("")
-            modulecode.putln("/* Module declarations from '%s' */" % module.qualified_name)
-            self.generate_c_class_declarations(module, modulecode, defined_here)
+            modulecode.putln("/* Module declarations from %s */" % module.qualified_name.as_c_string_literal())
+            self.generate_c_class_declarations(module, modulecode, defined_here, globalstate)
             self.generate_cvariable_declarations(module, modulecode, defined_here)
             self.generate_cfunction_declarations(module, modulecode, defined_here)
 
-    def _put_setup_code(self, code, name):
+    @staticmethod
+    def _put_setup_code(code, name):
         code.put(UtilityCode.load_as_string(name, "ModuleSetupCode.c")[1])
 
     def generate_module_preamble(self, env, options, cimported_modules, metadata, code):
@@ -638,6 +777,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("#ifndef PY_SSIZE_T_CLEAN")
         code.putln("#define PY_SSIZE_T_CLEAN")
         code.putln("#endif /* PY_SSIZE_T_CLEAN */")
+        self._put_setup_code(code, "InitLimitedAPI")
 
         for inc in sorted(env.c_includes.values(), key=IncludeCode.sortkey):
             if inc.location == inc.INITIAL:
@@ -645,14 +785,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("#ifndef Py_PYTHON_H")
         code.putln("    #error Python headers needed to compile C extensions, "
                    "please install development version of Python.")
-        code.putln("#elif PY_VERSION_HEX < 0x02060000 || "
+        code.putln("#elif PY_VERSION_HEX < 0x02070000 || "
                    "(0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)")
-        code.putln("    #error Cython requires Python 2.6+ or Python 3.3+.")
+        code.putln("    #error Cython requires Python 2.7+ or Python 3.3+.")
         code.putln("#else")
         code.globalstate["end"].putln("#endif /* Py_PYTHON_H */")
 
         from .. import __version__
         code.putln('#define CYTHON_ABI "%s"' % __version__.replace('.', '_'))
+        code.putln('#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI')
+        code.putln('#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "."')
         code.putln('#define CYTHON_HEX_VERSION %s' % build_hex_version(__version__))
         code.putln("#define CYTHON_FUTURE_DIVISION %d" % (
             Future.division in env.context.future_directives))
@@ -680,11 +822,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("    { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }")
 
         code.putln("")
-        self.generate_extern_c_macro_definition(code)
+        self.generate_extern_c_macro_definition(code, env.is_cpp())
         code.putln("")
 
-        code.putln("#define %s" % Naming.h_guard_prefix + self.api_name(env))
-        code.putln("#define %s" % Naming.api_guard_prefix + self.api_name(env))
+        code.putln("#define %s" % self.api_name(Naming.h_guard_prefix, env))
+        code.putln("#define %s" % self.api_name(Naming.api_guard_prefix, env))
         code.putln("/* Early includes */")
         self.generate_includes(env, cimported_modules, code, late=False)
         code.putln("")
@@ -721,6 +863,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_func_name)
         code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_func_name)
         code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0])
+        env.use_utility_code(UtilityCode.load_cached("FormatTypeName", "ObjectHandling.c"))
 
         # These utility functions are assumed to exist and used elsewhere.
         PyrexTypes.c_long_type.create_to_py_utility_code(env)
@@ -730,32 +873,42 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.put(Nodes.branch_prediction_macros)
         code.putln('static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }')
         code.putln('')
+        code.putln('#if !CYTHON_USE_MODULE_STATE')
         code.putln('static PyObject *%s = NULL;' % env.module_cname)
-        code.putln('static PyObject *%s;' % env.module_dict_cname)
-        code.putln('static PyObject *%s;' % Naming.builtins_cname)
-        code.putln('static PyObject *%s = NULL;' % Naming.cython_runtime_cname)
-        code.putln('static PyObject *%s;' % Naming.empty_tuple)
-        code.putln('static PyObject *%s;' % Naming.empty_bytes)
-        code.putln('static PyObject *%s;' % Naming.empty_unicode)
         if Options.pre_import is not None:
             code.putln('static PyObject *%s;' % Naming.preimport_cname)
+        code.putln('#endif')
+
         code.putln('static int %s;' % Naming.lineno_cname)
         code.putln('static int %s = 0;' % Naming.clineno_cname)
-        code.putln('static const char * %s= %s;' % (Naming.cfilenm_cname, Naming.file_c_macro))
+        code.putln('static const char * %s = %s;' % (Naming.cfilenm_cname, Naming.file_c_macro))
         code.putln('static const char *%s;' % Naming.filename_cname)
 
         env.use_utility_code(UtilityCode.load_cached("FastTypeChecks", "ModuleSetupCode.c"))
         if has_np_pythran(env):
             env.use_utility_code(UtilityCode.load_cached("PythranConversion", "CppSupport.cpp"))
 
-    def generate_extern_c_macro_definition(self, code):
+    def generate_extern_c_macro_definition(self, code, is_cpp):
         name = Naming.extern_c_macro
-        code.putln("#ifndef %s" % name)
-        code.putln("  #ifdef __cplusplus")
-        code.putln('    #define %s extern "C"' % name)
-        code.putln("  #else")
-        code.putln("    #define %s extern" % name)
-        code.putln("  #endif")
+        code.putln("#ifdef CYTHON_EXTERN_C")
+        # make sure that user overrides always take precedence
+        code.putln('    #undef %s' % name)
+        code.putln('    #define %s CYTHON_EXTERN_C' % name)
+        code.putln("#elif defined(%s)" % name)
+        code.putln("    #ifdef _MSC_VER")
+        code.putln("    #pragma message (\"Please do not define the '%s' macro externally. Use 'CYTHON_EXTERN_C' instead.\")" % name)
+        code.putln("    #else")
+        code.putln("    #warning Please do not define the '%s' macro externally. Use 'CYTHON_EXTERN_C' instead." % name)
+        code.putln("    #endif")
+        code.putln("#else")
+        if is_cpp:
+            code.putln('    #define %s extern "C++"' % name)
+        else:
+            code.putln("  #ifdef __cplusplus")
+            code.putln('    #define %s extern "C"' % name)
+            code.putln("  #else")
+            code.putln("    #define %s extern" % name)
+            code.putln("  #endif")
         code.putln("#endif")
 
     def generate_dl_import_macro(self, code):
@@ -764,7 +917,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("#endif")
 
     def generate_includes(self, env, cimported_modules, code, early=True, late=True):
-        includes = []
         for inc in sorted(env.c_includes.values(), key=IncludeCode.sortkey):
             if inc.location == inc.EARLY:
                 if early:
@@ -785,7 +937,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 if isabs(file_path):
                     file_path = basename(file_path)  # never include absolute paths
                 escaped_filename = file_path.replace("\\", "\\\\").replace('"', r'\"')
-                code.putln('"%s",' % escaped_filename)
+                escaped_filename = as_encoded_filename(escaped_filename)
+                code.putln('%s,' % escaped_filename.as_c_string_literal())
         else:
             # Some C compilers don't like an empty array
             code.putln("0")
@@ -802,7 +955,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             if not entry.in_cinclude:
                 #print "generate_type_header_code:", entry.name, repr(entry.type) ###
                 type = entry.type
-                if type.is_typedef: # Must test this first!
+                if type.is_typedef:  # Must test this first!
                     pass
                 elif type.is_struct_or_union or type.is_cpp_class:
                     self.generate_struct_union_predeclaration(entry, code)
@@ -815,9 +968,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             if not entry.in_cinclude:
                 #print "generate_type_header_code:", entry.name, repr(entry.type) ###
                 type = entry.type
-                if type.is_typedef: # Must test this first!
+                if type.is_typedef:  # Must test this first!
                     self.generate_typedef(entry, code)
-                elif type.is_enum:
+                elif type.is_enum or type.is_cpp_enum:
                     self.generate_enum_definition(entry, code)
                 elif type.is_struct_or_union:
                     self.generate_struct_union_definition(entry, code)
@@ -844,7 +997,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
     def generate_typedef(self, entry, code):
         base_type = entry.type.typedef_base_type
-        if base_type.is_numeric:
+        enclosing_scope = entry.scope
+        if base_type.is_numeric and not enclosing_scope.is_cpp_class_scope:
             try:
                 writer = code.globalstate['numeric_typedefs']
             except KeyError:
@@ -894,8 +1048,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 code.putln("#endif")
             code.putln(header)
             var_entries = scope.var_entries
-            if not var_entries:
-                error(entry.pos, "Empty struct or union definition not allowed outside a 'cdef extern from' block")
             for attr in var_entries:
                 code.putln(
                     "%s;" % attr.type.declaration_code(attr.cname))
@@ -922,6 +1074,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     [base_class.empty_declaration_code() for base_class in type.base_classes])
                 code.put(" : public %s" % base_class_decl)
             code.putln(" {")
+            self.generate_type_header_code(scope.type_entries, code)
             py_attrs = [e for e in scope.entries.values()
                         if e.type.is_pyobject and not e.is_inherited]
             has_virtual_methods = False
@@ -956,67 +1109,69 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     arg_decls = ["void"]
                     arg_names = []
                 if is_implementing:
-                  code.putln("%s(%s) {" % (type.cname, ", ".join(arg_decls)))
-                  if py_attrs:
-                      code.put_ensure_gil()
-                      for attr in py_attrs:
-                          code.put_init_var_to_py_none(attr, nanny=False);
-                  if constructor:
-                      code.putln("%s(%s);" % (constructor.cname, ", ".join(arg_names)))
-                  if py_attrs:
-                      code.put_release_ensured_gil()
-                  code.putln("}")
+                    code.putln("%s(%s) {" % (type.cname, ", ".join(arg_decls)))
+                    if py_attrs:
+                        code.put_ensure_gil()
+                        for attr in py_attrs:
+                            code.put_init_var_to_py_none(attr, nanny=False)
+                    if constructor:
+                        code.putln("%s(%s);" % (constructor.cname, ", ".join(arg_names)))
+                    if py_attrs:
+                        code.put_release_ensured_gil()
+                    code.putln("}")
                 else:
-                  code.putln("%s(%s);" % (type.cname, ", ".join(arg_decls)))
+                    code.putln("%s(%s);" % (type.cname, ", ".join(arg_decls)))
             if destructor or py_attrs or has_virtual_methods:
                 if has_virtual_methods:
                     code.put("virtual ")
                 if is_implementing:
-                  code.putln("~%s() {" % type.cname)
-                  if py_attrs:
-                      code.put_ensure_gil()
-                  if destructor:
-                      code.putln("%s();" % destructor.cname)
-                  if py_attrs:
-                      for attr in py_attrs:
-                          code.put_var_xdecref(attr, nanny=False);
-                      code.put_release_ensured_gil()
-                  code.putln("}")
+                    code.putln("~%s() {" % type.cname)
+                    if py_attrs:
+                        code.put_ensure_gil()
+                    if destructor:
+                        code.putln("%s();" % destructor.cname)
+                    if py_attrs:
+                        for attr in py_attrs:
+                            code.put_var_xdecref(attr, nanny=False)
+                        code.put_release_ensured_gil()
+                    code.putln("}")
                 else:
-                  code.putln("~%s();" % type.cname)
+                    code.putln("~%s();" % type.cname)
             if py_attrs:
                 # Also need copy constructor and assignment operators.
                 if is_implementing:
-                  code.putln("%s(const %s& __Pyx_other) {" % (type.cname, type.cname))
-                  code.put_ensure_gil()
-                  for attr in scope.var_entries:
-                      if not attr.type.is_cfunction:
-                          code.putln("%s = __Pyx_other.%s;" % (attr.cname, attr.cname))
-                          code.put_var_incref(attr, nanny=False)
-                  code.put_release_ensured_gil()
-                  code.putln("}")
-                  code.putln("%s& operator=(const %s& __Pyx_other) {" % (type.cname, type.cname))
-                  code.putln("if (this != &__Pyx_other) {")
-                  code.put_ensure_gil()
-                  for attr in scope.var_entries:
-                      if not attr.type.is_cfunction:
-                          code.put_var_xdecref(attr, nanny=False);
-                          code.putln("%s = __Pyx_other.%s;" % (attr.cname, attr.cname))
-                          code.put_var_incref(attr, nanny=False)
-                  code.put_release_ensured_gil()
-                  code.putln("}")
-                  code.putln("return *this;")
-                  code.putln("}")
+                    code.putln("%s(const %s& __Pyx_other) {" % (type.cname, type.cname))
+                    code.put_ensure_gil()
+                    for attr in scope.var_entries:
+                        if not attr.type.is_cfunction:
+                            code.putln("%s = __Pyx_other.%s;" % (attr.cname, attr.cname))
+                            code.put_var_incref(attr, nanny=False)
+                    code.put_release_ensured_gil()
+                    code.putln("}")
+                    code.putln("%s& operator=(const %s& __Pyx_other) {" % (type.cname, type.cname))
+                    code.putln("if (this != &__Pyx_other) {")
+                    code.put_ensure_gil()
+                    for attr in scope.var_entries:
+                        if not attr.type.is_cfunction:
+                            code.put_var_xdecref(attr, nanny=False)
+                            code.putln("%s = __Pyx_other.%s;" % (attr.cname, attr.cname))
+                            code.put_var_incref(attr, nanny=False)
+                    code.put_release_ensured_gil()
+                    code.putln("}")
+                    code.putln("return *this;")
+                    code.putln("}")
                 else:
-                  code.putln("%s(const %s& __Pyx_other);" % (type.cname, type.cname))
-                  code.putln("%s& operator=(const %s& __Pyx_other);" % (type.cname, type.cname))
+                    code.putln("%s(const %s& __Pyx_other);" % (type.cname, type.cname))
+                    code.putln("%s& operator=(const %s& __Pyx_other);" % (type.cname, type.cname))
             code.putln("};")
 
     def generate_enum_definition(self, entry, code):
         code.mark_pos(entry.pos)
         type = entry.type
         name = entry.cname or entry.name or ""
-        header, footer = self.sue_header_footer(type, "enum", name)
+
+        kind = "enum class" if entry.type.is_cpp_enum else "enum"
+        header, footer = self.sue_header_footer(type, kind, name)
         code.putln(header)
         enum_values = entry.enum_values
         if not enum_values:
@@ -1030,18 +1185,20 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
             for value_entry in enum_values:
                 if value_entry.value_node is None:
-                    value_code = value_entry.cname
+                    value_code = value_entry.cname.split("::")[-1]
                 else:
                     value_code = ("%s = %s" % (
-                        value_entry.cname,
+                        value_entry.cname.split("::")[-1],
                         value_entry.value_node.result()))
                 if value_entry is not last_entry:
                     value_code += ","
                 code.putln(value_code)
         code.putln(footer)
-        if entry.type.typedef_flag:
-            # Not pre-declared.
-            code.putln("typedef enum %s %s;" % (name, name))
+
+        if entry.type.is_enum:
+            if entry.type.typedef_flag:
+                # Not pre-declared.
+                code.putln("typedef enum %s %s;" % (name, name))
 
     def generate_typeobj_predeclaration(self, entry, code):
         code.putln("")
@@ -1120,7 +1277,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         # Generate object struct definition for an
         # extension type.
         if not type.scope:
-            return # Forward declared but never defined
+            return  # Forward declared but never defined
         header, footer = \
             self.sue_header_footer(type, "struct", type.objstruct_cname)
         code.putln(header)
@@ -1148,18 +1305,53 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 attr_type = py_object_type
             else:
                 attr_type = attr.type
-            code.putln(
-                "%s;" % attr_type.declaration_code(attr.cname))
+            if attr.is_cpp_optional:
+                decl = attr_type.cpp_optional_declaration_code(attr.cname)
+            else:
+                decl = attr_type.declaration_code(attr.cname)
+            type.scope.use_entry_utility_code(attr)
+            code.putln("%s;" % decl)
         code.putln(footer)
         if type.objtypedef_cname is not None:
             # Only for exposing public typedef name.
             code.putln("typedef struct %s %s;" % (type.objstruct_cname, type.objtypedef_cname))
 
-    def generate_c_class_declarations(self, env, code, definition):
+    def generate_c_class_declarations(self, env, code, definition, globalstate):
+        module_state = globalstate['module_state']
+        module_state_defines = globalstate['module_state_defines']
+        module_state_clear = globalstate['module_state_clear']
+        module_state_traverse = globalstate['module_state_traverse']
+        module_state_typeobj = module_state.insertion_point()
+        module_state_defines_typeobj = module_state_defines.insertion_point()
+        for writer in [module_state_typeobj, module_state_defines_typeobj]:
+            writer.putln("#if CYTHON_USE_MODULE_STATE")
         for entry in env.c_class_entries:
             if definition or entry.defined_in_pxd:
-                code.putln("static PyTypeObject *%s = 0;" % (
+                module_state.putln("PyTypeObject *%s;" % entry.type.typeptr_cname)
+                module_state_defines.putln("#define %s %s->%s" % (
+                    entry.type.typeptr_cname,
+                    Naming.modulestateglobal_cname,
                     entry.type.typeptr_cname))
+                module_state_clear.putln(
+                    "Py_CLEAR(clear_module_state->%s);" %
+                    entry.type.typeptr_cname)
+                module_state_traverse.putln(
+                    "Py_VISIT(traverse_module_state->%s);" %
+                    entry.type.typeptr_cname)
+                if entry.type.typeobj_cname is not None:
+                    module_state_typeobj.putln("PyObject *%s;" % entry.type.typeobj_cname)
+                    module_state_defines_typeobj.putln("#define %s %s->%s" % (
+                        entry.type.typeobj_cname,
+                        Naming.modulestateglobal_cname,
+                        entry.type.typeobj_cname))
+                    module_state_clear.putln(
+                        "Py_CLEAR(clear_module_state->%s);" % (
+                        entry.type.typeobj_cname))
+                    module_state_traverse.putln(
+                        "Py_VISIT(traverse_module_state->%s);" % (
+                        entry.type.typeobj_cname))
+        for writer in [module_state_typeobj, module_state_defines_typeobj]:
+            writer.putln("#endif")
 
     def generate_cvariable_declarations(self, env, code, definition):
         if env.is_cython_builtin:
@@ -1199,17 +1391,26 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
             if storage_class:
                 code.put("%s " % storage_class)
-            code.put(type.declaration_code(
-                cname, dll_linkage=dll_linkage))
+            if entry.is_cpp_optional:
+                code.put(type.cpp_optional_declaration_code(
+                    cname, dll_linkage=dll_linkage))
+            else:
+                code.put(type.declaration_code(
+                    cname, dll_linkage=dll_linkage))
             if init is not None:
                 code.put_safe(" = %s" % init)
             code.putln(";")
             if entry.cname != cname:
                 code.putln("#define %s (*%s)" % (entry.cname, cname))
+            env.use_entry_utility_code(entry)
 
     def generate_cfunction_declarations(self, env, code, definition):
         for entry in env.cfunc_entries:
-            if entry.used or (entry.visibility == 'public' or entry.api):
+            from_pyx = Options.cimport_from_pyx and not entry.visibility == 'extern'
+            if (entry.used
+                    or entry.visibility == 'public'
+                    or entry.api
+                    or from_pyx):
                 generate_cfunction_declaration(entry, env, code, definition)
 
     def generate_variable_definitions(self, env, code):
@@ -1229,14 +1430,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             if entry.visibility != 'extern':
                 type = entry.type
                 scope = type.scope
-                if scope: # could be None if there was an error
-                    if not scope.directives['c_api_binop_methods']:
-                        error(self.pos,
-                              "The 'c_api_binop_methods' directive is only supported for forward compatibility"
-                              " and must be True.")
+                if scope:  # could be None if there was an error
                     self.generate_exttype_vtable(scope, code)
                     self.generate_new_function(scope, code, entry)
+                    self.generate_del_function(scope, code)
                     self.generate_dealloc_function(scope, code)
+
                     if scope.needs_gc():
                         self.generate_traverse_function(scope, code, entry)
                         if scope.needs_tp_clear():
@@ -1264,12 +1463,26 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                         self.generate_descr_set_function(scope, code)
                     if not scope.is_closure_class_scope and scope.defines_any(["__dict__"]):
                         self.generate_dict_getter_function(scope, code)
+
                     if scope.defines_any_special(TypeSlots.richcmp_special_methods):
                         self.generate_richcmp_function(scope, code)
+                    elif 'total_ordering' in scope.directives:
+                        # Warn if this is used when it can't have any effect.
+                        warning(scope.parent_type.pos,
+                                "total_ordering directive used, but no comparison and equality methods defined")
+
+                    for slot in TypeSlots.get_slot_table(code.globalstate.directives).PyNumberMethods:
+                        if slot.is_binop and scope.defines_any_special(slot.user_methods):
+                            self.generate_binop_function(scope, slot, code, entry.pos)
+
                     self.generate_property_accessors(scope, code)
                     self.generate_method_table(scope, code)
                     self.generate_getset_table(scope, code)
+                    code.putln("#if CYTHON_USE_TYPE_SPECS")
+                    self.generate_typeobj_spec(entry, code)
+                    code.putln("#else")
                     self.generate_typeobj_definition(full_module_name, entry, code)
+                    code.putln("#endif")
 
     def generate_exttype_vtable(self, scope, code):
         # Generate the definition of an extension type's vtable.
@@ -1287,8 +1500,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 type.empty_declaration_code()))
 
     def generate_new_function(self, scope, code, cclass_entry):
-        tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__')
+        tp_slot = TypeSlots.ConstructorSlot("tp_new", "__cinit__")
         slot_func = scope.mangle_internal("tp_new")
+        if tp_slot.slot_code(scope) != slot_func:
+            return  # never used
+
         type = scope.parent_type
         base_type = type.base_type
 
@@ -1298,12 +1514,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         if scope.is_internal:
             # internal classes (should) never need None inits, normal zeroing will do
             py_attrs = []
-        cpp_class_attrs = [entry for entry in scope.var_entries
-                           if entry.type.is_cpp_class]
+        cpp_constructable_attrs = [entry for entry in scope.var_entries if entry.type.needs_cpp_construction]
+
+        cinit_func_entry = scope.lookup_here("__cinit__")
+        if cinit_func_entry and not cinit_func_entry.is_special:
+            cinit_func_entry = None
 
-        new_func_entry = scope.lookup_here("__new__")
-        if base_type or (new_func_entry and new_func_entry.is_special
-                         and not new_func_entry.trivial_signature):
+        if base_type or (cinit_func_entry and not cinit_func_entry.trivial_signature):
             unused_marker = ''
         else:
             unused_marker = 'CYTHON_UNUSED '
@@ -1331,26 +1548,30 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
         need_self_cast = (type.vtabslot_cname or
                           (py_buffers or memoryview_slices or py_attrs) or
-                          cpp_class_attrs)
+                          cpp_constructable_attrs)
         if need_self_cast:
             code.putln("%s;" % scope.parent_type.declaration_code("p"))
         if base_type:
             tp_new = TypeSlots.get_base_slot_function(scope, tp_slot)
             if tp_new is None:
-                tp_new = "%s->tp_new" % base_type.typeptr_cname
+                tp_new = "__Pyx_PyType_GetSlot(%s, tp_new, newfunc)" % base_type.typeptr_cname
             code.putln("PyObject *o = %s(t, a, k);" % tp_new)
         else:
             code.putln("PyObject *o;")
+            code.putln("#if CYTHON_COMPILING_IN_LIMITED_API")
+            code.putln("allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc);")
+            code.putln("o = alloc_func(t, 0);")
+            code.putln("#else")
             if freelist_size:
                 code.globalstate.use_utility_code(
                     UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
                 if is_final_type:
                     type_safety_check = ''
                 else:
-                    type_safety_check = ' & ((t->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)) == 0)'
+                    type_safety_check = ' & (int)(!__Pyx_PyType_HasFeature(t, (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))'
                 obj_struct = type.declaration_code("", deref=True)
                 code.putln(
-                    "if (CYTHON_COMPILING_IN_CPYTHON && likely((%s > 0) & (t->tp_basicsize == sizeof(%s))%s)) {" % (
+                    "if (CYTHON_COMPILING_IN_CPYTHON && likely((int)(%s > 0) & (int)(t->tp_basicsize == sizeof(%s))%s)) {" % (
                         freecount_name, obj_struct, type_safety_check))
                 code.putln("o = (PyObject*)%s[--%s];" % (
                     freelist_name, freecount_name))
@@ -1360,7 +1581,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     code.putln("PyObject_GC_Track(o);")
                 code.putln("} else {")
             if not is_final_type:
-                code.putln("if (likely((t->tp_flags & Py_TPFLAGS_IS_ABSTRACT) == 0)) {")
+                code.putln("if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) {")
             code.putln("o = (*t->tp_alloc)(t, 0);")
             if not is_final_type:
                 code.putln("} else {")
@@ -1369,6 +1590,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("if (unlikely(!o)) return 0;")
         if freelist_size and not base_type:
             code.putln('}')
+        if not base_type:
+            code.putln("#endif")
         if need_self_cast:
             code.putln("p = %s;" % type.cast_code("o"))
         #if need_self_cast:
@@ -1389,9 +1612,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 type.vtabslot_cname,
                 struct_type_cast, type.vtabptr_cname))
 
-        for entry in cpp_class_attrs:
+        for entry in cpp_constructable_attrs:
+            if entry.is_cpp_optional:
+                decl_code = entry.type.cpp_optional_declaration_code("")
+            else:
+                decl_code = entry.type.empty_declaration_code()
             code.putln("new((void*)&(p->%s)) %s();" % (
-                entry.cname, entry.type.empty_declaration_code()))
+                entry.cname, decl_code))
 
         for entry in py_attrs:
             if entry.name == "__dict__":
@@ -1411,14 +1638,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         if cclass_entry.cname == '__pyx_memoryviewslice':
             code.putln("p->from_slice.memview = NULL;")
 
-        if new_func_entry and new_func_entry.is_special:
-            if new_func_entry.trivial_signature:
+        if cinit_func_entry:
+            if cinit_func_entry.trivial_signature:
                 cinit_args = "o, %s, NULL" % Naming.empty_tuple
             else:
                 cinit_args = "o, a, k"
             needs_error_cleanup = True
             code.putln("if (unlikely(%s(%s) < 0)) goto bad;" % (
-                new_func_entry.func_cname, cinit_args))
+                cinit_func_entry.func_cname, cinit_args))
 
         code.putln(
             "return o;")
@@ -1429,6 +1656,28 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln(
             "}")
 
+    def generate_del_function(self, scope, code):
+        tp_slot = TypeSlots.get_slot_by_name("tp_finalize", scope.directives)
+        slot_func_cname = scope.mangle_internal("tp_finalize")
+        if tp_slot.slot_code(scope) != slot_func_cname:
+            return  # never used
+
+        entry = scope.lookup_here("__del__")
+        if entry is None or not entry.is_special:
+            return  # nothing to wrap
+        code.putln("")
+
+        if tp_slot.used_ifdef:
+            code.putln("#if %s" % tp_slot.used_ifdef)
+        code.putln("static void %s(PyObject *o) {" % slot_func_cname)
+        code.putln("PyObject *etype, *eval, *etb;")
+        code.putln("PyErr_Fetch(&etype, &eval, &etb);")
+        code.putln("%s(o);" % entry.func_cname)
+        code.putln("PyErr_Restore(etype, eval, etb);")
+        code.putln("}")
+        if tp_slot.used_ifdef:
+            code.putln("#endif")
+
     def generate_dealloc_function(self, scope, code):
         tp_slot = TypeSlots.ConstructorSlot("tp_dealloc", '__dealloc__')
         slot_func = scope.mangle_internal("tp_dealloc")
@@ -1443,6 +1692,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
         is_final_type = scope.parent_type.is_final_type
         needs_gc = scope.needs_gc()
+        needs_trashcan = scope.needs_trashcan()
 
         weakref_slot = scope.lookup_here("__weakref__") if not scope.is_closure_class_scope else None
         if weakref_slot not in scope.var_entries:
@@ -1453,13 +1703,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             dict_slot = None
 
         _, (py_attrs, _, memoryview_slices) = scope.get_refcounted_entries()
-        cpp_class_attrs = [entry for entry in scope.var_entries
-                           if entry.type.is_cpp_class]
+        cpp_destructable_attrs = [entry for entry in scope.var_entries
+                           if entry.type.needs_cpp_construction]
 
-        if py_attrs or cpp_class_attrs or memoryview_slices or weakref_slot or dict_slot:
+        if py_attrs or cpp_destructable_attrs or memoryview_slices or weakref_slot or dict_slot:
             self.generate_self_cast(scope, code)
 
-        if not is_final_type:
+        if not is_final_type or scope.may_have_finalize():
             # in Py3.4+, call tp_finalize() as early as possible
             code.putln("#if CYTHON_USE_TP_FINALIZE")
             if needs_gc:
@@ -1468,11 +1718,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 finalised_check = (
                     '(!PyType_IS_GC(Py_TYPE(o)) || !_PyGC_FINALIZED(o))')
             code.putln(
-                "if (unlikely(PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)"
-                " && Py_TYPE(o)->tp_finalize) && %s) {" % finalised_check)
+                "if (unlikely("
+                "(PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE))"
+                " && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && %s) {" % finalised_check)
+
+            code.putln("if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == %s) {" % slot_func_cname)
             # if instance was resurrected by finaliser, return
             code.putln("if (PyObject_CallFinalizerFromDealloc(o)) return;")
             code.putln("}")
+            code.putln("}")
             code.putln("#endif")
 
         if needs_gc:
@@ -1481,33 +1735,40 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             # running this destructor.
             code.putln("PyObject_GC_UnTrack(o);")
 
-        # call the user's __dealloc__
-        self.generate_usr_dealloc_call(scope, code)
+        if needs_trashcan:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyTrashcan", "ExtensionTypes.c"))
+            code.putln("__Pyx_TRASHCAN_BEGIN(o, %s)" % slot_func_cname)
 
         if weakref_slot:
+            # We must clean the weakreferences before calling the user's __dealloc__
+            # because if the __dealloc__ releases the GIL, a weakref can be
+            # dereferenced accessing the object in an inconsistent state or
+            # resurrecting it.
             code.putln("if (p->__weakref__) PyObject_ClearWeakRefs(o);")
 
+        # call the user's __dealloc__
+        self.generate_usr_dealloc_call(scope, code)
+
         if dict_slot:
             code.putln("if (p->__dict__) PyDict_Clear(p->__dict__);")
 
-        for entry in cpp_class_attrs:
+        for entry in cpp_destructable_attrs:
             code.putln("__Pyx_call_destructor(p->%s);" % entry.cname)
 
-        for entry in py_attrs:
+        for entry in (py_attrs + memoryview_slices):
             code.put_xdecref_clear("p->%s" % entry.cname, entry.type, nanny=False,
-                                   clear_before_decref=True)
-
-        for entry in memoryview_slices:
-            code.put_xdecref_memoryviewslice("p->%s" % entry.cname,
-                                             have_gil=True)
+                                   clear_before_decref=True, have_gil=True)
 
         if base_type:
             base_cname = base_type.typeptr_cname
             if needs_gc:
                 # The base class deallocator probably expects this to be tracked,
                 # so undo the untracking above.
-                if base_type.scope and base_type.scope.needs_gc():
-                    code.putln("PyObject_GC_Track(o);")
+                if base_type.scope:
+                    # Assume that we know whether the base class uses GC or not.
+                    if base_type.scope.needs_gc():
+                        code.putln("PyObject_GC_Track(o);")
                 else:
                     code.putln("if (PyType_IS_GC(%s)) PyObject_GC_Track(o);" % base_cname)
 
@@ -1515,13 +1776,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             if tp_dealloc is not None:
                 code.putln("%s(o);" % tp_dealloc)
             elif base_type.is_builtin_type:
-                code.putln("%s->tp_dealloc(o);" % base_cname)
+                code.putln("__Pyx_PyType_GetSlot(%s, tp_dealloc, destructor)(o);" % base_cname)
             else:
                 # This is an externally defined type.  Calling through the
                 # cimported base type pointer directly interacts badly with
                 # the module cleanup, which may already have cleared it.
                 # In that case, fall back to traversing the type hierarchy.
-                code.putln("if (likely(%s)) %s->tp_dealloc(o); "
+                code.putln("if (likely(%s)) __Pyx_PyType_GetSlot(%s, tp_dealloc, destructor)(o); "
                            "else __Pyx_call_next_tp_dealloc(o, %s);" % (
                                base_cname, base_cname, slot_func_cname))
                 code.globalstate.use_utility_code(
@@ -1536,11 +1797,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     type_safety_check = ''
                 else:
                     type_safety_check = (
-                        ' & ((Py_TYPE(o)->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)) == 0)')
+                        ' & (int)(!__Pyx_PyType_HasFeature(Py_TYPE(o), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))')
 
                 type = scope.parent_type
                 code.putln(
-                    "if (CYTHON_COMPILING_IN_CPYTHON && ((%s < %d) & (Py_TYPE(o)->tp_basicsize == sizeof(%s))%s)) {" % (
+                    "if (CYTHON_COMPILING_IN_CPYTHON && ((int)(%s < %d) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(%s))%s)) {" % (
                         freecount_name,
                         freelist_size,
                         type.declaration_code("", deref=True),
@@ -1551,12 +1812,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             code.putln("(*Py_TYPE(o)->tp_free)(o);")
             if freelist_size:
                 code.putln("}")
+
+        if needs_trashcan:
+            code.putln("__Pyx_TRASHCAN_END")
+
         code.putln(
             "}")
 
     def generate_usr_dealloc_call(self, scope, code):
         entry = scope.lookup_here("__dealloc__")
-        if not entry:
+        if not entry or not entry.is_special:
             return
 
         code.putln("{")
@@ -1632,11 +1897,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("}")
 
     def generate_clear_function(self, scope, code, cclass_entry):
-        tp_slot = TypeSlots.get_slot_by_name("tp_clear")
+        tp_slot = TypeSlots.get_slot_by_name("tp_clear", scope.directives)
         slot_func = scope.mangle_internal("tp_clear")
         base_type = scope.parent_type.base_type
         if tp_slot.slot_code(scope) != slot_func:
-            return # never used
+            return  # never used
 
         have_entries, (py_attrs, py_buffers, memoryview_slices) = (
             scope.get_refcounted_entries(include_gc_simple=False))
@@ -1694,7 +1959,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             code.putln("Py_CLEAR(p->%s.obj);" % entry.cname)
 
         if cclass_entry.cname == '__pyx_memoryviewslice':
-            code.putln("__PYX_XDEC_MEMVIEW(&p->from_slice, 1);")
+            code.putln("__PYX_XCLEAR_MEMVIEW(&p->from_slice, 1);")
 
         code.putln("return 0;")
         code.putln("}")
@@ -1735,12 +2000,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         if set_entry:
             code.putln("return %s(o, i, v);" % set_entry.func_cname)
         else:
+            code.putln(
+                "__Pyx_TypeName o_type_name;")
             self.generate_guarded_basetype_call(
                 base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code)
             code.putln(
+                "o_type_name = __Pyx_PyType_GetName(Py_TYPE(o));")
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
             code.putln(
-                '  "Subscript assignment not supported by %.200s", Py_TYPE(o)->tp_name);')
+                '  "Subscript assignment not supported by " __Pyx_FMT_TYPENAME, o_type_name);')
+            code.putln(
+                "__Pyx_DECREF_TypeName(o_type_name);")
             code.putln(
                 "return -1;")
         code.putln(
@@ -1752,12 +2023,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 "return %s(o, i);" % (
                     del_entry.func_cname))
         else:
+            code.putln(
+                "__Pyx_TypeName o_type_name;")
             self.generate_guarded_basetype_call(
                 base_type, "tp_as_mapping", "mp_ass_subscript", "o, i, v", code)
             code.putln(
+                "o_type_name = __Pyx_PyType_GetName(Py_TYPE(o));")
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
             code.putln(
-                '  "Subscript deletion not supported by %.200s", Py_TYPE(o)->tp_name);')
+                '  "Subscript deletion not supported by " __Pyx_FMT_TYPENAME, o_type_name);')
+            code.putln(
+                "__Pyx_DECREF_TypeName(o_type_name);")
             code.putln(
                 "return -1;")
         code.putln(
@@ -1802,12 +2079,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 "return %s(o, i, j, v);" % (
                     set_entry.func_cname))
         else:
+            code.putln(
+                "__Pyx_TypeName o_type_name;")
             self.generate_guarded_basetype_call(
                 base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code)
             code.putln(
+                "o_type_name = __Pyx_PyType_GetName(Py_TYPE(o));")
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
             code.putln(
-                '  "2-element slice assignment not supported by %.200s", Py_TYPE(o)->tp_name);')
+                '  "2-element slice assignment not supported by " __Pyx_FMT_TYPENAME, o_type_name);')
+            code.putln(
+                "__Pyx_DECREF_TypeName(o_type_name);")
             code.putln(
                 "return -1;")
         code.putln(
@@ -1819,12 +2102,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 "return %s(o, i, j);" % (
                     del_entry.func_cname))
         else:
+            code.putln(
+                "__Pyx_TypeName o_type_name;")
             self.generate_guarded_basetype_call(
                 base_type, "tp_as_sequence", "sq_ass_slice", "o, i, j, v", code)
             code.putln(
+                "o_type_name = __Pyx_PyType_GetName(Py_TYPE(o));")
+            code.putln(
                 "PyErr_Format(PyExc_NotImplementedError,")
             code.putln(
-                '  "2-element slice deletion not supported by %.200s", Py_TYPE(o)->tp_name);')
+                '  "2-element slice deletion not supported by " __Pyx_FMT_TYPENAME, o_type_name);')
+            code.putln(
+                "__Pyx_DECREF_TypeName(o_type_name);")
             code.putln(
                 "return -1;")
         code.putln(
@@ -1854,37 +2143,112 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             # need to call up into base classes as we may not know all implemented comparison methods
             extern_parent = cls if cls.typeptr_cname else scope.parent_type.base_type
 
-        eq_entry = None
-        has_ne = False
+        total_ordering = 'total_ordering' in scope.directives
+
+        comp_entry = {}
+
         for cmp_method in TypeSlots.richcmp_special_methods:
             for class_scope in class_scopes:
                 entry = class_scope.lookup_here(cmp_method)
                 if entry is not None:
+                    comp_entry[cmp_method] = entry
                     break
+
+        if total_ordering:
+            # Check this is valid - we must have at least 1 operation defined.
+            comp_names = [from_name for from_name, to_name in TOTAL_ORDERING if from_name in comp_entry]
+            if not comp_names:
+                if '__eq__' not in comp_entry and '__ne__'  not in comp_entry:
+                    warning(scope.parent_type.pos,
+                            "total_ordering directive used, but no comparison and equality methods defined")
+                else:
+                    warning(scope.parent_type.pos,
+                          "total_ordering directive used, but no comparison methods defined")
+                total_ordering = False
             else:
-                continue
+                if '__eq__' not in comp_entry and '__ne__' not in comp_entry:
+                    warning(scope.parent_type.pos, "total_ordering directive used, but no equality method defined")
+                    total_ordering = False
+
+                # Same priority as functools, prefers
+                # __lt__ to __le__ to __gt__ to __ge__
+                ordering_source = max(comp_names)
 
+        for cmp_method in TypeSlots.richcmp_special_methods:
             cmp_type = cmp_method.strip('_').upper()  # e.g. "__eq__" -> EQ
+            entry = comp_entry.get(cmp_method)
+            if entry is None and (not total_ordering or cmp_type in ('NE', 'EQ')):
+                # No definition, fall back to superclasses.
+                # eq/ne methods shouldn't use the total_ordering code.
+                continue
+
             code.putln("case Py_%s: {" % cmp_type)
-            if cmp_method == '__eq__':
-                eq_entry = entry
-                # Python itself does not do this optimisation, it seems...
-                #code.putln("if (o1 == o2) return __Pyx_NewRef(Py_True);")
-            elif cmp_method == '__ne__':
-                has_ne = True
-                # Python itself does not do this optimisation, it seems...
-                #code.putln("if (o1 == o2) return __Pyx_NewRef(Py_False);")
-            code.putln("return %s(o1, o2);" % entry.func_cname)
-            code.putln("}")
+            if entry is None:
+                assert total_ordering
+                # We need to generate this from the other methods.
+                invert_comp, comp_op, invert_equals = TOTAL_ORDERING[ordering_source, cmp_method]
+
+                # First we always do the comparison.
+                code.putln("PyObject *ret;")
+                code.putln("ret = %s(o1, o2);" % comp_entry[ordering_source].func_cname)
+                code.putln("if (likely(ret && ret != Py_NotImplemented)) {")
+                code.putln("int order_res = __Pyx_PyObject_IsTrue(ret);")
+                code.putln("Py_DECREF(ret);")
+                code.putln("if (unlikely(order_res < 0)) return NULL;")
+                # We may need to check equality too. For some combos it's never required.
+                if invert_equals is not None:
+                    # Implement the and/or check with an if.
+                    if comp_op == '&&':
+                        code.putln("if (%s order_res) {" % ('!!' if invert_comp else '!'))
+                        code.putln("ret = __Pyx_NewRef(Py_False);")
+                        code.putln("} else {")
+                    elif comp_op == '||':
+                        code.putln("if (%s order_res) {" % ('!' if invert_comp else ''))
+                        code.putln("ret = __Pyx_NewRef(Py_True);")
+                        code.putln("} else {")
+                    else:
+                        raise AssertionError('Unknown op %s' % (comp_op, ))
+                    if '__eq__' in comp_entry:
+                        eq_func = '__eq__'
+                    else:
+                        # Fall back to NE, which is defined here.
+                        eq_func = '__ne__'
+                        invert_equals = not invert_equals
+
+                    code.putln("ret = %s(o1, o2);" % comp_entry[eq_func].func_cname)
+                    code.putln("if (likely(ret && ret != Py_NotImplemented)) {")
+                    code.putln("int eq_res = __Pyx_PyObject_IsTrue(ret);")
+                    code.putln("Py_DECREF(ret);")
+                    code.putln("if (unlikely(eq_res < 0)) return NULL;")
+                    if invert_equals:
+                        code.putln("ret = eq_res ? Py_False : Py_True;")
+                    else:
+                        code.putln("ret = eq_res ? Py_True : Py_False;")
+                    code.putln("Py_INCREF(ret);")
+                    code.putln("}")  # equals success
+                    code.putln("}")  # Needs to try equals
+                else:
+                    # Convert direct to a boolean.
+                    if invert_comp:
+                        code.putln("ret = order_res ? Py_False : Py_True;")
+                    else:
+                        code.putln("ret = order_res ? Py_True : Py_False;")
+                    code.putln("Py_INCREF(ret);")
+                code.putln("}")  # comp_op
+                code.putln("return ret;")
+            else:
+                code.putln("return %s(o1, o2);" % entry.func_cname)
+            code.putln("}")  # Case
 
-        if eq_entry and not has_ne and not extern_parent:
+        if '__eq__' in comp_entry and '__ne__' not in comp_entry and not extern_parent:
             code.putln("case Py_NE: {")
             code.putln("PyObject *ret;")
             # Python itself does not do this optimisation, it seems...
             #code.putln("if (o1 == o2) return __Pyx_NewRef(Py_False);")
-            code.putln("ret = %s(o1, o2);" % eq_entry.func_cname)
+            code.putln("ret = %s(o1, o2);" % comp_entry['__eq__'].func_cname)
             code.putln("if (likely(ret && ret != Py_NotImplemented)) {")
-            code.putln("int b = __Pyx_PyObject_IsTrue(ret); Py_DECREF(ret);")
+            code.putln("int b = __Pyx_PyObject_IsTrue(ret);")
+            code.putln("Py_DECREF(ret);")
             code.putln("if (unlikely(b < 0)) return NULL;")
             code.putln("ret = (b) ? Py_False : Py_True;")
             code.putln("Py_INCREF(ret);")
@@ -1902,6 +2266,73 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("}")  # switch
         code.putln("}")
 
+    def generate_binop_function(self, scope, slot, code, pos):
+        func_name = scope.mangle_internal(slot.slot_name)
+        if scope.directives['c_api_binop_methods']:
+            code.putln('#define %s %s' % (func_name, slot.left_slot.slot_code(scope)))
+            return
+
+        code.putln()
+        preprocessor_guard = slot.preprocessor_guard_code()
+        if preprocessor_guard:
+            code.putln(preprocessor_guard)
+
+        if slot.left_slot.signature in (TypeSlots.binaryfunc, TypeSlots.ibinaryfunc):
+            slot_type = 'binaryfunc'
+            extra_arg = extra_arg_decl = ''
+        elif slot.left_slot.signature in (TypeSlots.powternaryfunc, TypeSlots.ipowternaryfunc):
+            slot_type = 'ternaryfunc'
+            extra_arg = ', extra_arg'
+            extra_arg_decl = ', PyObject* extra_arg'
+        else:
+            error(pos, "Unexpected type slot signature: %s" % slot)
+            return
+
+        def get_slot_method_cname(method_name):
+            entry = scope.lookup(method_name)
+            return entry.func_cname if entry and entry.is_special else None
+
+        def call_slot_method(method_name, reverse):
+            func_cname = get_slot_method_cname(method_name)
+            if func_cname:
+                return "%s(%s%s)" % (
+                    func_cname,
+                    "right, left" if reverse else "left, right",
+                    extra_arg)
+            else:
+                return '%s_maybe_call_slot(__Pyx_PyType_GetSlot(%s, tp_base, PyTypeObject*), left, right %s)' % (
+                    func_name,
+                    scope.parent_type.typeptr_cname,
+                    extra_arg)
+
+        if get_slot_method_cname(slot.left_slot.method_name) and not get_slot_method_cname(slot.right_slot.method_name):
+            warning(pos, "Extension type implements %s() but not %s(). "
+                         "The behaviour has changed from previous Cython versions to match Python semantics. "
+                         "You can implement both special methods in a backwards compatible way." % (
+                slot.left_slot.method_name,
+                slot.right_slot.method_name,
+            ))
+
+        overloads_left = int(bool(get_slot_method_cname(slot.left_slot.method_name)))
+        overloads_right = int(bool(get_slot_method_cname(slot.right_slot.method_name)))
+        code.putln(
+            TempitaUtilityCode.load_as_string(
+                "BinopSlot", "ExtensionTypes.c",
+                context={
+                    "func_name": func_name,
+                    "slot_name": slot.slot_name,
+                    "overloads_left": overloads_left,
+                    "overloads_right": overloads_right,
+                    "call_left": call_slot_method(slot.left_slot.method_name, reverse=False),
+                    "call_right": call_slot_method(slot.right_slot.method_name, reverse=True),
+                    "type_cname": scope.parent_type.typeptr_cname,
+                    "slot_type": slot_type,
+                    "extra_arg": extra_arg,
+                    "extra_arg_decl": extra_arg_decl,
+                    })[1])
+        if preprocessor_guard:
+            code.putln("#endif")
+
     def generate_getattro_function(self, scope, code):
         # First try to get the attribute using __getattribute__, if defined, or
         # PyObject_GenericGetAttr.
@@ -2136,10 +2567,42 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln(
             "}")
 
+    def generate_typeobj_spec(self, entry, code):
+        ext_type = entry.type
+        scope = ext_type.scope
+
+        members_slot = TypeSlots.get_slot_by_name("tp_members", code.globalstate.directives)
+        members_slot.generate_substructure_spec(scope, code)
+
+        buffer_slot = TypeSlots.get_slot_by_name("tp_as_buffer", code.globalstate.directives)
+        if not buffer_slot.is_empty(scope):
+            code.putln("#if !CYTHON_COMPILING_IN_LIMITED_API")
+            buffer_slot.generate_substructure(scope, code)
+            code.putln("#endif")
+
+        code.putln("static PyType_Slot %s_slots[] = {" % ext_type.typeobj_cname)
+        for slot in TypeSlots.get_slot_table(code.globalstate.directives):
+            slot.generate_spec(scope, code)
+        code.putln("{0, 0},")
+        code.putln("};")
+
+        if ext_type.typedef_flag:
+            objstruct = ext_type.objstruct_cname
+        else:
+            objstruct = "struct %s" % ext_type.objstruct_cname
+        classname = scope.class_name.as_c_string_literal()
+        code.putln("static PyType_Spec %s_spec = {" % ext_type.typeobj_cname)
+        code.putln('"%s.%s",' % (self.full_module_name, classname.replace('"', '')))
+        code.putln("sizeof(%s)," % objstruct)
+        code.putln("0,")
+        code.putln("%s," % TypeSlots.get_slot_by_name("tp_flags", scope.directives).slot_code(scope))
+        code.putln("%s_slots," % ext_type.typeobj_cname)
+        code.putln("};")
+
     def generate_typeobj_definition(self, modname, entry, code):
         type = entry.type
         scope = type.scope
-        for suite in TypeSlots.substructures:
+        for suite in TypeSlots.get_slot_table(code.globalstate.directives).substructures:
             suite.generate_substructure(scope, code)
         code.putln("")
         if entry.visibility == 'public':
@@ -2150,9 +2613,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln(header % type.typeobj_cname)
         code.putln(
             "PyVarObject_HEAD_INIT(0, 0)")
+        classname = scope.class_name.as_c_string_literal()
         code.putln(
-            '"%s.%s", /*tp_name*/' % (
-                self.full_module_name, scope.class_name))
+            '"%s."%s, /*tp_name*/' % (
+                self.full_module_name,
+                classname))
         if type.typedef_flag:
             objstruct = type.objstruct_cname
         else:
@@ -2161,7 +2626,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             "sizeof(%s), /*tp_basicsize*/" % objstruct)
         code.putln(
             "0, /*tp_itemsize*/")
-        for slot in TypeSlots.slot_table:
+        for slot in TypeSlots.get_slot_table(code.globalstate.directives):
             slot.generate(scope, code)
         code.putln(
             "};")
@@ -2215,12 +2680,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 if doc:
                     if doc.is_unicode:
                         doc = doc.as_utf8_string()
-                    doc_code = doc.as_c_string_literal()
+                    doc_code = "PyDoc_STR(%s)" % doc.as_c_string_literal()
                 else:
                     doc_code = "0"
                 code.putln(
-                    '{(char *)"%s", %s, %s, (char *)%s, 0},' % (
-                        entry.name,
+                    '{(char *)%s, %s, %s, (char *)%s, 0},' % (
+                        entry.name.as_c_string_literal(),
                         entry.getter_cname or "0",
                         entry.setter_cname or "0",
                         doc_code))
@@ -2296,7 +2761,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         if code.label_used(code.error_label):
             code.put_label(code.error_label)
             # This helps locate the offending name.
-            code.put_add_traceback(self.full_module_name)
+            code.put_add_traceback(EncodedString(self.full_module_name))
         code.error_label = old_error_label
         code.putln("bad:")
         code.putln("return -1;")
@@ -2305,20 +2770,213 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln(UtilityCode.load_as_string("ImportStar", "ImportExport.c")[1])
         code.exit_cfunc_scope()  # done with labels
 
+    def generate_module_state_start(self, env, code):
+        # TODO: Refactor to move module state struct decl closer to the static decl
+        code.putln('typedef struct {')
+        code.putln('PyObject *%s;' % env.module_dict_cname)
+        code.putln('PyObject *%s;' % Naming.builtins_cname)
+        code.putln('PyObject *%s;' % Naming.cython_runtime_cname)
+        code.putln('PyObject *%s;' % Naming.empty_tuple)
+        code.putln('PyObject *%s;' % Naming.empty_bytes)
+        code.putln('PyObject *%s;' % Naming.empty_unicode)
+        if Options.pre_import is not None:
+            code.putln('PyObject *%s;' % Naming.preimport_cname)
+        for type_cname, used_name in Naming.used_types_and_macros:
+            code.putln('#ifdef %s' % used_name)
+            code.putln('PyTypeObject *%s;' % type_cname)
+            code.putln('#endif')
+
+    def generate_module_state_end(self, env, modules, globalstate):
+        module_state = globalstate['module_state']
+        module_state_defines = globalstate['module_state_defines']
+        module_state_clear = globalstate['module_state_clear']
+        module_state_traverse = globalstate['module_state_traverse']
+        module_state.putln('} %s;' % Naming.modulestate_cname)
+        module_state.putln('')
+        module_state.putln("#if CYTHON_USE_MODULE_STATE")
+        module_state.putln('#ifdef __cplusplus')
+        module_state.putln('namespace {')
+        module_state.putln('extern struct PyModuleDef %s;' % Naming.pymoduledef_cname)
+        module_state.putln('} /* anonymous namespace */')
+        module_state.putln('#else')
+        module_state.putln('static struct PyModuleDef %s;' % Naming.pymoduledef_cname)
+        module_state.putln('#endif')
+        module_state.putln('')
+        module_state.putln('#define %s(o) ((%s *)__Pyx_PyModule_GetState(o))' % (
+            Naming.modulestate_cname,
+            Naming.modulestate_cname))
+        module_state.putln('')
+        module_state.putln('#define %s (%s(PyState_FindModule(&%s)))' % (
+            Naming.modulestateglobal_cname,
+            Naming.modulestate_cname,
+            Naming.pymoduledef_cname))
+        module_state.putln('')
+        module_state.putln('#define %s (PyState_FindModule(&%s))' % (
+            env.module_cname,
+            Naming.pymoduledef_cname))
+        module_state.putln("#else")
+        module_state.putln('static %s %s_static =' % (
+            Naming.modulestate_cname,
+            Naming.modulestateglobal_cname
+        ))
+        module_state.putln('#ifdef __cplusplus')
+        # C++ likes to be initialized with {} to avoid "missing initializer" warnings
+        # but it isn't valid C
+        module_state.putln('    {};')
+        module_state.putln('#else')
+        module_state.putln('    {0};')
+        module_state.putln('#endif')
+        module_state.putln('static %s *%s = &%s_static;' % (
+            Naming.modulestate_cname,
+            Naming.modulestateglobal_cname,
+            Naming.modulestateglobal_cname
+        ))
+        module_state.putln("#endif")
+        module_state_clear.putln("return 0;")
+        module_state_clear.putln("}")
+        module_state_clear.putln("#endif")
+        module_state_traverse.putln("return 0;")
+        module_state_traverse.putln("}")
+        module_state_traverse.putln("#endif")
+
+    def generate_module_state_defines(self, env, code):
+        code.putln('#define %s %s->%s' % (
+            env.module_dict_cname,
+            Naming.modulestateglobal_cname,
+            env.module_dict_cname))
+        code.putln('#define %s %s->%s' % (
+            Naming.builtins_cname,
+            Naming.modulestateglobal_cname,
+            Naming.builtins_cname))
+        code.putln('#define %s %s->%s' % (
+            Naming.cython_runtime_cname,
+            Naming.modulestateglobal_cname,
+            Naming.cython_runtime_cname))
+        code.putln('#define %s %s->%s' % (
+            Naming.empty_tuple,
+            Naming.modulestateglobal_cname,
+            Naming.empty_tuple))
+        code.putln('#define %s %s->%s' % (
+            Naming.empty_bytes,
+            Naming.modulestateglobal_cname,
+            Naming.empty_bytes))
+        code.putln('#define %s %s->%s' % (
+            Naming.empty_unicode,
+            Naming.modulestateglobal_cname,
+            Naming.empty_unicode))
+        if Options.pre_import is not None:
+            code.putln('#define %s %s->%s' % (
+                Naming.preimport_cname,
+                Naming.modulestateglobal_cname,
+                Naming.preimport_cname))
+        for cname, used_name in Naming.used_types_and_macros:
+            code.putln('#ifdef %s' % used_name)
+            code.putln('#define %s %s->%s' % (
+                cname,
+                Naming.modulestateglobal_cname,
+                cname))
+            code.putln('#endif')
+
+    def generate_module_state_clear(self, env, code):
+        code.putln("#if CYTHON_USE_MODULE_STATE")
+        code.putln("static int %s_clear(PyObject *m) {" % Naming.module_cname)
+        code.putln("%s *clear_module_state = %s(m);" % (
+            Naming.modulestate_cname,
+            Naming.modulestate_cname))
+        code.putln("if (!clear_module_state) return 0;")
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            env.module_dict_cname)
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            Naming.builtins_cname)
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            Naming.cython_runtime_cname)
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            Naming.empty_tuple)
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            Naming.empty_bytes)
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            Naming.empty_unicode)
+        code.putln('#ifdef __Pyx_CyFunction_USED')
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            Naming.cyfunction_type_cname)
+        code.putln('#endif')
+        code.putln('#ifdef __Pyx_FusedFunction_USED')
+        code.putln('Py_CLEAR(clear_module_state->%s);' %
+            Naming.fusedfunction_type_cname)
+        code.putln('#endif')
+
+    def generate_module_state_traverse(self, env, code):
+        code.putln("#if CYTHON_USE_MODULE_STATE")
+        code.putln("static int %s_traverse(PyObject *m, visitproc visit, void *arg) {" % Naming.module_cname)
+        code.putln("%s *traverse_module_state = %s(m);" % (
+            Naming.modulestate_cname,
+            Naming.modulestate_cname))
+        code.putln("if (!traverse_module_state) return 0;")
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            env.module_dict_cname)
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            Naming.builtins_cname)
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            Naming.cython_runtime_cname)
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            Naming.empty_tuple)
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            Naming.empty_bytes)
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            Naming.empty_unicode)
+        code.putln('#ifdef __Pyx_CyFunction_USED')
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            Naming.cyfunction_type_cname)
+        code.putln('#endif')
+        code.putln('#ifdef __Pyx_FusedFunction_USED')
+        code.putln('Py_VISIT(traverse_module_state->%s);' %
+            Naming.fusedfunction_type_cname)
+        code.putln('#endif')
+
     def generate_module_init_func(self, imported_modules, env, code):
         subfunction = self.mod_init_subfunction(self.pos, self.scope, code)
 
+        self.generate_pymoduledef_struct(env, code)
+
         code.enter_cfunc_scope(self.scope)
         code.putln("")
         code.putln(UtilityCode.load_as_string("PyModInitFuncType", "ModuleSetupCode.c")[0])
-        header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % env.module_name
+        if env.module_name.isascii():
+            py2_mod_name = env.module_name
+            fail_compilation_in_py2 = False
+        else:
+            fail_compilation_in_py2 = True
+            # at this point py2_mod_name is largely a placeholder and the value doesn't matter
+            py2_mod_name = env.module_name.encode("ascii", errors="ignore").decode("utf8")
+
+        header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % py2_mod_name
         header3 = "__Pyx_PyMODINIT_FUNC %s(void)" % self.mod_init_func_cname('PyInit', env)
+        header3 = EncodedString(header3)
         code.putln("#if PY_MAJOR_VERSION < 3")
         # Optimise for small code size as the module init function is only executed once.
         code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header2)
+        if fail_compilation_in_py2:
+            code.putln('#error "Unicode module names are not supported in Python 2";')
+        if self.scope.is_package:
+            code.putln("#if !defined(CYTHON_NO_PYINIT_EXPORT) && (defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS))")
+            code.putln("__Pyx_PyMODINIT_FUNC init__init__(void) { init%s(); }" % py2_mod_name)
+            code.putln("#endif")
         code.putln(header2)
         code.putln("#else")
         code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header3)
+        if self.scope.is_package:
+            code.putln("#if !defined(CYTHON_NO_PYINIT_EXPORT) && (defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS))")
+            code.putln("__Pyx_PyMODINIT_FUNC PyInit___init__(void) { return %s(); }" % (
+                self.mod_init_func_cname('PyInit', env)))
+            code.putln("#endif")
+        # Hack for a distutils bug - https://bugs.python.org/issue39432
+        # distutils attempts to make visible a slightly wrong PyInitU module name. Just create a dummy
+        # function to keep it quiet
+        wrong_punycode_module_name = self.wrong_punycode_module_name(env.module_name)
+        if wrong_punycode_module_name:
+            code.putln("#if !defined(CYTHON_NO_PYINIT_EXPORT) && (defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS))")
+            code.putln("void %s(void) {} /* workaround for https://bugs.python.org/issue39432 */" % wrong_punycode_module_name)
+            code.putln("#endif")
         code.putln(header3)
 
         # CPython 3.5+ supports multi-phase module initialisation (gives access to __spec__, __file__, etc.)
@@ -2333,7 +2991,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("")
         # main module init code lives in Py_mod_exec function, not in PyInit function
         code.putln("static CYTHON_SMALL_CODE int %s(PyObject *%s)" % (
-            self.mod_init_func_cname(Naming.pymodule_exec_func_cname, env),
+            self.module_init_func_cname(),
             Naming.pymodinit_module_arg))
         code.putln("#endif")  # PEP489
 
@@ -2341,12 +2999,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
         # start of module init/exec function (pre/post PEP 489)
         code.putln("{")
+        code.putln('int stringtab_initialized = 0;')
+        code.putln("#if CYTHON_USE_MODULE_STATE")
+        code.putln('int pystate_addmodule_run = 0;')
+        code.putln("#endif")
 
         tempdecl_code = code.insertion_point()
 
         profile = code.globalstate.directives['profile']
         linetrace = code.globalstate.directives['linetrace']
         if profile or linetrace:
+            if linetrace:
+                code.use_fast_gil_utility_code()
             code.globalstate.use_utility_code(UtilityCode.load_cached("Profile", "Profile.c"))
 
         code.put_declare_refcount_context()
@@ -2361,7 +3025,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         ))
         code.putln('PyErr_SetString(PyExc_RuntimeError,'
                    ' "Module \'%s\' has already been imported. Re-initialisation is not supported.");' %
-                   env.module_name)
+                   env.module_name.as_c_string_literal()[1:-1])
         code.putln("return -1;")
         code.putln("}")
         code.putln("#elif PY_MAJOR_VERSION >= 3")
@@ -2372,6 +3036,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         ))
         code.putln("#endif")
 
+        code.putln("/*--- Module creation code ---*/")
+        self.generate_module_creation_code(env, code)
+
         if profile or linetrace:
             tempdecl_code.put_trace_declarations()
             code.put_trace_frame_init()
@@ -2395,7 +3062,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
 
         for ext_type in ('CyFunction', 'FusedFunction', 'Coroutine', 'Generator', 'AsyncGen', 'StopAsyncIteration'):
             code.putln("#ifdef __Pyx_%s_USED" % ext_type)
-            code.put_error_if_neg(self.pos, "__pyx_%s_init()" % ext_type)
+            code.put_error_if_neg(self.pos, "__pyx_%s_init(%s)" % (ext_type, env.module_cname))
             code.putln("#endif")
 
         code.putln("/*--- Library function declarations ---*/")
@@ -2408,18 +3075,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("PyEval_InitThreads();")
         code.putln("#endif")
 
-        code.putln("/*--- Module creation code ---*/")
-        self.generate_module_creation_code(env, code)
-
         code.putln("/*--- Initialize various global constants etc. ---*/")
-        code.put_error_if_neg(self.pos, "__Pyx_InitGlobals()")
+        code.put_error_if_neg(self.pos, "__Pyx_InitConstants()")
+        code.putln("stringtab_initialized = 1;")
+        code.put_error_if_neg(self.pos, "__Pyx_InitGlobals()")  # calls any utility code
+
 
         code.putln("#if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || "
                    "__PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)")
         code.put_error_if_neg(self.pos, "__Pyx_init_sys_getdefaultencoding_params()")
         code.putln("#endif")
 
-        code.putln("if (%s%s) {" % (Naming.module_is_main, self.full_module_name.replace('.', '__')))
+        code.putln("if (%s) {" % self.is_main_module_flag_cname())
         code.put_error_if_neg(self.pos, 'PyObject_SetAttr(%s, %s, %s)' % (
             env.module_cname,
             code.intern_identifier(EncodedString("__name__")),
@@ -2474,7 +3141,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             code.put_trace_call(header3, self.pos, nogil=not code.funcstate.gil_owned)
             code.funcstate.can_trace = True
 
+        code.mark_pos(None)
         self.body.generate_execution_code(code)
+        code.mark_pos(None)
 
         if profile or linetrace:
             code.funcstate.can_trace = False
@@ -2495,8 +3164,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         for cname, type in code.funcstate.all_managed_temps():
             code.put_xdecref(cname, type)
         code.putln('if (%s) {' % env.module_cname)
-        code.putln('if (%s) {' % env.module_dict_cname)
-        code.put_add_traceback("init %s" % env.qualified_name)
+        code.putln('if (%s && stringtab_initialized) {' % env.module_dict_cname)
+        # We can run into errors before the module or stringtab are initialized.
+        # In this case it is not safe to add a traceback (because it uses the stringtab)
+        code.put_add_traceback(EncodedString("init %s" % env.qualified_name))
         code.globalstate.use_utility_code(Nodes.traceback_utility_code)
         # Module reference and module dict are in global variables which might still be needed
         # for cleanup, atexit code, etc., so leaking is better than crashing.
@@ -2504,9 +3175,24 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         # user code in atexit or other global registries.
         ##code.put_decref_clear(env.module_dict_cname, py_object_type, nanny=False)
         code.putln('}')
+        code.putln("#if !CYTHON_USE_MODULE_STATE")
         code.put_decref_clear(env.module_cname, py_object_type, nanny=False, clear_before_decref=True)
+        code.putln("#else")
+        # This section is mainly for the limited API. env.module_cname still owns a reference so
+        # decrement that
+        code.put_decref(env.module_cname, py_object_type, nanny=False)
+        # Also remove the failed module from the module state lookup
+        # fetch/restore the error indicator because PyState_RemvoeModule might fail itself
+        code.putln("if (pystate_addmodule_run) {")
+        code.putln("PyObject *tp, *value, *tb;")
+        code.putln("PyErr_Fetch(&tp, &value, &tb);")
+        code.putln("PyState_RemoveModule(&%s);" % Naming.pymoduledef_cname)
+        code.putln("PyErr_Restore(tp, value, tb);")
+        code.putln("}")
+        code.putln("#endif")
         code.putln('} else if (!PyErr_Occurred()) {')
-        code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' % env.qualified_name)
+        code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' %
+                   env.qualified_name.as_c_string_literal()[1:-1])
         code.putln('}')
         code.put_label(code.return_label)
 
@@ -2556,7 +3242,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 code.putln("static int %s(void) {" % self.cfunc_name)
                 code.put_declare_refcount_context()
                 self.tempdecl_code = code.insertion_point()
-                code.put_setup_refcount_context(self.cfunc_name)
+                code.put_setup_refcount_context(EncodedString(self.cfunc_name))
                 # Leave a grepable marker that makes it easy to find the generator source.
                 code.putln("/*--- %s ---*/" % self.description)
                 return code
@@ -2613,7 +3299,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                         EncodedString(decode_filename(
                             os.path.dirname(module_path)))).cname,
                     code.error_goto_if_null(temp, self.pos)))
-                code.put_gotref(temp)
+                code.put_gotref(temp, py_object_type)
                 code.putln(
                     'if (PyObject_SetAttrString(%s, "__path__", %s) < 0) %s;' % (
                         env.module_cname, temp, code.error_goto(self.pos)))
@@ -2637,14 +3323,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         # CPython may not have put us into sys.modules yet, but relative imports and reimports require it
         fq_module_name = self.full_module_name
         if fq_module_name.endswith('.__init__'):
-            fq_module_name = fq_module_name[:-len('.__init__')]
+            fq_module_name = EncodedString(fq_module_name[:-len('.__init__')])
+        fq_module_name_cstring = fq_module_name.as_c_string_literal()
         code.putln("#if PY_MAJOR_VERSION >= 3")
         code.putln("{")
         code.putln("PyObject *modules = PyImport_GetModuleDict(); %s" %
                    code.error_goto_if_null("modules", self.pos))
-        code.putln('if (!PyDict_GetItemString(modules, "%s")) {' % fq_module_name)
-        code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, "%s", %s)' % (
-            fq_module_name, env.module_cname), self.pos))
+        code.putln('if (!PyDict_GetItemString(modules, %s)) {' % fq_module_name_cstring)
+        code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, %s, %s)' % (
+            fq_module_name_cstring, env.module_cname), self.pos))
         code.putln("}")
         code.putln("}")
         code.putln("#endif")
@@ -2717,11 +3404,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         if Options.pre_import is not None:
             code.put_decref_clear(Naming.preimport_cname, py_object_type,
                                   nanny=False, clear_before_decref=True)
-        for cname in [env.module_dict_cname, Naming.cython_runtime_cname, Naming.builtins_cname]:
+        for cname in [Naming.cython_runtime_cname, Naming.builtins_cname]:
             code.put_decref_clear(cname, py_object_type, nanny=False, clear_before_decref=True)
+        code.put_decref_clear(env.module_dict_cname, py_object_type, nanny=False, clear_before_decref=True)
 
     def generate_main_method(self, env, code):
-        module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))
+        module_is_main = self.is_main_module_flag_cname()
         if Options.embed == "main":
             wmain = "wmain"
         else:
@@ -2734,8 +3422,33 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 main_method=Options.embed,
                 wmain_method=wmain))
 
+    def punycode_module_name(self, prefix, name):
+        # adapted from PEP483
+        try:
+            name = '_' + name.encode('ascii').decode('ascii')
+        except UnicodeEncodeError:
+            name = 'U_' + name.encode('punycode').replace(b'-', b'_').decode('ascii')
+        return "%s%s" % (prefix, name)
+
+    def wrong_punycode_module_name(self, name):
+        # to work around a distutils bug by also generating an incorrect symbol...
+        try:
+            name.encode("ascii")
+            return None  # workaround is not needed
+        except UnicodeEncodeError:
+            return "PyInitU" + (u"_"+name).encode('punycode').replace(b'-', b'_').decode('ascii')
+
     def mod_init_func_cname(self, prefix, env):
-        return '%s_%s' % (prefix, env.module_name)
+        # from PEP483
+        return self.punycode_module_name(prefix, env.module_name)
+
+    # Returns the name of the C-function that corresponds to the module initialisation.
+    # (module initialisation == the cython code outside of functions)
+    # Note that this should never be the name of a wrapper and always the name of the
+    # function containing the actual code. Otherwise, cygdb will experience problems.
+    def module_init_func_cname(self):
+        env = self.scope
+        return self.mod_init_func_cname(Naming.pymodule_exec_func_cname, env)
 
     def generate_pymoduledef_struct(self, env, code):
         if env.doc:
@@ -2750,7 +3463,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("")
         code.putln("#if PY_MAJOR_VERSION >= 3")
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
-        exec_func_cname = self.mod_init_func_cname(Naming.pymodule_exec_func_cname, env)
+        exec_func_cname = self.module_init_func_cname()
         code.putln("static PyObject* %s(PyObject *spec, PyModuleDef *def); /*proto*/" %
                    Naming.pymodule_create_func_cname)
         code.putln("static int %s(PyObject* module); /*proto*/" % exec_func_cname)
@@ -2760,15 +3473,27 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("{Py_mod_exec, (void*)%s}," % exec_func_cname)
         code.putln("{0, NULL}")
         code.putln("};")
+        if not env.module_name.isascii():
+            code.putln("#else /* CYTHON_PEP489_MULTI_PHASE_INIT */")
+            code.putln('#error "Unicode module names are only supported with multi-phase init'
+                       ' as per PEP489"')
         code.putln("#endif")
 
         code.putln("")
-        code.putln("static struct PyModuleDef %s = {" % Naming.pymoduledef_cname)
+        code.putln('#ifdef __cplusplus')
+        code.putln('namespace {')
+        code.putln("struct PyModuleDef %s =" % Naming.pymoduledef_cname)
+        code.putln('#else')
+        code.putln("static struct PyModuleDef %s =" % Naming.pymoduledef_cname)
+        code.putln('#endif')
+        code.putln('{')
         code.putln("  PyModuleDef_HEAD_INIT,")
-        code.putln('  "%s",' % env.module_name)
+        code.putln('  %s,' % env.module_name.as_c_string_literal())
         code.putln("  %s, /* m_doc */" % doc)
         code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
         code.putln("  0, /* m_size */")
+        code.putln("#elif CYTHON_USE_MODULE_STATE")  # FIXME: should allow combination with PEP-489
+        code.putln("  sizeof(%s), /* m_size */" % Naming.modulestate_cname)
         code.putln("#else")
         code.putln("  -1, /* m_size */")
         code.putln("#endif")
@@ -2778,10 +3503,19 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("#else")
         code.putln("  NULL, /* m_reload */")
         code.putln("#endif")
+        code.putln("#if CYTHON_USE_MODULE_STATE")
+        code.putln("  %s_traverse, /* m_traverse */" % Naming.module_cname)
+        code.putln("  %s_clear, /* m_clear */" % Naming.module_cname)
+        code.putln("  %s /* m_free */" % cleanup_func)
+        code.putln("#else")
         code.putln("  NULL, /* m_traverse */")
         code.putln("  NULL, /* m_clear */")
         code.putln("  %s /* m_free */" % cleanup_func)
+        code.putln("#endif")
         code.putln("};")
+        code.putln('#ifdef __cplusplus')
+        code.putln('} /* anonymous namespace */')
+        code.putln('#endif')
         code.putln("#endif")
 
     def generate_module_creation_code(self, env, code):
@@ -2800,20 +3534,44 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("#else")
         code.putln("#if PY_MAJOR_VERSION < 3")
         code.putln(
-            '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % (
+            '%s = Py_InitModule4(%s, %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % (
                 env.module_cname,
-                env.module_name,
+                env.module_name.as_c_string_literal(),
                 env.method_table_cname,
                 doc,
                 env.module_cname))
-        code.putln("#else")
+        code.putln(code.error_goto_if_null(env.module_cname, self.pos))
+        code.putln("#elif CYTHON_USE_MODULE_STATE")
+        # manage_ref is False (and refnanny calls are omitted) because refnanny isn't yet initialized
+        module_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=False)
+        code.putln(
+            "%s = PyModule_Create(&%s); %s" % (
+                module_temp,
+                Naming.pymoduledef_cname,
+                code.error_goto_if_null(module_temp, self.pos)))
+        code.putln("{")
+        # So that PyState_FindModule works in the init function:
+        code.putln("int add_module_result = PyState_AddModule(%s, &%s);" % (
+            module_temp, Naming.pymoduledef_cname))
+        code.putln("%s = 0; /* transfer ownership from %s to %s pseudovariable */" % (
+            module_temp, module_temp, env.module_name
+        ))
+        # At this stage the module likely has a refcount of 2 - one owned by the list
+        # inside PyState_AddModule and one owned by "__pyx_m" (and returned from this
+        # function as a new reference).
+        code.putln(code.error_goto_if_neg("add_module_result", self.pos))
+        code.putln("pystate_addmodule_run = 1;")
+        code.putln("}")
+        code.funcstate.release_temp(module_temp)
+        code.putln('#else')
         code.putln(
             "%s = PyModule_Create(&%s);" % (
                 env.module_cname,
                 Naming.pymoduledef_cname))
-        code.putln("#endif")
         code.putln(code.error_goto_if_null(env.module_cname, self.pos))
+        code.putln("#endif")
         code.putln("#endif")  # CYTHON_PEP489_MULTI_PHASE_INIT
+        code.putln("CYTHON_UNUSED_VAR(%s);" % module_temp)  # only used in limited API
 
         code.putln(
             "%s = PyModule_GetDict(%s); %s" % (
@@ -2903,8 +3661,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             # investigation shows that the resulting binary is smaller with repeated functions calls.
             for entry in entries:
                 signature = entry.type.signature_string()
-                code.putln('if (__Pyx_ExportFunction("%s", (void (*)(void))%s, "%s") < 0) %s' % (
-                    entry.name,
+                code.putln('if (__Pyx_ExportFunction(%s, (void (*)(void))%s, "%s") < 0) %s' % (
+                    entry.name.as_c_string_literal(),
                     entry.cname,
                     signature,
                     code.error_goto(self.pos)))
@@ -2946,7 +3704,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     module.qualified_name,
                     temp,
                     code.error_goto(self.pos)))
-            code.put_gotref(temp)
+            code.put_gotref(temp, py_object_type)
             for entry in entries:
                 if env is module:
                     cname = entry.cname
@@ -2977,13 +3735,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     module.qualified_name,
                     temp,
                     code.error_goto(self.pos)))
-            code.put_gotref(temp)
+            code.put_gotref(temp, py_object_type)
             for entry in entries:
                 code.putln(
-                    'if (__Pyx_ImportFunction_%s(%s, "%s", (void (**)(void))&%s, "%s") < 0) %s' % (
+                    'if (__Pyx_ImportFunction_%s(%s, %s, (void (**)(void))&%s, "%s") < 0) %s' % (
                         Naming.cyversion,
                         temp,
-                        entry.name,
+                        entry.name.as_c_string_literal(),
                         entry.cname,
                         entry.type.signature_string(),
                         code.error_goto(self.pos)))
@@ -3006,7 +3764,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
     def generate_base_type_import_code(self, env, entry, code, import_generator):
         base_type = entry.type.base_type
         if (base_type and base_type.module_name != env.qualified_name and not
-                base_type.is_builtin_type and not entry.utility_code_definition):
+                (base_type.is_builtin_type or base_type.is_cython_builtin_type)
+                 and not entry.utility_code_definition):
             self.generate_type_import_code(env, base_type, self.pos, code, import_generator)
 
     def generate_type_import_code(self, env, type, pos, code, import_generator):
@@ -3023,7 +3782,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         if type.vtabptr_cname:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached('GetVTable', 'ImportExport.c'))
-            code.putln("%s = (struct %s*)__Pyx_GetVtable(%s->tp_dict); %s" % (
+            code.putln("%s = (struct %s*)__Pyx_GetVtable(%s); %s" % (
                 type.vtabptr_cname,
                 type.vtabstruct_cname,
                 type.typeptr_cname,
@@ -3064,15 +3823,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             module,
             module_name))
 
+        type_name = type.name.as_c_string_literal()
+
         if condition and replacement:
             code.putln("")  # start in new line
             code.putln("#if %s" % condition)
             code.putln('"%s",' % replacement)
             code.putln("#else")
-            code.putln('"%s",' % type.name)
+            code.putln('%s,' % type_name)
             code.putln("#endif")
         else:
-            code.put(' "%s", ' % type.name)
+            code.put(' %s, ' % type_name)
 
         if sizeof_objstruct != objstruct:
             if not condition:
@@ -3080,6 +3841,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
             code.putln("#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000")
             code.putln('sizeof(%s), __PYX_GET_STRUCT_ALIGNMENT_%s(%s),' % (
                 objstruct, Naming.cyversion, objstruct))
+            code.putln("#elif CYTHON_COMPILING_IN_LIMITED_API")
+            code.putln('sizeof(%s), __PYX_GET_STRUCT_ALIGNMENT_%s(%s),' % (
+                objstruct, Naming.cyversion, objstruct))
             code.putln("#else")
             code.putln('sizeof(%s), __PYX_GET_STRUCT_ALIGNMENT_%s(%s),' % (
                 sizeof_objstruct, Naming.cyversion, sizeof_objstruct))
@@ -3104,6 +3868,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
     def generate_type_ready_code(self, entry, code):
         Nodes.CClassDefNode.generate_type_ready_code(entry, code)
 
+    def is_main_module_flag_cname(self):
+        full_module_name = self.full_module_name.replace('.', '__')
+        return self.punycode_module_name(Naming.module_is_main, full_module_name)
+
     def generate_exttype_vtable_init_code(self, entry, code):
         # Generate code to initialise the C method table of an
         # extension type.
@@ -3156,7 +3924,7 @@ class ModuleImportGenerator(object):
         self.temps.append(temp)
         code.putln('%s = PyImport_ImportModule(%s); if (unlikely(!%s)) %s' % (
             temp, module_name_string, temp, error_code))
-        code.put_gotref(temp)
+        code.put_gotref(temp, py_object_type)
         self.imported[module_name_string] = temp
         return temp
 
@@ -3216,5 +3984,3 @@ packed_struct_utility_code = UtilityCode(proto="""
 #define __Pyx_PACKED
 #endif
 """, impl="", proto_block='utility_code_proto_before_types')
-
-capsule_utility_code = UtilityCode.load("Capsule")
diff --git a/Cython/Compiler/Naming.py b/Cython/Compiler/Naming.py
index 4dd6cbbd5..140fe0435 100644
--- a/Cython/Compiler/Naming.py
+++ b/Cython/Compiler/Naming.py
@@ -15,8 +15,11 @@ codewriter_temp_prefix = pyrex_prefix + "t_"
 
 temp_prefix       = u"__cyt_"
 
+pyunicode_identifier_prefix = pyrex_prefix + 'U'
+
 builtin_prefix    = pyrex_prefix + "builtin_"
 arg_prefix        = pyrex_prefix + "arg_"
+genexpr_arg_prefix = pyrex_prefix + "genexpr_arg_"
 funcdoc_prefix    = pyrex_prefix + "doc_"
 enum_prefix       = pyrex_prefix + "e_"
 func_prefix       = pyrex_prefix + "f_"
@@ -47,12 +50,18 @@ pybufferstruct_prefix  = pyrex_prefix + "pybuffer_"
 vtable_prefix     = pyrex_prefix + "vtable_"
 vtabptr_prefix    = pyrex_prefix + "vtabptr_"
 vtabstruct_prefix = pyrex_prefix + "vtabstruct_"
+unicode_vtabentry_prefix  = pyrex_prefix + "Uvtabentry_"
+# vtab entries aren't normally mangled,
+# but punycode names sometimes start with numbers leading to a C syntax error
+unicode_structmember_prefix = pyrex_prefix + "Umember_"
+# as above -
+# not normally mangled but punycode names cause specific problems
 opt_arg_prefix    = pyrex_prefix + "opt_args_"
 convert_func_prefix = pyrex_prefix + "convert_"
 closure_scope_prefix = pyrex_prefix + "scope_"
 closure_class_prefix = pyrex_prefix + "scope_struct_"
 lambda_func_prefix = pyrex_prefix + "lambda_"
-module_is_main   = pyrex_prefix + "module_is_main_"
+module_is_main   = pyrex_prefix + "module_is_main"
 defaults_struct_prefix = pyrex_prefix + "defaults"
 dynamic_args_cname = pyrex_prefix + "dynamic_args"
 
@@ -69,6 +78,8 @@ interned_prefixes = {
 
 ctuple_type_prefix = pyrex_prefix + "ctuple_"
 args_cname       = pyrex_prefix + "args"
+nargs_cname      = pyrex_prefix + "nargs"
+kwvalues_cname   = pyrex_prefix + "kwvalues"
 generator_cname  = pyrex_prefix + "generator"
 sent_value_cname = pyrex_prefix + "sent_value"
 pykwdlist_cname  = pyrex_prefix + "pyargnames"
@@ -87,6 +98,8 @@ clineno_cname    = pyrex_prefix + "clineno"
 cfilenm_cname    = pyrex_prefix + "cfilenm"
 local_tstate_cname = pyrex_prefix + "tstate"
 module_cname     = pyrex_prefix + "m"
+modulestate_cname = pyrex_prefix + "mstate"
+modulestateglobal_cname = pyrex_prefix + "mstate_global"
 moddoc_cname     = pyrex_prefix + "mdoc"
 methtable_cname  = pyrex_prefix + "methods"
 retval_cname     = pyrex_prefix + "r"
@@ -99,7 +112,7 @@ gilstate_cname   = pyrex_prefix + "state"
 skip_dispatch_cname = pyrex_prefix + "skip_dispatch"
 empty_tuple      = pyrex_prefix + "empty_tuple"
 empty_bytes      = pyrex_prefix + "empty_bytes"
-empty_unicode	 = pyrex_prefix + "empty_unicode"
+empty_unicode    = pyrex_prefix + "empty_unicode"
 print_function   = pyrex_prefix + "print"
 print_function_kwargs   = pyrex_prefix + "print_kwargs"
 cleanup_cname    = pyrex_prefix + "module_cleanup"
@@ -116,13 +129,20 @@ cur_scope_cname  = pyrex_prefix + "cur_scope"
 enc_scope_cname  = pyrex_prefix + "enc_scope"
 frame_cname      = pyrex_prefix + "frame"
 frame_code_cname = pyrex_prefix + "frame_code"
+error_without_exception_cname = pyrex_prefix + "error_without_exception"
 binding_cfunc    = pyrex_prefix + "binding_PyCFunctionType"
 fused_func_prefix = pyrex_prefix + 'fuse_'
-quick_temp_cname = pyrex_prefix + "temp" # temp variable for quick'n'dirty temping
+quick_temp_cname = pyrex_prefix + "temp"  # temp variable for quick'n'dirty temping
 tp_dict_version_temp = pyrex_prefix + "tp_dict_version"
 obj_dict_version_temp = pyrex_prefix + "obj_dict_version"
-type_dict_guard_temp = pyrex_prefix + "type_dict_guard"
+type_dict_guard_temp = pyrex_prefix + "typedict_guard"
 cython_runtime_cname   = pyrex_prefix + "cython_runtime"
+cyfunction_type_cname = pyrex_prefix + "CyFunctionType"
+fusedfunction_type_cname = pyrex_prefix + "FusedFunctionType"
+# the name "dflt" was picked by analogy with the CPython dataclass module which stores
+# the default values in variables named f"_dflt_{field.name}" in a hidden scope that's
+# passed to the __init__ function. (The name is unimportant to the exact workings though)
+dataclass_field_default_cname = pyrex_prefix + "dataclass_dflt"
 
 global_code_object_cache_find = pyrex_prefix + 'find_code_object'
 global_code_object_cache_insert = pyrex_prefix + 'insert_code_object'
@@ -154,11 +174,24 @@ exc_vars = (exc_type_name, exc_value_name, exc_tb_name)
 
 api_name        = pyrex_prefix + "capi__"
 
-h_guard_prefix   = "__PYX_HAVE__"
-api_guard_prefix = "__PYX_HAVE_API__"
+# the h and api guards get changed to:
+#  __PYX_HAVE__FILENAME (for ascii filenames)
+#  __PYX_HAVE_U_PUNYCODEFILENAME (for non-ascii filenames)
+h_guard_prefix   = "__PYX_HAVE_"
+api_guard_prefix = "__PYX_HAVE_API_"
 api_func_guard   = "__PYX_HAVE_API_FUNC_"
 
 PYX_NAN          = "__PYX_NAN()"
 
 def py_version_hex(major, minor=0, micro=0, release_level=0, release_serial=0):
     return (major << 24) | (minor << 16) | (micro << 8) | (release_level << 4) | (release_serial)
+
+# there's a few places where it's useful to iterate over all of these
+used_types_and_macros = [
+    (cyfunction_type_cname, '__Pyx_CyFunction_USED'),
+    (fusedfunction_type_cname, '__Pyx_FusedFunction_USED'),
+    ('__pyx_GeneratorType', '__Pyx_Generator_USED'),
+    ('__pyx_IterableCoroutineType', '__Pyx_IterableCoroutine_USED'),
+    ('__pyx_CoroutineAwaitType', '__Pyx_Coroutine_USED'),
+    ('__pyx_CoroutineType', '__Pyx_Coroutine_USED'),
+]
diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py
index 457ae94ad..230226298 100644
--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -5,6 +5,7 @@
 from __future__ import absolute_import
 
 import cython
+
 cython.declare(sys=object, os=object, copy=object,
                Builtin=object, error=object, warning=object, Naming=object, PyrexTypes=object,
                py_object_type=object, ModuleScope=object, LocalScope=object, ClosureScope=object,
@@ -12,17 +13,18 @@ cython.declare(sys=object, os=object, copy=object,
                CppClassScope=object, UtilityCode=object, EncodedString=object,
                error_type=object, _py_int_types=object)
 
-import sys, os, copy
+import sys, copy
 from itertools import chain
 
 from . import Builtin
-from .Errors import error, warning, InternalError, CompileError
+from .Errors import error, warning, InternalError, CompileError, CannotSpecialize
 from . import Naming
 from . import PyrexTypes
 from . import TypeSlots
 from .PyrexTypes import py_object_type, error_type
-from .Symtab import (ModuleScope, LocalScope, ClosureScope,
-                     StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope)
+from .Symtab import (ModuleScope, LocalScope, ClosureScope, PropertyScope,
+                     StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope, GeneratorExpressionScope,
+                     CppScopedEnumScope, punycodify_name)
 from .Code import UtilityCode
 from .StringEncoding import EncodedString
 from . import Future
@@ -38,6 +40,9 @@ else:
     _py_int_types = (int, long)
 
 
+IMPLICIT_CLASSMETHODS = {"__init_subclass__", "__class_getitem__"}
+
+
 def relative_position(pos):
     return (pos[0].get_filenametable_entry(), pos[1])
 
@@ -68,53 +73,6 @@ def embed_position(pos, docstring):
     return doc
 
 
-def analyse_type_annotation(annotation, env, assigned_value=None):
-    base_type = None
-    is_ambiguous = False
-    explicit_pytype = explicit_ctype = False
-    if annotation.is_dict_literal:
-        warning(annotation.pos,
-                "Dicts should no longer be used as type annotations. Use 'cython.int' etc. directly.")
-        for name, value in annotation.key_value_pairs:
-            if not name.is_string_literal:
-                continue
-            if name.value in ('type', b'type'):
-                explicit_pytype = True
-                if not explicit_ctype:
-                    annotation = value
-            elif name.value in ('ctype', b'ctype'):
-                explicit_ctype = True
-                annotation = value
-        if explicit_pytype and explicit_ctype:
-            warning(annotation.pos, "Duplicate type declarations found in signature annotation")
-    arg_type = annotation.analyse_as_type(env)
-    if annotation.is_name and not annotation.cython_attribute and annotation.name in ('int', 'long', 'float'):
-        # Map builtin numeric Python types to C types in safe cases.
-        if assigned_value is not None and arg_type is not None and not arg_type.is_pyobject:
-            assigned_type = assigned_value.infer_type(env)
-            if assigned_type and assigned_type.is_pyobject:
-                # C type seems unsafe, e.g. due to 'None' default value  => ignore annotation type
-                is_ambiguous = True
-                arg_type = None
-        # ignore 'int' and require 'cython.int' to avoid unsafe integer declarations
-        if arg_type in (PyrexTypes.c_long_type, PyrexTypes.c_int_type, PyrexTypes.c_float_type):
-            arg_type = PyrexTypes.c_double_type if annotation.name == 'float' else py_object_type
-    elif arg_type is not None and annotation.is_string_literal:
-        warning(annotation.pos,
-                "Strings should no longer be used for type declarations. Use 'cython.int' etc. directly.")
-    if arg_type is not None:
-        if explicit_pytype and not explicit_ctype and not arg_type.is_pyobject:
-            warning(annotation.pos,
-                    "Python type declaration in signature annotation does not refer to a Python type")
-        base_type = CAnalysedBaseTypeNode(
-            annotation.pos, type=arg_type, is_arg=True)
-    elif is_ambiguous:
-        warning(annotation.pos, "Ambiguous types in annotation, ignoring")
-    else:
-        warning(annotation.pos, "Unknown type declaration in annotation, ignoring")
-    return base_type, arg_type
-
-
 def write_func_call(func, codewriter_class):
     def f(*args, **kwds):
         if len(args) > 1 and isinstance(args[1], codewriter_class):
@@ -125,19 +83,16 @@ def write_func_call(func, codewriter_class):
                 ' ' * code.call_level,
                 node.__class__.__name__,
                 func.__name__,
-                node.pos[1:])
-            pristine = code.buffer.stream.tell()
-            code.putln(marker)
+                node.pos[1:],
+            )
+            insertion_point = code.insertion_point()
             start = code.buffer.stream.tell()
             code.call_level += 4
             res = func(*args, **kwds)
             code.call_level -= 4
-            if start == code.buffer.stream.tell():
-                # no code written => undo writing marker
-                code.buffer.stream.truncate(pristine)
-            else:
-                marker = marker.replace('->', '<-', 1)
-                code.putln(marker)
+            if start != code.buffer.stream.tell():
+                code.putln(marker.replace('->', '<-', 1))
+                insertion_point.putln(marker)
             return res
         else:
             return func(*args, **kwds)
@@ -160,9 +115,11 @@ class VerboseCodeWriter(type):
 class CheckAnalysers(type):
     """Metaclass to check that type analysis functions return a node.
     """
-    methods = set(['analyse_types',
-                   'analyse_expressions',
-                   'analyse_target_types'])
+    methods = frozenset({
+        'analyse_types',
+        'analyse_expressions',
+        'analyse_target_types',
+    })
 
     def __new__(cls, name, bases, attrs):
         from types import FunctionType
@@ -200,6 +157,8 @@ class Node(object):
     is_literal = 0
     is_terminator = 0
     is_wrapper = False  # is a DefNode wrapper for a C function
+    is_cproperty = False
+    is_templated_type_node = False
     temps = None
 
     # All descendants should set child_attrs to a list of the attributes
@@ -254,7 +213,7 @@ class Node(object):
 
 
     #
-    #  There are 3 phases of parse tree processing, applied in order to
+    #  There are 3 main phases of parse tree processing, applied in order to
     #  all the statements in a given scope-block:
     #
     #  (0) analyse_declarations
@@ -266,25 +225,25 @@ class Node(object):
     #         Determine the result types of expressions and fill in the
     #         'type' attribute of each ExprNode. Insert coercion nodes into the
     #         tree where needed to convert to and from Python objects.
-    #         Allocate temporary locals for intermediate results. Fill
-    #         in the 'result_code' attribute of each ExprNode with a C code
-    #         fragment.
+    #         Replace tree nodes with more appropriate implementations found by
+    #         the type analysis.
     #
     #  (2) generate_code
     #         Emit C code for all declarations, statements and expressions.
-    #         Recursively applies the 3 processing phases to the bodies of
-    #         functions.
+    #
+    #  These phases are triggered by tree transformations.
+    #  See the full pipeline in Pipeline.py.
     #
 
     def analyse_declarations(self, env):
         pass
 
     def analyse_expressions(self, env):
-        raise InternalError("analyse_expressions not implemented for %s" % \
+        raise InternalError("analyse_expressions not implemented for %s" %
             self.__class__.__name__)
 
     def generate_code(self, code):
-        raise InternalError("generate_code not implemented for %s" % \
+        raise InternalError("generate_code not implemented for %s" %
             self.__class__.__name__)
 
     def annotate(self, code):
@@ -360,6 +319,7 @@ class Node(object):
         return u'"%s":%d:%d\n%s\n' % (
             source_desc.get_escaped_description(), line, col, u''.join(lines))
 
+
 class CompilerDirectivesNode(Node):
     """
     Sets compiler directives for the children nodes
@@ -402,6 +362,7 @@ class CompilerDirectivesNode(Node):
         self.body.annotate(code)
         code.globalstate.directives = old
 
+
 class BlockNode(object):
     #  Mixin class for nodes representing a declaration block.
 
@@ -415,14 +376,15 @@ class BlockNode(object):
         for node in env.lambda_defs:
             node.generate_function_definitions(env, code)
 
+
 class StatListNode(Node):
     # stats     a list of StatNode
 
     child_attrs = ["stats"]
 
     @staticmethod
-    def create_analysed(pos, env, *args, **kw):
-        node = StatListNode(pos, *args, **kw)
+    def create_analysed(pos, env, **kw):
+        node = StatListNode(pos, **kw)
         return node  # No node-specific analysis needed
 
     def analyse_declarations(self, env):
@@ -469,7 +431,7 @@ class StatNode(Node):
         pass
 
     def generate_execution_code(self, code):
-        raise InternalError("generate_execution_code not implemented for %s" % \
+        raise InternalError("generate_execution_code not implemented for %s" %
             self.__class__.__name__)
 
 
@@ -499,8 +461,13 @@ class CDefExternNode(StatNode):
             env.add_include_file(self.include_file, self.verbatim_include, late)
 
     def analyse_expressions(self, env):
+        # Allow C properties, inline methods, etc. also in external types.
+        self.body = self.body.analyse_expressions(env)
         return self
 
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+
     def generate_execution_code(self, code):
         pass
 
@@ -525,6 +492,9 @@ class CDeclaratorNode(Node):
 
     calling_convention = ""
 
+    def declared_name(self):
+        return None
+
     def analyse_templates(self):
         # Only C++ functions have templates.
         return None
@@ -539,6 +509,9 @@ class CNameDeclaratorNode(CDeclaratorNode):
 
     default = None
 
+    def declared_name(self):
+        return self.name
+
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
         if nonempty and self.name == '':
             # May have mistaken the name for the type.
@@ -551,7 +524,12 @@ class CNameDeclaratorNode(CDeclaratorNode):
                 base_type = py_object_type
 
         if base_type.is_fused and env.fused_to_specific:
-            base_type = base_type.specialize(env.fused_to_specific)
+            try:
+                base_type = base_type.specialize(env.fused_to_specific)
+            except CannotSpecialize:
+                error(self.pos,
+                      "'%s' cannot be specialized since its type is not a fused argument to this function" %
+                      self.name)
 
         self.type = base_type
         return self, base_type
@@ -562,6 +540,9 @@ class CPtrDeclaratorNode(CDeclaratorNode):
 
     child_attrs = ["base"]
 
+    def declared_name(self):
+        return self.base.declared_name()
+
     def analyse_templates(self):
         return self.base.analyse_templates()
 
@@ -572,14 +553,17 @@ class CPtrDeclaratorNode(CDeclaratorNode):
         return self.base.analyse(ptr_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
 
 
-class CReferenceDeclaratorNode(CDeclaratorNode):
-    # base     CDeclaratorNode
-
+class _CReferenceDeclaratorBaseNode(CDeclaratorNode):
     child_attrs = ["base"]
 
+    def declared_name(self):
+        return self.base.declared_name()
+
     def analyse_templates(self):
         return self.base.analyse_templates()
 
+
+class CReferenceDeclaratorNode(_CReferenceDeclaratorBaseNode):
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
         if base_type.is_pyobject:
             error(self.pos, "Reference base type cannot be a Python object")
@@ -587,6 +571,14 @@ class CReferenceDeclaratorNode(CDeclaratorNode):
         return self.base.analyse(ref_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
 
 
+class CppRvalueReferenceDeclaratorNode(_CReferenceDeclaratorBaseNode):
+    def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
+        if base_type.is_pyobject:
+            error(self.pos, "Rvalue-reference base type cannot be a Python object")
+        ref_type = PyrexTypes.cpp_rvalue_ref_type(base_type)
+        return self.base.analyse(ref_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
+
+
 class CArrayDeclaratorNode(CDeclaratorNode):
     # base        CDeclaratorNode
     # dimension   ExprNode
@@ -594,7 +586,9 @@ class CArrayDeclaratorNode(CDeclaratorNode):
     child_attrs = ["base", "dimension"]
 
     def analyse(self, base_type, env, nonempty=0, visibility=None, in_pxd=False):
-        if (base_type.is_cpp_class and base_type.is_template_type()) or base_type.is_cfunction:
+        if ((base_type.is_cpp_class and base_type.is_template_type()) or
+                base_type.is_cfunction or
+                base_type.python_type_constructor_name):
             from .ExprNodes import TupleNode
             if isinstance(self.dimension, TupleNode):
                 args = self.dimension.args
@@ -606,7 +600,7 @@ class CArrayDeclaratorNode(CDeclaratorNode):
                 error(args[ix].pos, "Template parameter not a type")
                 base_type = error_type
             else:
-                base_type = base_type.specialize_here(self.pos, values)
+                base_type = base_type.specialize_here(self.pos, env, values)
             return self.base.analyse(base_type, env, nonempty=nonempty, visibility=visibility, in_pxd=in_pxd)
         if self.dimension:
             self.dimension = self.dimension.analyse_const_expression(env)
@@ -636,8 +630,8 @@ class CFuncDeclaratorNode(CDeclaratorNode):
     # args             [CArgDeclNode]
     # templates        [TemplatePlaceholderType]
     # has_varargs      boolean
-    # exception_value  ConstNode
-    # exception_check  boolean    True if PyErr_Occurred check needed
+    # exception_value  ConstNode or NameNode    NameNode when the name of a c++ exception conversion function
+    # exception_check  boolean or "+"    True if PyErr_Occurred check needed, "+" for a c++ check
     # nogil            boolean    Can be called without gil
     # with_gil         boolean    Acquire gil around function body
     # is_const_method  boolean    Whether this is a const method
@@ -649,6 +643,9 @@ class CFuncDeclaratorNode(CDeclaratorNode):
     is_const_method = 0
     templates = None
 
+    def declared_name(self):
+        return self.base.declared_name()
+
     def analyse_templates(self):
         if isinstance(self.base, CArrayDeclaratorNode):
             from .ExprNodes import TupleNode, NameNode
@@ -718,6 +715,12 @@ class CFuncDeclaratorNode(CDeclaratorNode):
             env.add_include_file('new')         # for std::bad_alloc
             env.add_include_file('stdexcept')
             env.add_include_file('typeinfo')    # for std::bad_cast
+        elif return_type.is_pyobject and self.exception_check:
+            # Functions in pure Python mode default to always check return values for exceptions
+            # (equivalent to the "except*" declaration). In this case, the exception clause
+            # is silently ignored for functions returning a Python object.
+            self.exception_check = False
+
         if (return_type.is_pyobject
                 and (self.exception_value or self.exception_check)
                 and self.exception_check != '+'):
@@ -727,15 +730,21 @@ class CFuncDeclaratorNode(CDeclaratorNode):
                 # Use an explicit exception return value to speed up exception checks.
                 # Even if it is not declared, we can use the default exception value of the return type,
                 # unless the function is some kind of external function that we do not control.
-                if return_type.exception_value is not None and (visibility != 'extern' and not in_pxd):
-                    # Extension types are more difficult because the signature must match the base type signature.
-                    if not env.is_c_class_scope:
+                if (return_type.exception_value is not None and (visibility != 'extern' and not in_pxd)):
+                    # - We skip this optimization for extension types; they are more difficult because
+                    #   the signature must match the base type signature.
+                    # - Same for function pointers, as we want them to be able to match functions
+                    #   with any exception value.
+                    # - Ideally the function-pointer test would be better after self.base is analysed
+                    #   however that is hard to do with the current implementation so it lives here
+                    #   for now.
+                    if not env.is_c_class_scope and not isinstance(self.base, CPtrDeclaratorNode):
                         from .ExprNodes import ConstNode
                         self.exception_value = ConstNode(
                             self.pos, value=return_type.exception_value, type=return_type)
             if self.exception_value:
-                self.exception_value = self.exception_value.analyse_const_expression(env)
                 if self.exception_check == '+':
+                    self.exception_value = self.exception_value.analyse_const_expression(env)
                     exc_val_type = self.exception_value.type
                     if (not exc_val_type.is_error
                             and not exc_val_type.is_pyobject
@@ -745,19 +754,28 @@ class CFuncDeclaratorNode(CDeclaratorNode):
                             and not (exc_val_type == PyrexTypes.c_char_type
                                      and self.exception_value.value == '*')):
                         error(self.exception_value.pos,
-                              "Exception value must be a Python exception or cdef function with no arguments or *.")
+                              "Exception value must be a Python exception, or C++ function with no arguments, or *.")
                     exc_val = self.exception_value
                 else:
-                    self.exception_value = self.exception_value.coerce_to(
+                    self.exception_value = self.exception_value.analyse_types(env).coerce_to(
                         return_type, env).analyse_const_expression(env)
                     exc_val = self.exception_value.get_constant_c_result_code()
                     if exc_val is None:
-                        raise InternalError(
-                            "get_constant_c_result_code not implemented for %s" %
-                            self.exception_value.__class__.__name__)
+                        error(self.exception_value.pos, "Exception value must be constant")
                     if not return_type.assignable_from(self.exception_value.type):
                         error(self.exception_value.pos,
                               "Exception value incompatible with function return type")
+                    if (visibility != 'extern'
+                            and (return_type.is_int or return_type.is_float)
+                            and self.exception_value.has_constant_result()):
+                        try:
+                            type_default_value = float(return_type.default_value)
+                        except ValueError:
+                            pass
+                        else:
+                            if self.exception_value.constant_result == type_default_value:
+                                warning(self.pos, "Ambiguous exception value, same as default return value: %r" %
+                                        self.exception_value.constant_result)
             exc_check = self.exception_check
         if return_type.is_cfunction:
             error(self.pos, "Function cannot return a function")
@@ -789,6 +807,13 @@ class CFuncDeclaratorNode(CDeclaratorNode):
                 error(self.pos, "cannot have both '%s' and '%s' "
                       "calling conventions" % (current, callspec))
             func_type.calling_convention = callspec
+
+        if func_type.return_type.is_rvalue_reference:
+            warning(self.pos, "Rvalue-reference as function return type not supported", 1)
+        for arg in func_type.args:
+            if arg.type.is_rvalue_reference and not arg.is_forwarding_reference():
+                warning(self.pos, "Rvalue-reference as function argument not supported", 1)
+
         return self.base.analyse(func_type, env, visibility=visibility, in_pxd=in_pxd)
 
     def declare_optional_arg_struct(self, func_type, env, fused_cname=None):
@@ -850,8 +875,12 @@ class CArgDeclNode(Node):
     # annotation     ExprNode or None   Py3 function arg annotation
     # is_self_arg    boolean            Is the "self" arg of an extension type method
     # is_type_arg    boolean            Is the "class" arg of an extension type classmethod
-    # is_kw_only     boolean            Is a keyword-only argument
+    # kw_only        boolean            Is a keyword-only argument
     # is_dynamic     boolean            Non-literal arg stored inside CyFunction
+    # pos_only       boolean            Is a positional-only argument
+    #
+    # name_cstring                         property that converts the name to a cstring taking care of unicode
+    #                                      and quoting it
 
     child_attrs = ["base_type", "declarator", "default", "annotation"]
     outer_attrs = ["default", "annotation"]
@@ -859,7 +888,9 @@ class CArgDeclNode(Node):
     is_self_arg = 0
     is_type_arg = 0
     is_generic = 1
+    is_special_method_optional = False
     kw_only = 0
+    pos_only = 0
     not_none = 0
     or_none = 0
     type = None
@@ -868,59 +899,106 @@ class CArgDeclNode(Node):
     annotation = None
     is_dynamic = 0
 
+    def declared_name(self):
+        return self.declarator.declared_name()
+
+    @property
+    def name_cstring(self):
+        return self.name.as_c_string_literal()
+
+    @property
+    def hdr_cname(self):
+        # done lazily - needs self.entry to be set to get the class-mangled
+        # name, which means it has to be generated relatively late
+        if self.needs_conversion:
+            return punycodify_name(Naming.arg_prefix + self.entry.name)
+        else:
+            return punycodify_name(Naming.var_prefix + self.entry.name)
+
+
     def analyse(self, env, nonempty=0, is_self_arg=False):
         if is_self_arg:
-            self.base_type.is_self_arg = self.is_self_arg = True
-        if self.type is None:
-            # The parser may misinterpret names as types. We fix that here.
-            if isinstance(self.declarator, CNameDeclaratorNode) and self.declarator.name == '':
-                if nonempty:
-                    if self.base_type.is_basic_c_type:
-                        # char, short, long called "int"
-                        type = self.base_type.analyse(env, could_be_name=True)
-                        arg_name = type.empty_declaration_code()
-                    else:
-                        arg_name = self.base_type.name
-                    self.declarator.name = EncodedString(arg_name)
-                    self.base_type.name = None
-                    self.base_type.is_basic_c_type = False
-                could_be_name = True
-            else:
-                could_be_name = False
-            self.base_type.is_arg = True
-            base_type = self.base_type.analyse(env, could_be_name=could_be_name)
-            if hasattr(self.base_type, 'arg_name') and self.base_type.arg_name:
-                self.declarator.name = self.base_type.arg_name
-
-            # The parser is unable to resolve the ambiguity of [] as part of the
-            # type (e.g. in buffers) or empty declarator (as with arrays).
-            # This is only arises for empty multi-dimensional arrays.
-            if (base_type.is_array
-                    and isinstance(self.base_type, TemplatedTypeNode)
-                    and isinstance(self.declarator, CArrayDeclaratorNode)):
-                declarator = self.declarator
-                while isinstance(declarator.base, CArrayDeclaratorNode):
-                    declarator = declarator.base
-                declarator.base = self.base_type.array_declarator
-                base_type = base_type.base_type
+            self.base_type.is_self_arg = self.is_self_arg = is_self_arg
+        if self.type is not None:
+            return self.name_declarator, self.type
 
-            # inject type declaration from annotations
-            # this is called without 'env' by AdjustDefByDirectives transform before declaration analysis
-            if self.annotation and env and env.directives['annotation_typing'] and self.base_type.name is None:
-                arg_type = self.inject_type_from_annotations(env)
-                if arg_type is not None:
-                    base_type = arg_type
-            return self.declarator.analyse(base_type, env, nonempty=nonempty)
+        # The parser may misinterpret names as types. We fix that here.
+        if isinstance(self.declarator, CNameDeclaratorNode) and self.declarator.name == '':
+            if nonempty:
+                if self.base_type.is_basic_c_type:
+                    # char, short, long called "int"
+                    type = self.base_type.analyse(env, could_be_name=True)
+                    arg_name = type.empty_declaration_code()
+                else:
+                    arg_name = self.base_type.name
+                self.declarator.name = EncodedString(arg_name)
+                self.base_type.name = None
+                self.base_type.is_basic_c_type = False
+            could_be_name = True
         else:
-            return self.name_declarator, self.type
+            could_be_name = False
+        self.base_type.is_arg = True
+        base_type = self.base_type.analyse(env, could_be_name=could_be_name)
+        base_arg_name = getattr(self.base_type, 'arg_name', None)
+        if base_arg_name:
+            self.declarator.name = base_arg_name
+
+        # The parser is unable to resolve the ambiguity of [] as part of the
+        # type (e.g. in buffers) or empty declarator (as with arrays).
+        # This is only arises for empty multi-dimensional arrays.
+        if (base_type.is_array
+                and isinstance(self.base_type, TemplatedTypeNode)
+                and isinstance(self.declarator, CArrayDeclaratorNode)):
+            declarator = self.declarator
+            while isinstance(declarator.base, CArrayDeclaratorNode):
+                declarator = declarator.base
+            declarator.base = self.base_type.array_declarator
+            base_type = base_type.base_type
+
+        # inject type declaration from annotations
+        # this is called without 'env' by AdjustDefByDirectives transform before declaration analysis
+        if (self.annotation and env and env.directives['annotation_typing']
+                # CSimpleBaseTypeNode has a name attribute; CAnalysedBaseTypeNode
+                # (and maybe other options) doesn't
+                and getattr(self.base_type, "name", None) is None):
+            arg_type = self.inject_type_from_annotations(env)
+            if arg_type is not None:
+                base_type = arg_type
+        return self.declarator.analyse(base_type, env, nonempty=nonempty)
 
     def inject_type_from_annotations(self, env):
         annotation = self.annotation
         if not annotation:
             return None
-        base_type, arg_type = analyse_type_annotation(annotation, env, assigned_value=self.default)
-        if base_type is not None:
-            self.base_type = base_type
+
+        modifiers, arg_type = annotation.analyse_type_annotation(env, assigned_value=self.default)
+        if arg_type is not None:
+            self.base_type = CAnalysedBaseTypeNode(
+                annotation.pos, type=arg_type, is_arg=True)
+
+        if arg_type:
+            if "typing.Optional" in modifiers:
+                # "x: Optional[...]"  =>  explicitly allow 'None'
+                arg_type = arg_type.resolve()
+                if arg_type and not arg_type.is_pyobject:
+                    # We probably already reported this as "cannot be applied to non-Python type".
+                    # error(annotation.pos, "Only Python type arguments can use typing.Optional[...]")
+                    pass
+                else:
+                    self.or_none = True
+            elif arg_type is py_object_type:
+                # exclude ": object" from the None check - None is a generic object.
+                self.or_none = True
+            elif self.default and self.default.is_none and (arg_type.is_pyobject or arg_type.equivalent_type):
+                # "x: ... = None"  =>  implicitly allow 'None'
+                if not arg_type.is_pyobject:
+                    arg_type = arg_type.equivalent_type
+                if not self.or_none:
+                    warning(self.pos, "PEP-484 recommends 'typing.Optional[...]' for arguments that can be None.")
+                    self.or_none = True
+            elif arg_type.is_pyobject and not self.or_none:
+                self.not_none = True
+
         return arg_type
 
     def calculate_default_value_code(self, code):
@@ -947,8 +1025,7 @@ class CArgDeclNode(Node):
         default.make_owned_reference(code)
         result = default.result() if overloaded_assignment else default.result_as(self.type)
         code.putln("%s = %s;" % (target, result))
-        if self.type.is_pyobject:
-            code.put_giveref(default.result())
+        code.put_giveref(default.result(), self.type)
         default.generate_post_assignment_code(code)
         default.free_temps(code)
 
@@ -989,6 +1066,7 @@ class CSimpleBaseTypeNode(CBaseTypeNode):
     module_path = []
     is_basic_c_type = False
     complex = False
+    is_self_arg = False
 
     def analyse(self, env, could_be_name=False):
         # Return type descriptor.
@@ -1009,22 +1087,31 @@ class CSimpleBaseTypeNode(CBaseTypeNode):
             else:
                 type = py_object_type
         else:
+            scope = env
             if self.module_path:
                 # Maybe it's a nested C++ class.
-                scope = env
                 for item in self.module_path:
                     entry = scope.lookup(item)
-                    if entry is not None and entry.is_cpp_class:
+                    if entry is not None and (
+                        entry.is_cpp_class or
+                        entry.is_type and entry.type.is_cpp_class
+                    ):
                         scope = entry.type.scope
+                    elif entry and entry.as_module:
+                        scope = entry.as_module
                     else:
                         scope = None
                         break
-
+                if scope is None and len(self.module_path) == 1:
+                    # (may be possible to handle longer module paths?)
+                    # TODO: probably not the best place to declare it?
+                    from .Builtin import get_known_standard_library_module_scope
+                    found_entry = env.lookup(self.module_path[0])
+                    if found_entry and found_entry.known_standard_library_import:
+                        scope = get_known_standard_library_module_scope(found_entry.known_standard_library_import)
                 if scope is None:
                     # Maybe it's a cimport.
                     scope = env.find_imported_module(self.module_path, self.pos)
-            else:
-                scope = env
 
             if scope:
                 if scope.is_c_class_scope:
@@ -1043,7 +1130,7 @@ class CSimpleBaseTypeNode(CBaseTypeNode):
                     self.arg_name = EncodedString(self.name)
                 else:
                     if self.templates:
-                        if not self.name in self.templates:
+                        if self.name not in self.templates:
                             error(self.pos, "'%s' is not a type identifier" % self.name)
                         type = PyrexTypes.TemplatePlaceholderType(self.name)
                     else:
@@ -1063,10 +1150,9 @@ class CSimpleBaseTypeNode(CBaseTypeNode):
             type = PyrexTypes.c_double_complex_type
             type.create_declaration_utility_code(env)
             self.complex = True
-        if type:
-            return type
-        else:
-            return PyrexTypes.error_type
+        if not type:
+            type = PyrexTypes.error_type
+        return type
 
 class MemoryViewSliceTypeNode(CBaseTypeNode):
 
@@ -1135,29 +1221,56 @@ class TemplatedTypeNode(CBaseTypeNode):
     child_attrs = ["base_type_node", "positional_args",
                    "keyword_args", "dtype_node"]
 
+    is_templated_type_node = True
     dtype_node = None
-
     name = None
 
+    def _analyse_template_types(self, env, base_type):
+        require_python_types = base_type.python_type_constructor_name in (
+            'typing.Optional',
+            'dataclasses.ClassVar',
+        )
+        in_c_type_context = env.in_c_type_context and not require_python_types
+
+        template_types = []
+        for template_node in self.positional_args:
+            # CBaseTypeNode -> allow C type declarations in a 'cdef' context again
+            with env.new_c_type_context(in_c_type_context or isinstance(template_node, CBaseTypeNode)):
+                ttype = template_node.analyse_as_type(env)
+            if ttype is None:
+                if base_type.is_cpp_class:
+                    error(template_node.pos, "unknown type in template argument")
+                    ttype = error_type
+                # For Python generics we can be a bit more flexible and allow None.
+            elif require_python_types and not ttype.is_pyobject:
+                if ttype.equivalent_type and not template_node.as_cython_attribute():
+                    ttype = ttype.equivalent_type
+                else:
+                    error(template_node.pos, "%s[...] cannot be applied to non-Python type %s" % (
+                        base_type.python_type_constructor_name,
+                        ttype,
+                    ))
+                    ttype = error_type
+            template_types.append(ttype)
+
+        return template_types
+
     def analyse(self, env, could_be_name=False, base_type=None):
         if base_type is None:
             base_type = self.base_type_node.analyse(env)
         if base_type.is_error: return base_type
 
-        if base_type.is_cpp_class and base_type.is_template_type():
-            # Templated class
+        if ((base_type.is_cpp_class and base_type.is_template_type()) or
+                base_type.python_type_constructor_name):
+            # Templated class, Python generics, etc.
             if self.keyword_args and self.keyword_args.key_value_pairs:
-                error(self.pos, "c++ templates cannot take keyword arguments")
+                tp = "c++ templates" if base_type.is_cpp_class else "indexed types"
+                error(self.pos, "%s cannot take keyword arguments" % tp)
                 self.type = PyrexTypes.error_type
-            else:
-                template_types = []
-                for template_node in self.positional_args:
-                    type = template_node.analyse_as_type(env)
-                    if type is None:
-                        error(template_node.pos, "unknown type in template argument")
-                        type = error_type
-                    template_types.append(type)
-                self.type = base_type.specialize_here(self.pos, template_types)
+                return self.type
+
+            template_types = self._analyse_template_types(env, base_type)
+            self.type = base_type.specialize_here(self.pos, env, template_types)
 
         elif base_type.is_pyobject:
             # Buffer
@@ -1198,11 +1311,29 @@ class TemplatedTypeNode(CBaseTypeNode):
                     dimension=dimension)
                 self.type = self.array_declarator.analyse(base_type, env)[1]
 
-        if self.type.is_fused and env.fused_to_specific:
-            self.type = self.type.specialize(env.fused_to_specific)
+        if self.type and self.type.is_fused and env.fused_to_specific:
+            try:
+                self.type = self.type.specialize(env.fused_to_specific)
+            except CannotSpecialize:
+                error(self.pos,
+                      "'%s' cannot be specialized since its type is not a fused argument to this function" %
+                      self.name)
 
         return self.type
 
+    def analyse_pytyping_modifiers(self, env):
+        # Check for declaration modifiers, e.g. "typing.Optional[...]" or "dataclasses.InitVar[...]"
+        # TODO: somehow bring this together with IndexNode.analyse_pytyping_modifiers()
+        modifiers = []
+        modifier_node = self
+        while modifier_node.is_templated_type_node and modifier_node.base_type_node and len(modifier_node.positional_args) == 1:
+            modifier_type = self.base_type_node.analyse_as_type(env)
+            if modifier_type.python_type_constructor_name and modifier_type.modifier_name:
+                modifiers.append(modifier_type.modifier_name)
+            modifier_node = modifier_node.positional_args[0]
+
+        return modifiers
+
 
 class CComplexBaseTypeNode(CBaseTypeNode):
     # base_type   CBaseTypeNode
@@ -1273,8 +1404,10 @@ class FusedTypeNode(CBaseTypeNode):
         return PyrexTypes.FusedType(types, name=self.name)
 
 
-class CConstTypeNode(CBaseTypeNode):
+class CConstOrVolatileTypeNode(CBaseTypeNode):
     # base_type     CBaseTypeNode
+    # is_const      boolean
+    # is_volatile   boolean
 
     child_attrs = ["base_type"]
 
@@ -1282,8 +1415,8 @@ class CConstTypeNode(CBaseTypeNode):
         base = self.base_type.analyse(env, could_be_name)
         if base.is_pyobject:
             error(self.pos,
-                  "Const base type cannot be a Python object")
-        return PyrexTypes.c_const_type(base)
+                  "Const/volatile base type cannot be a Python object")
+        return PyrexTypes.c_const_or_volatile_type(base, self.is_const, self.is_volatile)
 
 
 class CVarDefNode(StatNode):
@@ -1328,6 +1461,11 @@ class CVarDefNode(StatNode):
 
         base_type = self.base_type.analyse(env)
 
+        # Check for declaration modifiers, e.g. "typing.Optional[...]" or "dataclasses.InitVar[...]"
+        modifiers = None
+        if self.base_type.is_templated_type_node:
+            modifiers = self.base_type.analyse_pytyping_modifiers(env)
+
         if base_type.is_fused and not self.in_pxd and (env.is_c_class_scope or
                                                        env.is_module_scope):
             error(self.pos, "Fused types not allowed here")
@@ -1369,6 +1507,8 @@ class CVarDefNode(StatNode):
                 return
             if type.is_reference and self.visibility != 'extern':
                 error(declarator.pos, "C++ references cannot be declared; use a pointer instead")
+            if type.is_rvalue_reference and self.visibility != 'extern':
+                error(declarator.pos, "C++ rvalue-references cannot be declared")
             if type.is_cfunction:
                 if 'staticmethod' in env.directives:
                     type.is_static_method = True
@@ -1383,14 +1523,13 @@ class CVarDefNode(StatNode):
                     self.entry.create_wrapper = True
             else:
                 if self.overridable:
-                    warning(self.pos, "cpdef variables will not be supported in Cython 3; "
-                            "currently they are no different from cdef variables", 2)
+                    error(self.pos, "Variables cannot be declared with 'cpdef'. Use 'cdef' instead.")
                 if self.directive_locals:
                     error(self.pos, "Decorators can only be followed by functions")
                 self.entry = dest_scope.declare_var(
                     name, type, declarator.pos,
                     cname=cname, visibility=visibility, in_pxd=self.in_pxd,
-                    api=self.api, is_cdef=1)
+                    api=self.api, is_cdef=True, pytyping_modifiers=modifiers)
                 if Options.docstrings:
                     self.entry.doc = embed_position(self.pos, self.doc)
 
@@ -1499,6 +1638,9 @@ class CppClassNode(CStructOrUnionDefNode, BlockNode):
                 elif isinstance(attr, CompilerDirectivesNode):
                     for sub_attr in func_attributes(attr.body.stats):
                         yield sub_attr
+                elif isinstance(attr, CppClassNode) and attr.attributes is not None:
+                    for sub_attr in func_attributes(attr.attributes):
+                        yield sub_attr
         if self.attributes is not None:
             if self.in_pxd and not env.in_cinclude:
                 self.entry.defined_in_pxd = 1
@@ -1529,36 +1671,62 @@ class CppClassNode(CStructOrUnionDefNode, BlockNode):
 
 
 class CEnumDefNode(StatNode):
-    #  name           string or None
-    #  cname          string or None
-    #  items          [CEnumDefItemNode]
-    #  typedef_flag   boolean
-    #  visibility     "public" or "private" or "extern"
-    #  api            boolean
-    #  in_pxd         boolean
-    #  create_wrapper boolean
-    #  entry          Entry
-
-    child_attrs = ["items"]
+    #  name               string or None
+    #  cname              string or None
+    #  scoped             boolean                Is a C++ scoped enum
+    #  underlying_type    CSimpleBaseTypeNode    The underlying value type (int or C++ type)
+    #  items              [CEnumDefItemNode]
+    #  typedef_flag       boolean
+    #  visibility         "public" or "private" or "extern"
+    #  api                boolean
+    #  in_pxd             boolean
+    #  create_wrapper     boolean
+    #  entry              Entry
+    #  doc                EncodedString or None    Doc string
+
+    child_attrs = ["items", "underlying_type"]
+    doc = None
 
     def declare(self, env):
-         self.entry = env.declare_enum(
-             self.name, self.pos,
-             cname=self.cname, typedef_flag=self.typedef_flag,
-             visibility=self.visibility, api=self.api,
-             create_wrapper=self.create_wrapper)
+        doc = None
+        if Options.docstrings:
+            doc = embed_position(self.pos, self.doc)
+
+        self.entry = env.declare_enum(
+            self.name, self.pos,
+            cname=self.cname,
+            scoped=self.scoped,
+            typedef_flag=self.typedef_flag,
+            visibility=self.visibility, api=self.api,
+            create_wrapper=self.create_wrapper, doc=doc)
 
     def analyse_declarations(self, env):
+        scope = None
+        underlying_type = self.underlying_type.analyse(env)
+
+        if not underlying_type.is_int:
+            error(self.underlying_type.pos, "underlying type is not an integral type")
+
+        self.entry.type.underlying_type = underlying_type
+
+        if self.scoped and self.items is not None:
+            scope = CppScopedEnumScope(self.name, env)
+            scope.type = self.entry.type
+        else:
+            scope = env
+
         if self.items is not None:
             if self.in_pxd and not env.in_cinclude:
                 self.entry.defined_in_pxd = 1
             for item in self.items:
-                item.analyse_declarations(env, self.entry)
+                item.analyse_declarations(scope, self.entry)
 
     def analyse_expressions(self, env):
         return self
 
     def generate_execution_code(self, code):
+        if self.scoped:
+            return  # nothing to do here for C++ enums
         if self.visibility == 'public' or self.api:
             code.mark_pos(self.pos)
             temp = code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=True)
@@ -1567,7 +1735,7 @@ class CEnumDefNode(StatNode):
                     temp,
                     item.cname,
                     code.error_goto_if_null(temp, item.pos)))
-                code.put_gotref(temp)
+                code.put_gotref(temp, PyrexTypes.py_object_type)
                 code.putln('if (PyDict_SetItemString(%s, "%s", %s) < 0) %s' % (
                     Naming.moddict_cname,
                     item.name,
@@ -1590,9 +1758,15 @@ class CEnumDefItemNode(StatNode):
             if not self.value.type.is_int:
                 self.value = self.value.coerce_to(PyrexTypes.c_int_type, env)
                 self.value = self.value.analyse_const_expression(env)
+
+        if enum_entry.type.is_cpp_enum:
+            cname = "%s::%s" % (enum_entry.cname, self.name)
+        else:
+            cname = self.cname
+
         entry = env.declare_const(
             self.name, enum_entry.type,
-            self.value, self.pos, cname=self.cname,
+            self.value, self.pos, cname=cname,
             visibility=enum_entry.visibility, api=enum_entry.api,
             create_wrapper=enum_entry.create_wrapper and enum_entry.name is None)
         enum_entry.enum_values.append(entry)
@@ -1659,6 +1833,9 @@ class FuncDefNode(StatNode, BlockNode):
     needs_outer_scope = False
     pymethdef_required = False
     is_generator = False
+    is_generator_expression = False  # this can be True alongside is_generator
+    is_coroutine = False
+    is_asyncgen = False
     is_generator_body = False
     is_async_def = False
     modifiers = []
@@ -1667,6 +1844,9 @@ class FuncDefNode(StatNode, BlockNode):
     starstar_arg = None
     is_cyfunction = False
     code_object = None
+    return_type_annotation = None
+
+    outer_attrs = None  # overridden by some derived classes - to be visited outside the node's scope
 
     def analyse_default_values(self, env):
         default_seen = 0
@@ -1676,6 +1856,10 @@ class FuncDefNode(StatNode, BlockNode):
                 if arg.is_generic:
                     arg.default = arg.default.analyse_types(env)
                     arg.default = arg.default.coerce_to(arg.type, env)
+                elif arg.is_special_method_optional:
+                    if not arg.default.is_none:
+                        error(arg.pos, "This argument cannot have a non-None default value")
+                        arg.default = None
                 else:
                     error(arg.pos, "This argument cannot have a default value")
                     arg.default = None
@@ -1684,18 +1868,12 @@ class FuncDefNode(StatNode, BlockNode):
             elif default_seen:
                 error(arg.pos, "Non-default argument following default argument")
 
-    def analyse_annotation(self, env, annotation):
-        # Annotations can not only contain valid Python expressions but arbitrary type references.
-        if annotation is None:
-            return None
-        if not env.directives['annotation_typing'] or annotation.analyse_as_type(env) is None:
-            annotation = annotation.analyse_types(env)
-        return annotation
-
     def analyse_annotations(self, env):
         for arg in self.args:
             if arg.annotation:
-                arg.annotation = self.analyse_annotation(env, arg.annotation)
+                arg.annotation = arg.annotation.analyse_types(env)
+        if self.return_type_annotation:
+            self.return_type_annotation = self.return_type_annotation.analyse_types(env)
 
     def align_argument_type(self, env, arg):
         # @cython.locals()
@@ -1718,6 +1896,9 @@ class FuncDefNode(StatNode, BlockNode):
             error(type_node.pos, "Previous declaration here")
         else:
             arg.type = other_type
+            if arg.type.is_complex:
+                # utility code for complex types is special-cased and also important to ensure that it's run
+                arg.type.create_declaration_utility_code(env)
         return arg
 
     def need_gil_acquisition(self, lenv):
@@ -1728,7 +1909,8 @@ class FuncDefNode(StatNode, BlockNode):
         while genv.is_py_class_scope or genv.is_c_class_scope:
             genv = genv.outer_scope
         if self.needs_closure:
-            lenv = ClosureScope(name=self.entry.name,
+            cls = GeneratorExpressionScope if self.is_generator_expression else ClosureScope
+            lenv = cls(name=self.entry.name,
                                 outer_scope=genv,
                                 parent_scope=env,
                                 scope_name=self.entry.cname)
@@ -1749,8 +1931,6 @@ class FuncDefNode(StatNode, BlockNode):
 
     def generate_function_definitions(self, env, code):
         from . import Buffer
-        if self.return_type.is_memoryviewslice:
-            from . import MemoryView
 
         lenv = self.local_scope
         if lenv.is_closure_scope and not lenv.is_passthrough:
@@ -1778,6 +1958,8 @@ class FuncDefNode(StatNode, BlockNode):
         profile = code.globalstate.directives['profile']
         linetrace = code.globalstate.directives['linetrace']
         if profile or linetrace:
+            if linetrace:
+                code.use_fast_gil_utility_code()
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("Profile", "Profile.c"))
 
@@ -1823,14 +2005,20 @@ class FuncDefNode(StatNode, BlockNode):
 
         # Initialize the return variable __pyx_r
         init = ""
-        if not self.return_type.is_void:
-            if self.return_type.is_pyobject:
+        return_type = self.return_type
+        if return_type.is_cv_qualified and return_type.is_const:
+            # Within this function body, we want to be able to set this
+            # variable, even though the function itself needs to return
+            # a const version
+            return_type = return_type.cv_base_type
+        if not return_type.is_void:
+            if return_type.is_pyobject:
                 init = " = NULL"
-            elif self.return_type.is_memoryviewslice:
-                init = ' = ' + MemoryView.memslice_entry_init
+            elif return_type.is_memoryviewslice:
+                init = ' = ' + return_type.literal_code(return_type.default_value)
 
             code.putln("%s%s;" % (
-                self.return_type.declaration_code(Naming.retval_cname),
+                return_type.declaration_code(Naming.retval_cname),
                 init))
 
         tempvardecl_code = code.insertion_point()
@@ -1862,11 +2050,12 @@ class FuncDefNode(StatNode, BlockNode):
 
         use_refnanny = not lenv.nogil or lenv.has_with_gil_block
 
+        gilstate_decl = None
         if acquire_gil or acquire_gil_for_var_decls_only:
             code.put_ensure_gil()
             code.funcstate.gil_owned = True
-        elif lenv.nogil and lenv.has_with_gil_block:
-            code.declare_gilstate()
+        else:
+            gilstate_decl = code.insertion_point()
 
         if profile or linetrace:
             if not self.is_generator:
@@ -1908,7 +2097,7 @@ class FuncDefNode(StatNode, BlockNode):
             code.put_incref("Py_None", py_object_type)
             code.putln(code.error_goto(self.pos))
             code.putln("} else {")
-            code.put_gotref(Naming.cur_scope_cname)
+            code.put_gotref(Naming.cur_scope_cname, lenv.scope_class.type)
             code.putln("}")
             # Note that it is unsafe to decref the scope at this point.
         if self.needs_outer_scope:
@@ -1927,7 +2116,7 @@ class FuncDefNode(StatNode, BlockNode):
             elif self.needs_closure:
                 # inner closures own a reference to their outer parent
                 code.put_incref(outer_scope_cname, cenv.scope_class.type)
-                code.put_giveref(outer_scope_cname)
+                code.put_giveref(outer_scope_cname, cenv.scope_class.type)
         # ----- Trace function call
         if profile or linetrace:
             # this looks a bit late, but if we don't get here due to a
@@ -1945,20 +2134,19 @@ class FuncDefNode(StatNode, BlockNode):
         self.generate_argument_parsing_code(env, code)
         # If an argument is assigned to in the body, we must
         # incref it to properly keep track of refcounts.
-        is_cdef = isinstance(self, CFuncDefNode)
         for entry in lenv.arg_entries:
-            if entry.type.is_pyobject:
-                if (acquire_gil or len(entry.cf_assignments) > 1) and not entry.in_closure:
+            if not entry.type.is_memoryviewslice:
+                if (acquire_gil or entry.cf_is_reassigned) and not entry.in_closure:
                     code.put_var_incref(entry)
-
             # Note: defaults are always incref-ed. For def functions, we
             #       we acquire arguments from object conversion, so we have
             #       new references. If we are a cdef function, we need to
             #       incref our arguments
-            elif is_cdef and entry.type.is_memoryviewslice and len(entry.cf_assignments) > 1:
-                code.put_incref_memoryviewslice(entry.cname, have_gil=code.funcstate.gil_owned)
+            elif entry.cf_is_reassigned and not entry.in_closure:
+                code.put_var_incref_memoryviewslice(entry,
+                                    have_gil=code.funcstate.gil_owned)
         for entry in lenv.var_entries:
-            if entry.is_arg and len(entry.cf_assignments) > 1 and not entry.in_closure:
+            if entry.is_arg and entry.cf_is_reassigned and not entry.in_closure:
                 if entry.xdecref_cleanup:
                     code.put_var_xincref(entry)
                 else:
@@ -1989,27 +2177,45 @@ class FuncDefNode(StatNode, BlockNode):
         code.putln("")
         code.putln("/* function exit code */")
 
+        gil_owned = {
+            'success': code.funcstate.gil_owned,
+            'error': code.funcstate.gil_owned,
+            'gil_state_declared': gilstate_decl is None,
+        }
+        def assure_gil(code_path, code=code):
+            if not gil_owned[code_path]:
+                if not gil_owned['gil_state_declared']:
+                    gilstate_decl.declare_gilstate()
+                    gil_owned['gil_state_declared'] = True
+                code.put_ensure_gil(declare_gilstate=False)
+                gil_owned[code_path] = True
+
         # ----- Default return value
+        return_type = self.return_type
         if not self.body.is_terminator:
-            if self.return_type.is_pyobject:
-                #if self.return_type.is_extension_type:
+            if return_type.is_pyobject:
+                #if return_type.is_extension_type:
                 #    lhs = "(PyObject *)%s" % Naming.retval_cname
                 #else:
                 lhs = Naming.retval_cname
-                code.put_init_to_py_none(lhs, self.return_type)
-            else:
-                val = self.return_type.default_value
+                assure_gil('success')
+                code.put_init_to_py_none(lhs, return_type)
+            elif not return_type.is_memoryviewslice:
+                # memory view structs receive their default value on initialisation
+                val = return_type.default_value
                 if val:
                     code.putln("%s = %s;" % (Naming.retval_cname, val))
-                elif not self.return_type.is_void:
+                elif not return_type.is_void:
                     code.putln("__Pyx_pretend_to_initialize(&%s);" % Naming.retval_cname)
+
         # ----- Error cleanup
-        if code.error_label in code.labels_used:
+        if code.label_used(code.error_label):
             if not self.body.is_terminator:
                 code.put_goto(code.return_label)
             code.put_label(code.error_label)
             for cname, type in code.funcstate.all_managed_temps():
-                code.put_xdecref(cname, type, have_gil=not lenv.nogil)
+                assure_gil('error')
+                code.put_xdecref(cname, type, have_gil=gil_owned['error'])
 
             # Clean up buffers -- this calls a Python function
             # so need to save and restore error state
@@ -2019,6 +2225,7 @@ class FuncDefNode(StatNode, BlockNode):
                 code.globalstate.use_utility_code(restore_exception_utility_code)
                 code.putln("{ PyObject *__pyx_type, *__pyx_value, *__pyx_tb;")
                 code.putln("__Pyx_PyThreadState_declare")
+                assure_gil('error')
                 code.putln("__Pyx_PyThreadState_assign")
                 code.putln("__Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);")
                 for entry in used_buffer_entries:
@@ -2026,7 +2233,8 @@ class FuncDefNode(StatNode, BlockNode):
                     #code.putln("%s = 0;" % entry.cname)
                 code.putln("__Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}")
 
-            if self.return_type.is_memoryviewslice:
+            if return_type.is_memoryviewslice:
+                from . import MemoryView
                 MemoryView.put_init_entry(Naming.retval_cname, code)
                 err_val = Naming.retval_cname
             else:
@@ -2038,104 +2246,136 @@ class FuncDefNode(StatNode, BlockNode):
                 # code.globalstate.use_utility_code(get_exception_tuple_utility_code)
                 # code.put_trace_exception()
 
-                if lenv.nogil and not lenv.has_with_gil_block:
-                    code.putln("{")
-                    code.put_ensure_gil()
-
+                assure_gil('error')
+                if code.funcstate.error_without_exception:
+                    tempvardecl_code.putln(
+                        "int %s = 0; /* StopIteration */" % Naming.error_without_exception_cname
+                    )
+                    code.putln("if (!%s) {" % Naming.error_without_exception_cname)
                 code.put_add_traceback(self.entry.qualified_name)
-
-                if lenv.nogil and not lenv.has_with_gil_block:
-                    code.put_release_ensured_gil()
+                if code.funcstate.error_without_exception:
                     code.putln("}")
             else:
                 warning(self.entry.pos,
                         "Unraisable exception in function '%s'." %
                         self.entry.qualified_name, 0)
-                code.put_unraisable(self.entry.qualified_name, lenv.nogil)
-            default_retval = self.return_type.default_value
+                assure_gil('error')
+                code.put_unraisable(self.entry.qualified_name)
+            default_retval = return_type.default_value
             if err_val is None and default_retval:
                 err_val = default_retval
             if err_val is not None:
                 if err_val != Naming.retval_cname:
                     code.putln("%s = %s;" % (Naming.retval_cname, err_val))
-            elif not self.return_type.is_void:
+            elif not return_type.is_void:
                 code.putln("__Pyx_pretend_to_initialize(&%s);" % Naming.retval_cname)
 
             if is_getbuffer_slot:
+                assure_gil('error')
                 self.getbuffer_error_cleanup(code)
 
+            def align_error_path_gil_to_success_path(code=code.insertion_point()):
+                # align error and success GIL state when both join
+                if gil_owned['success']:
+                    assure_gil('error', code=code)
+                elif gil_owned['error']:
+                    code.put_release_ensured_gil()
+                    gil_owned['error'] = False
+                assert gil_owned['error'] == gil_owned['success'], "%s: error path %s != success path %s" % (
+                    self.pos, gil_owned['error'], gil_owned['success'])
+
             # If we are using the non-error cleanup section we should
             # jump past it if we have an error. The if-test below determine
             # whether this section is used.
-            if buffers_present or is_getbuffer_slot or self.return_type.is_memoryviewslice:
+            if buffers_present or is_getbuffer_slot or return_type.is_memoryviewslice:
+                # In the buffer cases, we already called assure_gil('error') and own the GIL.
+                assert gil_owned['error'] or return_type.is_memoryviewslice
                 code.put_goto(code.return_from_error_cleanup_label)
+            else:
+                # Adapt the GIL state to the success path right now.
+                align_error_path_gil_to_success_path()
+        else:
+            # No error path, no need to adapt the GIL state.
+            def align_error_path_gil_to_success_path(): pass
 
         # ----- Non-error return cleanup
-        code.put_label(code.return_label)
-        for entry in used_buffer_entries:
-            Buffer.put_release_buffer_code(code, entry)
-        if is_getbuffer_slot:
-            self.getbuffer_normal_cleanup(code)
+        if code.label_used(code.return_label) or not code.label_used(code.error_label):
+            code.put_label(code.return_label)
 
-        if self.return_type.is_memoryviewslice:
-            # See if our return value is uninitialized on non-error return
-            # from . import MemoryView
-            # MemoryView.err_if_nogil_initialized_check(self.pos, env)
-            cond = code.unlikely(self.return_type.error_condition(Naming.retval_cname))
-            code.putln(
-                'if (%s) {' % cond)
-            if env.nogil:
-                code.put_ensure_gil()
-            code.putln(
-                'PyErr_SetString(PyExc_TypeError, "Memoryview return value is not initialized");')
-            if env.nogil:
-                code.put_release_ensured_gil()
-            code.putln(
-                '}')
+            for entry in used_buffer_entries:
+                assure_gil('success')
+                Buffer.put_release_buffer_code(code, entry)
+            if is_getbuffer_slot:
+                assure_gil('success')
+                self.getbuffer_normal_cleanup(code)
+
+            if return_type.is_memoryviewslice:
+                # See if our return value is uninitialized on non-error return
+                # from . import MemoryView
+                # MemoryView.err_if_nogil_initialized_check(self.pos, env)
+                cond = code.unlikely(return_type.error_condition(Naming.retval_cname))
+                code.putln(
+                    'if (%s) {' % cond)
+                if not gil_owned['success']:
+                    code.put_ensure_gil()
+                code.putln(
+                    'PyErr_SetString(PyExc_TypeError, "Memoryview return value is not initialized");')
+                if not gil_owned['success']:
+                    code.put_release_ensured_gil()
+                code.putln(
+                    '}')
 
         # ----- Return cleanup for both error and no-error return
-        code.put_label(code.return_from_error_cleanup_label)
+        if code.label_used(code.return_from_error_cleanup_label):
+            align_error_path_gil_to_success_path()
+            code.put_label(code.return_from_error_cleanup_label)
 
         for entry in lenv.var_entries:
             if not entry.used or entry.in_closure:
                 continue
 
-            if entry.type.is_memoryviewslice:
-                code.put_xdecref_memoryviewslice(entry.cname, have_gil=not lenv.nogil)
-            elif entry.type.is_pyobject:
-                if not entry.is_arg or len(entry.cf_assignments) > 1:
-                    if entry.xdecref_cleanup:
-                        code.put_var_xdecref(entry)
-                    else:
-                        code.put_var_decref(entry)
+            if entry.type.is_pyobject:
+                if entry.is_arg and not entry.cf_is_reassigned:
+                    continue
+            if entry.type.needs_refcounting:
+                assure_gil('success')
+            # FIXME ideally use entry.xdecref_cleanup but this currently isn't reliable
+            code.put_var_xdecref(entry, have_gil=gil_owned['success'])
 
         # Decref any increfed args
         for entry in lenv.arg_entries:
-            if entry.type.is_pyobject:
-                if (acquire_gil or len(entry.cf_assignments) > 1) and not entry.in_closure:
-                    code.put_var_decref(entry)
-            elif (entry.type.is_memoryviewslice and
-                  (not is_cdef or len(entry.cf_assignments) > 1)):
+            if entry.in_closure:
+                continue
+            if entry.type.is_memoryviewslice:
                 # decref slices of def functions and acquired slices from cdef
                 # functions, but not borrowed slices from cdef functions.
-                code.put_xdecref_memoryviewslice(entry.cname,
-                                                 have_gil=not lenv.nogil)
+                if not entry.cf_is_reassigned:
+                    continue
+            else:
+                if not acquire_gil and not entry.cf_is_reassigned:
+                    continue
+                if entry.type.needs_refcounting:
+                    assure_gil('success')
+
+            # FIXME use entry.xdecref_cleanup - del arg seems to be the problem
+            code.put_var_xdecref(entry, have_gil=gil_owned['success'])
         if self.needs_closure:
+            assure_gil('success')
             code.put_decref(Naming.cur_scope_cname, lenv.scope_class.type)
 
         # ----- Return
         # This code is duplicated in ModuleNode.generate_module_init_func
         if not lenv.nogil:
-            default_retval = self.return_type.default_value
+            default_retval = return_type.default_value
             err_val = self.error_value()
             if err_val is None and default_retval:
                 err_val = default_retval  # FIXME: why is err_val not used?
-            if self.return_type.is_pyobject:
-                code.put_xgiveref(self.return_type.as_pyobject(Naming.retval_cname))
+            code.put_xgiveref(Naming.retval_cname, return_type)
 
         if self.entry.is_special and self.entry.name == "__hash__":
             # Returning -1 for __hash__ is supposed to signal an error
             # We do as Python instances and coerce -1 into -2.
+            assure_gil('success')  # in special methods, the GIL is owned anyway
             code.putln("if (unlikely(%s == -1) && !PyErr_Occurred()) %s = -2;" % (
                 Naming.retval_cname, Naming.retval_cname))
 
@@ -2143,23 +2383,22 @@ class FuncDefNode(StatNode, BlockNode):
             code.funcstate.can_trace = False
             if not self.is_generator:
                 # generators are traced when iterated, not at creation
-                if self.return_type.is_pyobject:
+                if return_type.is_pyobject:
                     code.put_trace_return(
-                        Naming.retval_cname, nogil=not code.funcstate.gil_owned)
+                        Naming.retval_cname, nogil=not gil_owned['success'])
                 else:
                     code.put_trace_return(
-                        "Py_None", nogil=not code.funcstate.gil_owned)
+                        "Py_None", nogil=not gil_owned['success'])
 
-        if not lenv.nogil:
-            # GIL holding function
-            code.put_finish_refcount_context()
+        if use_refnanny:
+            code.put_finish_refcount_context(nogil=not gil_owned['success'])
 
-        if acquire_gil or (lenv.nogil and lenv.has_with_gil_block):
+        if acquire_gil or (lenv.nogil and gil_owned['success']):
             # release the GIL (note that with-gil blocks acquire it on exit in their EnsureGILNode)
             code.put_release_ensured_gil()
             code.funcstate.gil_owned = False
 
-        if not self.return_type.is_void:
+        if not return_type.is_void:
             code.putln("return %s;" % Naming.retval_cname)
 
         code.putln("}")
@@ -2194,11 +2433,11 @@ class FuncDefNode(StatNode, BlockNode):
             typeptr_cname = arg.type.typeptr_cname
             arg_code = "((PyObject *)%s)" % arg.entry.cname
             code.putln(
-                'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, "%s", %s))) %s' % (
+                'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, %s, %s))) %s' % (
                     arg_code,
                     typeptr_cname,
                     arg.accept_none,
-                    arg.name,
+                    arg.name_cstring,
                     arg.type.is_builtin_type and arg.type.require_exact,
                     code.error_goto(arg.pos)))
         else:
@@ -2212,8 +2451,8 @@ class FuncDefNode(StatNode, BlockNode):
             cname = arg.entry.cname
 
         code.putln('if (unlikely(((PyObject *)%s) == Py_None)) {' % cname)
-        code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", "%s"); %s''' % (
-            max(200, len(arg.name)), arg.name,
+        code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", %s); %s''' % (
+            max(200, len(arg.name_cstring)), arg.name_cstring,
             code.error_goto(arg.pos)))
         code.putln('}')
 
@@ -2223,9 +2462,11 @@ class FuncDefNode(StatNode, BlockNode):
     def generate_execution_code(self, code):
         code.mark_pos(self.pos)
         # Evaluate and store argument default values
-        for arg in self.args:
-            if not arg.is_dynamic:
-                arg.generate_assignment_code(code)
+        # skip this for wrappers since it's done by wrapped function
+        if not self.is_wrapper:
+            for arg in self.args:
+                if not arg.is_dynamic:
+                    arg.generate_assignment_code(code)
 
     #
     # Special code for the __getbuffer__ function
@@ -2249,7 +2490,7 @@ class FuncDefNode(StatNode, BlockNode):
     def getbuffer_check(self, code):
         py_buffer, _ = self._get_py_buffer_info()
         view = py_buffer.cname
-        code.putln("if (%s == NULL) {" % view)
+        code.putln("if (unlikely(%s == NULL)) {" % view)
         code.putln("PyErr_SetString(PyExc_BufferError, "
                    "\"PyObject_GetBuffer: view==NULL argument is obsolete\");")
         code.putln("return -1;")
@@ -2260,7 +2501,7 @@ class FuncDefNode(StatNode, BlockNode):
         view = py_buffer.cname
         if obj_type and obj_type.is_pyobject:
             code.put_init_to_py_none("%s->obj" % view, obj_type)
-            code.put_giveref("%s->obj" % view) # Do not refnanny object within structs
+            code.put_giveref("%s->obj" % view, obj_type)  # Do not refnanny object within structs
         else:
             code.putln("%s->obj = NULL;" % view)
 
@@ -2269,7 +2510,7 @@ class FuncDefNode(StatNode, BlockNode):
         view = py_buffer.cname
         if obj_type and obj_type.is_pyobject:
             code.putln("if (%s->obj != NULL) {" % view)
-            code.put_gotref("%s->obj" % view)
+            code.put_gotref("%s->obj" % view, obj_type)
             code.put_decref_clear("%s->obj" % view, obj_type)
             code.putln("}")
         else:
@@ -2280,7 +2521,7 @@ class FuncDefNode(StatNode, BlockNode):
         view = py_buffer.cname
         if obj_type and obj_type.is_pyobject:
             code.putln("if (%s->obj == Py_None) {" % view)
-            code.put_gotref("%s->obj" % view)
+            code.put_gotref("%s->obj" % view, obj_type)
             code.put_decref_clear("%s->obj" % view, obj_type)
             code.putln("}")
 
@@ -2288,7 +2529,7 @@ class FuncDefNode(StatNode, BlockNode):
         if not self.entry.is_special:
             return None
         name = self.entry.name
-        slot = TypeSlots.method_name_to_slot.get(name)
+        slot = TypeSlots.get_slot_table(self.local_scope.directives).get_slot_by_method_name(name)
         if not slot:
             return None
         if name == '__long__' and not self.entry.scope.lookup_here('__int__'):
@@ -2322,7 +2563,8 @@ class CFuncDefNode(FuncDefNode):
     #  is_static_method whether this is a static method
     #  is_c_class_method whether this is a cclass method
 
-    child_attrs = ["base_type", "declarator", "body", "py_func_stat"]
+    child_attrs = ["base_type", "declarator", "body", "decorators", "py_func_stat"]
+    outer_attrs = ["decorators", "py_func_stat"]
 
     inline_in_pxd = False
     decorators = None
@@ -2336,6 +2578,9 @@ class CFuncDefNode(FuncDefNode):
     def unqualified_name(self):
         return self.entry.name
 
+    def declared_name(self):
+        return self.declarator.declared_name()
+
     @property
     def code_object(self):
         # share the CodeObject with the cpdef wrapper (if available)
@@ -2356,20 +2601,20 @@ class CFuncDefNode(FuncDefNode):
         self.is_static_method = 'staticmethod' in env.directives and not env.lookup_here('staticmethod')
         # The 2 here is because we need both function and argument names.
         if isinstance(self.declarator, CFuncDeclaratorNode):
-            name_declarator, type = self.declarator.analyse(
+            name_declarator, typ = self.declarator.analyse(
                 base_type, env, nonempty=2 * (self.body is not None),
                 directive_locals=self.directive_locals, visibility=self.visibility)
         else:
-            name_declarator, type = self.declarator.analyse(
+            name_declarator, typ = self.declarator.analyse(
                 base_type, env, nonempty=2 * (self.body is not None), visibility=self.visibility)
-        if not type.is_cfunction:
+        if not typ.is_cfunction:
             error(self.pos, "Suite attached to non-function declaration")
         # Remember the actual type according to the function header
         # written here, because the type in the symbol table entry
         # may be different if we're overriding a C method inherited
         # from the base type of an extension type.
-        self.type = type
-        type.is_overridable = self.overridable
+        self.type = typ
+        typ.is_overridable = self.overridable
         declarator = self.declarator
         while not hasattr(declarator, 'args'):
             declarator = declarator.base
@@ -2382,11 +2627,18 @@ class CFuncDefNode(FuncDefNode):
             error(self.cfunc_declarator.pos,
                   "Function with optional arguments may not be declared public or api")
 
-        if type.exception_check == '+' and self.visibility != 'extern':
-            warning(self.cfunc_declarator.pos,
+        if typ.exception_check == '+' and self.visibility != 'extern':
+            if typ.exception_value and typ.exception_value.is_name:
+                # it really is impossible to reason about what the user wants to happens
+                # if they've specified a C++ exception translation function. Therefore,
+                # raise an error.
+                error(self.pos,
                     "Only extern functions can throw C++ exceptions.")
+            else:
+                warning(self.pos,
+                    "Only extern functions can throw C++ exceptions.", 2)
 
-        for formal_arg, type_arg in zip(self.args, type.args):
+        for formal_arg, type_arg in zip(self.args, typ.args):
             self.align_argument_type(env, type_arg)
             formal_arg.type = type_arg.type
             formal_arg.name = type_arg.name
@@ -2407,20 +2659,21 @@ class CFuncDefNode(FuncDefNode):
                 elif 'inline' in self.modifiers:
                     warning(formal_arg.pos, "Buffer unpacking not optimized away.", 1)
 
-        self._validate_type_visibility(type.return_type, self.pos, env)
+        self._validate_type_visibility(typ.return_type, self.pos, env)
 
         name = name_declarator.name
         cname = name_declarator.cname
 
-        type.is_const_method = self.is_const_method
-        type.is_static_method = self.is_static_method
+        typ.is_const_method = self.is_const_method
+        typ.is_static_method = self.is_static_method
+
         self.entry = env.declare_cfunction(
-            name, type, self.pos,
+            name, typ, self.pos,
             cname=cname, visibility=self.visibility, api=self.api,
             defining=self.body is not None, modifiers=self.modifiers,
             overridable=self.overridable)
         self.entry.inline_func_in_pxd = self.inline_in_pxd
-        self.return_type = type.return_type
+        self.return_type = typ.return_type
         if self.return_type.is_array and self.visibility != 'extern':
             error(self.pos, "Function cannot return an array")
         if self.return_type.is_cpp_class:
@@ -2435,38 +2688,45 @@ class CFuncDefNode(FuncDefNode):
         self.create_local_scope(env)
 
     def declare_cpdef_wrapper(self, env):
-        if self.overridable:
-            if self.is_static_method:
-                # TODO(robertwb): Finish this up, perhaps via more function refactoring.
-                error(self.pos, "static cpdef methods not yet supported")
-            name = self.entry.name
-            py_func_body = self.call_self_node(is_module_scope=env.is_module_scope)
-            if self.is_static_method:
-                from .ExprNodes import NameNode
-                decorators = [DecoratorNode(self.pos, decorator=NameNode(self.pos, name='staticmethod'))]
-                decorators[0].decorator.analyse_types(env)
+        if not self.overridable:
+            return
+        if self.is_static_method:
+            # TODO(robertwb): Finish this up, perhaps via more function refactoring.
+            error(self.pos, "static cpdef methods not yet supported")
+
+        name = self.entry.name
+        py_func_body = self.call_self_node(is_module_scope=env.is_module_scope)
+        if self.is_static_method:
+            from .ExprNodes import NameNode
+            decorators = [DecoratorNode(self.pos, decorator=NameNode(self.pos, name=EncodedString('staticmethod')))]
+            decorators[0].decorator.analyse_types(env)
+        else:
+            decorators = []
+        self.py_func = DefNode(pos=self.pos,
+                               name=self.entry.name,
+                               args=self.args,
+                               star_arg=None,
+                               starstar_arg=None,
+                               doc=self.doc,
+                               body=py_func_body,
+                               decorators=decorators,
+                               is_wrapper=1)
+        self.py_func.is_module_scope = env.is_module_scope
+        self.py_func.analyse_declarations(env)
+        self.py_func.entry.is_overridable = True
+        self.py_func_stat = StatListNode(self.pos, stats=[self.py_func])
+        self.py_func.type = PyrexTypes.py_object_type
+        self.entry.as_variable = self.py_func.entry
+        self.entry.used = self.entry.as_variable.used = True
+        # Reset scope entry the above cfunction
+        env.entries[name] = self.entry
+        if (not self.entry.is_final_cmethod and
+                (not env.is_module_scope or Options.lookup_module_cpdef)):
+            if self.override:
+                # This is a hack: we shouldn't create the wrapper twice, but we do for fused functions.
+                assert self.entry.is_fused_specialized  # should not happen for non-fused cpdef functions
+                self.override.py_func = self.py_func
             else:
-                decorators = []
-            self.py_func = DefNode(pos=self.pos,
-                                   name=self.entry.name,
-                                   args=self.args,
-                                   star_arg=None,
-                                   starstar_arg=None,
-                                   doc=self.doc,
-                                   body=py_func_body,
-                                   decorators=decorators,
-                                   is_wrapper=1)
-            self.py_func.is_module_scope = env.is_module_scope
-            self.py_func.analyse_declarations(env)
-            self.py_func.entry.is_overridable = True
-            self.py_func_stat = StatListNode(self.pos, stats=[self.py_func])
-            self.py_func.type = PyrexTypes.py_object_type
-            self.entry.as_variable = self.py_func.entry
-            self.entry.used = self.entry.as_variable.used = True
-            # Reset scope entry the above cfunction
-            env.entries[name] = self.entry
-            if (not self.entry.is_final_cmethod and
-                    (not env.is_module_scope or Options.lookup_module_cpdef)):
                 self.override = OverrideCheckNode(self.pos, py_func=self.py_func)
                 self.body = StatListNode(self.pos, stats=[self.override, self.body])
 
@@ -2585,7 +2845,7 @@ class CFuncDefNode(FuncDefNode):
 
         header = self.return_type.declaration_code(entity, dll_linkage=dll_linkage)
         #print (storage_class, modifiers, header)
-        needs_proto = self.is_c_class_method
+        needs_proto = self.is_c_class_method or self.entry.is_cproperty
         if self.template_declaration:
             if needs_proto:
                 code.globalstate.parts['module_declarations'].putln(self.template_declaration)
@@ -2638,7 +2898,7 @@ class CFuncDefNode(FuncDefNode):
             if entry.in_closure and not arg.default:
                 code.putln('%s = %s;' % (entry.cname, entry.original_cname))
                 if entry.type.is_memoryviewslice:
-                    code.put_incref_memoryviewslice(entry.cname, have_gil=True)
+                    entry.type.generate_incref_memoryviewslice(code, entry.cname, True)
                 else:
                     code.put_var_incref(entry)
                     code.put_var_giveref(entry)
@@ -2670,7 +2930,6 @@ class CFuncDefNode(FuncDefNode):
         if self.return_type.is_pyobject:
             return "0"
         else:
-            #return None
             return self.entry.type.exception_value
 
     def caller_will_check_exceptions(self):
@@ -2769,7 +3028,7 @@ class DefNode(FuncDefNode):
     self_in_stararg = 0
     py_cfunc_node = None
     requires_classobj = False
-    defaults_struct = None # Dynamic kwrds structure name
+    defaults_struct = None  # Dynamic kwrds structure name
     doc = None
 
     fused_py_func = False
@@ -2782,14 +3041,17 @@ class DefNode(FuncDefNode):
 
     def __init__(self, pos, **kwds):
         FuncDefNode.__init__(self, pos, **kwds)
-        k = rk = r = 0
+        p = k = rk = r = 0
         for arg in self.args:
+            if arg.pos_only:
+                p += 1
             if arg.kw_only:
                 k += 1
                 if not arg.default:
                     rk += 1
             if not arg.default:
                 r += 1
+        self.num_posonly_args = p
         self.num_kwonly_args = k
         self.num_required_kw_args = rk
         self.num_required_args = r
@@ -2887,8 +3149,18 @@ class DefNode(FuncDefNode):
             # staticmethod() was overridden - not much we can do here ...
             self.is_staticmethod = False
 
-        if self.name == '__new__' and env.is_py_class_scope:
-            self.is_staticmethod = 1
+        if env.is_py_class_scope or env.is_c_class_scope:
+            if self.name == '__new__' and env.is_py_class_scope:
+                self.is_staticmethod = True
+            elif self.name == '__init_subclass__' and env.is_c_class_scope:
+                error(self.pos, "'__init_subclass__' is not supported by extension class")
+            elif self.name in IMPLICIT_CLASSMETHODS and not self.is_classmethod:
+                self.is_classmethod = True
+                # TODO: remove the need to generate a real decorator here, is_classmethod=True should suffice.
+                from .ExprNodes import NameNode
+                self.decorators = self.decorators or []
+                self.decorators.insert(0, DecoratorNode(
+                    self.pos, decorator=NameNode(self.pos, name=EncodedString('classmethod'))))
 
         self.analyse_argument_types(env)
         if self.name == '<lambda>':
@@ -2901,7 +3173,7 @@ class DefNode(FuncDefNode):
         # if a signature annotation provides a more specific return object type, use it
         if self.return_type is py_object_type and self.return_type_annotation:
             if env.directives['annotation_typing'] and not self.entry.is_special:
-                _, return_type = analyse_type_annotation(self.return_type_annotation, env)
+                _, return_type = self.return_type_annotation.analyse_type_annotation(env)
                 if return_type and return_type.is_pyobject:
                     self.return_type = return_type
 
@@ -2941,9 +3213,6 @@ class DefNode(FuncDefNode):
                 arg.name = name_declarator.name
                 arg.type = type
 
-                if type.is_fused:
-                    self.has_fused_arguments = True
-
             self.align_argument_type(env, arg)
             if name_declarator and name_declarator.cname:
                 error(self.pos, "Python function argument cannot have C name specification")
@@ -2968,12 +3237,15 @@ class DefNode(FuncDefNode):
                 else:
                     # probably just a plain 'object'
                     arg.accept_none = True
-            else:
-                arg.accept_none = True # won't be used, but must be there
+            elif not arg.type.is_error:
+                arg.accept_none = True  # won't be used, but must be there
                 if arg.not_none:
                     error(arg.pos, "Only Python type arguments can have 'not None'")
                 if arg.or_none:
                     error(arg.pos, "Only Python type arguments can have 'or None'")
+
+            if arg.type.is_fused:
+                self.has_fused_arguments = True
         env.fused_to_specific = f2s
 
         if has_np_pythran(env):
@@ -2986,8 +3258,10 @@ class DefNode(FuncDefNode):
             if self.decorators:
                 error(self.pos, "special functions of cdef classes cannot have decorators")
             self.entry.trivial_signature = len(self.args) == 1 and not (self.star_arg or self.starstar_arg)
-        elif not env.directives['always_allow_keywords'] and not (self.star_arg or self.starstar_arg):
-            # Use the simpler calling signature for zero- and one-argument functions.
+        elif not (self.star_arg or self.starstar_arg) and (
+                not env.directives['always_allow_keywords']
+                or all([arg.pos_only for arg in self.args])):
+            # Use the simpler calling signature for zero- and one-argument pos-only functions.
             if self.entry.signature is TypeSlots.pyfunction_signature:
                 if len(self.args) == 0:
                     self.entry.signature = TypeSlots.pyfunction_noargs
@@ -3002,18 +3276,19 @@ class DefNode(FuncDefNode):
                         self.entry.signature = TypeSlots.ibinaryfunc
 
         sig = self.entry.signature
-        nfixed = sig.num_fixed_args()
+        nfixed = sig.max_num_fixed_args()
+        min_nfixed = sig.min_num_fixed_args()
         if (sig is TypeSlots.pymethod_signature and nfixed == 1
                and len(self.args) == 0 and self.star_arg):
             # this is the only case where a diverging number of
             # arguments is not an error - when we have no explicit
             # 'self' parameter as in method(*args)
-            sig = self.entry.signature = TypeSlots.pyfunction_signature # self is not 'really' used
+            sig = self.entry.signature = TypeSlots.pyfunction_signature  # self is not 'really' used
             self.self_in_stararg = 1
-            nfixed = 0
+            nfixed = min_nfixed = 0
 
         if self.is_staticmethod and env.is_c_class_scope:
-            nfixed = 0
+            nfixed = min_nfixed = 0
             self.self_in_stararg = True  # FIXME: why for staticmethods?
 
             self.entry.signature = sig = copy.copy(sig)
@@ -3028,6 +3303,8 @@ class DefNode(FuncDefNode):
         for i in range(min(nfixed, len(self.args))):
             arg = self.args[i]
             arg.is_generic = 0
+            if i >= min_nfixed:
+                arg.is_special_method_optional = True
             if sig.is_self_arg(i) and not self.is_staticmethod:
                 if self.is_classmethod:
                     arg.is_type_arg = 1
@@ -3043,12 +3320,8 @@ class DefNode(FuncDefNode):
                         arg.needs_type_test = 1
                     else:
                         arg.needs_conversion = 1
-            if arg.needs_conversion:
-                arg.hdr_cname = Naming.arg_prefix + arg.name
-            else:
-                arg.hdr_cname = Naming.var_prefix + arg.name
 
-        if nfixed > len(self.args):
+        if min_nfixed > len(self.args):
             self.bad_signature()
             return
         elif nfixed < len(self.args):
@@ -3058,11 +3331,38 @@ class DefNode(FuncDefNode):
                 if arg.is_generic and (arg.type.is_extension_type or arg.type.is_builtin_type):
                     arg.needs_type_test = 1
 
+        # Decide whether to use METH_FASTCALL
+        # 1. If we use METH_NOARGS or METH_O, keep that. We can only change
+        #    METH_VARARGS to METH_FASTCALL
+        # 2. Special methods like __call__ always use the METH_VARGARGS
+        #    calling convention
+        mf = sig.method_flags()
+        if mf and TypeSlots.method_varargs in mf and not self.entry.is_special:
+            # 3. If the function uses the full args tuple, it's more
+            #    efficient to use METH_VARARGS. This happens when the function
+            #    takes *args but no other positional arguments (apart from
+            #    possibly self). We don't do the analogous check for keyword
+            #    arguments since the kwargs dict is copied anyway.
+            if self.star_arg:
+                uses_args_tuple = True
+                for arg in self.args:
+                    if (arg.is_generic and not arg.kw_only and
+                            not arg.is_self_arg and not arg.is_type_arg):
+                        # Other positional argument
+                        uses_args_tuple = False
+            else:
+                uses_args_tuple = False
+
+            if not uses_args_tuple:
+                sig = self.entry.signature = sig.with_fastcall()
+
     def bad_signature(self):
         sig = self.entry.signature
-        expected_str = "%d" % sig.num_fixed_args()
+        expected_str = "%d" % sig.min_num_fixed_args()
         if sig.has_generic_args:
             expected_str += " or more"
+        elif sig.optional_object_arg_count:
+            expected_str += " to %d" % sig.max_num_fixed_args()
         name = self.name
         if name.startswith("__") and name.endswith("__"):
             desc = "Special method"
@@ -3083,16 +3383,16 @@ class DefNode(FuncDefNode):
         entry = env.declare_pyfunction(name, self.pos, allow_redefine=not self.is_wrapper)
         self.entry = entry
         prefix = env.next_id(env.scope_prefix)
-        self.entry.pyfunc_cname = Naming.pyfunc_prefix + prefix + name
+        self.entry.pyfunc_cname = punycodify_name(Naming.pyfunc_prefix + prefix + name)
         if Options.docstrings:
             entry.doc = embed_position(self.pos, self.doc)
-            entry.doc_cname = Naming.funcdoc_prefix + prefix + name
+            entry.doc_cname = punycodify_name(Naming.funcdoc_prefix + prefix + name)
             if entry.is_special:
                 if entry.name in TypeSlots.invisible or not entry.doc or (
                         entry.name in '__getattr__' and env.directives['fast_getattr']):
                     entry.wrapperbase_cname = None
                 else:
-                    entry.wrapperbase_cname = Naming.wrapperbase_prefix + prefix + name
+                    entry.wrapperbase_cname = punycodify_name(Naming.wrapperbase_prefix + prefix + name)
         else:
             entry.doc = None
 
@@ -3135,8 +3435,6 @@ class DefNode(FuncDefNode):
         self.local_scope.directives = env.directives
         self.analyse_default_values(env)
         self.analyse_annotations(env)
-        if self.return_type_annotation:
-            self.return_type_annotation = self.analyse_annotation(env, self.return_type_annotation)
 
         if not self.needs_assignment_synthesis(env) and self.decorators:
             for decorator in self.decorators[::-1]:
@@ -3237,8 +3535,20 @@ class DefNode(FuncDefNode):
         # Move arguments into closure if required
         def put_into_closure(entry):
             if entry.in_closure:
-                code.putln('%s = %s;' % (entry.cname, entry.original_cname))
-                if entry.xdecref_cleanup:
+                if entry.type.is_array:
+                    # This applies to generator expressions that iterate over C arrays (and need to
+                    # capture them by value), under most other circumstances C array arguments are dropped to
+                    # pointers so this copy isn't used
+                    assert entry.type.size is not None
+                    code.globalstate.use_utility_code(UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
+                    code.putln("memcpy({0}, {1}, sizeof({0}));".format(entry.cname, entry.original_cname))
+                else:
+                    code.putln('%s = %s;' % (entry.cname, entry.original_cname))
+                if entry.type.is_memoryviewslice:
+                    # TODO - at some point reference count of memoryviews should
+                    # genuinely be unified with PyObjects
+                    entry.type.generate_incref_memoryviewslice(code, entry.cname, True)
+                elif entry.xdecref_cleanup:
                     # mostly applies to the starstar arg - this can sometimes be NULL
                     # so must be xincrefed instead
                     code.put_var_xincref(entry)
@@ -3260,10 +3570,11 @@ class DefNodeWrapper(FuncDefNode):
     # DefNode python wrapper code generator
 
     defnode = None
-    target = None # Target DefNode
+    target = None  # Target DefNode
 
     def __init__(self, *args, **kwargs):
         FuncDefNode.__init__(self, *args, **kwargs)
+        self.num_posonly_args = self.target.num_posonly_args
         self.num_kwonly_args = self.target.num_kwonly_args
         self.num_required_kw_args = self.target.num_required_kw_args
         self.num_required_args = self.target.num_required_args
@@ -3274,8 +3585,8 @@ class DefNodeWrapper(FuncDefNode):
         target_entry = self.target.entry
         name = self.name
         prefix = env.next_id(env.scope_prefix)
-        target_entry.func_cname = Naming.pywrap_prefix + prefix + name
-        target_entry.pymethdef_cname = Naming.pymethdef_prefix + prefix + name
+        target_entry.func_cname = punycodify_name(Naming.pywrap_prefix + prefix + name)
+        target_entry.pymethdef_cname = punycodify_name(Naming.pymethdef_prefix + prefix + name)
 
         self.signature = target_entry.signature
 
@@ -3289,10 +3600,10 @@ class DefNodeWrapper(FuncDefNode):
         for arg in self.args:
             if not arg.type.is_pyobject:
                 if not arg.type.create_from_py_utility_code(env):
-                    pass # will fail later
+                    pass  # will fail later
             elif arg.hdr_type and not arg.hdr_type.is_pyobject:
                 if not arg.hdr_type.create_to_py_utility_code(env):
-                    pass # will fail later
+                    pass  # will fail later
 
         if self.starstar_arg and not self.starstar_arg.entry.cf_used:
             # we will set the kwargs argument to NULL instead of a new dict
@@ -3319,7 +3630,13 @@ class DefNodeWrapper(FuncDefNode):
         if self.signature.has_dummy_arg:
             args.append(Naming.self_cname)
         for arg in self.args:
-            if arg.hdr_type and not (arg.type.is_memoryviewslice or
+            if arg.type.is_cpp_class:
+                # it's safe to move converted C++ types because they aren't
+                # used again afterwards
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
+                args.append("__PYX_STD_MOVE_IF_SUPPORTED(%s)" % arg.entry.cname)
+            elif arg.hdr_type and not (arg.type.is_memoryviewslice or
                                      arg.type.is_struct or
                                      arg.type.is_complex):
                 args.append(arg.type.cast_code(arg.entry.cname))
@@ -3363,7 +3680,7 @@ class DefNodeWrapper(FuncDefNode):
                 self.return_type.declaration_code(Naming.retval_cname),
                 retval_init))
         code.put_declare_refcount_context()
-        code.put_setup_refcount_context('%s (wrapper)' % self.name)
+        code.put_setup_refcount_context(EncodedString('%s (wrapper)' % self.name))
 
         self.generate_argument_parsing_code(lenv, code)
         self.generate_argument_type_tests(code)
@@ -3389,8 +3706,20 @@ class DefNodeWrapper(FuncDefNode):
         # ----- Non-error return cleanup
         code.put_label(code.return_label)
         for entry in lenv.var_entries:
-            if entry.is_arg and entry.type.is_pyobject:
-                code.put_var_decref(entry)
+            if entry.is_arg:
+                # mainly captures the star/starstar args
+                if entry.xdecref_cleanup:
+                    code.put_var_xdecref(entry)
+                else:
+                    code.put_var_decref(entry)
+        for arg in self.args:
+            if not arg.type.is_pyobject:
+                # This captures anything that's been converted from a PyObject.
+                # Primarily memoryviews at the moment
+                if arg.entry.xdecref_cleanup:
+                    code.put_var_xdecref(arg.entry)
+                else:
+                    code.put_var_decref(arg.entry)
 
         code.put_finish_refcount_context()
         if not self.return_type.is_void:
@@ -3420,12 +3749,20 @@ class DefNodeWrapper(FuncDefNode):
         entry = self.target.entry
         if not entry.is_special and sig.method_flags() == [TypeSlots.method_noargs]:
             arg_code_list.append("CYTHON_UNUSED PyObject *unused")
-        if entry.scope.is_c_class_scope and entry.name == "__ipow__":
-            arg_code_list.append("CYTHON_UNUSED PyObject *unused")
         if sig.has_generic_args:
-            arg_code_list.append(
-                "PyObject *%s, PyObject *%s" % (
-                    Naming.args_cname, Naming.kwds_cname))
+            varargs_args = "PyObject *%s, PyObject *%s" % (
+                    Naming.args_cname, Naming.kwds_cname)
+            if sig.use_fastcall:
+                fastcall_args = "PyObject *const *%s, Py_ssize_t %s, PyObject *%s" % (
+                        Naming.args_cname, Naming.nargs_cname, Naming.kwds_cname)
+                arg_code_list.append(
+                    "\n#if CYTHON_METH_FASTCALL\n%s\n#else\n%s\n#endif\n" % (
+                        fastcall_args, varargs_args))
+            else:
+                arg_code_list.append(varargs_args)
+        if entry.is_special:
+            for n in range(len(self.args), sig.max_num_fixed_args()):
+                arg_code_list.append("CYTHON_UNUSED PyObject *unused_arg_%s" % n)
         arg_code = ", ".join(arg_code_list)
 
         # Prevent warning: unused function '__pyx_pw_5numpy_7ndarray_1__getbuffer__'
@@ -3436,7 +3773,7 @@ class DefNodeWrapper(FuncDefNode):
             with_pymethdef = False
 
         dc = self.return_type.declaration_code(entry.func_cname)
-        header = "static %s%s(%s)" % (mf, dc, arg_code)
+        header = "%sstatic %s(%s)" % (mf, dc, arg_code)
         code.putln("%s; /*proto*/" % header)
 
         if proto_only:
@@ -3459,7 +3796,7 @@ class DefNodeWrapper(FuncDefNode):
                 docstr = docstr.as_utf8_string()
 
             if not (entry.is_special and entry.name in ('__getbuffer__', '__releasebuffer__')):
-                code.putln('static char %s[] = %s;' % (
+                code.putln('PyDoc_STRVAR(%s, %s);' % (
                     entry.doc_cname,
                     docstr.as_c_string_literal()))
 
@@ -3486,6 +3823,23 @@ class DefNodeWrapper(FuncDefNode):
             if entry.is_arg:
                 code.put_var_declaration(entry)
 
+        # Assign nargs variable as len(args), but avoid an "unused" warning in the few cases where we don't need it.
+        if self.signature_has_generic_args():
+            nargs_code = "CYTHON_UNUSED const Py_ssize_t %s = PyTuple_GET_SIZE(%s);" % (
+                        Naming.nargs_cname, Naming.args_cname)
+            if self.signature.use_fastcall:
+                code.putln("#if !CYTHON_METH_FASTCALL")
+                code.putln(nargs_code)
+                code.putln("#endif")
+            else:
+                code.putln(nargs_code)
+
+        # Array containing the values of keyword arguments when using METH_FASTCALL.
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("fastcall", "FunctionArguments.c"))
+        code.putln('CYTHON_UNUSED PyObject *const *%s = __Pyx_KwValues_%s(%s, %s);' % (
+            Naming.kwvalues_cname, self.signature.fastvar, Naming.args_cname, Naming.nargs_cname))
+
     def generate_argument_parsing_code(self, env, code):
         # Generate fast equivalent of PyArg_ParseTuple call for
         # generic arguments, if any, including args/kwargs
@@ -3509,6 +3863,8 @@ class DefNodeWrapper(FuncDefNode):
 
         elif not self.signature_has_nongeneric_args():
             # func(*args) or func(**kw) or func(*args, **kw)
+            # possibly with a "self" argument but no other non-star
+            # arguments
             self.generate_stararg_copy_code(code)
 
         else:
@@ -3526,6 +3882,11 @@ class DefNodeWrapper(FuncDefNode):
                         code.put_var_xdecref_clear(self.starstar_arg.entry)
                     else:
                         code.put_var_decref_clear(self.starstar_arg.entry)
+            for arg in self.args:
+                if not arg.type.is_pyobject and arg.type.needs_refcounting:
+                    # at the moment this just catches memoryviewslices, but in future
+                    # other non-PyObject reference counted types might need cleanup
+                    code.put_var_xdecref(arg.entry)
             code.put_add_traceback(self.target.entry.qualified_name)
             code.put_finish_refcount_context()
             code.putln("return %s;" % self.error_value())
@@ -3544,10 +3905,9 @@ class DefNodeWrapper(FuncDefNode):
         if not self.star_arg:
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
-            code.putln("if (unlikely(PyTuple_GET_SIZE(%s) > 0)) {" %
-                       Naming.args_cname)
-            code.put('__Pyx_RaiseArgtupleInvalid("%s", 1, 0, 0, PyTuple_GET_SIZE(%s)); return %s;' % (
-                self.name, Naming.args_cname, self.error_value()))
+            code.putln("if (unlikely(%s > 0)) {" % Naming.nargs_cname)
+            code.put('__Pyx_RaiseArgtupleInvalid(%s, 1, 0, 0, %s); return %s;' % (
+                self.name.as_c_string_literal(), Naming.nargs_cname, self.error_value()))
             code.putln("}")
 
         if self.starstar_arg:
@@ -3556,69 +3916,66 @@ class DefNodeWrapper(FuncDefNode):
             else:
                 kwarg_check = "%s" % Naming.kwds_cname
         else:
-            kwarg_check = "unlikely(%s) && unlikely(PyDict_Size(%s) > 0)" % (
-                Naming.kwds_cname, Naming.kwds_cname)
+            kwarg_check = "unlikely(%s) && __Pyx_NumKwargs_%s(%s)" % (
+                Naming.kwds_cname, self.signature.fastvar, Naming.kwds_cname)
         code.globalstate.use_utility_code(
             UtilityCode.load_cached("KeywordStringCheck", "FunctionArguments.c"))
         code.putln(
-            "if (%s && unlikely(!__Pyx_CheckKeywordStrings(%s, \"%s\", %d))) return %s;" % (
-                kwarg_check, Naming.kwds_cname, self.name,
+            "if (%s && unlikely(!__Pyx_CheckKeywordStrings(%s, %s, %d))) return %s;" % (
+                kwarg_check, Naming.kwds_cname, self.name.as_c_string_literal(),
                 bool(self.starstar_arg), self.error_value()))
 
         if self.starstar_arg and self.starstar_arg.entry.cf_used:
-            if all(ref.node.allow_null for ref in self.starstar_arg.entry.cf_references):
-                code.putln("if (%s) {" % kwarg_check)
-                code.putln("%s = PyDict_Copy(%s); if (unlikely(!%s)) return %s;" % (
-                    self.starstar_arg.entry.cname,
-                    Naming.kwds_cname,
-                    self.starstar_arg.entry.cname,
-                    self.error_value()))
-                code.put_gotref(self.starstar_arg.entry.cname)
-                code.putln("} else {")
-                code.putln("%s = NULL;" % (self.starstar_arg.entry.cname,))
-                code.putln("}")
-                self.starstar_arg.entry.xdecref_cleanup = 1
-            else:
-                code.put("%s = (%s) ? PyDict_Copy(%s) : PyDict_New(); " % (
-                    self.starstar_arg.entry.cname,
-                    Naming.kwds_cname,
-                    Naming.kwds_cname))
-                code.putln("if (unlikely(!%s)) return %s;" % (
-                    self.starstar_arg.entry.cname, self.error_value()))
-                self.starstar_arg.entry.xdecref_cleanup = 0
-                code.put_gotref(self.starstar_arg.entry.cname)
+            code.putln("if (%s) {" % kwarg_check)
+            code.putln("%s = __Pyx_KwargsAsDict_%s(%s, %s);" % (
+                self.starstar_arg.entry.cname,
+                self.signature.fastvar,
+                Naming.kwds_cname,
+                Naming.kwvalues_cname))
+            code.putln("if (unlikely(!%s)) return %s;" % (
+                self.starstar_arg.entry.cname, self.error_value()))
+            code.put_gotref(self.starstar_arg.entry.cname, py_object_type)
+            code.putln("} else {")
+            code.putln("%s = PyDict_New();" % (self.starstar_arg.entry.cname,))
+            code.putln("if (unlikely(!%s)) return %s;" % (
+                self.starstar_arg.entry.cname, self.error_value()))
+            code.put_var_gotref(self.starstar_arg.entry)
+            self.starstar_arg.entry.xdecref_cleanup = False
+            code.putln("}")
 
         if self.self_in_stararg and not self.target.is_staticmethod:
+            assert not self.signature.use_fastcall
             # need to create a new tuple with 'self' inserted as first item
-            code.put("%s = PyTuple_New(PyTuple_GET_SIZE(%s)+1); if (unlikely(!%s)) " % (
+            code.put("%s = PyTuple_New(%s + 1); if (unlikely(!%s)) " % (
                 self.star_arg.entry.cname,
-                Naming.args_cname,
+                Naming.nargs_cname,
                 self.star_arg.entry.cname))
             if self.starstar_arg and self.starstar_arg.entry.cf_used:
                 code.putln("{")
-                code.put_xdecref_clear(self.starstar_arg.entry.cname, py_object_type)
+                code.put_var_xdecref_clear(self.starstar_arg.entry)
                 code.putln("return %s;" % self.error_value())
                 code.putln("}")
             else:
                 code.putln("return %s;" % self.error_value())
-            code.put_gotref(self.star_arg.entry.cname)
+            code.put_var_gotref(self.star_arg.entry)
             code.put_incref(Naming.self_cname, py_object_type)
-            code.put_giveref(Naming.self_cname)
+            code.put_giveref(Naming.self_cname, py_object_type)
             code.putln("PyTuple_SET_ITEM(%s, 0, %s);" % (
                 self.star_arg.entry.cname, Naming.self_cname))
             temp = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
-            code.putln("for (%s=0; %s < PyTuple_GET_SIZE(%s); %s++) {" % (
-                temp, temp, Naming.args_cname, temp))
+            code.putln("for (%s=0; %s < %s; %s++) {" % (
+                temp, temp, Naming.nargs_cname, temp))
             code.putln("PyObject* item = PyTuple_GET_ITEM(%s, %s);" % (
                 Naming.args_cname, temp))
             code.put_incref("item", py_object_type)
-            code.put_giveref("item")
+            code.put_giveref("item", py_object_type)
             code.putln("PyTuple_SET_ITEM(%s, %s+1, item);" % (
                 self.star_arg.entry.cname, temp))
             code.putln("}")
             code.funcstate.release_temp(temp)
             self.star_arg.entry.xdecref_cleanup = 0
         elif self.star_arg:
+            assert not self.signature.use_fastcall
             code.put_incref(Naming.args_cname, py_object_type)
             code.putln("%s = %s;" % (
                 self.star_arg.entry.cname,
@@ -3626,11 +3983,17 @@ class DefNodeWrapper(FuncDefNode):
             self.star_arg.entry.xdecref_cleanup = 0
 
     def generate_tuple_and_keyword_parsing_code(self, args, success_label, code):
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("fastcall", "FunctionArguments.c"))
+
+        self_name_csafe = self.name.as_c_string_literal()
+
         argtuple_error_label = code.new_label("argtuple_error")
 
         positional_args = []
         required_kw_only_args = []
         optional_kw_only_args = []
+        num_pos_only_args = 0
         for arg in args:
             if arg.is_generic:
                 if arg.default:
@@ -3643,6 +4006,8 @@ class DefNodeWrapper(FuncDefNode):
                     required_kw_only_args.append(arg)
                 elif not arg.is_self_arg and not arg.is_type_arg:
                     positional_args.append(arg)
+                if arg.pos_only:
+                    num_pos_only_args += 1
 
         # sort required kw-only args before optional ones to avoid special
         # cases in the unpacking code
@@ -3661,10 +4026,12 @@ class DefNodeWrapper(FuncDefNode):
 
         code.putln('{')
         all_args = tuple(positional_args) + tuple(kw_only_args)
-        code.putln("static PyObject **%s[] = {%s,0};" % (
+        non_posonly_args = [arg for arg in all_args if not arg.pos_only]
+        non_pos_args_id = ','.join(
+            ['&%s' % code.intern_identifier(arg.entry.name) for arg in non_posonly_args] + ['0'])
+        code.putln("PyObject **%s[] = {%s};" % (
             Naming.pykwdlist_cname,
-            ','.join(['&%s' % code.intern_identifier(arg.name)
-                      for arg in all_args])))
+            non_pos_args_id))
 
         # Before being converted and assigned to the target variables,
         # borrowed references to all unpacked argument values are
@@ -3676,14 +4043,43 @@ class DefNodeWrapper(FuncDefNode):
         # was passed for them.
         self.generate_argument_values_setup_code(all_args, code)
 
+        # If all args are positional-only, we can raise an error
+        # straight away if we receive a non-empty kw-dict.
+        # This requires a PyDict_Size call.  This call is wasteful
+        # for functions which do accept kw-args, so we do not generate
+        # the PyDict_Size call unless all args are positional-only.
+        accept_kwd_args = non_posonly_args or self.starstar_arg
+        if accept_kwd_args:
+            kw_unpacking_condition = Naming.kwds_cname
+        else:
+            kw_unpacking_condition = "%s && __Pyx_NumKwargs_%s(%s) > 0" % (
+                Naming.kwds_cname, self.signature.fastvar, Naming.kwds_cname)
+
+        if self.num_required_kw_args > 0:
+            kw_unpacking_condition = "likely(%s)" % kw_unpacking_condition
+
         # --- optimised code when we receive keyword arguments
-        code.putln("if (%s(%s)) {" % (
-            (self.num_required_kw_args > 0) and "likely" or "unlikely",
-            Naming.kwds_cname))
-        self.generate_keyword_unpacking_code(
-            min_positional_args, max_positional_args,
-            has_fixed_positional_count, has_kw_only_args,
-            all_args, argtuple_error_label, code)
+        code.putln("if (%s) {" % kw_unpacking_condition)
+
+        if accept_kwd_args:
+            self.generate_keyword_unpacking_code(
+                min_positional_args, max_positional_args,
+                has_fixed_positional_count, has_kw_only_args, all_args, argtuple_error_label, code)
+        else:
+            # Here we do not accept kw-args but we are passed a non-empty kw-dict.
+            # We call ParseOptionalKeywords which will raise an appropriate error if
+            # the kw-args dict passed is non-empty (which it will be, since kw_unpacking_condition is true)
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c"))
+            code.putln('if (likely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, %s, %s) < 0)) %s' % (
+                Naming.kwds_cname,
+                Naming.kwvalues_cname,
+                Naming.pykwdlist_cname,
+                self.starstar_arg.entry.cname if self.starstar_arg else 0,
+                'values',
+                0,
+                self_name_csafe,
+                code.error_goto(self.pos)))
 
         # --- optimised code when we do not receive any keyword arguments
         if (self.num_required_kw_args and min_positional_args > 0) or min_positional_args == max_positional_args:
@@ -3693,20 +4089,20 @@ class DefNodeWrapper(FuncDefNode):
                 compare = '!='
             else:
                 compare = '<'
-            code.putln('} else if (PyTuple_GET_SIZE(%s) %s %d) {' % (
-                Naming.args_cname, compare, min_positional_args))
+            code.putln('} else if (unlikely(%s %s %d)) {' % (
+                Naming.nargs_cname, compare, min_positional_args))
             code.put_goto(argtuple_error_label)
 
         if self.num_required_kw_args:
             # pure error case: keywords required but not passed
             if max_positional_args > min_positional_args and not self.star_arg:
-                code.putln('} else if (PyTuple_GET_SIZE(%s) > %d) {' % (
-                    Naming.args_cname, max_positional_args))
+                code.putln('} else if (unlikely(%s > %d)) {' % (
+                    Naming.nargs_cname, max_positional_args))
                 code.put_goto(argtuple_error_label)
             code.putln('} else {')
             for i, arg in enumerate(kw_only_args):
                 if not arg.default:
-                    pystring_cname = code.intern_identifier(arg.name)
+                    pystring_cname = code.intern_identifier(arg.entry.name)
                     # required keyword-only argument missing
                     code.globalstate.use_utility_code(
                         UtilityCode.load_cached("RaiseKeywordRequired", "FunctionArguments.c"))
@@ -3723,11 +4119,12 @@ class DefNodeWrapper(FuncDefNode):
                 # parse the exact number of positional arguments from
                 # the args tuple
                 for i, arg in enumerate(positional_args):
-                    code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (i, Naming.args_cname, i))
+                    code.putln("values[%d] = __Pyx_Arg_%s(%s, %d);" % (
+                            i, self.signature.fastvar, Naming.args_cname, i))
             else:
                 # parse the positional arguments from the variable length
                 # args tuple and reject illegal argument tuple sizes
-                code.putln('switch (PyTuple_GET_SIZE(%s)) {' % Naming.args_cname)
+                code.putln('switch (%s) {' % Naming.nargs_cname)
                 if self.star_arg:
                     code.putln('default:')
                 reversed_args = list(enumerate(positional_args))[::-1]
@@ -3736,7 +4133,8 @@ class DefNodeWrapper(FuncDefNode):
                         if i != reversed_args[0][0]:
                             code.putln('CYTHON_FALLTHROUGH;')
                         code.put('case %2d: ' % (i+1))
-                    code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (i, Naming.args_cname, i))
+                    code.putln("values[%d] = __Pyx_Arg_%s(%s, %d);" % (
+                            i, self.signature.fastvar, Naming.args_cname, i))
                 if min_positional_args == 0:
                     code.putln('CYTHON_FALLTHROUGH;')
                     code.put('case  0: ')
@@ -3751,7 +4149,7 @@ class DefNodeWrapper(FuncDefNode):
                     code.put_goto(argtuple_error_label)
                 code.putln('}')
 
-        code.putln('}') # end of the conditional unpacking blocks
+        code.putln('}')  # end of the conditional unpacking blocks
 
         # Convert arg values to their final type and assign them.
         # Also inject non-Python default arguments, which do cannot
@@ -3759,17 +4157,17 @@ class DefNodeWrapper(FuncDefNode):
         for i, arg in enumerate(all_args):
             self.generate_arg_assignment(arg, "values[%d]" % i, code)
 
-        code.putln('}') # end of the whole argument unpacking block
+        code.putln('}')  # end of the whole argument unpacking block
 
         if code.label_used(argtuple_error_label):
             code.put_goto(success_label)
             code.put_label(argtuple_error_label)
             code.globalstate.use_utility_code(
                 UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
-            code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, PyTuple_GET_SIZE(%s)); ' % (
-                self.name, has_fixed_positional_count,
+            code.put('__Pyx_RaiseArgtupleInvalid(%s, %d, %d, %d, %s); ' % (
+                self_name_csafe, has_fixed_positional_count,
                 min_positional_args, max_positional_args,
-                Naming.args_cname))
+                Naming.nargs_cname))
             code.putln(code.error_goto(self.pos))
 
     def generate_arg_assignment(self, arg, item, code):
@@ -3792,8 +4190,7 @@ class DefNodeWrapper(FuncDefNode):
                         arg.entry.cname,
                         arg.calculate_default_value_code(code)))
                     if arg.type.is_memoryviewslice:
-                        code.put_incref_memoryviewslice(arg.entry.cname,
-                                                        have_gil=True)
+                        code.put_var_incref_memoryviewslice(arg.entry, have_gil=True)
                     code.putln('}')
             else:
                 error(arg.pos, "Cannot convert Python object argument to type '%s'" % arg.type)
@@ -3805,26 +4202,30 @@ class DefNodeWrapper(FuncDefNode):
                 self.starstar_arg.entry.cname,
                 self.starstar_arg.entry.cname,
                 self.error_value()))
-            code.put_gotref(self.starstar_arg.entry.cname)
+            code.put_var_gotref(self.starstar_arg.entry)
         if self.star_arg:
             self.star_arg.entry.xdecref_cleanup = 0
-            code.putln('if (PyTuple_GET_SIZE(%s) > %d) {' % (
-                Naming.args_cname,
-                max_positional_args))
-            code.putln('%s = PyTuple_GetSlice(%s, %d, PyTuple_GET_SIZE(%s));' % (
-                self.star_arg.entry.cname, Naming.args_cname,
-                max_positional_args, Naming.args_cname))
-            code.putln("if (unlikely(!%s)) {" % self.star_arg.entry.cname)
-            if self.starstar_arg:
-                code.put_decref_clear(self.starstar_arg.entry.cname, py_object_type)
-            code.put_finish_refcount_context()
-            code.putln('return %s;' % self.error_value())
-            code.putln('}')
-            code.put_gotref(self.star_arg.entry.cname)
-            code.putln('} else {')
-            code.put("%s = %s; " % (self.star_arg.entry.cname, Naming.empty_tuple))
-            code.put_incref(Naming.empty_tuple, py_object_type)
-            code.putln('}')
+            if max_positional_args == 0:
+                # If there are no positional arguments, use the args tuple
+                # directly
+                assert not self.signature.use_fastcall
+                code.put_incref(Naming.args_cname, py_object_type)
+                code.putln("%s = %s;" % (self.star_arg.entry.cname, Naming.args_cname))
+            else:
+                # It is possible that this is a slice of "negative" length,
+                # as in args[5:3]. That's not a problem, the function below
+                # handles that efficiently and returns the empty tuple.
+                code.putln('%s = __Pyx_ArgsSlice_%s(%s, %d, %s);' % (
+                    self.star_arg.entry.cname, self.signature.fastvar,
+                    Naming.args_cname, max_positional_args, Naming.nargs_cname))
+                code.putln("if (unlikely(!%s)) {" %
+                           self.star_arg.entry.type.nullcheck_string(self.star_arg.entry.cname))
+                if self.starstar_arg:
+                    code.put_var_decref_clear(self.starstar_arg.entry)
+                code.put_finish_refcount_context()
+                code.putln('return %s;' % self.error_value())
+                code.putln('}')
+                code.put_var_gotref(self.star_arg.entry)
 
     def generate_argument_values_setup_code(self, args, code):
         max_args = len(args)
@@ -3846,22 +4247,45 @@ class DefNodeWrapper(FuncDefNode):
                 code.putln('values[%d] = %s;' % (i, arg.type.as_pyobject(default_value)))
 
     def generate_keyword_unpacking_code(self, min_positional_args, max_positional_args,
-                                        has_fixed_positional_count, has_kw_only_args,
-                                        all_args, argtuple_error_label, code):
+                                        has_fixed_positional_count,
+                                        has_kw_only_args, all_args, argtuple_error_label, code):
+        # First we count how many arguments must be passed as positional
+        num_required_posonly_args = num_pos_only_args = 0
+        for i, arg in enumerate(all_args):
+            if arg.pos_only:
+                num_pos_only_args += 1
+                if not arg.default:
+                    num_required_posonly_args += 1
+
         code.putln('Py_ssize_t kw_args;')
-        code.putln('const Py_ssize_t pos_args = PyTuple_GET_SIZE(%s);' % Naming.args_cname)
         # copy the values from the args tuple and check that it's not too long
-        code.putln('switch (pos_args) {')
+        code.putln('switch (%s) {' % Naming.nargs_cname)
         if self.star_arg:
             code.putln('default:')
-        for i in range(max_positional_args-1, -1, -1):
+
+        for i in range(max_positional_args-1, num_required_posonly_args-1, -1):
             code.put('case %2d: ' % (i+1))
-            code.putln("values[%d] = PyTuple_GET_ITEM(%s, %d);" % (
-                i, Naming.args_cname, i))
+            code.putln("values[%d] = __Pyx_Arg_%s(%s, %d);" % (
+                i, self.signature.fastvar, Naming.args_cname, i))
             code.putln('CYTHON_FALLTHROUGH;')
-        code.putln('case  0: break;')
+        if num_required_posonly_args > 0:
+            code.put('case %2d: ' % num_required_posonly_args)
+            for i in range(num_required_posonly_args-1, -1, -1):
+                code.putln("values[%d] = __Pyx_Arg_%s(%s, %d);" % (
+                    i, self.signature.fastvar, Naming.args_cname, i))
+            code.putln('break;')
+        for i in range(num_required_posonly_args-2, -1, -1):
+            code.put('case %2d: ' % (i+1))
+            code.putln('CYTHON_FALLTHROUGH;')
+
+        code.put('case  0: ')
+        if num_required_posonly_args == 0:
+            code.putln('break;')
+        else:
+            # catch-all for not enough pos-only args passed
+            code.put_goto(argtuple_error_label)
         if not self.star_arg:
-            code.put('default: ') # more arguments than allowed
+            code.put('default: ')  # more arguments than allowed
             code.put_goto(argtuple_error_label)
         code.putln('}')
 
@@ -3874,7 +4298,10 @@ class DefNodeWrapper(FuncDefNode):
 
         # If we received kwargs, fill up the positional/required
         # arguments with values from the kw dict
-        code.putln('kw_args = PyDict_Size(%s);' % Naming.kwds_cname)
+        self_name_csafe = self.name.as_c_string_literal()
+
+        code.putln('kw_args = __Pyx_NumKwargs_%s(%s);' % (
+                self.signature.fastvar, Naming.kwds_cname))
         if self.num_required_args or max_positional_args > 0:
             last_required_arg = -1
             for i, arg in enumerate(all_args):
@@ -3882,30 +4309,32 @@ class DefNodeWrapper(FuncDefNode):
                     last_required_arg = i
             if last_required_arg < max_positional_args:
                 last_required_arg = max_positional_args-1
-            if max_positional_args > 0:
-                code.putln('switch (pos_args) {')
-            for i, arg in enumerate(all_args[:last_required_arg+1]):
-                if max_positional_args > 0 and i <= max_positional_args:
-                    if i != 0:
+            if max_positional_args > num_pos_only_args:
+                code.putln('switch (%s) {' % Naming.nargs_cname)
+            for i, arg in enumerate(all_args[num_pos_only_args:last_required_arg+1], num_pos_only_args):
+                if max_positional_args > num_pos_only_args and i <= max_positional_args:
+                    if i != num_pos_only_args:
                         code.putln('CYTHON_FALLTHROUGH;')
                     if self.star_arg and i == max_positional_args:
                         code.putln('default:')
                     else:
                         code.putln('case %2d:' % i)
-                pystring_cname = code.intern_identifier(arg.name)
+                pystring_cname = code.intern_identifier(arg.entry.name)
                 if arg.default:
                     if arg.kw_only:
                         # optional kw-only args are handled separately below
                         continue
                     code.putln('if (kw_args > 0) {')
                     # don't overwrite default argument
-                    code.putln('PyObject* value = __Pyx_PyDict_GetItemStr(%s, %s);' % (
-                        Naming.kwds_cname, pystring_cname))
+                    code.putln('PyObject* value = __Pyx_GetKwValue_%s(%s, %s, %s);' % (
+                        self.signature.fastvar, Naming.kwds_cname, Naming.kwvalues_cname, pystring_cname))
                     code.putln('if (value) { values[%d] = value; kw_args--; }' % i)
+                    code.putln('else if (unlikely(PyErr_Occurred())) %s' % code.error_goto(self.pos))
                     code.putln('}')
                 else:
-                    code.putln('if (likely((values[%d] = __Pyx_PyDict_GetItemStr(%s, %s)) != 0)) kw_args--;' % (
-                        i, Naming.kwds_cname, pystring_cname))
+                    code.putln('if (likely((values[%d] = __Pyx_GetKwValue_%s(%s, %s, %s)) != 0)) kw_args--;' % (
+                        i, self.signature.fastvar, Naming.kwds_cname, Naming.kwvalues_cname, pystring_cname))
+                    code.putln('else if (unlikely(PyErr_Occurred())) %s' % code.error_goto(self.pos))
                     if i < min_positional_args:
                         if i == 0:
                             # special case: we know arg 0 is missing
@@ -3918,8 +4347,8 @@ class DefNodeWrapper(FuncDefNode):
                             code.putln('else {')
                             code.globalstate.use_utility_code(
                                 UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
-                            code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, %d); ' % (
-                                self.name, has_fixed_positional_count,
+                            code.put('__Pyx_RaiseArgtupleInvalid(%s, %d, %d, %d, %d); ' % (
+                                self_name_csafe, has_fixed_positional_count,
                                 min_positional_args, max_positional_args, i))
                             code.putln(code.error_goto(self.pos))
                             code.putln('}')
@@ -3927,11 +4356,11 @@ class DefNodeWrapper(FuncDefNode):
                         code.putln('else {')
                         code.globalstate.use_utility_code(
                             UtilityCode.load_cached("RaiseKeywordRequired", "FunctionArguments.c"))
-                        code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' % (
-                            self.name, pystring_cname))
+                        code.put('__Pyx_RaiseKeywordRequired(%s, %s); ' % (
+                            self_name_csafe, pystring_cname))
                         code.putln(code.error_goto(self.pos))
                         code.putln('}')
-            if max_positional_args > 0:
+            if max_positional_args > num_pos_only_args:
                 code.putln('}')
 
         if has_kw_only_args:
@@ -3947,34 +4376,69 @@ class DefNodeWrapper(FuncDefNode):
         # arguments, this will always do the right thing for unpacking
         # keyword arguments, so that we can concentrate on optimising
         # common cases above.
+        #
+        # ParseOptionalKeywords() needs to know how many of the arguments
+        # that could be passed as keywords have in fact been passed as
+        # positional args.
+        if num_pos_only_args > 0:
+            # There are positional-only arguments which we don't want to count,
+            # since they cannot be keyword arguments.  Subtract the number of
+            # pos-only arguments from the number of positional arguments we got.
+            # If we get a negative number then none of the keyword arguments were
+            # passed as positional args.
+            code.putln('const Py_ssize_t kwd_pos_args = (unlikely(%s < %d)) ? 0 : %s - %d;' % (
+                Naming.nargs_cname, num_pos_only_args,
+                Naming.nargs_cname, num_pos_only_args,
+            ))
+        elif max_positional_args > 0:
+            code.putln('const Py_ssize_t kwd_pos_args = %s;' % Naming.nargs_cname)
+
         if max_positional_args == 0:
             pos_arg_count = "0"
         elif self.star_arg:
-            code.putln("const Py_ssize_t used_pos_args = (pos_args < %d) ? pos_args : %d;" % (
-                max_positional_args, max_positional_args))
+            # If there is a *arg, the number of used positional args could be larger than
+            # the number of possible keyword arguments.  But ParseOptionalKeywords() uses the
+            # number of positional args as an index into the keyword argument name array,
+            # if this is larger than the number of kwd args we get a segfault.  So round
+            # this down to max_positional_args - num_pos_only_args (= num possible kwd args).
+            code.putln("const Py_ssize_t used_pos_args = (kwd_pos_args < %d) ? kwd_pos_args : %d;" % (
+                max_positional_args - num_pos_only_args, max_positional_args - num_pos_only_args))
             pos_arg_count = "used_pos_args"
         else:
-            pos_arg_count = "pos_args"
+            pos_arg_count = "kwd_pos_args"
+        if num_pos_only_args < len(all_args):
+            values_array = 'values + %d' % num_pos_only_args
+        else:
+            values_array = 'values'
         code.globalstate.use_utility_code(
             UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c"))
-        code.putln('if (unlikely(__Pyx_ParseOptionalKeywords(%s, %s, %s, values, %s, "%s") < 0)) %s' % (
+        code.putln('if (unlikely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, %s, %s) < 0)) %s' % (
             Naming.kwds_cname,
+            Naming.kwvalues_cname,
             Naming.pykwdlist_cname,
             self.starstar_arg and self.starstar_arg.entry.cname or '0',
+            values_array,
             pos_arg_count,
-            self.name,
+            self_name_csafe,
             code.error_goto(self.pos)))
         code.putln('}')
 
     def generate_optional_kwonly_args_unpacking_code(self, all_args, code):
         optional_args = []
         first_optional_arg = -1
+        num_posonly_args = 0
         for i, arg in enumerate(all_args):
+            if arg.pos_only:
+                num_posonly_args += 1
             if not arg.kw_only or not arg.default:
                 continue
             if not optional_args:
                 first_optional_arg = i
             optional_args.append(arg.name)
+        if num_posonly_args > 0:
+            posonly_correction = '-%d' % num_posonly_args
+        else:
+            posonly_correction = ''
         if optional_args:
             if len(optional_args) > 1:
                 # if we receive more than the named kwargs, we either have **kwargs
@@ -3990,9 +4454,14 @@ class DefNodeWrapper(FuncDefNode):
             else:
                 code.putln('if (kw_args == 1) {')
                 code.putln('const Py_ssize_t index = %d;' % first_optional_arg)
-            code.putln('PyObject* value = __Pyx_PyDict_GetItemStr(%s, *%s[index]);' % (
-                Naming.kwds_cname, Naming.pykwdlist_cname))
+            code.putln('PyObject* value = __Pyx_GetKwValue_%s(%s, %s, *%s[index%s]);' % (
+                self.signature.fastvar,
+                Naming.kwds_cname,
+                Naming.kwvalues_cname,
+                Naming.pykwdlist_cname,
+                posonly_correction))
             code.putln('if (value) { values[index] = value; kw_args--; }')
+            code.putln('else if (unlikely(PyErr_Occurred())) %s' % code.error_goto(self.pos))
             if len(optional_args) > 1:
                 code.putln('}')
             code.putln('}')
@@ -4061,6 +4530,36 @@ class DefNodeWrapper(FuncDefNode):
                                           arg.type.is_buffer or
                                           arg.type.is_memoryviewslice):
                 self.generate_arg_none_check(arg, code)
+        if self.target.entry.is_special:
+            for n in reversed(range(len(self.args), self.signature.max_num_fixed_args())):
+                # for special functions with optional args (e.g. power which can
+                # take 2 or 3 args), unused args are None since this is what the
+                # compilers sets
+                if self.target.entry.name == "__ipow__":
+                    # Bug in Python < 3.8 - __ipow__ is used as a binary function
+                    # and attempts to access the third argument will always fail
+                    code.putln("#if PY_VERSION_HEX >= 0x03080000")
+                code.putln("if (unlikely(unused_arg_%s != Py_None)) {" % n)
+                code.putln(
+                    'PyErr_SetString(PyExc_TypeError, '
+                    '"%s() takes %s arguments but %s were given");' % (
+                        self.target.entry.qualified_name, self.signature.max_num_fixed_args(), n))
+                code.putln("%s;" % code.error_goto(self.pos))
+                code.putln("}")
+                if self.target.entry.name == "__ipow__":
+                    code.putln("#endif /*PY_VERSION_HEX >= 0x03080000*/")
+            if self.target.entry.name == "__ipow__" and len(self.args) != 2:
+                # It's basically impossible to safely support it:
+                # Class().__ipow__(1) is guaranteed to crash.
+                # Therefore, raise an error.
+                # Use "if" instead of "#if" to avoid warnings about unused variables
+                code.putln("if ((PY_VERSION_HEX < 0x03080000)) {")
+                code.putln(
+                    'PyErr_SetString(PyExc_NotImplementedError, '
+                    '"3-argument %s cannot be used in Python<3.8");' % (
+                        self.target.entry.qualified_name))
+                code.putln("%s;" % code.error_goto(self.pos))
+                code.putln('}')
 
     def error_value(self):
         return self.signature.error_value
@@ -4073,9 +4572,7 @@ class GeneratorDefNode(DefNode):
     #
 
     is_generator = True
-    is_coroutine = False
     is_iterable_coroutine = False
-    is_asyncgen = False
     gen_type_name = 'Generator'
     needs_closure = True
 
@@ -4110,7 +4607,7 @@ class GeneratorDefNode(DefNode):
             code.putln('%s = __Pyx_CyFunction_GetClassObj(%s);' % (
                 classobj_cname, Naming.self_cname))
             code.put_incref(classobj_cname, py_object_type)
-            code.put_giveref(classobj_cname)
+            code.put_giveref(classobj_cname, py_object_type)
         code.put_finish_refcount_context()
         code.putln('return (PyObject *) gen;')
         code.putln('}')
@@ -4234,7 +4731,7 @@ class GeneratorBodyDefNode(DefNode):
             code.putln("%s = %s; %s" % (
                 Naming.retval_cname, comp_init,
                 code.error_goto_if_null(Naming.retval_cname, self.pos)))
-            code.put_gotref(Naming.retval_cname)
+            code.put_gotref(Naming.retval_cname, py_object_type)
 
         # ----- Function body
         self.generate_function_body(env, code)
@@ -4280,7 +4777,7 @@ class GeneratorBodyDefNode(DefNode):
         # ----- Non-error return cleanup
         code.put_label(code.return_label)
         if self.is_inlined:
-            code.put_xgiveref(Naming.retval_cname)
+            code.put_xgiveref(Naming.retval_cname, py_object_type)
         else:
             code.put_xdecref_clear(Naming.retval_cname, py_object_type)
         # For Py3.7, clearing is already done below.
@@ -4357,7 +4854,10 @@ class OverrideCheckNode(StatNode):
         return self
 
     def generate_execution_code(self, code):
-        interned_attr_cname = code.intern_identifier(self.py_func.entry.name)
+        # For fused functions, look up the dispatch function, not the specialisation.
+        method_entry = self.py_func.fused_py_func.entry if self.py_func.fused_py_func else self.py_func.entry
+        interned_attr_cname = code.intern_identifier(method_entry.name)
+
         # Check to see if we are an extension type
         if self.py_func.is_module_scope:
             self_arg = "((PyObject *)%s)" % Naming.module_cname
@@ -4369,8 +4869,8 @@ class OverrideCheckNode(StatNode):
         if self.py_func.is_module_scope:
             code.putln("else {")
         else:
-            code.putln("else if (unlikely((Py_TYPE(%s)->tp_dictoffset != 0)"
-                       " || (Py_TYPE(%s)->tp_flags & (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))) {" % (
+            code.putln("else if (unlikely((Py_TYPE(%s)->tp_dictoffset != 0) || "
+                       "__Pyx_PyType_HasFeature(Py_TYPE(%s), (Py_TPFLAGS_IS_ABSTRACT | Py_TPFLAGS_HEAPTYPE)))) {" % (
                 self_arg, self_arg))
 
         code.putln("#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_PYTYPE_LOOKUP && CYTHON_USE_TYPE_SLOTS")
@@ -4396,12 +4896,16 @@ class OverrideCheckNode(StatNode):
         err = code.error_goto_if_null(func_node_temp, self.pos)
         code.putln("%s = __Pyx_PyObject_GetAttrStr(%s, %s); %s" % (
             func_node_temp, self_arg, interned_attr_cname, err))
-        code.put_gotref(func_node_temp)
+        code.put_gotref(func_node_temp, py_object_type)
 
-        is_builtin_function_or_method = "PyCFunction_Check(%s)" % func_node_temp
         is_overridden = "(PyCFunction_GET_FUNCTION(%s) != (PyCFunction)(void*)%s)" % (
-            func_node_temp, self.py_func.entry.func_cname)
-        code.putln("if (!%s || %s) {" % (is_builtin_function_or_method, is_overridden))
+            func_node_temp, method_entry.func_cname)
+        code.putln("#ifdef __Pyx_CyFunction_USED")
+        code.putln("if (!__Pyx_IsCyOrPyCFunction(%s)" % func_node_temp)
+        code.putln("#else")
+        code.putln("if (!PyCFunction_Check(%s)" % func_node_temp)
+        code.putln("#endif")
+        code.putln("        || %s) {" % is_overridden)
         self.body.generate_execution_code(code)
         code.putln("}")
 
@@ -4443,25 +4947,31 @@ class PyClassDefNode(ClassDefNode):
     #  A Python class definition.
     #
     #  name     EncodedString   Name of the class
-    #  doc      string or None
+    #  doc      string or None  The class docstring
     #  body     StatNode        Attribute definition code
     #  entry    Symtab.Entry
     #  scope    PyClassScope
     #  decorators    [DecoratorNode]        list of decorators or None
+    #  bases    ExprNode        Expression that evaluates to a tuple of base classes
     #
     #  The following subnodes are constructed internally:
     #
+    #  doc_node NameNode   '__doc__' name that is made available to the class body
     #  dict     DictNode   Class dictionary or Py3 namespace
     #  classobj ClassNode  Class object
     #  target   NameNode   Variable to assign class object to
+    #  orig_bases  None or ExprNode  "bases" before transformation by PEP560 __mro_entries__,
+    #                                used to create the __orig_bases__ attribute
 
-    child_attrs = ["body", "dict", "metaclass", "mkw", "bases", "class_result",
-                   "target", "class_cell", "decorators"]
+    child_attrs = ["doc_node", "body", "dict", "metaclass", "mkw", "bases", "class_result",
+                   "target", "class_cell", "decorators", "orig_bases"]
     decorators = None
     class_result = None
     is_py3_style_class = False  # Python3 style class (kwargs)
     metaclass = None
     mkw = None
+    doc_node = None
+    orig_bases = None
 
     def __init__(self, pos, name, bases, doc, body, decorators=None,
                  keyword_args=None, force_py3_semantics=False):
@@ -4475,6 +4985,7 @@ class PyClassDefNode(ClassDefNode):
         if self.doc and Options.docstrings:
             doc = embed_position(self.pos, self.doc)
             doc_node = ExprNodes.StringNode(pos, value=doc)
+            self.doc_node = ExprNodes.NameNode(name=EncodedString('__doc__'), type=py_object_type, pos=pos)
         else:
             doc_node = None
 
@@ -4523,7 +5034,9 @@ class PyClassDefNode(ClassDefNode):
             self.classobj = ExprNodes.Py3ClassNode(
                 pos, name=name, class_def_node=self, doc=doc_node,
                 calculate_metaclass=needs_metaclass_calculation,
-                allow_py2_metaclass=allow_py2_metaclass)
+                allow_py2_metaclass=allow_py2_metaclass,
+                force_type=force_py3_semantics,
+            )
         else:
             # no bases, no metaclass => old style class creation
             self.dict = ExprNodes.DictNode(pos, key_value_pairs=[])
@@ -4560,7 +5073,7 @@ class PyClassDefNode(ClassDefNode):
         return cenv
 
     def analyse_declarations(self, env):
-        class_result = self.classobj
+        unwrapped_class_result = class_result = self.classobj
         if self.decorators:
             from .ExprNodes import SimpleCallNode
             for decorator in self.decorators[::-1]:
@@ -4579,9 +5092,27 @@ class PyClassDefNode(ClassDefNode):
         cenv = self.create_scope(env)
         cenv.directives = env.directives
         cenv.class_obj_cname = self.target.entry.cname
+        if self.doc_node:
+            self.doc_node.analyse_target_declaration(cenv)
         self.body.analyse_declarations(cenv)
+        unwrapped_class_result.analyse_annotations(cenv)
+
+    update_bases_functype = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("bases",  PyrexTypes.py_object_type, None)
+        ])
 
     def analyse_expressions(self, env):
+        if self.bases and not (self.bases.is_sequence_constructor and len(self.bases.args) == 0):
+            from .ExprNodes import PythonCapiCallNode, CloneNode
+            # handle the Python 3.7 __mro_entries__ transformation
+            orig_bases = self.bases.analyse_expressions(env)
+            self.bases = PythonCapiCallNode(orig_bases.pos,
+                function_name="__Pyx_PEP560_update_bases",
+                func_type=self.update_bases_functype,
+                utility_code=UtilityCode.load_cached('Py3UpdateBases', 'ObjectHandling.c'),
+                args=[CloneNode(orig_bases)])
+            self.orig_bases = orig_bases
         if self.bases:
             self.bases = self.bases.analyse_expressions(env)
         if self.mkw:
@@ -4592,7 +5123,7 @@ class PyClassDefNode(ClassDefNode):
         self.class_result = self.class_result.analyse_expressions(env)
         cenv = self.scope
         self.body = self.body.analyse_expressions(cenv)
-        self.target.analyse_target_expression(env, self.classobj)
+        self.target = self.target.analyse_target_expression(env, self.classobj)
         self.class_cell = self.class_cell.analyse_expressions(cenv)
         return self
 
@@ -4604,6 +5135,8 @@ class PyClassDefNode(ClassDefNode):
         code.mark_pos(self.pos)
         code.pyclass_stack.append(self)
         cenv = self.scope
+        if self.orig_bases:
+            self.orig_bases.generate_evaluation_code(code)
         if self.bases:
             self.bases.generate_evaluation_code(code)
         if self.mkw:
@@ -4611,6 +5144,17 @@ class PyClassDefNode(ClassDefNode):
         if self.metaclass:
             self.metaclass.generate_evaluation_code(code)
         self.dict.generate_evaluation_code(code)
+        if self.orig_bases:
+            # update __orig_bases__ if needed
+            code.putln("if (%s != %s) {" % (self.bases.result(), self.orig_bases.result()))
+            code.putln(
+                code.error_goto_if_neg('PyDict_SetItemString(%s, "__orig_bases__", %s)' % (
+                    self.dict.result(), self.orig_bases.result()),
+                    self.pos
+            ))
+            code.putln("}")
+            self.orig_bases.generate_disposal_code(code)
+            self.orig_bases.free_temps(code)
         cenv.namespace_cname = cenv.class_obj_cname = self.dict.result()
 
         class_cell = self.class_cell
@@ -4677,6 +5221,10 @@ class CClassDefNode(ClassDefNode):
     decorators = None
     shadow = False
 
+    @property
+    def punycode_class_name(self):
+        return punycodify_name(self.class_name)
+
     def buffer_defaults(self, env):
         if not hasattr(self, '_buffer_defaults'):
             from . import Buffer
@@ -4713,6 +5261,8 @@ class CClassDefNode(ClassDefNode):
             api=self.api,
             buffer_defaults=self.buffer_defaults(env),
             shadow=self.shadow)
+        if self.bases and len(self.bases.args) > 1:
+            self.entry.type.multiple_bases = True
 
     def analyse_declarations(self, env):
         #print "CClassDefNode.analyse_declarations:", self.class_name
@@ -4757,7 +5307,8 @@ class CClassDefNode(ClassDefNode):
                 error(base.pos, "Base class '%s' of type '%s' is final" % (
                     base_type, self.class_name))
             elif base_type.is_builtin_type and \
-                     base_type.name in ('tuple', 'str', 'bytes'):
+                     base_type.name in ('tuple', 'bytes'):
+                     # str in Py2 is also included in this, but now checked at run-time
                 error(base.pos, "inheritance from PyVarObject types like '%s' is not currently supported"
                       % base_type.name)
             else:
@@ -4783,7 +5334,7 @@ class CClassDefNode(ClassDefNode):
         if self.visibility == 'extern':
             if (self.module_name == '__builtin__' and
                     self.class_name in Builtin.builtin_types and
-                    env.qualified_name[:8] != 'cpython.'): # allow overloaded names for cimporting from cpython
+                    env.qualified_name[:8] != 'cpython.'):  # allow overloaded names for cimporting from cpython
                 warning(self.pos, "%s already a builtin Cython type" % self.class_name, 1)
 
         self.entry = home_scope.declare_c_class(
@@ -4801,6 +5352,8 @@ class CClassDefNode(ClassDefNode):
             api=self.api,
             buffer_defaults=self.buffer_defaults(env),
             shadow=self.shadow)
+        if self.bases and len(self.bases.args) > 1:
+            self.entry.type.multiple_bases = True
 
         if self.shadow:
             home_scope.lookup(self.class_name).as_variable = self.entry
@@ -4809,6 +5362,15 @@ class CClassDefNode(ClassDefNode):
         self.scope = scope = self.entry.type.scope
         if scope is not None:
             scope.directives = env.directives
+            if "dataclasses.dataclass" in env.directives:
+                is_frozen = False
+                # Retrieve the @dataclass config (args, kwargs), as passed into the decorator.
+                dataclass_config = env.directives["dataclasses.dataclass"]
+                if dataclass_config:
+                    decorator_kwargs = dataclass_config[1]
+                    frozen_flag = decorator_kwargs.get('frozen')
+                    is_frozen = frozen_flag and frozen_flag.is_literal and frozen_flag.value
+                scope.is_c_dataclass_scope = "frozen" if is_frozen else True
 
         if self.doc and Options.docstrings:
             scope.doc = embed_position(self.pos, self.doc)
@@ -4868,71 +5430,206 @@ class CClassDefNode(ClassDefNode):
         # This is needed to generate evaluation code for
         # default values of method arguments.
         code.mark_pos(self.pos)
-        if self.body:
-            self.body.generate_execution_code(code)
         if not self.entry.type.early_init:
+            bases = None
             if self.type_init_args:
+                # Extract bases tuple and validate 'best base' by actually calling 'type()'.
+                bases = code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=True)
+
                 self.type_init_args.generate_evaluation_code(code)
-                bases = "PyTuple_GET_ITEM(%s, 1)" % self.type_init_args.result()
+                code.putln("%s = PyTuple_GET_ITEM(%s, 1);" % (bases, self.type_init_args.result()))
+                code.put_incref(bases, PyrexTypes.py_object_type)
+
                 first_base = "((PyTypeObject*)PyTuple_GET_ITEM(%s, 0))" % bases
                 # Let Python do the base types compatibility checking.
-                trial_type = code.funcstate.allocate_temp(PyrexTypes.py_object_type, True)
-                code.putln("%s = PyType_Type.tp_new(&PyType_Type, %s, NULL);" % (
+                trial_type = code.funcstate.allocate_temp(PyrexTypes.py_object_type, manage_ref=True)
+                code.putln("%s = __Pyx_PyType_GetSlot(&PyType_Type, tp_new, newfunc)(&PyType_Type, %s, NULL);" % (
                     trial_type, self.type_init_args.result()))
                 code.putln(code.error_goto_if_null(trial_type, self.pos))
-                code.put_gotref(trial_type)
-                code.putln("if (((PyTypeObject*) %s)->tp_base != %s) {" % (
+                code.put_gotref(trial_type, py_object_type)
+                code.putln("if (__Pyx_PyType_GetSlot((PyTypeObject*) %s, tp_base, PyTypeObject*) != %s) {" % (
                     trial_type, first_base))
-                code.putln("PyErr_Format(PyExc_TypeError, \"best base '%s' must be equal to first base '%s'\",")
-                code.putln("             ((PyTypeObject*) %s)->tp_base->tp_name, %s->tp_name);" % (
-                           trial_type, first_base))
+                trial_type_base = "__Pyx_PyType_GetSlot((PyTypeObject*) %s, tp_base, PyTypeObject*)" % trial_type
+                code.putln("__Pyx_TypeName base_name = __Pyx_PyType_GetName(%s);" % trial_type_base)
+                code.putln("__Pyx_TypeName type_name = __Pyx_PyType_GetName(%s);" % first_base)
+                code.putln("PyErr_Format(PyExc_TypeError, "
+                    "\"best base '\" __Pyx_FMT_TYPENAME \"' must be equal to first base '\" __Pyx_FMT_TYPENAME \"'\",")
+                code.putln("             base_name, type_name);")
+                code.putln("__Pyx_DECREF_TypeName(base_name);")
+                code.putln("__Pyx_DECREF_TypeName(type_name);")
                 code.putln(code.error_goto(self.pos))
                 code.putln("}")
-                code.funcstate.release_temp(trial_type)
-                code.put_incref(bases, PyrexTypes.py_object_type)
-                code.put_giveref(bases)
-                code.putln("%s.tp_bases = %s;" % (self.entry.type.typeobj_cname, bases))
+
                 code.put_decref_clear(trial_type, PyrexTypes.py_object_type)
+                code.funcstate.release_temp(trial_type)
+
                 self.type_init_args.generate_disposal_code(code)
                 self.type_init_args.free_temps(code)
 
-            self.generate_type_ready_code(self.entry, code, True)
+            self.generate_type_ready_code(self.entry, code, bases_tuple_cname=bases, check_heap_type_bases=True)
+            if bases is not None:
+                code.put_decref_clear(bases, PyrexTypes.py_object_type)
+                code.funcstate.release_temp(bases)
+
+        if self.body:
+            self.body.generate_execution_code(code)
 
     # Also called from ModuleNode for early init types.
     @staticmethod
-    def generate_type_ready_code(entry, code, heap_type_bases=False):
+    def generate_type_ready_code(entry, code, bases_tuple_cname=None, check_heap_type_bases=False):
         # Generate a call to PyType_Ready for an extension
         # type defined in this module.
         type = entry.type
-        typeobj_cname = type.typeobj_cname
+        typeptr_cname = type.typeptr_cname
         scope = type.scope
         if not scope:  # could be None if there was an error
             return
-        if entry.visibility != 'extern':
-            for slot in TypeSlots.slot_table:
-                slot.generate_dynamic_init_code(scope, code)
-            if heap_type_bases:
-                code.globalstate.use_utility_code(
-                    UtilityCode.load_cached('PyType_Ready', 'ExtensionTypes.c'))
-                readyfunc = "__Pyx_PyType_Ready"
+        if entry.visibility == 'extern':
+            # Generate code to initialise the typeptr of an external extension
+            # type defined in this module to point to its type object.
+            if type.typeobj_cname:
+                # FIXME: this should not normally be set :-?
+                assert not type.typeobj_cname
+                code.putln("%s = &%s;" % (
+                    type.typeptr_cname,
+                    type.typeobj_cname,
+                ))
+            return
+        # TODO: remove 'else:' and dedent
+        else:
+            assert typeptr_cname
+            assert type.typeobj_cname
+            typespec_cname = "%s_spec" % type.typeobj_cname
+            code.putln("#if CYTHON_USE_TYPE_SPECS")
+            tuple_temp = None
+            if not bases_tuple_cname and scope.parent_type.base_type:
+                tuple_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+                code.putln("%s = PyTuple_Pack(1, (PyObject *)%s); %s" % (
+                    tuple_temp,
+                    scope.parent_type.base_type.typeptr_cname,
+                    code.error_goto_if_null(tuple_temp, entry.pos),
+                ))
+                code.put_gotref(tuple_temp, py_object_type)
+
+            if bases_tuple_cname or tuple_temp:
+                if check_heap_type_bases:
+                    code.globalstate.use_utility_code(
+                        UtilityCode.load_cached('ValidateBasesTuple', 'ExtensionTypes.c'))
+                    code.put_error_if_neg(entry.pos, "__Pyx_validate_bases_tuple(%s.name, %s, %s)" % (
+                        typespec_cname,
+                        TypeSlots.get_slot_by_name("tp_dictoffset", scope.directives).slot_code(scope),
+                        bases_tuple_cname or tuple_temp,
+                    ))
+
+                code.putln("%s = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(%s, &%s, %s);" % (
+                    typeptr_cname,
+                    Naming.module_cname,
+                    typespec_cname,
+                    bases_tuple_cname or tuple_temp,
+                ))
+                if tuple_temp:
+                    code.put_xdecref_clear(tuple_temp, type=py_object_type)
+                    code.funcstate.release_temp(tuple_temp)
+                code.putln(code.error_goto_if_null(typeptr_cname, entry.pos))
             else:
-                readyfunc = "PyType_Ready"
-            code.putln(
-                "if (%s(&%s) < 0) %s" % (
-                    readyfunc,
-                    typeobj_cname,
-                    code.error_goto(entry.pos)))
-            # Don't inherit tp_print from builtin types, restoring the
+                code.putln(
+                    "%s = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(%s, &%s, NULL); %s" % (
+                        typeptr_cname,
+                        Naming.module_cname,
+                        typespec_cname,
+                        code.error_goto_if_null(typeptr_cname, entry.pos),
+                    ))
+
+            # The buffer interface is not currently supported by PyType_FromSpec().
+            buffer_slot = TypeSlots.get_slot_by_name("tp_as_buffer", code.globalstate.directives)
+            if not buffer_slot.is_empty(scope):
+                code.putln("#if !CYTHON_COMPILING_IN_LIMITED_API")
+                code.putln("%s->%s = %s;" % (
+                    typeptr_cname,
+                    buffer_slot.slot_name,
+                    buffer_slot.slot_code(scope),
+                ))
+                # Still need to inherit buffer methods since PyType_Ready() didn't do it for us.
+                for buffer_method_name in ("__getbuffer__", "__releasebuffer__"):
+                    buffer_slot = TypeSlots.get_slot_table(
+                        code.globalstate.directives).get_slot_by_method_name(buffer_method_name)
+                    if buffer_slot.slot_code(scope) == "0" and not TypeSlots.get_base_slot_function(scope, buffer_slot):
+                        code.putln("if (!%s->tp_as_buffer->%s &&"
+                                   " %s->tp_base->tp_as_buffer &&"
+                                   " %s->tp_base->tp_as_buffer->%s) {" % (
+                            typeptr_cname, buffer_slot.slot_name,
+                            typeptr_cname,
+                            typeptr_cname, buffer_slot.slot_name,
+                        ))
+                        code.putln("%s->tp_as_buffer->%s = %s->tp_base->tp_as_buffer->%s;" % (
+                            typeptr_cname, buffer_slot.slot_name,
+                            typeptr_cname, buffer_slot.slot_name,
+                        ))
+                        code.putln("}")
+                code.putln("#elif defined(Py_bf_getbuffer) && defined(Py_bf_releasebuffer)")
+                code.putln("/* PY_VERSION_HEX >= 0x03090000 || Py_LIMITED_API >= 0x030B0000 */")
+                code.putln("#elif defined(_MSC_VER)")
+                code.putln("#pragma message (\"The buffer protocol is not supported in the Limited C-API < 3.11.\")")
+                code.putln("#else")
+                code.putln("#warning \"The buffer protocol is not supported in the Limited C-API < 3.11.\"")
+                code.putln("#endif")
+
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("FixUpExtensionType", "ExtensionTypes.c"))
+            code.put_error_if_neg(entry.pos, "__Pyx_fix_up_extension_type_from_spec(&%s, %s)" % (
+                typespec_cname, typeptr_cname))
+
+            code.putln("#else")
+            if bases_tuple_cname:
+                code.put_incref(bases_tuple_cname, py_object_type)
+                code.put_giveref(bases_tuple_cname, py_object_type)
+                code.putln("%s.tp_bases = %s;" % (type.typeobj_cname, bases_tuple_cname))
+            code.putln("%s = &%s;" % (
+                typeptr_cname,
+                type.typeobj_cname,
+            ))
+            code.putln("#endif")  # if CYTHON_USE_TYPE_SPECS
+
+            base_type = type.base_type
+            while base_type:
+                if base_type.is_external and not base_type.objstruct_cname == "PyTypeObject":
+                    # 'type' is special-cased because it is actually based on PyHeapTypeObject
+                    # Variable length bases are allowed if the current class doesn't grow
+                    code.putln("if (sizeof(%s%s) != sizeof(%s%s)) {" % (
+                        "" if type.typedef_flag else "struct ", type.objstruct_cname,
+                        "" if base_type.typedef_flag else "struct ", base_type.objstruct_cname))
+                    code.globalstate.use_utility_code(
+                        UtilityCode.load_cached("ValidateExternBase", "ExtensionTypes.c"))
+                    code.put_error_if_neg(entry.pos, "__Pyx_validate_extern_base(%s)" % (
+                        type.base_type.typeptr_cname))
+                    code.putln("}")
+                    break
+                base_type = base_type.base_type
+
+            code.putln("#if !CYTHON_COMPILING_IN_LIMITED_API")
+            # FIXME: these still need to get initialised even with the limited-API
+            for slot in TypeSlots.get_slot_table(code.globalstate.directives):
+                slot.generate_dynamic_init_code(scope, code)
+            code.putln("#endif")
+
+            code.putln("#if !CYTHON_USE_TYPE_SPECS")
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached('PyType_Ready', 'ExtensionTypes.c'))
+            code.put_error_if_neg(entry.pos, "__Pyx_PyType_Ready(%s)" % typeptr_cname)
+            code.putln("#endif")
+
+            # Don't inherit tp_print from builtin types in Python 2, restoring the
             # behavior of using tp_repr or tp_str instead.
             # ("tp_print" was renamed to "tp_vectorcall_offset" in Py3.8b1)
-            code.putln("#if PY_VERSION_HEX < 0x030800B1")
-            code.putln("%s.tp_print = 0;" % typeobj_cname)
+            code.putln("#if PY_MAJOR_VERSION < 3")
+            code.putln("%s->tp_print = 0;" % typeptr_cname)
             code.putln("#endif")
 
             # Use specialised attribute lookup for types with generic lookup but no instance dict.
             getattr_slot_func = TypeSlots.get_slot_code_by_name(scope, 'tp_getattro')
             dictoffset_slot_func = TypeSlots.get_slot_code_by_name(scope, 'tp_dictoffset')
             if getattr_slot_func == '0' and dictoffset_slot_func == '0':
+                code.putln("#if !CYTHON_COMPILING_IN_LIMITED_API")  # FIXME
                 if type.is_final_type:
                     py_cfunc = "__Pyx_PyObject_GenericGetAttrNoDict"  # grepable
                     utility_func = "PyObject_GenericGetAttrNoDict"
@@ -4942,11 +5639,12 @@ class CClassDefNode(ClassDefNode):
                 code.globalstate.use_utility_code(UtilityCode.load_cached(utility_func, "ObjectHandling.c"))
 
                 code.putln("if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) &&"
-                           " likely(!%s.tp_dictoffset && %s.tp_getattro == PyObject_GenericGetAttr)) {" % (
-                    typeobj_cname, typeobj_cname))
-                code.putln("%s.tp_getattro = %s;" % (
-                    typeobj_cname, py_cfunc))
+                           " likely(!%s->tp_dictoffset && %s->tp_getattro == PyObject_GenericGetAttr)) {" % (
+                    typeptr_cname, typeptr_cname))
+                code.putln("%s->tp_getattro = %s;" % (
+                    typeptr_cname, py_cfunc))
                 code.putln("}")
+                code.putln("#endif")  # if !CYTHON_COMPILING_IN_LIMITED_API
 
             # Fix special method docstrings. This is a bit of a hack, but
             # unless we let PyType_Ready create the slot wrappers we have
@@ -4955,19 +5653,20 @@ class CClassDefNode(ClassDefNode):
                 is_buffer = func.name in ('__getbuffer__', '__releasebuffer__')
                 if (func.is_special and Options.docstrings and
                         func.wrapperbase_cname and not is_buffer):
-                    slot = TypeSlots.method_name_to_slot.get(func.name)
+                    slot = TypeSlots.get_slot_table(
+                        entry.type.scope.directives).get_slot_by_method_name(func.name)
                     preprocessor_guard = slot.preprocessor_guard_code() if slot else None
                     if preprocessor_guard:
                         code.putln(preprocessor_guard)
                     code.putln('#if CYTHON_UPDATE_DESCRIPTOR_DOC')
                     code.putln("{")
                     code.putln(
-                        'PyObject *wrapper = PyObject_GetAttrString((PyObject *)&%s, "%s"); %s' % (
-                            typeobj_cname,
+                        'PyObject *wrapper = PyObject_GetAttrString((PyObject *)%s, "%s"); %s' % (
+                            typeptr_cname,
                             func.name,
                             code.error_goto_if_null('wrapper', entry.pos)))
                     code.putln(
-                        "if (Py_TYPE(wrapper) == &PyWrapperDescr_Type) {")
+                        "if (__Pyx_IS_TYPE(wrapper, &PyWrapperDescr_Type)) {")
                     code.putln(
                         "%s = *((PyWrapperDescrObject *)wrapper)->d_base;" % (
                             func.wrapperbase_cname))
@@ -4981,34 +5680,34 @@ class CClassDefNode(ClassDefNode):
                     code.putln('#endif')
                     if preprocessor_guard:
                         code.putln('#endif')
+
             if type.vtable_cname:
                 code.globalstate.use_utility_code(
                     UtilityCode.load_cached('SetVTable', 'ImportExport.c'))
-                code.putln(
-                    "if (__Pyx_SetVtable(%s.tp_dict, %s) < 0) %s" % (
-                        typeobj_cname,
-                        type.vtabptr_cname,
-                        code.error_goto(entry.pos)))
-                if heap_type_bases:
-                    code.globalstate.use_utility_code(
-                        UtilityCode.load_cached('MergeVTables', 'ImportExport.c'))
-                    code.putln("if (__Pyx_MergeVtables(&%s) < 0) %s" % (
-                        typeobj_cname,
-                        code.error_goto(entry.pos)))
+                code.put_error_if_neg(entry.pos, "__Pyx_SetVtable(%s, %s)" % (
+                    typeptr_cname,
+                    type.vtabptr_cname,
+                ))
+                # TODO: find a way to make this work with the Limited API!
+                code.putln("#if !CYTHON_COMPILING_IN_LIMITED_API")
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached('MergeVTables', 'ImportExport.c'))
+                code.put_error_if_neg(entry.pos, "__Pyx_MergeVtables(%s)" % typeptr_cname)
+                code.putln("#endif")
             if not type.scope.is_internal and not type.scope.directives.get('internal'):
                 # scope.is_internal is set for types defined by
                 # Cython (such as closures), the 'internal'
                 # directive is set by users
-                code.putln(
-                    'if (PyObject_SetAttr(%s, %s, (PyObject *)&%s) < 0) %s' % (
-                        Naming.module_cname,
-                        code.intern_identifier(scope.class_name),
-                        typeobj_cname,
-                        code.error_goto(entry.pos)))
+                code.put_error_if_neg(entry.pos, "PyObject_SetAttr(%s, %s, (PyObject *) %s)" % (
+                    Naming.module_cname,
+                    code.intern_identifier(scope.class_name),
+                    typeptr_cname,
+                ))
+
             weakref_entry = scope.lookup_here("__weakref__") if not scope.is_closure_class_scope else None
             if weakref_entry:
                 if weakref_entry.type is py_object_type:
-                    tp_weaklistoffset = "%s.tp_weaklistoffset" % typeobj_cname
+                    tp_weaklistoffset = "%s->tp_weaklistoffset" % typeptr_cname
                     if type.typedef_flag:
                         objstruct = type.objstruct_cname
                     else:
@@ -5020,21 +5719,16 @@ class CClassDefNode(ClassDefNode):
                         weakref_entry.cname))
                 else:
                     error(weakref_entry.pos, "__weakref__ slot must be of type 'object'")
+
             if scope.lookup_here("__reduce_cython__") if not scope.is_closure_class_scope else None:
                 # Unfortunately, we cannot reliably detect whether a
                 # superclass defined __reduce__ at compile time, so we must
                 # do so at runtime.
                 code.globalstate.use_utility_code(
                     UtilityCode.load_cached('SetupReduce', 'ExtensionTypes.c'))
-                code.putln('if (__Pyx_setup_reduce((PyObject*)&%s) < 0) %s' % (
-                              typeobj_cname,
-                              code.error_goto(entry.pos)))
-        # Generate code to initialise the typeptr of an extension
-        # type defined in this module to point to its type object.
-        if type.typeobj_cname:
-            code.putln(
-                "%s = &%s;" % (
-                    type.typeptr_cname, type.typeobj_cname))
+                code.putln("#if !CYTHON_COMPILING_IN_LIMITED_API")  # FIXME
+                code.put_error_if_neg(entry.pos, "__Pyx_setup_reduce((PyObject *) %s)" % typeptr_cname)
+                code.putln("#endif")
 
     def annotate(self, code):
         if self.type_init_args:
@@ -5048,14 +5742,13 @@ class PropertyNode(StatNode):
     #
     #  name   string
     #  doc    EncodedString or None    Doc string
-    #  entry  Symtab.Entry
+    #  entry  Symtab.Entry             The Entry of the property attribute
     #  body   StatListNode
 
     child_attrs = ["body"]
 
     def analyse_declarations(self, env):
         self.entry = env.declare_property(self.name, self.doc, self.pos)
-        self.entry.scope.directives = env.directives
         self.body.analyse_declarations(self.entry.scope)
 
     def analyse_expressions(self, env):
@@ -5072,6 +5765,44 @@ class PropertyNode(StatNode):
         self.body.annotate(code)
 
 
+class CPropertyNode(StatNode):
+    """Definition of a C property, backed by a CFuncDefNode getter.
+    """
+    #  name   string
+    #  doc    EncodedString or None        Doc string of the property
+    #  entry  Symtab.Entry                 The Entry of the property attribute
+    #  body   StatListNode[CFuncDefNode]   (for compatibility with PropertyNode)
+
+    child_attrs = ["body"]
+    is_cproperty = True
+
+    @property
+    def cfunc(self):
+        stats = self.body.stats
+        assert stats and isinstance(stats[0], CFuncDefNode), stats
+        return stats[0]
+
+    def analyse_declarations(self, env):
+        scope = PropertyScope(self.name, class_scope=env)
+        self.body.analyse_declarations(scope)
+        entry = self.entry = env.declare_property(
+            self.name, self.doc, self.pos, ctype=self.cfunc.return_type, property_scope=scope)
+        entry.getter_cname = self.cfunc.entry.cname
+
+    def analyse_expressions(self, env):
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+
+    def generate_execution_code(self, code):
+        pass
+
+    def annotate(self, code):
+        self.body.annotate(code)
+
+
 class GlobalNode(StatNode):
     # Global variable declaration.
     #
@@ -5207,12 +5938,14 @@ class SingleAssignmentNode(AssignmentNode):
     #  rhs                      ExprNode      Right hand side
     #  first                    bool          Is this guaranteed the first assignment to lhs?
     #  is_overloaded_assignment bool          Is this assignment done via an overloaded operator=
+    #  is_assignment_expression bool          Internally SingleAssignmentNode is used to implement assignment expressions
     #  exception_check
     #  exception_value
 
     child_attrs = ["lhs", "rhs"]
     first = False
     is_overloaded_assignment = False
+    is_assignment_expression = False
     declaration_only = False
 
     def analyse_declarations(self, env):
@@ -5297,7 +6030,17 @@ class SingleAssignmentNode(AssignmentNode):
         if self.declaration_only:
             return
         else:
-            self.lhs.analyse_target_declaration(env)
+            if self.is_assignment_expression:
+                self.lhs.analyse_assignment_expression_target_declaration(env)
+            else:
+                self.lhs.analyse_target_declaration(env)
+                # if an entry doesn't exist that just implies that lhs isn't made up purely
+                # of AttributeNodes and NameNodes - it isn't useful as a known path to
+                # a standard library module
+                if (self.lhs.is_attribute or self.lhs.is_name) and self.lhs.entry and not self.lhs.entry.known_standard_library_import:
+                    stdlib_import_name = self.rhs.get_known_standard_library_import()
+                    if stdlib_import_name:
+                        self.lhs.entry.known_standard_library_import = stdlib_import_name
 
     def analyse_types(self, env, use_temp=0):
         from . import ExprNodes
@@ -5320,8 +6063,8 @@ class SingleAssignmentNode(AssignmentNode):
         elif self.lhs.type.is_array:
             if not isinstance(self.lhs, ExprNodes.SliceIndexNode):
                 # cannot assign to C array, only to its full slice
-                self.lhs = ExprNodes.SliceIndexNode(self.lhs.pos, base=self.lhs, start=None, stop=None)
-                self.lhs = self.lhs.analyse_target_types(env)
+                lhs = ExprNodes.SliceIndexNode(self.lhs.pos, base=self.lhs, start=None, stop=None)
+                self.lhs = lhs.analyse_target_types(env)
 
         if self.lhs.type.is_cpp_class:
             op = env.lookup_operator_for_types(self.pos, '=', [self.lhs.type, self.rhs.type])
@@ -5821,7 +6564,7 @@ class ExecStatNode(StatNode):
             arg.free_temps(code)
         code.putln(
             code.error_goto_if_null(temp_result, self.pos))
-        code.put_gotref(temp_result)
+        code.put_gotref(temp_result, py_object_type)
         code.put_decref_clear(temp_result, py_object_type)
         code.funcstate.release_temp(temp_result)
 
@@ -6062,9 +6805,15 @@ class RaiseStatNode(StatNode):
     #  exc_value   ExprNode or None
     #  exc_tb      ExprNode or None
     #  cause       ExprNode or None
+    #
+    # set in FlowControl
+    #  in_try_block  bool
 
     child_attrs = ["exc_type", "exc_value", "exc_tb", "cause"]
     is_terminator = True
+    builtin_exc_name = None
+    wrap_tuple_value = False
+    in_try_block = False
 
     def analyse_expressions(self, env):
         if self.exc_type:
@@ -6072,6 +6821,12 @@ class RaiseStatNode(StatNode):
             self.exc_type = exc_type.coerce_to_pyobject(env)
         if self.exc_value:
             exc_value = self.exc_value.analyse_types(env)
+            if self.wrap_tuple_value:
+                if exc_value.type is Builtin.tuple_type or not exc_value.type.is_builtin_type:
+                    # prevent tuple values from being interpreted as argument value tuples
+                    from .ExprNodes import TupleNode
+                    exc_value = TupleNode(exc_value.pos, args=[exc_value.coerce_to_pyobject(env)], slow=True)
+                    exc_value = exc_value.analyse_types(env, skip_children=True)
             self.exc_value = exc_value.coerce_to_pyobject(env)
         if self.exc_tb:
             exc_tb = self.exc_tb.analyse_types(env)
@@ -6080,7 +6835,6 @@ class RaiseStatNode(StatNode):
             cause = self.cause.analyse_types(env)
             self.cause = cause.coerce_to_pyobject(env)
         # special cases for builtin exceptions
-        self.builtin_exc_name = None
         if self.exc_type and not self.exc_value and not self.exc_tb:
             exc = self.exc_type
             from . import ExprNodes
@@ -6088,9 +6842,19 @@ class RaiseStatNode(StatNode):
                     not (exc.args or (exc.arg_tuple is not None and exc.arg_tuple.args))):
                 exc = exc.function  # extract the exception type
             if exc.is_name and exc.entry.is_builtin:
+                from . import Symtab
                 self.builtin_exc_name = exc.name
                 if self.builtin_exc_name == 'MemoryError':
-                    self.exc_type = None # has a separate implementation
+                    self.exc_type = None  # has a separate implementation
+                elif (self.builtin_exc_name == 'StopIteration' and
+                        env.is_local_scope and env.name == "__next__" and
+                        env.parent_scope and env.parent_scope.is_c_class_scope and
+                        not self.in_try_block):
+                    # tp_iternext is allowed to return NULL without raising StopIteration.
+                    # For the sake of simplicity, only allow this to happen when not in
+                    # a try block
+                    self.exc_type = None
+
         return self
 
     nogil_check = Node.gil_error
@@ -6101,6 +6865,11 @@ class RaiseStatNode(StatNode):
         if self.builtin_exc_name == 'MemoryError':
             code.putln('PyErr_NoMemory(); %s' % code.error_goto(self.pos))
             return
+        elif self.builtin_exc_name == 'StopIteration' and not self.exc_type:
+            code.putln('%s = 1;' % Naming.error_without_exception_cname)
+            code.putln('%s;' % code.error_goto(None))
+            code.funcstate.error_without_exception = True
+            return
 
         if self.exc_type:
             self.exc_type.generate_evaluation_code(code)
@@ -6175,10 +6944,10 @@ class ReraiseStatNode(StatNode):
         vars = code.funcstate.exc_vars
         if vars:
             code.globalstate.use_utility_code(restore_exception_utility_code)
-            code.put_giveref(vars[0])
-            code.put_giveref(vars[1])
+            code.put_giveref(vars[0], py_object_type)
+            code.put_giveref(vars[1], py_object_type)
             # fresh exceptions may not have a traceback yet (-> finally!)
-            code.put_xgiveref(vars[2])
+            code.put_xgiveref(vars[2], py_object_type)
             code.putln("__Pyx_ErrRestoreWithState(%s, %s, %s);" % tuple(vars))
             for varname in vars:
                 code.put("%s = 0; " % varname)
@@ -6189,65 +6958,55 @@ class ReraiseStatNode(StatNode):
                 UtilityCode.load_cached("ReRaiseException", "Exceptions.c"))
             code.putln("__Pyx_ReraiseException(); %s" % code.error_goto(self.pos))
 
+
 class AssertStatNode(StatNode):
     #  assert statement
     #
-    #  cond    ExprNode
-    #  value   ExprNode or None
+    #  condition    ExprNode
+    #  value        ExprNode or None
+    #  exception    (Raise/GIL)StatNode   created from 'value' in PostParse transform
 
-    child_attrs = ["cond", "value"]
+    child_attrs = ["condition", "value", "exception"]
+    exception = None
+
+    def analyse_declarations(self, env):
+        assert self.value is None, "Message should have been replaced in PostParse()"
+        assert self.exception is not None, "Message should have been replaced in PostParse()"
+        self.exception.analyse_declarations(env)
 
     def analyse_expressions(self, env):
-        self.cond = self.cond.analyse_boolean_expression(env)
-        if self.value:
-            value = self.value.analyse_types(env)
-            if value.type is Builtin.tuple_type or not value.type.is_builtin_type:
-                # prevent tuple values from being interpreted as argument value tuples
-                from .ExprNodes import TupleNode
-                value = TupleNode(value.pos, args=[value], slow=True)
-                self.value = value.analyse_types(env, skip_children=True).coerce_to_pyobject(env)
-            else:
-                self.value = value.coerce_to_pyobject(env)
+        self.condition = self.condition.analyse_temp_boolean_expression(env)
+        self.exception = self.exception.analyse_expressions(env)
         return self
 
-    nogil_check = Node.gil_error
-    gil_message = "Raising exception"
-
     def generate_execution_code(self, code):
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("AssertionsEnabled", "Exceptions.c"))
         code.putln("#ifndef CYTHON_WITHOUT_ASSERTIONS")
-        code.putln("if (unlikely(!Py_OptimizeFlag)) {")
+        code.putln("if (unlikely(__pyx_assertions_enabled())) {")
         code.mark_pos(self.pos)
-        self.cond.generate_evaluation_code(code)
-        code.putln(
-            "if (unlikely(!%s)) {" % self.cond.result())
-        if self.value:
-            self.value.generate_evaluation_code(code)
-            code.putln(
-                "PyErr_SetObject(PyExc_AssertionError, %s);" % self.value.py_result())
-            self.value.generate_disposal_code(code)
-            self.value.free_temps(code)
-        else:
-            code.putln(
-                "PyErr_SetNone(PyExc_AssertionError);")
+        self.condition.generate_evaluation_code(code)
         code.putln(
-            code.error_goto(self.pos))
+            "if (unlikely(!%s)) {" % self.condition.result())
+        self.exception.generate_execution_code(code)
         code.putln(
             "}")
-        self.cond.generate_disposal_code(code)
-        self.cond.free_temps(code)
+        self.condition.generate_disposal_code(code)
+        self.condition.free_temps(code)
         code.putln(
             "}")
+        code.putln("#else")
+        # avoid unused labels etc.
+        code.putln("if ((1)); else %s" % code.error_goto(self.pos, used=False))
         code.putln("#endif")
 
     def generate_function_definitions(self, env, code):
-        self.cond.generate_function_definitions(env, code)
-        if self.value is not None:
-            self.value.generate_function_definitions(env, code)
+        self.condition.generate_function_definitions(env, code)
+        self.exception.generate_function_definitions(env, code)
 
     def annotate(self, code):
-        self.cond.annotate(code)
-        if self.value:
-            self.value.annotate(code)
+        self.condition.annotate(code)
+        self.exception.annotate(code)
 
 
 class IfStatNode(StatNode):
@@ -6274,13 +7033,9 @@ class IfStatNode(StatNode):
         code.mark_pos(self.pos)
         end_label = code.new_label()
         last = len(self.if_clauses)
-        if self.else_clause:
-            # If the 'else' clause is 'unlikely', then set the preceding 'if' clause to 'likely' to reflect that.
-            self._set_branch_hint(self.if_clauses[-1], self.else_clause, inverse=True)
-        else:
+        if not self.else_clause:
             last -= 1  # avoid redundant goto at end of last if-clause
         for i, if_clause in enumerate(self.if_clauses):
-            self._set_branch_hint(if_clause, if_clause.body)
             if_clause.generate_execution_code(code, end_label, is_last=i == last)
         if self.else_clause:
             code.mark_pos(self.else_clause.pos)
@@ -6289,21 +7044,6 @@ class IfStatNode(StatNode):
             code.putln("}")
         code.put_label(end_label)
 
-    def _set_branch_hint(self, clause, statements_node, inverse=False):
-        if not statements_node.is_terminator:
-            return
-        if not isinstance(statements_node, StatListNode) or not statements_node.stats:
-            return
-        # Anything that unconditionally raises exceptions should be considered unlikely.
-        if isinstance(statements_node.stats[-1], (RaiseStatNode, ReraiseStatNode)):
-            if len(statements_node.stats) > 1:
-                # Allow simple statements before the 'raise', but no conditions, loops, etc.
-                non_branch_nodes = (ExprStatNode, AssignmentNode, DelStatNode, GlobalNode, NonlocalNode)
-                for node in statements_node.stats[:-1]:
-                    if not isinstance(node, non_branch_nodes):
-                        return
-            clause.branch_hint = 'likely' if inverse else 'unlikely'
-
     def generate_function_definitions(self, env, code):
         for clause in self.if_clauses:
             clause.generate_function_definitions(env, code)
@@ -6589,7 +7329,7 @@ class DictIterationNextNode(Node):
 
         # evaluate all coercions before the assignments
         for var, result, target in assignments:
-            code.put_gotref(var.result())
+            var.generate_gotref(code)
         for var, result, target in assignments:
             result.generate_evaluation_code(code)
         for var, result, target in assignments:
@@ -6651,7 +7391,7 @@ class SetIterationNextNode(Node):
         code.funcstate.release_temp(result_temp)
 
         # evaluate all coercions before the assignments
-        code.put_gotref(value_ref.result())
+        value_ref.generate_gotref(code)
         self.coerced_value_var.generate_evaluation_code(code)
         self.value_target.generate_assignment_code(self.coerced_value_var, code)
         value_ref.release(code)
@@ -6719,39 +7459,33 @@ class _ForInStatNode(LoopNode, StatNode):
         code.mark_pos(self.pos)
         code.put_label(code.continue_label)
         code.putln("}")
-        break_label = code.break_label
+
+        # clean up before we enter the 'else:' branch
+        self.iterator.generate_disposal_code(code)
+
+        else_label = code.new_label("for_else") if self.else_clause else None
+        end_label = code.new_label("for_end")
+        label_intercepts = code.label_interceptor(
+            [code.break_label],
+            [end_label],
+            skip_to_label=else_label or end_label,
+            pos=self.pos,
+        )
+
+        code.mark_pos(self.pos)
+        for _ in label_intercepts:
+            self.iterator.generate_disposal_code(code)
+
         code.set_loop_labels(old_loop_labels)
+        self.iterator.free_temps(code)
 
         if self.else_clause:
-            # In nested loops, the 'else' block can contain 'continue' or 'break'
-            # statements for the outer loop, but we may need to generate cleanup code
-            # before taking those paths, so we intercept them here.
-            orig_exit_labels = (code.continue_label, code.break_label)
-            code.continue_label = code.new_label('outer_continue')
-            code.break_label = code.new_label('outer_break')
-
             code.putln("/*else*/ {")
+            code.put_label(else_label)
             self.else_clause.generate_execution_code(code)
             code.putln("}")
 
-            needs_goto_end = not self.else_clause.is_terminator
-            for exit_label, orig_exit_label in zip([code.continue_label, code.break_label], orig_exit_labels):
-                if not code.label_used(exit_label):
-                    continue
-                if needs_goto_end:
-                    code.put_goto(break_label)
-                    needs_goto_end = False
-                code.mark_pos(self.pos)
-                code.put_label(exit_label)
-                self.iterator.generate_disposal_code(code)
-                code.put_goto(orig_exit_label)
-            code.set_loop_labels(old_loop_labels)
-
-        code.mark_pos(self.pos)
-        if code.label_used(break_label):
-            code.put_label(break_label)
-        self.iterator.generate_disposal_code(code)
-        self.iterator.free_temps(code)
+        code.put_label(end_label)
 
     def generate_function_definitions(self, env, code):
         self.target.generate_function_definitions(env, code)
@@ -6969,7 +7703,7 @@ class ForFromStatNode(LoopNode, StatNode):
                     target_node.result(),
                     interned_cname,
                     code.error_goto_if_null(target_node.result(), self.target.pos)))
-                code.put_gotref(target_node.result())
+                target_node.generate_gotref(code)
             else:
                 target_node = self.target
             from_py_node = ExprNodes.CoerceFromPyTypeNode(
@@ -7105,7 +7839,7 @@ class WithStatNode(StatNode):
             code.intern_identifier(EncodedString('__aexit__' if self.is_async else '__exit__')),
             code.error_goto_if_null(self.exit_var, self.pos),
             ))
-        code.put_gotref(self.exit_var)
+        code.put_gotref(self.exit_var, py_object_type)
 
         # need to free exit_var in the face of exceptions during setup
         old_error_label = code.new_error_label()
@@ -7249,17 +7983,17 @@ class TryExceptStatNode(StatNode):
             save_exc.putln("__Pyx_ExceptionSave(%s);" % (
                 ', '.join(['&%s' % var for var in exc_save_vars])))
             for var in exc_save_vars:
-                save_exc.put_xgotref(var)
+                save_exc.put_xgotref(var, py_object_type)
 
             def restore_saved_exception():
                 for name in exc_save_vars:
-                    code.put_xgiveref(name)
+                    code.put_xgiveref(name, py_object_type)
                 code.putln("__Pyx_ExceptionReset(%s);" %
                            ', '.join(exc_save_vars))
         else:
             # try block cannot raise exceptions, but we had to allocate the temps above,
             # so just keep the C compiler from complaining about them being unused
-            mark_vars_used =  ["(void)%s;" % var for var in exc_save_vars]
+            mark_vars_used = ["(void)%s;" % var for var in exc_save_vars]
             save_exc.putln("%s /* mark used */" % ' '.join(mark_vars_used))
 
             def restore_saved_exception():
@@ -7297,19 +8031,17 @@ class TryExceptStatNode(StatNode):
             if not self.has_default_clause:
                 code.put_goto(except_error_label)
 
-        for exit_label, old_label in [(except_error_label, old_error_label),
-                                      (try_break_label, old_break_label),
-                                      (try_continue_label, old_continue_label),
-                                      (try_return_label, old_return_label),
-                                      (except_return_label, old_return_label)]:
-            if code.label_used(exit_label):
-                if not normal_case_terminates and not code.label_used(try_end_label):
-                    code.put_goto(try_end_label)
-                code.put_label(exit_label)
-                code.mark_pos(self.pos, trace=False)
-                if can_raise:
-                    restore_saved_exception()
-                code.put_goto(old_label)
+        label_intercepts = code.label_interceptor(
+            [except_error_label, try_break_label, try_continue_label, try_return_label, except_return_label],
+            [old_error_label, old_break_label, old_continue_label, old_return_label, old_return_label],
+            skip_to_label=try_end_label if not normal_case_terminates and not code.label_used(try_end_label) else None,
+            pos=self.pos,
+            trace=False,
+        )
+
+        for _ in label_intercepts:
+            if can_raise:
+                restore_saved_exception()
 
         if code.label_used(except_end_label):
             if not normal_case_terminates and not code.label_used(try_end_label):
@@ -7402,17 +8134,40 @@ class ExceptClauseNode(Node):
                             for _ in range(3)]
                 code.globalstate.use_utility_code(UtilityCode.load_cached("PyErrFetchRestore", "Exceptions.c"))
                 code.putln("__Pyx_ErrFetch(&%s, &%s, &%s);" % tuple(exc_vars))
-                code.globalstate.use_utility_code(UtilityCode.load_cached("FastTypeChecks", "ModuleSetupCode.c"))
-                exc_test_func = "__Pyx_PyErr_GivenExceptionMatches(%s, %%s)" % exc_vars[0]
+                exc_type = exc_vars[0]
             else:
-                exc_vars = ()
-                code.globalstate.use_utility_code(UtilityCode.load_cached("PyErrExceptionMatches", "Exceptions.c"))
-                exc_test_func = "__Pyx_PyErr_ExceptionMatches(%s)"
+                exc_vars = exc_type = None
 
-            exc_tests = []
             for pattern in self.pattern:
                 pattern.generate_evaluation_code(code)
-                exc_tests.append(exc_test_func % pattern.py_result())
+            patterns = [pattern.py_result() for pattern in self.pattern]
+
+            exc_tests = []
+            if exc_type:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("FastTypeChecks", "ModuleSetupCode.c"))
+                if len(patterns) == 2:
+                    exc_tests.append("__Pyx_PyErr_GivenExceptionMatches2(%s, %s, %s)" % (
+                        exc_type, patterns[0], patterns[1],
+                    ))
+                else:
+                    exc_tests.extend(
+                        "__Pyx_PyErr_GivenExceptionMatches(%s, %s)" % (exc_type, pattern)
+                        for pattern in patterns
+                    )
+            elif len(patterns) == 2:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("FastTypeChecks", "ModuleSetupCode.c"))
+                exc_tests.append("__Pyx_PyErr_ExceptionMatches2(%s, %s)" % (
+                    patterns[0], patterns[1],
+                ))
+            else:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PyErrExceptionMatches", "Exceptions.c"))
+                exc_tests.extend(
+                    "__Pyx_PyErr_ExceptionMatches(%s)" % pattern
+                    for pattern in patterns
+                )
 
             match_flag = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
             code.putln("%s = %s;" % (match_flag, ' || '.join(exc_tests)))
@@ -7420,7 +8175,7 @@ class ExceptClauseNode(Node):
                 pattern.generate_disposal_code(code)
                 pattern.free_temps(code)
 
-            if has_non_literals:
+            if exc_vars:
                 code.putln("__Pyx_ErrRestore(%s, %s, %s);" % tuple(exc_vars))
                 code.putln(' '.join(["%s = 0;" % var for var in exc_vars]))
                 for temp in exc_vars:
@@ -7455,7 +8210,7 @@ class ExceptClauseNode(Node):
         code.putln("if (__Pyx_GetException(%s) < 0) %s" % (
             exc_args, code.error_goto(self.pos)))
         for var in exc_vars:
-            code.put_gotref(var)
+            code.put_gotref(var, py_object_type)
         if self.target:
             self.exc_value.set_var(exc_vars[1])
             self.exc_value.generate_evaluation_code(code)
@@ -7464,9 +8219,7 @@ class ExceptClauseNode(Node):
             for tempvar, node in zip(exc_vars, self.excinfo_target.args):
                 node.set_var(tempvar)
 
-        old_break_label, old_continue_label = code.break_label, code.continue_label
-        code.break_label = code.new_label('except_break')
-        code.continue_label = code.new_label('except_continue')
+        old_loop_labels = code.new_loop_labels("except_")
 
         old_exc_vars = code.funcstate.exc_vars
         code.funcstate.exc_vars = exc_vars
@@ -7480,15 +8233,11 @@ class ExceptClauseNode(Node):
                 code.put_xdecref_clear(var, py_object_type)
             code.put_goto(end_label)
 
-        for new_label, old_label in [(code.break_label, old_break_label),
-                                     (code.continue_label, old_continue_label)]:
-            if code.label_used(new_label):
-                code.put_label(new_label)
-                for var in exc_vars:
-                    code.put_decref_clear(var, py_object_type)
-                code.put_goto(old_label)
-        code.break_label = old_break_label
-        code.continue_label = old_continue_label
+        for _ in code.label_interceptor(code.get_loop_labels(), old_loop_labels):
+            for var in exc_vars:
+                code.put_decref_clear(var, py_object_type)
+
+        code.set_loop_labels(old_loop_labels)
 
         for temp in exc_vars:
             code.funcstate.release_temp(temp)
@@ -7644,12 +8393,8 @@ class TryFinallyStatNode(StatNode):
                     code.funcstate.release_temp(exc_filename_cname)
                 code.put_goto(old_error_label)
 
-            for new_label, old_label in zip(code.get_all_labels(), finally_old_labels):
-                if not code.label_used(new_label):
-                    continue
-                code.put_label(new_label)
+            for _ in code.label_interceptor(code.get_all_labels(), finally_old_labels):
                 self.put_error_cleaner(code, exc_vars)
-                code.put_goto(old_label)
 
             for cname in exc_vars:
                 code.funcstate.release_temp(cname)
@@ -7659,6 +8404,7 @@ class TryFinallyStatNode(StatNode):
         return_label = code.return_label
         exc_vars = ()
 
+        # TODO: use code.label_interceptor()?
         for i, (new_label, old_label) in enumerate(zip(new_labels, old_labels)):
             if not code.label_used(new_label):
                 continue
@@ -7737,7 +8483,7 @@ class TryFinallyStatNode(StatNode):
                    " unlikely(__Pyx_GetException(&%s, &%s, &%s) < 0)) "
                    "__Pyx_ErrFetch(&%s, &%s, &%s);" % (exc_vars[:3] * 2))
         for var in exc_vars:
-            code.put_xgotref(var)
+            code.put_xgotref(var, py_object_type)
         if exc_lineno_cnames:
             code.putln("%s = %s; %s = %s; %s = %s;" % (
                 exc_lineno_cnames[0], Naming.lineno_cname,
@@ -7758,11 +8504,11 @@ class TryFinallyStatNode(StatNode):
         # unused utility functions and/or temps
         code.putln("if (PY_MAJOR_VERSION >= 3) {")
         for var in exc_vars[3:]:
-            code.put_xgiveref(var)
+            code.put_xgiveref(var, py_object_type)
         code.putln("__Pyx_ExceptionReset(%s, %s, %s);" % exc_vars[3:])
         code.putln("}")
         for var in exc_vars[:3]:
-            code.put_xgiveref(var)
+            code.put_xgiveref(var, py_object_type)
         code.putln("__Pyx_ErrRestore(%s, %s, %s);" % exc_vars[:3])
 
         if self.is_try_finally_in_nogil:
@@ -7784,7 +8530,7 @@ class TryFinallyStatNode(StatNode):
         # unused utility functions and/or temps
         code.putln("if (PY_MAJOR_VERSION >= 3) {")
         for var in exc_vars[3:]:
-            code.put_xgiveref(var)
+            code.put_xgiveref(var, py_object_type)
         code.putln("__Pyx_ExceptionReset(%s, %s, %s);" % exc_vars[3:])
         code.putln("}")
         for var in exc_vars[:3]:
@@ -7811,11 +8557,16 @@ class GILStatNode(NogilTryFinallyStatNode):
     #  'with gil' or 'with nogil' statement
     #
     #   state   string   'gil' or 'nogil'
+    #   scope_gil_state_known  bool  For nogil functions this can be False, since they can also be run with gil
+    #                           set to False by GilCheck transform
 
+    child_attrs = ["condition"] + NogilTryFinallyStatNode.child_attrs
     state_temp = None
+    scope_gil_state_known = True
 
-    def __init__(self, pos, state, body):
+    def __init__(self, pos, state, body, condition=None):
         self.state = state
+        self.condition = condition
         self.create_state_temp_if_needed(pos, state, body)
         TryFinallyStatNode.__init__(
             self, pos,
@@ -7842,11 +8593,18 @@ class GILStatNode(NogilTryFinallyStatNode):
         if self.state == 'gil':
             env.has_with_gil_block = True
 
+        if self.condition is not None:
+            self.condition.analyse_declarations(env)
+
         return super(GILStatNode, self).analyse_declarations(env)
 
     def analyse_expressions(self, env):
         env.use_utility_code(
             UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
+
+        if self.condition is not None:
+            self.condition = self.condition.analyse_expressions(env)
+
         was_nogil = env.nogil
         env.nogil = self.state == 'nogil'
         node = TryFinallyStatNode.analyse_expressions(self, env)
@@ -7867,7 +8625,7 @@ class GILStatNode(NogilTryFinallyStatNode):
             code.put_ensure_gil(variable=variable)
             code.funcstate.gil_owned = True
         else:
-            code.put_release_gil(variable=variable)
+            code.put_release_gil(variable=variable, unknown_gil_state=not self.scope_gil_state_known)
             code.funcstate.gil_owned = False
 
         TryFinallyStatNode.generate_execution_code(self, code)
@@ -7884,10 +8642,13 @@ class GILExitNode(StatNode):
     Used as the 'finally' block in a GILStatNode
 
     state   string   'gil' or 'nogil'
+    #   scope_gil_state_known  bool  For nogil functions this can be False, since they can also be run with gil
+    #                           set to False by GilCheck transform
     """
 
     child_attrs = []
     state_temp = None
+    scope_gil_state_known = True
 
     def analyse_expressions(self, env):
         return self
@@ -7901,7 +8662,7 @@ class GILExitNode(StatNode):
         if self.state == 'gil':
             code.put_release_ensured_gil(variable)
         else:
-            code.put_acquire_gil(variable)
+            code.put_acquire_gil(variable, unknown_gil_state=not self.scope_gil_state_known)
 
 
 class EnsureGILNode(GILExitNode):
@@ -7933,6 +8694,31 @@ utility_code_for_imports = {
     'inspect': ("__Pyx_patch_inspect", "PatchInspect", "Coroutine.c"),
 }
 
+def cimport_numpy_check(node, code):
+    # shared code between CImportStatNode and FromCImportStatNode
+    # check to ensure that import_array is called
+    for mod in code.globalstate.module_node.scope.cimported_modules:
+        if mod.name != node.module_name:
+            continue
+        # there are sometimes several cimported modules with the same name
+        # so complete the loop if necessary
+        import_array = mod.lookup_here("import_array")
+        _import_array = mod.lookup_here("_import_array")
+        # at least one entry used
+        used = (import_array and import_array.used) or (_import_array and _import_array.used)
+        if ((import_array or _import_array)  # at least one entry found
+                and not used):
+            # sanity check that this is actually numpy and not a user pxd called "numpy"
+            if _import_array and _import_array.type.is_cfunction:
+                # warning is mainly for the sake of testing
+                warning(node.pos, "'numpy.import_array()' has been added automatically "
+                        "since 'numpy' was cimported but 'numpy.import_array' was not called.", 0)
+                code.globalstate.use_utility_code(
+                         UtilityCode.load_cached("NumpyImportArray", "NumpyImportArray.c")
+                    )
+                return  # no need to continue once the utility code is added
+
+
 
 class CImportStatNode(StatNode):
     #  cimport statement
@@ -7966,7 +8752,8 @@ class CImportStatNode(StatNode):
                 env.declare_module(top_name, top_module_scope, self.pos)
         else:
             name = self.as_name or self.module_name
-            env.declare_module(name, module_scope, self.pos)
+            entry = env.declare_module(name, module_scope, self.pos)
+            entry.known_standard_library_import = self.module_name
         if self.module_name in utility_code_for_cimports:
             env.use_utility_code(utility_code_for_cimports[self.module_name]())
 
@@ -7974,7 +8761,8 @@ class CImportStatNode(StatNode):
         return self
 
     def generate_execution_code(self, code):
-        pass
+        if self.module_name == "numpy":
+            cimport_numpy_check(self, code)
 
 
 class FromCImportStatNode(StatNode):
@@ -7982,7 +8770,7 @@ class FromCImportStatNode(StatNode):
     #
     #  module_name     string                        Qualified name of module
     #  relative_level  int or None                   Relative import: number of dots before module_name
-    #  imported_names  [(pos, name, as_name, kind)]  Names to be imported
+    #  imported_names  [(pos, name, as_name)]  Names to be imported
 
     child_attrs = []
     module_name = None
@@ -7993,44 +8781,43 @@ class FromCImportStatNode(StatNode):
         if not env.is_module_scope:
             error(self.pos, "cimport only allowed at module level")
             return
-        if self.relative_level and self.relative_level > env.qualified_name.count('.'):
-            error(self.pos, "relative cimport beyond main package is not allowed")
-            return
+        qualified_name_components = env.qualified_name.count('.') + 1
+        if self.relative_level:
+            if self.relative_level > qualified_name_components:
+                # 1. case: importing beyond package: from .. import pkg
+                error(self.pos, "relative cimport beyond main package is not allowed")
+                return
+            elif self.relative_level == qualified_name_components and not env.is_package:
+                # 2. case: importing from same level but current dir is not package: from . import module
+                error(self.pos, "relative cimport from non-package directory is not allowed")
+                return
         module_scope = env.find_module(self.module_name, self.pos, relative_level=self.relative_level)
         module_name = module_scope.qualified_name
         env.add_imported_module(module_scope)
-        for pos, name, as_name, kind in self.imported_names:
+        for pos, name, as_name in self.imported_names:
             if name == "*":
                 for local_name, entry in list(module_scope.entries.items()):
                     env.add_imported_entry(local_name, entry, pos)
             else:
                 entry = module_scope.lookup(name)
                 if entry:
-                    if kind and not self.declaration_matches(entry, kind):
-                        entry.redeclared(pos)
                     entry.used = 1
                 else:
-                    if kind == 'struct' or kind == 'union':
-                        entry = module_scope.declare_struct_or_union(
-                            name, kind=kind, scope=None, typedef_flag=0, pos=pos)
-                    elif kind == 'class':
-                        entry = module_scope.declare_c_class(name, pos=pos, module_name=module_name)
+                    submodule_scope = env.context.find_module(
+                        name, relative_to=module_scope, pos=self.pos, absolute_fallback=False)
+                    if submodule_scope.parent_module is module_scope:
+                        env.declare_module(as_name or name, submodule_scope, self.pos)
                     else:
-                        submodule_scope = env.context.find_module(
-                            name, relative_to=module_scope, pos=self.pos, absolute_fallback=False)
-                        if submodule_scope.parent_module is module_scope:
-                            env.declare_module(as_name or name, submodule_scope, self.pos)
-                        else:
-                            error(pos, "Name '%s' not declared in module '%s'" % (name, module_name))
+                        error(pos, "Name '%s' not declared in module '%s'" % (name, module_name))
 
                 if entry:
                     local_name = as_name or name
                     env.add_imported_entry(local_name, entry, pos)
 
-        if module_name.startswith('cpython') or module_name.startswith('cython'): # enough for now
+        if module_name.startswith('cpython') or module_name.startswith('cython'):  # enough for now
             if module_name in utility_code_for_cimports:
                 env.use_utility_code(utility_code_for_cimports[module_name]())
-            for _, name, _, _ in self.imported_names:
+            for _, name, _ in self.imported_names:
                 fqname = '%s.%s' % (module_name, name)
                 if fqname in utility_code_for_cimports:
                     env.use_utility_code(utility_code_for_cimports[fqname]())
@@ -8053,7 +8840,8 @@ class FromCImportStatNode(StatNode):
         return self
 
     def generate_execution_code(self, code):
-        pass
+        if self.module_name == "numpy":
+            cimport_numpy_check(self, code)
 
 
 class FromImportStatNode(StatNode):
@@ -8078,6 +8866,14 @@ class FromImportStatNode(StatNode):
                 self.import_star = 1
             else:
                 target.analyse_target_declaration(env)
+                if target.entry:
+                    if target.get_known_standard_library_import() is None:
+                        target.entry.known_standard_library_import = EncodedString(
+                            "%s.%s" % (self.module.module_name.value, name))
+                else:
+                    # it isn't unambiguous
+                    target.entry.known_standard_library_import = ""
+
 
     def analyse_expressions(self, env):
         from . import ExprNodes
@@ -8135,7 +8931,7 @@ class FromImportStatNode(StatNode):
                     self.module.py_result(),
                     code.intern_identifier(name),
                     code.error_goto_if_null(item_temp, self.pos)))
-            code.put_gotref(item_temp)
+            code.put_gotref(item_temp, py_object_type)
             if coerced_item is None:
                 target.generate_assignment_code(self.item, code)
             else:
@@ -8251,7 +9047,7 @@ class ParallelStatNode(StatNode, ParallelNode):
                 seen.add(dictitem.key.value)
                 if dictitem.key.value == 'num_threads':
                     if not dictitem.value.is_none:
-                       self.num_threads = dictitem.value
+                        self.num_threads = dictitem.value
                 elif self.is_prange and dictitem.key.value == 'chunksize':
                     if not dictitem.value.is_none:
                         self.chunksize = dictitem.value
@@ -8512,11 +9308,7 @@ class ParallelStatNode(StatNode, ParallelNode):
         if self.is_parallel and not self.is_nested_prange:
             code.putln("/* Clean up any temporaries */")
             for temp, type in sorted(self.temps):
-                if type.is_memoryviewslice:
-                    code.put_xdecref_memoryviewslice(temp, have_gil=False)
-                elif type.is_pyobject:
-                    code.put_xdecref(temp, type)
-                    code.putln("%s = NULL;" % temp)
+                code.put_xdecref_clear(temp, type, have_gil=False)
 
     def setup_parallel_control_flow_block(self, code):
         """
@@ -8549,7 +9341,7 @@ class ParallelStatNode(StatNode, ParallelNode):
         self.old_return_label = code.return_label
         code.return_label = code.new_label(name="return")
 
-        code.begin_block() # parallel control flow block
+        code.begin_block()  # parallel control flow block
         self.begin_of_parallel_control_block_point = code.insertion_point()
         self.begin_of_parallel_control_block_point_after_decls = code.insertion_point()
 
@@ -8679,7 +9471,7 @@ class ParallelStatNode(StatNode, ParallelNode):
         code.putln_openmp("#pragma omp critical(%s)" % section_name)
         ParallelStatNode.critical_section_counter += 1
 
-        code.begin_block() # begin critical section
+        code.begin_block()  # begin critical section
 
         c = self.begin_of_parallel_control_block_point
 
@@ -8688,7 +9480,10 @@ class ParallelStatNode(StatNode, ParallelNode):
             if not lastprivate or entry.type.is_pyobject:
                 continue
 
-            type_decl = entry.type.empty_declaration_code()
+            if entry.type.is_cpp_class and not entry.type.is_fake_reference and code.globalstate.directives['cpp_locals']:
+                type_decl = entry.type.cpp_optional_declaration_code("")
+            else:
+                type_decl = entry.type.empty_declaration_code()
             temp_cname = "__pyx_parallel_temp%d" % temp_count
             private_cname = entry.cname
 
@@ -8702,12 +9497,19 @@ class ParallelStatNode(StatNode, ParallelNode):
             # Declare the parallel private in the outer block
             c.putln("%s %s%s;" % (type_decl, temp_cname, init))
 
+            self.parallel_private_temps.append((temp_cname, private_cname, entry.type))
+
+            if entry.type.is_cpp_class:
+                # moving is fine because we're quitting the loop and so won't be directly accessing the variable again
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
+                private_cname = "__PYX_STD_MOVE_IF_SUPPORTED(%s)" % private_cname
             # Initialize before escaping
             code.putln("%s = %s;" % (temp_cname, private_cname))
 
-            self.parallel_private_temps.append((temp_cname, private_cname))
 
-        code.end_block() # end critical section
+
+        code.end_block()  # end critical section
 
     def fetch_parallel_exception(self, code):
         """
@@ -8747,7 +9549,7 @@ class ParallelStatNode(StatNode, ParallelNode):
         pos_info = chain(*zip(self.parallel_pos_info, self.pos_info))
         code.funcstate.uses_error_indicator = True
         code.putln("%s = %s; %s = %s; %s = %s;" % tuple(pos_info))
-        code.put_gotref(Naming.parallel_exc_type)
+        code.put_gotref(Naming.parallel_exc_type, py_object_type)
 
         code.putln(
             "}")
@@ -8760,7 +9562,7 @@ class ParallelStatNode(StatNode, ParallelNode):
         code.begin_block()
         code.put_ensure_gil(declare_gilstate=True)
 
-        code.put_giveref(Naming.parallel_exc_type)
+        code.put_giveref(Naming.parallel_exc_type, py_object_type)
         code.putln("__Pyx_ErrRestoreWithState(%s, %s, %s);" % self.parallel_exc)
         pos_info = chain(*zip(self.pos_info, self.parallel_pos_info))
         code.putln("%s = %s; %s = %s; %s = %s;" % tuple(pos_info))
@@ -8826,7 +9628,10 @@ class ParallelStatNode(StatNode, ParallelNode):
             code.putln(
                 "if (%s) {" % Naming.parallel_why)
 
-            for temp_cname, private_cname in self.parallel_private_temps:
+            for temp_cname, private_cname, temp_type in self.parallel_private_temps:
+                if temp_type.is_cpp_class:
+                    # utility code was loaded earlier
+                    temp_cname = "__PYX_STD_MOVE_IF_SUPPORTED(%s)" % temp_cname
                 code.putln("%s = %s;" % (private_cname, temp_cname))
 
             code.putln("switch (%s) {" % Naming.parallel_why)
@@ -8848,11 +9653,11 @@ class ParallelStatNode(StatNode, ParallelNode):
                 self.restore_parallel_exception(code)
                 code.put_goto(code.error_label)
 
-            code.putln("}") # end switch
+            code.putln("}")  # end switch
             code.putln(
-                "}") # end if
+                "}")  # end if
 
-        code.end_block() # end parallel control flow block
+        code.end_block()  # end parallel control flow block
         self.redef_builtin_expect_apple_gcc_bug(code)
 
     # FIXME: improve with version number for OS X Lion
@@ -8971,9 +9776,6 @@ class ParallelRangeNode(ParallelStatNode):
         else:
             self.start, self.stop, self.step = self.args
 
-        if hasattr(self.schedule, 'decode'):
-            self.schedule = self.schedule.decode('ascii')
-
         if self.schedule not in (None, 'static', 'dynamic', 'guided', 'runtime'):
             error(self.pos, "Invalid schedule argument to prange: %s" % (self.schedule,))
 
@@ -9028,7 +9830,8 @@ class ParallelRangeNode(ParallelStatNode):
         # ensure lastprivate behaviour and propagation. If the target index is
         # not a NameNode, it won't have an entry, and an error was issued by
         # ParallelRangeTransform
-        if hasattr(self.target, 'entry'):
+        target_entry = getattr(self.target, 'entry', None)
+        if target_entry:
             self.assignments[self.target.entry] = self.target.pos, None
 
         node = super(ParallelRangeNode, self).analyse_expressions(env)
@@ -9141,9 +9944,12 @@ class ParallelRangeNode(ParallelStatNode):
 
         # TODO: check if the step is 0 and if so, raise an exception in a
         # 'with gil' block. For now, just abort
-        code.putln("if ((%(step)s == 0)) abort();" % fmt_dict)
+        if self.step is not None and self.step.has_constant_result() and self.step.constant_result == 0:
+            error(node.pos, "Iteration with step 0 is invalid.")
+        elif not fmt_dict['step'].isdigit() or int(fmt_dict['step']) == 0:
+            code.putln("if (((%(step)s) == 0)) abort();" % fmt_dict)
 
-        self.setup_parallel_control_flow_block(code) # parallel control flow block
+        self.setup_parallel_control_flow_block(code)  # parallel control flow block
 
         # Note: nsteps is private in an outer scope if present
         code.putln("%(nsteps)s = (%(stop)s - %(start)s + %(step)s - %(step)s/abs(%(step)s)) / %(step)s;" % fmt_dict)
@@ -9155,9 +9961,9 @@ class ParallelRangeNode(ParallelStatNode):
         # erroneously believes that nsteps may be <= 0, leaving the private
         # target index uninitialized
         code.putln("if (%(nsteps)s > 0)" % fmt_dict)
-        code.begin_block() # if block
+        code.begin_block()  # if block
         self.generate_loop(code, fmt_dict)
-        code.end_block() # end if block
+        code.end_block()  # end if block
 
         self.restore_labels(code)
 
@@ -9165,13 +9971,13 @@ class ParallelRangeNode(ParallelStatNode):
             if self.breaking_label_used:
                 code.put("if (%s < 2)" % Naming.parallel_why)
 
-            code.begin_block() # else block
+            code.begin_block()  # else block
             code.putln("/* else */")
             self.else_clause.generate_execution_code(code)
-            code.end_block() # end else block
+            code.end_block()  # end else block
 
         # ------ cleanup ------
-        self.end_parallel_control_flow_block(code) # end parallel control flow block
+        self.end_parallel_control_flow_block(code)  # end parallel control flow block
 
         # And finally, release our privates and write back any closure
         # variables
@@ -9202,7 +10008,7 @@ class ParallelRangeNode(ParallelStatNode):
             code.putln("")
             code.putln("#endif /* _OPENMP */")
 
-            code.begin_block() # pragma omp parallel begin block
+            code.begin_block()  # pragma omp parallel begin block
 
             # Initialize the GIL if needed for this thread
             self.begin_parallel_block(code)
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py
index 7e9435ba0..fb6dc5dae 100644
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -41,7 +41,7 @@ except ImportError:
 try:
     from __builtin__ import basestring
 except ImportError:
-    basestring = str # Python 3
+    basestring = str  # Python 3
 
 
 def load_c_utility(name):
@@ -192,19 +192,9 @@ class IterationTransform(Visitor.EnvTransform):
     def _optimise_for_loop(self, node, iterable, reversed=False):
         annotation_type = None
         if (iterable.is_name or iterable.is_attribute) and iterable.entry and iterable.entry.annotation:
-            annotation = iterable.entry.annotation
+            annotation = iterable.entry.annotation.expr
             if annotation.is_subscript:
                 annotation = annotation.base  # container base type
-            # FIXME: generalise annotation evaluation => maybe provide a "qualified name" also for imported names?
-            if annotation.is_name:
-                if annotation.entry and annotation.entry.qualified_name == 'typing.Dict':
-                    annotation_type = Builtin.dict_type
-                elif annotation.name == 'Dict':
-                    annotation_type = Builtin.dict_type
-                if annotation.entry and annotation.entry.qualified_name in ('typing.Set', 'typing.FrozenSet'):
-                    annotation_type = Builtin.set_type
-                elif annotation.name in ('Set', 'FrozenSet'):
-                    annotation_type = Builtin.set_type
 
         if Builtin.dict_type in (iterable.type, annotation_type):
             # like iterating over dict.keys()
@@ -228,6 +218,12 @@ class IterationTransform(Visitor.EnvTransform):
             return self._transform_bytes_iteration(node, iterable, reversed=reversed)
         if iterable.type is Builtin.unicode_type:
             return self._transform_unicode_iteration(node, iterable, reversed=reversed)
+        # in principle _transform_indexable_iteration would work on most of the above, and
+        # also tuple and list. However, it probably isn't quite as optimized
+        if iterable.type is Builtin.bytearray_type:
+            return self._transform_indexable_iteration(node, iterable, is_mutable=True, reversed=reversed)
+        if isinstance(iterable, ExprNodes.CoerceToPyTypeNode) and iterable.arg.type.is_memoryviewslice:
+            return self._transform_indexable_iteration(node, iterable.arg, is_mutable=False, reversed=reversed)
 
         # the rest is based on function calls
         if not isinstance(iterable, ExprNodes.SimpleCallNode):
@@ -323,6 +319,92 @@ class IterationTransform(Visitor.EnvTransform):
 
         return self._optimise_for_loop(node, arg, reversed=True)
 
+    def _transform_indexable_iteration(self, node, slice_node, is_mutable, reversed=False):
+        """In principle can handle any iterable that Cython has a len() for and knows how to index"""
+        unpack_temp_node = UtilNodes.LetRefNode(
+            slice_node.as_none_safe_node("'NoneType' is not iterable"),
+            may_hold_none=False, is_temp=True
+            )
+
+        start_node = ExprNodes.IntNode(
+            node.pos, value='0', constant_result=0, type=PyrexTypes.c_py_ssize_t_type)
+        def make_length_call():
+            # helper function since we need to create this node for a couple of places
+            builtin_len = ExprNodes.NameNode(node.pos, name="len",
+                                             entry=Builtin.builtin_scope.lookup("len"))
+            return ExprNodes.SimpleCallNode(node.pos,
+                                    function=builtin_len,
+                                    args=[unpack_temp_node]
+                                    )
+        length_temp = UtilNodes.LetRefNode(make_length_call(), type=PyrexTypes.c_py_ssize_t_type, is_temp=True)
+        end_node = length_temp
+
+        if reversed:
+            relation1, relation2 = '>', '>='
+            start_node, end_node = end_node, start_node
+        else:
+            relation1, relation2 = '<=', '<'
+
+        counter_ref = UtilNodes.LetRefNode(pos=node.pos, type=PyrexTypes.c_py_ssize_t_type)
+
+        target_value = ExprNodes.IndexNode(slice_node.pos, base=unpack_temp_node,
+                                           index=counter_ref)
+
+        target_assign = Nodes.SingleAssignmentNode(
+            pos = node.target.pos,
+            lhs = node.target,
+            rhs = target_value)
+
+        # analyse with boundscheck and wraparound
+        # off (because we're confident we know the size)
+        env = self.current_env()
+        new_directives = Options.copy_inherited_directives(env.directives, boundscheck=False, wraparound=False)
+        target_assign = Nodes.CompilerDirectivesNode(
+            target_assign.pos,
+            directives=new_directives,
+            body=target_assign,
+        )
+
+        body = Nodes.StatListNode(
+            node.pos,
+            stats = [target_assign])  # exclude node.body for now to not reanalyse it
+        if is_mutable:
+            # We need to be slightly careful here that we are actually modifying the loop
+            # bounds and not a temp copy of it. Setting is_temp=True on length_temp seems
+            # to ensure this.
+            # If this starts to fail then we could insert an "if out_of_bounds: break" instead
+            loop_length_reassign = Nodes.SingleAssignmentNode(node.pos,
+                                                        lhs = length_temp,
+                                                        rhs = make_length_call())
+            body.stats.append(loop_length_reassign)
+
+        loop_node = Nodes.ForFromStatNode(
+            node.pos,
+            bound1=start_node, relation1=relation1,
+            target=counter_ref,
+            relation2=relation2, bound2=end_node,
+            step=None, body=body,
+            else_clause=node.else_clause,
+            from_range=True)
+
+        ret = UtilNodes.LetNode(
+                    unpack_temp_node,
+                    UtilNodes.LetNode(
+                        length_temp,
+                        # TempResultFromStatNode provides the framework where the "counter_ref"
+                        # temp is set up and can be assigned to. However, we don't need the
+                        # result it returns so wrap it in an ExprStatNode.
+                        Nodes.ExprStatNode(node.pos,
+                            expr=UtilNodes.TempResultFromStatNode(
+                                    counter_ref,
+                                    loop_node
+                            )
+                        )
+                    )
+                ).analyse_expressions(env)
+        body.stats.insert(1, node.body)
+        return ret
+
     PyBytes_AS_STRING_func_type = PyrexTypes.CFuncType(
         PyrexTypes.c_char_ptr_type, [
             PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None)
@@ -1144,7 +1226,7 @@ class SwitchTransform(Visitor.EnvTransform):
             # integers on iteration, whereas Py2 returns 1-char byte
             # strings
             characters = string_literal.value
-            characters = list(set([ characters[i:i+1] for i in range(len(characters)) ]))
+            characters = list({ characters[i:i+1] for i in range(len(characters)) })
             characters.sort()
             return [ ExprNodes.CharNode(string_literal.pos, value=charval,
                                         constant_result=charval)
@@ -1156,7 +1238,8 @@ class SwitchTransform(Visitor.EnvTransform):
             return self.NO_MATCH
         elif common_var is not None and not is_common_value(var, common_var):
             return self.NO_MATCH
-        elif not (var.type.is_int or var.type.is_enum) or sum([not (cond.type.is_int or cond.type.is_enum) for cond in conditions]):
+        elif not (var.type.is_int or var.type.is_enum) or any(
+                [not (cond.type.is_int or cond.type.is_enum) for cond in conditions]):
             return self.NO_MATCH
         return not_in, var, conditions
 
@@ -1573,7 +1656,7 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
             utility_code = utility_code)
 
     def _error_wrong_arg_count(self, function_name, node, args, expected=None):
-        if not expected: # None or 0
+        if not expected:  # None or 0
             arg_str = ''
         elif isinstance(expected, basestring) or expected > 1:
             arg_str = '...'
@@ -1727,7 +1810,7 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
 
         arg = pos_args[0]
         if isinstance(arg, ExprNodes.ComprehensionNode) and arg.type is Builtin.list_type:
-            list_node = pos_args[0]
+            list_node = arg
             loop_node = list_node.loop
 
         elif isinstance(arg, ExprNodes.GeneratorExpressionNode):
@@ -1757,7 +1840,11 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
             # Interestingly, PySequence_List works on a lot of non-sequence
             # things as well.
             list_node = loop_node = ExprNodes.PythonCapiCallNode(
-                node.pos, "PySequence_List", self.PySequence_List_func_type,
+                node.pos,
+                "__Pyx_PySequence_ListKeepNew"
+                    if arg.is_temp and arg.type in (PyrexTypes.py_object_type, Builtin.list_type)
+                    else "PySequence_List",
+                self.PySequence_List_func_type,
                 args=pos_args, is_temp=True)
 
         result_node = UtilNodes.ResultRefNode(
@@ -1803,7 +1890,7 @@ class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
                 if not yield_expression.is_literal or not yield_expression.type.is_int:
                     return node
             except AttributeError:
-                return node # in case we don't have a type yet
+                return node  # in case we don't have a type yet
             # special case: old Py2 backwards compatible "sum([int_const for ...])"
             # can safely be unpacked into a genexpr
 
@@ -2018,7 +2105,8 @@ class InlineDefNodeCalls(Visitor.NodeRefCleanupMixin, Visitor.EnvTransform):
             return node
         inlined = ExprNodes.InlinedDefNodeCallNode(
             node.pos, function_name=function_name,
-            function=function, args=node.args)
+            function=function, args=node.args,
+            generator_arg_tag=node.generator_arg_tag)
         if inlined.can_be_inlined():
             return self.replace(node, inlined)
         return node
@@ -2097,12 +2185,13 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                 func_arg = arg.args[0]
                 if func_arg.type is Builtin.float_type:
                     return func_arg.as_none_safe_node("float() argument must be a string or a number, not 'NoneType'")
-                elif func_arg.type.is_pyobject:
+                elif func_arg.type.is_pyobject and arg.function.cname == "__Pyx_PyObject_AsDouble":
                     return ExprNodes.PythonCapiCallNode(
                         node.pos, '__Pyx_PyNumber_Float', self.PyNumber_Float_func_type,
                         args=[func_arg],
                         py_name='float',
                         is_temp=node.is_temp,
+                        utility_code = UtilityCode.load_cached("pynumber_float", "TypeConversion.c"),
                         result_is_used=node.result_is_used,
                     ).coerce_to(node.type, self.current_env())
         return node
@@ -2210,6 +2299,9 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             if func_arg.type.is_int or node.type.is_int:
                 if func_arg.type == node.type:
                     return func_arg
+                elif func_arg.type in (PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type):
+                    # need to parse (<Py_UCS4>'1') as digit 1
+                    return self._pyucs4_to_number(node, function.name, func_arg)
                 elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float:
                     return ExprNodes.TypecastNode(node.pos, operand=func_arg, type=node.type)
             elif func_arg.type.is_float and node.type.is_numeric:
@@ -2230,13 +2322,40 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             if func_arg.type.is_float or node.type.is_float:
                 if func_arg.type == node.type:
                     return func_arg
+                elif func_arg.type in (PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type):
+                    # need to parse (<Py_UCS4>'1') as digit 1
+                    return self._pyucs4_to_number(node, function.name, func_arg)
                 elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float:
                     return ExprNodes.TypecastNode(
                         node.pos, operand=func_arg, type=node.type)
         return node
 
+    pyucs4_int_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_int_type, [
+            PyrexTypes.CFuncTypeArg("arg", PyrexTypes.c_py_ucs4_type, None)
+        ],
+        exception_value="-1")
+
+    pyucs4_double_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_double_type, [
+            PyrexTypes.CFuncTypeArg("arg", PyrexTypes.c_py_ucs4_type, None)
+        ],
+        exception_value="-1.0")
+
+    def _pyucs4_to_number(self, node, py_type_name, func_arg):
+        assert py_type_name in ("int", "float")
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_int_from_UCS4" if py_type_name == "int" else "__Pyx_double_from_UCS4",
+            func_type=self.pyucs4_int_func_type if py_type_name == "int" else self.pyucs4_double_func_type,
+            args=[func_arg],
+            py_name=py_type_name,
+            is_temp=node.is_temp,
+            result_is_used=node.result_is_used,
+            utility_code=UtilityCode.load_cached("int_pyucs4" if py_type_name == "int" else "float_pyucs4", "Builtins.c"),
+        ).coerce_to(node.type, self.current_env())
+
     def _error_wrong_arg_count(self, function_name, node, args, expected=None):
-        if not expected: # None or 0
+        if not expected:  # None or 0
             arg_str = ''
         elif isinstance(expected, basestring) or expected > 1:
             arg_str = '...'
@@ -2316,6 +2435,38 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         return ExprNodes.CachedBuiltinMethodCallNode(
             node, function.obj, attr_name, arg_list)
 
+    PyObject_String_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [  # Change this to Builtin.str_type when removing Py2 support.
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
+            ])
+
+    def _handle_simple_function_str(self, node, function, pos_args):
+        """Optimize single argument calls to str().
+        """
+        if len(pos_args) != 1:
+            if len(pos_args) == 0:
+                return ExprNodes.StringNode(node.pos, value=EncodedString(), constant_result='')
+            return node
+        arg = pos_args[0]
+
+        if arg.type is Builtin.str_type:
+            if not arg.may_be_none():
+                return arg
+
+            cname = "__Pyx_PyStr_Str"
+            utility_code = UtilityCode.load_cached('PyStr_Str', 'StringTools.c')
+        else:
+            cname = '__Pyx_PyObject_Str'
+            utility_code = UtilityCode.load_cached('PyObject_Str', 'StringTools.c')
+
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, cname, self.PyObject_String_func_type,
+            args=pos_args,
+            is_temp=node.is_temp,
+            utility_code=utility_code,
+            py_name="str"
+        )
+
     PyObject_Unicode_func_type = PyrexTypes.CFuncType(
         Builtin.unicode_type, [
             PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
@@ -2387,8 +2538,14 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             return node
         arg = pos_args[0]
         return ExprNodes.PythonCapiCallNode(
-            node.pos, "PySequence_List", self.PySequence_List_func_type,
-            args=pos_args, is_temp=node.is_temp)
+            node.pos,
+            "__Pyx_PySequence_ListKeepNew"
+                if node.is_temp and arg.is_temp and arg.type in (PyrexTypes.py_object_type, Builtin.list_type)
+                else "PySequence_List",
+            self.PySequence_List_func_type,
+            args=pos_args,
+            is_temp=node.is_temp,
+        )
 
     PyList_AsTuple_func_type = PyrexTypes.CFuncType(
         Builtin.tuple_type, [
@@ -2489,20 +2646,49 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         elif len(pos_args) != 1:
             self._error_wrong_arg_count('float', node, pos_args, '0 or 1')
             return node
+
         func_arg = pos_args[0]
         if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode):
             func_arg = func_arg.arg
         if func_arg.type is PyrexTypes.c_double_type:
             return func_arg
+        elif func_arg.type in (PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type):
+            # need to parse (<Py_UCS4>'1') as digit 1
+            return self._pyucs4_to_number(node, function.name, func_arg)
         elif node.type.assignable_from(func_arg.type) or func_arg.type.is_numeric:
             return ExprNodes.TypecastNode(
                 node.pos, operand=func_arg, type=node.type)
+
+        arg = None
+        if func_arg.type is Builtin.bytes_type:
+            cfunc_name = "__Pyx_PyBytes_AsDouble"
+            utility_code_name = 'pybytes_as_double'
+        elif func_arg.type is Builtin.bytearray_type:
+            cfunc_name = "__Pyx_PyByteArray_AsDouble"
+            utility_code_name = 'pybytes_as_double'
+        elif func_arg.type is Builtin.unicode_type:
+            cfunc_name = "__Pyx_PyUnicode_AsDouble"
+            utility_code_name = 'pyunicode_as_double'
+        elif func_arg.type is Builtin.str_type:
+            cfunc_name = "__Pyx_PyString_AsDouble"
+            utility_code_name = 'pystring_as_double'
+        elif func_arg.type is Builtin.long_type:
+            cfunc_name = "PyLong_AsDouble"
+        else:
+            arg = func_arg  # no need for an additional None check
+            cfunc_name = "__Pyx_PyObject_AsDouble"
+            utility_code_name = 'pyobject_as_double'
+
+        if arg is None:
+            arg = func_arg.as_none_safe_node(
+                "float() argument must be a string or a number, not 'NoneType'")
+
         return ExprNodes.PythonCapiCallNode(
-            node.pos, "__Pyx_PyObject_AsDouble",
+            node.pos, cfunc_name,
             self.PyObject_AsDouble_func_type,
-            args = pos_args,
+            args = [arg],
             is_temp = node.is_temp,
-            utility_code = load_c_utility('pyobject_as_double'),
+            utility_code = load_c_utility(utility_code_name) if utility_code_name else None,
             py_name = "float")
 
     PyNumber_Int_func_type = PyrexTypes.CFuncType(
@@ -2556,17 +2742,59 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             # coerce back to Python object as that's the result we are expecting
             return operand.coerce_to_pyobject(self.current_env())
 
+    PyMemoryView_FromObject_func_type = PyrexTypes.CFuncType(
+        Builtin.memoryview_type, [
+            PyrexTypes.CFuncTypeArg("value", PyrexTypes.py_object_type, None)
+            ])
+
+    PyMemoryView_FromBuffer_func_type = PyrexTypes.CFuncType(
+        Builtin.memoryview_type, [
+            PyrexTypes.CFuncTypeArg("value", Builtin.py_buffer_type, None)
+            ])
+
+    def _handle_simple_function_memoryview(self, node, function, pos_args):
+        if len(pos_args) != 1:
+            self._error_wrong_arg_count('memoryview', node, pos_args, '1')
+            return node
+        else:
+            if pos_args[0].type.is_pyobject:
+                return ExprNodes.PythonCapiCallNode(
+                    node.pos, "PyMemoryView_FromObject",
+                    self.PyMemoryView_FromObject_func_type,
+                    args = [pos_args[0]],
+                    is_temp = node.is_temp,
+                    py_name = "memoryview")
+            elif pos_args[0].type.is_ptr and pos_args[0].base_type is Builtin.py_buffer_type:
+                # TODO - this currently doesn't work because the buffer fails a
+                # "can coerce to python object" test earlier. But it'd be nice to support
+                return ExprNodes.PythonCapiCallNode(
+                    node.pos, "PyMemoryView_FromBuffer",
+                    self.PyMemoryView_FromBuffer_func_type,
+                    args = [pos_args[0]],
+                    is_temp = node.is_temp,
+                    py_name = "memoryview")
+        return node
+
+
     ### builtin functions
 
     Pyx_strlen_func_type = PyrexTypes.CFuncType(
         PyrexTypes.c_size_t_type, [
             PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_const_char_ptr_type, None)
-        ])
+        ],
+        nogil=True)
+
+    Pyx_ssize_strlen_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_const_char_ptr_type, None)
+        ],
+        exception_value="-1")
 
     Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType(
-        PyrexTypes.c_size_t_type, [
+        PyrexTypes.c_py_ssize_t_type, [
             PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_const_py_unicode_ptr_type, None)
-        ])
+        ],
+        exception_value="-1")
 
     PyObject_Size_func_type = PyrexTypes.CFuncType(
         PyrexTypes.c_py_ssize_t_type, [
@@ -2585,7 +2813,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         Builtin.dict_type:       "PyDict_Size",
     }.get
 
-    _ext_types_with_pysize = set(["cpython.array.array"])
+    _ext_types_with_pysize = {"cpython.array.array"}
 
     def _handle_simple_function_len(self, node, function, pos_args):
         """Replace len(char*) by the equivalent call to strlen(),
@@ -2600,18 +2828,19 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             arg = arg.arg
         if arg.type.is_string:
             new_node = ExprNodes.PythonCapiCallNode(
-                node.pos, "strlen", self.Pyx_strlen_func_type,
+                node.pos, "__Pyx_ssize_strlen", self.Pyx_ssize_strlen_func_type,
                 args = [arg],
                 is_temp = node.is_temp,
-                utility_code = UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
+                utility_code = UtilityCode.load_cached("ssize_strlen", "StringTools.c"))
         elif arg.type.is_pyunicode_ptr:
             new_node = ExprNodes.PythonCapiCallNode(
-                node.pos, "__Pyx_Py_UNICODE_strlen", self.Pyx_Py_UNICODE_strlen_func_type,
+                node.pos, "__Pyx_Py_UNICODE_ssize_strlen", self.Pyx_Py_UNICODE_strlen_func_type,
                 args = [arg],
-                is_temp = node.is_temp)
+                is_temp = node.is_temp,
+                utility_code = UtilityCode.load_cached("ssize_pyunicode_strlen", "StringTools.c"))
         elif arg.type.is_memoryviewslice:
             func_type = PyrexTypes.CFuncType(
-                PyrexTypes.c_size_t_type, [
+                PyrexTypes.c_py_ssize_t_type, [
                     PyrexTypes.CFuncTypeArg("memoryviewslice", arg.type, None)
                 ], nogil=True)
             new_node = ExprNodes.PythonCapiCallNode(
@@ -2622,7 +2851,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             if cfunc_name is None:
                 arg_type = arg.type
                 if ((arg_type.is_extension_type or arg_type.is_builtin_type)
-                    and arg_type.entry.qualified_name in self._ext_types_with_pysize):
+                        and arg_type.entry.qualified_name in self._ext_types_with_pysize):
                     cfunc_name = 'Py_SIZE'
                 else:
                     return node
@@ -2698,6 +2927,9 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                     builtin_type = None
             if builtin_type is not None:
                 type_check_function = entry.type.type_check_function(exact=False)
+                if type_check_function == '__Pyx_Py3Int_Check' and builtin_type is Builtin.int_type:
+                    # isinstance(x, int) should really test for 'int' in Py2, not 'int | long'
+                    type_check_function = "PyInt_Check"
                 if type_check_function in tests:
                     continue
                 tests.append(type_check_function)
@@ -2781,11 +3013,9 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             return node
         type_arg = args[0]
         if not obj.is_name or not type_arg.is_name:
-            # play safe
-            return node
+            return node  # not a simple case
         if obj.type != Builtin.type_type or type_arg.type != Builtin.type_type:
-            # not a known type, play safe
-            return node
+            return node  # not a known type
         if not type_arg.type_entry or not obj.type_entry:
             if obj.name != type_arg.name:
                 return node
@@ -2847,6 +3077,13 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                 is_temp=node.is_temp
             )
 
+    def _handle_any_slot__class__(self, node, function, args,
+                                is_unbound_method, kwargs=None):
+        # The purpose of this function is to handle calls to instance.__class__() so that
+        # it doesn't get handled by the __Pyx_CallUnboundCMethod0 mechanism.
+        # TODO: optimizations of the instance.__class__() call might be possible in future.
+        return node
+
     ### methods of builtin types
 
     PyObject_Append_func_type = PyrexTypes.CFuncType(
@@ -3182,6 +3419,9 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
     def _handle_simple_method_object___sub__(self, node, function, args, is_unbound_method):
         return self._optimise_num_binop('Subtract', node, function, args, is_unbound_method)
 
+    def _handle_simple_method_object___mul__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Multiply', node, function, args, is_unbound_method)
+
     def _handle_simple_method_object___eq__(self, node, function, args, is_unbound_method):
         return self._optimise_num_binop('Eq', node, function, args, is_unbound_method)
 
@@ -3261,6 +3501,9 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         """
         Optimise math operators for (likely) float or small integer operations.
         """
+        if getattr(node, "special_bool_cmp_function", None):
+            return node  # already optimized
+
         if len(args) != 2:
             return node
 
@@ -3271,66 +3514,15 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
         else:
             return node
 
-        # When adding IntNode/FloatNode to something else, assume other operand is also numeric.
-        # Prefer constants on RHS as they allows better size control for some operators.
-        num_nodes = (ExprNodes.IntNode, ExprNodes.FloatNode)
-        if isinstance(args[1], num_nodes):
-            if args[0].type is not PyrexTypes.py_object_type:
-                return node
-            numval = args[1]
-            arg_order = 'ObjC'
-        elif isinstance(args[0], num_nodes):
-            if args[1].type is not PyrexTypes.py_object_type:
-                return node
-            numval = args[0]
-            arg_order = 'CObj'
-        else:
-            return node
-
-        if not numval.has_constant_result():
-            return node
-
-        is_float = isinstance(numval, ExprNodes.FloatNode)
-        num_type = PyrexTypes.c_double_type if is_float else PyrexTypes.c_long_type
-        if is_float:
-            if operator not in ('Add', 'Subtract', 'Remainder', 'TrueDivide', 'Divide', 'Eq', 'Ne'):
-                return node
-        elif operator == 'Divide':
-            # mixed old-/new-style division is not currently optimised for integers
-            return node
-        elif abs(numval.constant_result) > 2**30:
-            # Cut off at an integer border that is still safe for all operations.
+        result = optimise_numeric_binop(operator, node, ret_type, args[0], args[1])
+        if not result:
             return node
-
-        if operator in ('TrueDivide', 'FloorDivide', 'Divide', 'Remainder'):
-            if args[1].constant_result == 0:
-                # Don't optimise division by 0. :)
-                return node
-
-        args = list(args)
-        args.append((ExprNodes.FloatNode if is_float else ExprNodes.IntNode)(
-            numval.pos, value=numval.value, constant_result=numval.constant_result,
-            type=num_type))
-        inplace = node.inplace if isinstance(node, ExprNodes.NumBinopNode) else False
-        args.append(ExprNodes.BoolNode(node.pos, value=inplace, constant_result=inplace))
-        if is_float or operator not in ('Eq', 'Ne'):
-            # "PyFloatBinop" and "PyIntBinop" take an additional "check for zero division" argument.
-            zerodivision_check = arg_order == 'CObj' and (
-                not node.cdivision if isinstance(node, ExprNodes.DivNode) else False)
-            args.append(ExprNodes.BoolNode(node.pos, value=zerodivision_check, constant_result=zerodivision_check))
-
-        utility_code = TempitaUtilityCode.load_cached(
-            "PyFloatBinop" if is_float else "PyIntCompare" if operator in ('Eq', 'Ne') else "PyIntBinop",
-            "Optimize.c",
-            context=dict(op=operator, order=arg_order, ret_type=ret_type))
+        func_cname, utility_code, extra_args, num_type = result
+        args = list(args)+extra_args
 
         call_node = self._substitute_method_call(
             node, function,
-            "__Pyx_Py%s_%s%s%s" % (
-                'Float' if is_float else 'Int',
-                '' if ret_type.is_pyobject else 'Bool',
-                operator,
-                arg_order),
+            func_cname,
             self.Pyx_BinopInt_func_types[(num_type, ret_type)],
             '__%s__' % operator[:3].lower(), is_unbound_method, args,
             may_return_none=True,
@@ -3389,6 +3581,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
             ])
 
+    # DISABLED: Return value can only be one character, which is not correct.
+    '''
     def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method):
         if is_unbound_method or len(args) != 1:
             return node
@@ -3407,9 +3601,10 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             func_call = func_call.coerce_to_pyobject(self.current_env)
         return func_call
 
-    _handle_simple_method_unicode_lower = _inject_unicode_character_conversion
-    _handle_simple_method_unicode_upper = _inject_unicode_character_conversion
-    _handle_simple_method_unicode_title = _inject_unicode_character_conversion
+    #_handle_simple_method_unicode_lower = _inject_unicode_character_conversion
+    #_handle_simple_method_unicode_upper = _inject_unicode_character_conversion
+    #_handle_simple_method_unicode_title = _inject_unicode_character_conversion
+    '''
 
     PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType(
         Builtin.list_type, [
@@ -3448,6 +3643,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             return node
         if len(args) < 2:
             args.append(ExprNodes.NullNode(node.pos))
+        else:
+            self._inject_null_for_none(args, 1)
         self._inject_int_default_argument(
             node, args, 2, PyrexTypes.c_py_ssize_t_type, "-1")
 
@@ -3788,7 +3985,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             if not stop:
                 # use strlen() to find the string length, just as CPython would
                 if not string_node.is_name:
-                    string_node = UtilNodes.LetRefNode(string_node) # used twice
+                    string_node = UtilNodes.LetRefNode(string_node)  # used twice
                     temps.append(string_node)
                 stop = ExprNodes.PythonCapiCallNode(
                     string_node.pos, "strlen", self.Pyx_strlen_func_type,
@@ -3963,13 +4160,35 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
                 format_args=[attr_name])
         return self_arg
 
+    obj_to_obj_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
+        ])
+
+    def _inject_null_for_none(self, args, index):
+        if len(args) <= index:
+            return
+        arg = args[index]
+        args[index] = ExprNodes.NullNode(arg.pos) if arg.is_none else ExprNodes.PythonCapiCallNode(
+            arg.pos, "__Pyx_NoneAsNull",
+            self.obj_to_obj_func_type,
+            args=[arg.coerce_to_simple(self.current_env())],
+            is_temp=0,
+        )
+
     def _inject_int_default_argument(self, node, args, arg_index, type, default_value):
+        # Python usually allows passing None for range bounds,
+        # so we treat that as requesting the default.
         assert len(args) >= arg_index
-        if len(args) == arg_index:
+        if len(args) == arg_index or args[arg_index].is_none:
             args.append(ExprNodes.IntNode(node.pos, value=str(default_value),
                                           type=type, constant_result=default_value))
         else:
-            args[arg_index] = args[arg_index].coerce_to(type, self.current_env())
+            arg = args[arg_index].coerce_to(type, self.current_env())
+            if isinstance(arg, ExprNodes.CoerceFromPyTypeNode):
+                # Add a runtime check for None and map it to the default value.
+                arg.special_none_cvalue = str(default_value)
+            args[arg_index] = arg
 
     def _inject_bint_default_argument(self, node, args, arg_index, default_value):
         assert len(args) >= arg_index
@@ -3981,6 +4200,75 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
             args[arg_index] = args[arg_index].coerce_to_boolean(self.current_env())
 
 
+def optimise_numeric_binop(operator, node, ret_type, arg0, arg1):
+    """
+    Optimise math operators for (likely) float or small integer operations.
+    """
+    # When adding IntNode/FloatNode to something else, assume other operand is also numeric.
+    # Prefer constants on RHS as they allows better size control for some operators.
+    num_nodes = (ExprNodes.IntNode, ExprNodes.FloatNode)
+    if isinstance(arg1, num_nodes):
+        if arg0.type is not PyrexTypes.py_object_type:
+            return None
+        numval = arg1
+        arg_order = 'ObjC'
+    elif isinstance(arg0, num_nodes):
+        if arg1.type is not PyrexTypes.py_object_type:
+            return None
+        numval = arg0
+        arg_order = 'CObj'
+    else:
+        return None
+
+    if not numval.has_constant_result():
+        return None
+
+    # is_float is an instance check rather that numval.type.is_float because
+    # it will often be a Python float type rather than a C float type
+    is_float = isinstance(numval, ExprNodes.FloatNode)
+    num_type = PyrexTypes.c_double_type if is_float else PyrexTypes.c_long_type
+    if is_float:
+        if operator not in ('Add', 'Subtract', 'Remainder', 'TrueDivide', 'Divide', 'Eq', 'Ne'):
+            return None
+    elif operator == 'Divide':
+        # mixed old-/new-style division is not currently optimised for integers
+        return None
+    elif abs(numval.constant_result) > 2**30:
+        # Cut off at an integer border that is still safe for all operations.
+        return None
+
+    if operator in ('TrueDivide', 'FloorDivide', 'Divide', 'Remainder'):
+        if arg1.constant_result == 0:
+            # Don't optimise division by 0. :)
+            return None
+
+    extra_args = []
+
+    extra_args.append((ExprNodes.FloatNode if is_float else ExprNodes.IntNode)(
+        numval.pos, value=numval.value, constant_result=numval.constant_result,
+        type=num_type))
+    inplace = node.inplace if isinstance(node, ExprNodes.NumBinopNode) else False
+    extra_args.append(ExprNodes.BoolNode(node.pos, value=inplace, constant_result=inplace))
+    if is_float or operator not in ('Eq', 'Ne'):
+        # "PyFloatBinop" and "PyIntBinop" take an additional "check for zero division" argument.
+        zerodivision_check = arg_order == 'CObj' and (
+            not node.cdivision if isinstance(node, ExprNodes.DivNode) else False)
+        extra_args.append(ExprNodes.BoolNode(node.pos, value=zerodivision_check, constant_result=zerodivision_check))
+
+    utility_code = TempitaUtilityCode.load_cached(
+        "PyFloatBinop" if is_float else "PyIntCompare" if operator in ('Eq', 'Ne') else "PyIntBinop",
+        "Optimize.c",
+        context=dict(op=operator, order=arg_order, ret_type=ret_type))
+
+    func_cname = "__Pyx_Py%s_%s%s%s" % (
+        'Float' if is_float else 'Int',
+        '' if ret_type.is_pyobject else 'Bool',
+        operator,
+        arg_order)
+
+    return func_cname, utility_code, extra_args, num_type
+
+
 unicode_tailmatch_utility_code = UtilityCode.load_cached('unicode_tailmatch', 'StringTools.c')
 bytes_tailmatch_utility_code = UtilityCode.load_cached('bytes_tailmatch', 'StringTools.c')
 str_tailmatch_utility_code = UtilityCode.load_cached('str_tailmatch', 'StringTools.c')
@@ -4439,25 +4727,25 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
         args = []
         items = []
 
-        def add(arg):
+        def add(parent, arg):
             if arg.is_dict_literal:
-                if items:
-                    items[0].key_value_pairs.extend(arg.key_value_pairs)
+                if items and items[-1].reject_duplicates == arg.reject_duplicates:
+                    items[-1].key_value_pairs.extend(arg.key_value_pairs)
                 else:
                     items.append(arg)
-            elif isinstance(arg, ExprNodes.MergedDictNode):
+            elif isinstance(arg, ExprNodes.MergedDictNode) and parent.reject_duplicates == arg.reject_duplicates:
                 for child_arg in arg.keyword_args:
-                    add(child_arg)
+                    add(arg, child_arg)
             else:
                 if items:
-                    args.append(items[0])
+                    args.extend(items)
                     del items[:]
                 args.append(arg)
 
         for arg in node.keyword_args:
-            add(arg)
+            add(node, arg)
         if items:
-            args.append(items[0])
+            args.extend(items)
 
         if len(args) == 1:
             arg = args[0]
@@ -4546,22 +4834,20 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
         cascades = [[node.operand1]]
         final_false_result = []
 
-        def split_cascades(cmp_node):
+        cmp_node = node
+        while cmp_node is not None:
             if cmp_node.has_constant_result():
                 if not cmp_node.constant_result:
                     # False => short-circuit
                     final_false_result.append(self._bool_node(cmp_node, False))
-                    return
+                    break
                 else:
                     # True => discard and start new cascade
                     cascades.append([cmp_node.operand2])
             else:
                 # not constant => append to current cascade
                 cascades[-1].append(cmp_node)
-            if cmp_node.cascade:
-                split_cascades(cmp_node.cascade)
-
-        split_cascades(node)
+            cmp_node = cmp_node.cascade
 
         cmp_nodes = []
         for cascade in cascades:
@@ -4707,6 +4993,30 @@ class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
             return None
         return node
 
+    def visit_GILStatNode(self, node):
+        self.visitchildren(node)
+        if node.condition is None:
+            return node
+
+        if node.condition.has_constant_result():
+            # Condition is True - Modify node to be a normal
+            # GILStatNode with condition=None
+            if node.condition.constant_result:
+                node.condition = None
+
+            # Condition is False - the body of the GILStatNode
+            # should run without changing the state of the gil
+            # return the body of the GILStatNode
+            else:
+                return node.body
+
+        # If condition is not constant we keep the GILStatNode as it is.
+        # Either it will later become constant (e.g. a `numeric is int`
+        # expression in a fused type function) and then when ConstantFolding
+        # runs again it will be handled or a later transform (i.e. GilCheck)
+        # will raise an error
+        return node
+
     # in the future, other nodes can have their own handler method here
     # that can replace them with a constant result node
 
@@ -4723,6 +5033,7 @@ class FinalOptimizePhase(Visitor.EnvTransform, Visitor.NodeRefCleanupMixin):
         - isinstance -> typecheck for cdef types
         - eliminate checks for None and/or types that became redundant after tree changes
         - eliminate useless string formatting steps
+        - inject branch hints for unlikely if-cases that only raise exceptions
         - replace Python function calls that look like method calls by a faster PyMethodCallNode
     """
     in_loop = False
@@ -4821,6 +5132,48 @@ class FinalOptimizePhase(Visitor.EnvTransform, Visitor.NodeRefCleanupMixin):
         self.in_loop = old_val
         return node
 
+    def visit_IfStatNode(self, node):
+        """Assign 'unlikely' branch hints to if-clauses that only raise exceptions.
+        """
+        self.visitchildren(node)
+        last_non_unlikely_clause = None
+        for i, if_clause in enumerate(node.if_clauses):
+            self._set_ifclause_branch_hint(if_clause, if_clause.body)
+            if not if_clause.branch_hint:
+                last_non_unlikely_clause = if_clause
+        if node.else_clause and last_non_unlikely_clause:
+            # If the 'else' clause is 'unlikely', then set the preceding 'if' clause to 'likely' to reflect that.
+            self._set_ifclause_branch_hint(last_non_unlikely_clause, node.else_clause, inverse=True)
+        return node
+
+    def _set_ifclause_branch_hint(self, clause, statements_node, inverse=False):
+        """Inject a branch hint if the if-clause unconditionally leads to a 'raise' statement.
+        """
+        if not statements_node.is_terminator:
+            return
+        # Allow simple statements, but no conditions, loops, etc.
+        non_branch_nodes = (
+            Nodes.ExprStatNode,
+            Nodes.AssignmentNode,
+            Nodes.AssertStatNode,
+            Nodes.DelStatNode,
+            Nodes.GlobalNode,
+            Nodes.NonlocalNode,
+        )
+        statements = [statements_node]
+        for next_node_pos, node in enumerate(statements, 1):
+            if isinstance(node, Nodes.GILStatNode):
+                statements.insert(next_node_pos, node.body)
+                continue
+            if isinstance(node, Nodes.StatListNode):
+                statements[next_node_pos:next_node_pos] = node.stats
+                continue
+            if not isinstance(node, non_branch_nodes):
+                if next_node_pos == len(statements) and isinstance(node, (Nodes.RaiseStatNode, Nodes.ReraiseStatNode)):
+                    # Anything that unconditionally raises exceptions at the end should be considered unlikely.
+                    clause.branch_hint = 'likely' if inverse else 'unlikely'
+                break
+
 
 class ConsolidateOverflowCheck(Visitor.CythonTransform):
     """
diff --git a/Cython/Compiler/Options.py b/Cython/Compiler/Options.py
index d03119fca..cd1bb0431 100644
--- a/Cython/Compiler/Options.py
+++ b/Cython/Compiler/Options.py
@@ -4,6 +4,10 @@
 
 from __future__ import absolute_import
 
+import os
+
+from Cython import Utils
+
 
 class ShouldBeFromDirective(object):
 
@@ -25,15 +29,14 @@ class ShouldBeFromDirective(object):
         raise RuntimeError(repr(self))
 
     def __repr__(self):
-        return (
-        "Illegal access of '%s' from Options module rather than directive '%s'"
-        % (self.options_name, self.directive_name))
+        return "Illegal access of '%s' from Options module rather than directive '%s'" % (
+            self.options_name, self.directive_name)
 
 
 """
 The members of this module are documented using autodata in
 Cython/docs/src/reference/compilation.rst.
-See http://www.sphinx-doc.org/en/master/ext/autodoc.html#directive-autoattribute
+See https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#directive-autoattribute
 for how autodata works.
 Descriptions of those members should start with a #:
 Donc forget to keep the docs in sync by removing and adding
@@ -48,11 +51,6 @@ docstrings = True
 #: Embed the source code position in the docstrings of functions and classes.
 embed_pos_in_docstring = False
 
-#: Copy the original source code line by line into C code comments
-#: in the generated code file to help with understanding the output.
-#: This is also required for coverage analysis.
-emit_code_comments = True
-
 # undocumented
 pre_import = None
 
@@ -168,8 +166,19 @@ def get_directive_defaults():
                 _directive_defaults[old_option.directive_name] = value
     return _directive_defaults
 
+def copy_inherited_directives(outer_directives, **new_directives):
+    # A few directives are not copied downwards and this function removes them.
+    # For example, test_assert_path_exists and test_fail_if_path_exists should not be inherited
+    #  otherwise they can produce very misleading test failures
+    new_directives_out = dict(outer_directives)
+    for name in ('test_assert_path_exists', 'test_fail_if_path_exists', 'test_assert_c_code_has', 'test_fail_if_c_code_has'):
+        new_directives_out.pop(name, None)
+    new_directives_out.update(new_directives)
+    return new_directives_out
+
 # Declare compiler directives
 _directive_defaults = {
+    'binding': True,  # was False before 3.0
     'boundscheck' : True,
     'nonecheck' : False,
     'initializedcheck' : True,
@@ -178,11 +187,12 @@ _directive_defaults = {
     'auto_pickle': None,
     'cdivision': False,  # was True before 0.12
     'cdivision_warnings': False,
-    'c_api_binop_methods': True,
-    'cpow': True,
+    'cpow': None,  # was True before 3.0
+    # None (not set by user) is treated as slightly different from False
+    'c_api_binop_methods': False,  # was True before 3.0
     'overflowcheck': False,
     'overflowcheck.fold': True,
-    'always_allow_keywords': False,
+    'always_allow_keywords': True,
     'allow_none_for_extension_args': True,
     'wraparound' : True,
     'ccomplex' : False,  # use C99/C++ for complex types and arith
@@ -209,6 +219,8 @@ _directive_defaults = {
     'old_style_globals': False,
     'np_pythran': False,
     'fast_gil': False,
+    'cpp_locals': False,  # uses std::optional for C++ locals, so that they work more like Python locals
+    'legacy_implicit_noexcept': False,
 
     # set __file__ and/or __path__ to known source/target path at import time (instead of not having them available)
     'set_initial_path' : None,  # SOURCEFILE or "/full/path/to/module"
@@ -238,10 +250,10 @@ _directive_defaults = {
 # test support
     'test_assert_path_exists' : [],
     'test_fail_if_path_exists' : [],
+    'test_assert_c_code_has' : [],
+    'test_fail_if_c_code_has' : [],
 
 # experimental, subject to change
-    'binding': None,
-
     'formal_grammar': False,
 }
 
@@ -295,6 +307,12 @@ def normalise_encoding_name(option_name, encoding):
             return name
     return encoding
 
+# use as a sential value to defer analysis of the arguments
+# instead of analysing them in InterpretCompilerDirectives. The dataclass directives are quite
+# complicated and it's easier to deal with them at the point the dataclass is created
+class DEFER_ANALYSIS_OF_ARGUMENTS:
+    pass
+DEFER_ANALYSIS_OF_ARGUMENTS = DEFER_ANALYSIS_OF_ARGUMENTS()
 
 # Override types possibilities above, if needed
 directive_types = {
@@ -302,12 +320,15 @@ directive_types = {
     'auto_pickle': bool,
     'locals': dict,
     'final' : bool,  # final cdef classes and methods
+    'collection_type': one_of('sequence'),
     'nogil' : bool,
     'internal' : bool,  # cdef class visibility in the module dict
     'infer_types' : bool,  # values can be True/None/False
     'binding' : bool,
     'cfunc' : None,  # decorators do not take directive value
     'ccall' : None,
+    'ufunc': None,
+    'cpow' : bool,
     'inline' : None,
     'staticmethod' : None,
     'cclass' : None,
@@ -319,7 +340,10 @@ directive_types = {
     'freelist': int,
     'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'),
     'c_string_encoding': normalise_encoding_name,
-    'cpow': bool
+    'trashcan': bool,
+    'total_ordering': None,
+    'dataclasses.dataclass': DEFER_ANALYSIS_OF_ARGUMENTS,
+    'dataclasses.field': DEFER_ANALYSIS_OF_ARGUMENTS,
 }
 
 for key, val in _directive_defaults.items():
@@ -330,6 +354,7 @@ directive_scopes = {  # defaults to available everywhere
     # 'module', 'function', 'class', 'with statement'
     'auto_pickle': ('module', 'cclass'),
     'final' : ('cclass', 'function'),
+    'collection_type': ('cclass',),
     'nogil' : ('function', 'with statement'),
     'inline' : ('function',),
     'cfunc' : ('function', 'with statement'),
@@ -348,9 +373,10 @@ directive_scopes = {  # defaults to available everywhere
     'set_initial_path' : ('module',),
     'test_assert_path_exists' : ('function', 'class', 'cclass'),
     'test_fail_if_path_exists' : ('function', 'class', 'cclass'),
+    'test_assert_c_code_has' : ('module',),
+    'test_fail_if_c_code_has' : ('module',),
     'freelist': ('cclass',),
     'emit_code_comments': ('module',),
-    'annotation_typing': ('module',),  # FIXME: analysis currently lacks more specific function scope
     # Avoid scope-specific to/from_py_functions for c_string.
     'c_string_type': ('module',),
     'c_string_encoding': ('module',),
@@ -362,6 +388,26 @@ directive_scopes = {  # defaults to available everywhere
     'np_pythran': ('module',),
     'fast_gil': ('module',),
     'iterable_coroutine': ('module', 'function'),
+    'trashcan' : ('cclass',),
+    'total_ordering': ('cclass', ),
+    'dataclasses.dataclass' : ('class', 'cclass',),
+    'cpp_locals': ('module', 'function', 'cclass'),  # I don't think they make sense in a with_statement
+    'ufunc': ('function',),
+    'legacy_implicit_noexcept': ('module', ),
+}
+
+
+# a list of directives that (when used as a decorator) are only applied to
+# the object they decorate and not to its children.
+immediate_decorator_directives = {
+    'cfunc', 'ccall', 'cclass', 'dataclasses.dataclass', 'ufunc',
+    # function signature directives
+    'inline', 'exceptval', 'returns',
+    # class directives
+    'freelist', 'no_gc', 'no_gc_clear', 'type_version_tag', 'final',
+    'auto_pickle', 'internal', 'collection_type', 'total_ordering',
+    # testing directives
+    'test_fail_if_path_exists', 'test_assert_path_exists',
 }
 
 
@@ -476,6 +522,11 @@ def parse_directive_list(s, relaxed_bool=False, ignore_unknown=False,
                         result[directive] = parsed_value
             if not found and not ignore_unknown:
                 raise ValueError('Unknown option: "%s"' % name)
+        elif directive_types.get(name) is list:
+            if name in result:
+                result[name].append(value)
+            else:
+                result[name] = [value]
         else:
             parsed_value = parse_directive_value(name, value, relaxed_bool=relaxed_bool)
             result[name] = parsed_value
@@ -550,3 +601,190 @@ def parse_compile_time_env(s, current_settings=None):
         name, value = [s.strip() for s in item.split('=', 1)]
         result[name] = parse_variable_value(value)
     return result
+
+
+# ------------------------------------------------------------------------
+# CompilationOptions are constructed from user input and are the `option`
+#  object passed throughout the compilation pipeline.
+
+class CompilationOptions(object):
+    r"""
+    See default_options at the end of this module for a list of all possible
+    options and CmdLine.usage and CmdLine.parse_command_line() for their
+    meaning.
+    """
+    def __init__(self, defaults=None, **kw):
+        self.include_path = []
+        if defaults:
+            if isinstance(defaults, CompilationOptions):
+                defaults = defaults.__dict__
+        else:
+            defaults = default_options
+
+        options = dict(defaults)
+        options.update(kw)
+
+        # let's assume 'default_options' contains a value for most known compiler options
+        # and validate against them
+        unknown_options = set(options) - set(default_options)
+        # ignore valid options that are not in the defaults
+        unknown_options.difference_update(['include_path'])
+        if unknown_options:
+            message = "got unknown compilation option%s, please remove: %s" % (
+                's' if len(unknown_options) > 1 else '',
+                ', '.join(unknown_options))
+            raise ValueError(message)
+
+        directive_defaults = get_directive_defaults()
+        directives = dict(options['compiler_directives'])  # copy mutable field
+        # check for invalid directives
+        unknown_directives = set(directives) - set(directive_defaults)
+        if unknown_directives:
+            message = "got unknown compiler directive%s: %s" % (
+                's' if len(unknown_directives) > 1 else '',
+                ', '.join(unknown_directives))
+            raise ValueError(message)
+        options['compiler_directives'] = directives
+        if directives.get('np_pythran', False) and not options['cplus']:
+            import warnings
+            warnings.warn("C++ mode forced when in Pythran mode!")
+            options['cplus'] = True
+        if 'language_level' in directives and 'language_level' not in kw:
+            options['language_level'] = directives['language_level']
+        elif not options.get('language_level'):
+            options['language_level'] = directive_defaults.get('language_level')
+        if 'formal_grammar' in directives and 'formal_grammar' not in kw:
+            options['formal_grammar'] = directives['formal_grammar']
+        if options['cache'] is True:
+            options['cache'] = os.path.join(Utils.get_cython_cache_dir(), 'compiler')
+
+        self.__dict__.update(options)
+
+    def configure_language_defaults(self, source_extension):
+        if source_extension == 'py':
+            if self.compiler_directives.get('binding') is None:
+                self.compiler_directives['binding'] = True
+
+    def get_fingerprint(self):
+        r"""
+        Return a string that contains all the options that are relevant for cache invalidation.
+        """
+        # Collect only the data that can affect the generated file(s).
+        data = {}
+
+        for key, value in self.__dict__.items():
+            if key in ['show_version', 'errors_to_stderr', 'verbose', 'quiet']:
+                # verbosity flags have no influence on the compilation result
+                continue
+            elif key in ['output_file', 'output_dir']:
+                # ignore the exact name of the output file
+                continue
+            elif key in ['depfile']:
+                # external build system dependency tracking file does not influence outputs
+                continue
+            elif key in ['timestamps']:
+                # the cache cares about the content of files, not about the timestamps of sources
+                continue
+            elif key in ['cache']:
+                # hopefully caching has no influence on the compilation result
+                continue
+            elif key in ['compiler_directives']:
+                # directives passed on to the C compiler do not influence the generated C code
+                continue
+            elif key in ['include_path']:
+                # this path changes which headers are tracked as dependencies,
+                # it has no influence on the generated C code
+                continue
+            elif key in ['working_path']:
+                # this path changes where modules and pxd files are found;
+                # their content is part of the fingerprint anyway, their
+                # absolute path does not matter
+                continue
+            elif key in ['create_extension']:
+                # create_extension() has already mangled the options, e.g.,
+                # embedded_metadata, when the fingerprint is computed so we
+                # ignore it here.
+                continue
+            elif key in ['build_dir']:
+                # the (temporary) directory where we collect dependencies
+                # has no influence on the C output
+                continue
+            elif key in ['use_listing_file', 'generate_pxi', 'annotate', 'annotate_coverage_xml']:
+                # all output files are contained in the cache so the types of
+                # files generated must be part of the fingerprint
+                data[key] = value
+            elif key in ['formal_grammar', 'evaluate_tree_assertions']:
+                # these bits can change whether compilation to C passes/fails
+                data[key] = value
+            elif key in ['embedded_metadata', 'emit_linenums',
+                         'c_line_in_traceback', 'gdb_debug',
+                         'relative_path_in_code_position_comments']:
+                # the generated code contains additional bits when these are set
+                data[key] = value
+            elif key in ['cplus', 'language_level', 'compile_time_env', 'np_pythran']:
+                # assorted bits that, e.g., influence the parser
+                data[key] = value
+            elif key == ['capi_reexport_cincludes']:
+                if self.capi_reexport_cincludes:
+                    # our caching implementation does not yet include fingerprints of all the header files
+                    raise NotImplementedError('capi_reexport_cincludes is not compatible with Cython caching')
+            elif key == ['common_utility_include_dir']:
+                if self.common_utility_include_dir:
+                    raise NotImplementedError('common_utility_include_dir is not compatible with Cython caching yet')
+            else:
+                # any unexpected option should go into the fingerprint; it's better
+                # to recompile than to return incorrect results from the cache.
+                data[key] = value
+
+        def to_fingerprint(item):
+            r"""
+            Recursively turn item into a string, turning dicts into lists with
+            deterministic ordering.
+            """
+            if isinstance(item, dict):
+                item = sorted([(repr(key), to_fingerprint(value)) for key, value in item.items()])
+            return repr(item)
+
+        return to_fingerprint(data)
+
+
+# ------------------------------------------------------------------------
+#
+#  Set the default options depending on the platform
+#
+# ------------------------------------------------------------------------
+
+default_options = dict(
+    show_version=0,
+    use_listing_file=0,
+    errors_to_stderr=1,
+    cplus=0,
+    output_file=None,
+    depfile=None,
+    annotate=None,
+    annotate_coverage_xml=None,
+    generate_pxi=0,
+    capi_reexport_cincludes=0,
+    working_path="",
+    timestamps=None,
+    verbose=0,
+    quiet=0,
+    compiler_directives={},
+    embedded_metadata={},
+    evaluate_tree_assertions=False,
+    emit_linenums=False,
+    relative_path_in_code_position_comments=True,
+    c_line_in_traceback=True,
+    language_level=None,  # warn but default to 2
+    formal_grammar=False,
+    gdb_debug=False,
+    compile_time_env=None,
+    module_name=None,
+    common_utility_include_dir=None,
+    output_dir=None,
+    build_dir=None,
+    cache=None,
+    create_extension=None,
+    np_pythran=False,
+    legacy_implicit_noexcept=None,
+)
diff --git a/Cython/Compiler/ParseTreeTransforms.pxd b/Cython/Compiler/ParseTreeTransforms.pxd
index 2c17901fa..efbb14f70 100644
--- a/Cython/Compiler/ParseTreeTransforms.pxd
+++ b/Cython/Compiler/ParseTreeTransforms.pxd
@@ -1,5 +1,4 @@
-
-from __future__ import absolute_import
+# cython: language_level=3str
 
 cimport cython
 
@@ -7,8 +6,8 @@ from .Visitor cimport (
     CythonTransform, VisitorTransform, TreeVisitor,
     ScopeTrackingTransform, EnvTransform)
 
-cdef class SkipDeclarations: # (object):
-    pass
+# Don't include mixins, only the main classes.
+#cdef class SkipDeclarations:
 
 cdef class NormalizeTree(CythonTransform):
     cdef bint is_in_statlist
@@ -30,7 +29,7 @@ cdef map_starred_assignment(list lhs_targets, list starred_assignments, list lhs
 
 #class PxdPostParse(CythonTransform, SkipDeclarations):
 #class InterpretCompilerDirectives(CythonTransform, SkipDeclarations):
-#class WithTransform(CythonTransform, SkipDeclarations):
+#class WithTransform(VisitorTransform, SkipDeclarations):
 #class DecoratorTransform(CythonTransform, SkipDeclarations):
 
 #class AnalyseDeclarationsTransform(EnvTransform):
diff --git a/Cython/Compiler/ParseTreeTransforms.py b/Cython/Compiler/ParseTreeTransforms.py
index 0e86d5b0e..8008cba9a 100644
--- a/Cython/Compiler/ParseTreeTransforms.py
+++ b/Cython/Compiler/ParseTreeTransforms.py
@@ -1,3 +1,5 @@
+# cython: language_level=3str
+
 from __future__ import absolute_import
 
 import cython
@@ -54,6 +56,12 @@ class SkipDeclarations(object):
     def visit_CStructOrUnionDefNode(self, node):
         return node
 
+    def visit_CppClassNode(self, node):
+        if node.visibility != "extern":
+            # Need to traverse methods.
+            self.visitchildren(node)
+        return node
+
 
 class NormalizeTree(CythonTransform):
     """
@@ -81,6 +89,13 @@ class NormalizeTree(CythonTransform):
         self.is_in_statlist = False
         self.is_in_expr = False
 
+    def visit_ModuleNode(self, node):
+        self.visitchildren(node)
+        if not isinstance(node.body, Nodes.StatListNode):
+            # This can happen when the body only consists of a single (unused) declaration and no statements.
+            node.body = Nodes.StatListNode(pos=node.pos, stats=[node.body])
+        return node
+
     def visit_ExprNode(self, node):
         stacktmp = self.is_in_expr
         self.is_in_expr = True
@@ -170,8 +185,9 @@ class PostParse(ScopeTrackingTransform):
     Note: Currently Parsing.py does a lot of interpretation and
     reorganization that can be refactored into this transform
     if a more pure Abstract Syntax Tree is wanted.
-    """
 
+    - Some invalid uses of := assignment expressions are detected
+    """
     def __init__(self, context):
         super(PostParse, self).__init__(context)
         self.specialattribute_handlers = {
@@ -203,7 +219,9 @@ class PostParse(ScopeTrackingTransform):
         node.def_node = Nodes.DefNode(
             node.pos, name=node.name, doc=None,
             args=[], star_arg=None, starstar_arg=None,
-            body=node.loop, is_async_def=collector.has_await)
+            body=node.loop, is_async_def=collector.has_await,
+            is_generator_expression=True)
+        _AssignmentExpressionChecker.do_checks(node.loop, scope_is_class=self.scope_type in ("pyclass", "cclass"))
         self.visitchildren(node)
         return node
 
@@ -214,6 +232,7 @@ class PostParse(ScopeTrackingTransform):
             collector.visitchildren(node.loop)
             if collector.has_await:
                 node.has_local_scope = True
+        _AssignmentExpressionChecker.do_checks(node.loop, scope_is_class=self.scope_type in ("pyclass", "cclass"))
         self.visitchildren(node)
         return node
 
@@ -246,7 +265,7 @@ class PostParse(ScopeTrackingTransform):
                                     if decl is not declbase:
                                         raise PostParseError(decl.pos, ERR_INVALID_SPECIALATTR_TYPE)
                                     handler(decl)
-                                    continue # Remove declaration
+                                    continue  # Remove declaration
                             raise PostParseError(decl.pos, ERR_CDEF_INCLASS)
                         first_assignment = self.scope_type != 'module'
                         stats.append(Nodes.SingleAssignmentNode(node.pos,
@@ -349,6 +368,141 @@ class PostParse(ScopeTrackingTransform):
         self.visitchildren(node)
         return node
 
+    def visit_AssertStatNode(self, node):
+        """Extract the exception raising into a RaiseStatNode to simplify GIL handling.
+        """
+        if node.exception is None:
+            node.exception = Nodes.RaiseStatNode(
+                node.pos,
+                exc_type=ExprNodes.NameNode(node.pos, name=EncodedString("AssertionError")),
+                exc_value=node.value,
+                exc_tb=None,
+                cause=None,
+                builtin_exc_name="AssertionError",
+                wrap_tuple_value=True,
+            )
+            node.value = None
+        self.visitchildren(node)
+        return node
+
+class _AssignmentExpressionTargetNameFinder(TreeVisitor):
+    def __init__(self):
+        super(_AssignmentExpressionTargetNameFinder, self).__init__()
+        self.target_names = {}
+
+    def find_target_names(self, target):
+        if target.is_name:
+            return [target.name]
+        elif target.is_sequence_constructor:
+            names = []
+            for arg in target.args:
+                names.extend(self.find_target_names(arg))
+            return names
+        # other targets are possible, but it isn't necessary to investigate them here
+        return []
+
+    def visit_ForInStatNode(self, node):
+        self.target_names[node] = tuple(self.find_target_names(node.target))
+        self.visitchildren(node)
+
+    def visit_ComprehensionNode(self, node):
+        pass  # don't recurse into nested comprehensions
+
+    def visit_LambdaNode(self, node):
+        pass  # don't recurse into nested lambdas/generator expressions
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+
+
+class _AssignmentExpressionChecker(TreeVisitor):
+    """
+    Enforces rules on AssignmentExpressions within generator expressions and comprehensions
+    """
+    def __init__(self, loop_node, scope_is_class):
+        super(_AssignmentExpressionChecker, self).__init__()
+
+        target_name_finder = _AssignmentExpressionTargetNameFinder()
+        target_name_finder.visit(loop_node)
+        self.target_names_dict = target_name_finder.target_names
+        self.in_iterator = False
+        self.in_nested_generator = False
+        self.scope_is_class = scope_is_class
+        self.current_target_names = ()
+        self.all_target_names = set()
+        for names in self.target_names_dict.values():
+            self.all_target_names.update(names)
+
+    def _reset_state(self):
+        old_state = (self.in_iterator, self.in_nested_generator, self.scope_is_class, self.all_target_names, self.current_target_names)
+        # note: not resetting self.in_iterator here, see visit_LambdaNode() below
+        self.in_nested_generator = False
+        self.scope_is_class = False
+        self.current_target_names = ()
+        self.all_target_names = set()
+        return old_state
+
+    def _set_state(self, old_state):
+        self.in_iterator, self.in_nested_generator, self.scope_is_class, self.all_target_names, self.current_target_names = old_state
+
+    @classmethod
+    def do_checks(cls, loop_node, scope_is_class):
+        checker = cls(loop_node, scope_is_class)
+        checker.visit(loop_node)
+
+    def visit_ForInStatNode(self, node):
+        if self.in_nested_generator:
+            self.visitchildren(node)  # once nested, don't do anything special
+            return
+
+        current_target_names = self.current_target_names
+        target_name = self.target_names_dict.get(node, None)
+        if target_name:
+            self.current_target_names += target_name
+
+        self.in_iterator = True
+        self.visit(node.iterator)
+        self.in_iterator = False
+        self.visitchildren(node, exclude=("iterator",))
+
+        self.current_target_names = current_target_names
+
+    def visit_AssignmentExpressionNode(self, node):
+        if self.in_iterator:
+            error(node.pos, "assignment expression cannot be used in a comprehension iterable expression")
+        if self.scope_is_class:
+            error(node.pos, "assignment expression within a comprehension cannot be used in a class body")
+        if node.target_name in self.current_target_names:
+            error(node.pos, "assignment expression cannot rebind comprehension iteration variable '%s'" %
+                  node.target_name)
+        elif node.target_name in self.all_target_names:
+            error(node.pos, "comprehension inner loop cannot rebind assignment expression target '%s'" %
+                  node.target_name)
+
+    def visit_LambdaNode(self, node):
+        # Don't reset "in_iterator" - an assignment expression in a lambda in an
+        # iterator is explicitly tested by the Python testcases and banned.
+        old_state = self._reset_state()
+        # the lambda node's "def_node" is not set up at this point, so we need to recurse into it explicitly.
+        self.visit(node.result_expr)
+        self._set_state(old_state)
+
+    def visit_ComprehensionNode(self, node):
+        in_nested_generator = self.in_nested_generator
+        self.in_nested_generator = True
+        self.visitchildren(node)
+        self.in_nested_generator = in_nested_generator
+
+    def visit_GeneratorExpressionNode(self, node):
+        in_nested_generator = self.in_nested_generator
+        self.in_nested_generator = True
+        # def_node isn't set up yet, so we need to visit the loop directly.
+        self.visit(node.loop)
+        self.in_nested_generator = in_nested_generator
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+
 
 def eliminate_rhs_duplicates(expr_list_list, ref_node_sequence):
     """Replace rhs items by LetRefNodes if they appear more than once.
@@ -419,7 +573,7 @@ def sort_common_subsequences(items):
         return b.is_sequence_constructor and contains(b.args, a)
 
     for pos, item in enumerate(items):
-        key = item[1] # the ResultRefNode which has already been injected into the sequences
+        key = item[1]  # the ResultRefNode which has already been injected into the sequences
         new_pos = pos
         for i in range(pos-1, -1, -1):
             if lower_than(key, items[i][0]):
@@ -449,7 +603,7 @@ def flatten_parallel_assignments(input, output):
     #  recursively, so that nested structures get matched as well.
     rhs = input[-1]
     if (not (rhs.is_sequence_constructor or isinstance(rhs, ExprNodes.UnicodeNode))
-        or not sum([lhs.is_sequence_constructor for lhs in input[:-1]])):
+            or not sum([lhs.is_sequence_constructor for lhs in input[:-1]])):
         output.append(input)
         return
 
@@ -533,7 +687,7 @@ def map_starred_assignment(lhs_targets, starred_assignments, lhs_args, rhs_args)
         targets.append(expr)
 
     # the starred target itself, must be assigned a (potentially empty) list
-    target = lhs_args[starred].target # unpack starred node
+    target = lhs_args[starred].target  # unpack starred node
     starred_rhs = rhs_args[starred:]
     if lhs_remaining:
         starred_rhs = starred_rhs[:-lhs_remaining]
@@ -579,19 +733,19 @@ class PxdPostParse(CythonTransform, SkipDeclarations):
         err = self.ERR_INLINE_ONLY
 
         if (isinstance(node, Nodes.DefNode) and self.scope_type == 'cclass'
-            and node.name in ('__getbuffer__', '__releasebuffer__')):
-            err = None # allow these slots
+                and node.name in ('__getbuffer__', '__releasebuffer__')):
+            err = None  # allow these slots
 
         if isinstance(node, Nodes.CFuncDefNode):
             if (u'inline' in node.modifiers and
-                self.scope_type in ('pxd', 'cclass')):
+                    self.scope_type in ('pxd', 'cclass')):
                 node.inline_in_pxd = True
                 if node.visibility != 'private':
                     err = self.ERR_NOGO_WITH_INLINE % node.visibility
                 elif node.api:
                     err = self.ERR_NOGO_WITH_INLINE % 'api'
                 else:
-                    err = None # allow inline function
+                    err = None  # allow inline function
             else:
                 err = self.ERR_INLINE_ONLY
 
@@ -630,6 +784,9 @@ class InterpretCompilerDirectives(CythonTransform):
     - Command-line arguments overriding these
     - @cython.directivename decorators
     - with cython.directivename: statements
+    - replaces "cython.compiled" with BoolNode(value=True)
+      allowing unreachable blocks to be removed at a fairly early stage
+      before cython typing rules are forced on applied
 
     This transform is responsible for interpreting these various sources
     and store the directive in two ways:
@@ -668,17 +825,27 @@ class InterpretCompilerDirectives(CythonTransform):
         'operator.comma'        : ExprNodes.c_binop_constructor(','),
     }
 
-    special_methods = set(['declare', 'union', 'struct', 'typedef',
-                           'sizeof', 'cast', 'pointer', 'compiled',
-                           'NULL', 'fused_type', 'parallel'])
+    special_methods = {
+        'declare', 'union', 'struct', 'typedef',
+        'sizeof', 'cast', 'pointer', 'compiled',
+        'NULL', 'fused_type', 'parallel',
+    }
     special_methods.update(unop_method_nodes)
 
-    valid_parallel_directives = set([
+    valid_cython_submodules = {
+        'cimports',
+        'dataclasses',
+        'operator',
+        'parallel',
+        'view',
+    }
+
+    valid_parallel_directives = {
         "parallel",
         "prange",
         "threadid",
         #"threadsavailable",
-    ])
+    }
 
     def __init__(self, context, compilation_directive_defaults):
         super(InterpretCompilerDirectives, self).__init__(context)
@@ -701,6 +868,44 @@ class InterpretCompilerDirectives(CythonTransform):
                 error(pos, "Invalid directive: '%s'." % (directive,))
             return True
 
+    def _check_valid_cython_module(self, pos, module_name):
+        if not module_name.startswith("cython."):
+            return
+        submodule = module_name.split('.', 2)[1]
+        if submodule in self.valid_cython_submodules:
+            return
+
+        extra = ""
+        # This is very rarely used, so don't waste space on static tuples.
+        hints = [
+            line.split() for line in """\
+                imp                  cimports
+                cimp                 cimports
+                para                 parallel
+                parra                parallel
+                dataclass            dataclasses
+            """.splitlines()[:-1]
+        ]
+        for wrong, correct in hints:
+            if module_name.startswith("cython." + wrong):
+                extra = "Did you mean 'cython.%s' ?" % correct
+                break
+        if not extra:
+            is_simple_cython_name = submodule in Options.directive_types
+            if not is_simple_cython_name and not submodule.startswith("_"):
+                # Try to find it in the Shadow module (i.e. the pure Python namespace of cython.*).
+                # FIXME: use an internal reference of "cython.*" names instead of Shadow.py
+                from .. import Shadow
+                is_simple_cython_name = hasattr(Shadow, submodule)
+            if is_simple_cython_name:
+                extra = "Instead, use 'import cython' and then 'cython.%s'." % submodule
+
+        error(pos, "'%s' is not a valid cython.* module%s%s" % (
+            module_name,
+            ". " if extra else "",
+            extra,
+        ))
+
     # Set up processing and handle the cython: comments.
     def visit_ModuleNode(self, node):
         for key in sorted(node.directive_comments):
@@ -717,6 +922,12 @@ class InterpretCompilerDirectives(CythonTransform):
         node.cython_module_names = self.cython_module_names
         return node
 
+    def visit_CompilerDirectivesNode(self, node):
+        old_directives, self.directives = self.directives, node.directives
+        self.visitchildren(node)
+        self.directives = old_directives
+        return node
+
     # The following four functions track imports and cimports that
     # begin with "cython"
     def is_cython_directive(self, name):
@@ -749,22 +960,36 @@ class InterpretCompilerDirectives(CythonTransform):
         return result
 
     def visit_CImportStatNode(self, node):
-        if node.module_name == u"cython":
+        module_name = node.module_name
+        if module_name == u"cython.cimports":
+            error(node.pos, "Cannot cimport the 'cython.cimports' package directly, only submodules.")
+        if module_name.startswith(u"cython.cimports."):
+            if node.as_name and node.as_name != u'cython':
+                node.module_name = module_name[len(u"cython.cimports."):]
+                return node
+            error(node.pos,
+                  "Python cimports must use 'from cython.cimports... import ...'"
+                  " or 'import ... as ...', not just 'import ...'")
+
+        if module_name == u"cython":
             self.cython_module_names.add(node.as_name or u"cython")
-        elif node.module_name.startswith(u"cython."):
-            if node.module_name.startswith(u"cython.parallel."):
+        elif module_name.startswith(u"cython."):
+            if module_name.startswith(u"cython.parallel."):
                 error(node.pos, node.module_name + " is not a module")
-            if node.module_name == u"cython.parallel":
+            else:
+                self._check_valid_cython_module(node.pos, module_name)
+
+            if module_name == u"cython.parallel":
                 if node.as_name and node.as_name != u"cython":
-                    self.parallel_directives[node.as_name] = node.module_name
+                    self.parallel_directives[node.as_name] = module_name
                 else:
                     self.cython_module_names.add(u"cython")
                     self.parallel_directives[
-                                    u"cython.parallel"] = node.module_name
+                                    u"cython.parallel"] = module_name
                 self.module_scope.use_utility_code(
                     UtilityCode.load_cached("InitThreads", "ModuleSetupCode.c"))
             elif node.as_name:
-                self.directive_names[node.as_name] = node.module_name[7:]
+                self.directive_names[node.as_name] = module_name[7:]
             else:
                 self.cython_module_names.add(u"cython")
             # if this cimport was a compiler directive, we don't
@@ -773,26 +998,31 @@ class InterpretCompilerDirectives(CythonTransform):
         return node
 
     def visit_FromCImportStatNode(self, node):
-        if not node.relative_level and (
-                node.module_name == u"cython" or node.module_name.startswith(u"cython.")):
-            submodule = (node.module_name + u".")[7:]
+        module_name = node.module_name
+        if module_name == u"cython.cimports" or module_name.startswith(u"cython.cimports."):
+            # only supported for convenience
+            return self._create_cimport_from_import(
+                node.pos, module_name, node.relative_level, node.imported_names)
+        elif not node.relative_level and (
+                module_name == u"cython" or module_name.startswith(u"cython.")):
+            self._check_valid_cython_module(node.pos, module_name)
+            submodule = (module_name + u".")[7:]
             newimp = []
-
-            for pos, name, as_name, kind in node.imported_names:
+            for pos, name, as_name in node.imported_names:
                 full_name = submodule + name
                 qualified_name = u"cython." + full_name
-
                 if self.is_parallel_directive(qualified_name, node.pos):
                     # from cython cimport parallel, or
                     # from cython.parallel cimport parallel, prange, ...
                     self.parallel_directives[as_name or name] = qualified_name
                 elif self.is_cython_directive(full_name):
                     self.directive_names[as_name or name] = full_name
-                    if kind is not None:
-                        self.context.nonfatal_error(PostParseError(pos,
-                            "Compiler directive imports must be plain imports"))
+                elif full_name in ['dataclasses', 'typing']:
+                    self.directive_names[as_name or name] = full_name
+                    # unlike many directives, still treat it as a regular module
+                    newimp.append((pos, name, as_name))
                 else:
-                    newimp.append((pos, name, as_name, kind))
+                    newimp.append((pos, name, as_name))
 
             if not newimp:
                 return None
@@ -801,9 +1031,18 @@ class InterpretCompilerDirectives(CythonTransform):
         return node
 
     def visit_FromImportStatNode(self, node):
-        if (node.module.module_name.value == u"cython") or \
-               node.module.module_name.value.startswith(u"cython."):
-            submodule = (node.module.module_name.value + u".")[7:]
+        import_node = node.module
+        module_name = import_node.module_name.value
+        if module_name == u"cython.cimports" or module_name.startswith(u"cython.cimports."):
+            imported_names = []
+            for name, name_node in node.items:
+                imported_names.append(
+                    (name_node.pos, name, None if name == name_node.name else name_node.name))
+            return self._create_cimport_from_import(
+                node.pos, module_name, import_node.level, imported_names)
+        elif module_name == u"cython" or module_name.startswith(u"cython."):
+            self._check_valid_cython_module(import_node.module_name.pos, module_name)
+            submodule = (module_name + u".")[7:]
             newimp = []
             for name, name_node in node.items:
                 full_name = submodule + name
@@ -819,20 +1058,34 @@ class InterpretCompilerDirectives(CythonTransform):
             node.items = newimp
         return node
 
+    def _create_cimport_from_import(self, node_pos, module_name, level, imported_names):
+        if module_name == u"cython.cimports" or module_name.startswith(u"cython.cimports."):
+            module_name = EncodedString(module_name[len(u"cython.cimports."):])  # may be empty
+
+        if module_name:
+            # from cython.cimports.a.b import x, y, z  =>  from a.b cimport x, y, z
+            return Nodes.FromCImportStatNode(
+                node_pos, module_name=module_name,
+                relative_level=level,
+                imported_names=imported_names)
+        else:
+            # from cython.cimports import x, y, z  =>  cimport x; cimport y; cimport z
+            return [
+                Nodes.CImportStatNode(
+                    pos,
+                    module_name=dotted_name,
+                    as_name=as_name,
+                    is_absolute=level == 0)
+                for pos, dotted_name, as_name in imported_names
+            ]
+
     def visit_SingleAssignmentNode(self, node):
         if isinstance(node.rhs, ExprNodes.ImportNode):
             module_name = node.rhs.module_name.value
-            is_parallel = (module_name + u".").startswith(u"cython.parallel.")
-
-            if module_name != u"cython" and not is_parallel:
+            if module_name != u"cython" and not module_name.startswith("cython."):
                 return node
 
-            module_name = node.rhs.module_name.value
-            as_name = node.lhs.name
-
-            node = Nodes.CImportStatNode(node.pos,
-                                         module_name = module_name,
-                                         as_name = as_name)
+            node = Nodes.CImportStatNode(node.pos, module_name=module_name, as_name=node.lhs.name)
             node = self.visit_CImportStatNode(node)
         else:
             self.visitchildren(node)
@@ -840,16 +1093,35 @@ class InterpretCompilerDirectives(CythonTransform):
         return node
 
     def visit_NameNode(self, node):
+        if node.annotation:
+            self.visitchild(node, 'annotation')
         if node.name in self.cython_module_names:
             node.is_cython_module = True
         else:
             directive = self.directive_names.get(node.name)
             if directive is not None:
                 node.cython_attribute = directive
+        if node.as_cython_attribute() == "compiled":
+            return ExprNodes.BoolNode(node.pos, value=True)  # replace early so unused branches can be dropped
+                # before they have a chance to cause compile-errors
+        return node
+
+    def visit_AttributeNode(self, node):
+        self.visitchildren(node)
+        if node.as_cython_attribute() == "compiled":
+            return ExprNodes.BoolNode(node.pos, value=True)  # replace early so unused branches can be dropped
+                # before they have a chance to cause compile-errors
+        return node
+
+    def visit_AnnotationNode(self, node):
+        # for most transforms annotations are left unvisited (because they're unevaluated)
+        # however, it is important to pick up compiler directives from them
+        if node.expr:
+            self.visit(node.expr)
         return node
 
     def visit_NewExprNode(self, node):
-        self.visit(node.cppclass)
+        self.visitchild(node, 'cppclass')
         self.visitchildren(node)
         return node
 
@@ -858,7 +1130,7 @@ class InterpretCompilerDirectives(CythonTransform):
         # decorator), returns a list of (directivename, value) pairs.
         # Otherwise, returns None
         if isinstance(node, ExprNodes.CallNode):
-            self.visit(node.function)
+            self.visitchild(node, 'function')
             optname = node.function.as_cython_attribute()
             if optname:
                 directivetype = Options.directive_types.get(optname)
@@ -890,7 +1162,7 @@ class InterpretCompilerDirectives(CythonTransform):
                 if directivetype is bool:
                     arg = ExprNodes.BoolNode(node.pos, value=True)
                     return [self.try_to_parse_directive(optname, [arg], None, node.pos)]
-                elif directivetype is None:
+                elif directivetype is None or directivetype is Options.DEFER_ANALYSIS_OF_ARGUMENTS:
                     return [(optname, None)]
                 else:
                     raise PostParseError(
@@ -945,7 +1217,7 @@ class InterpretCompilerDirectives(CythonTransform):
             if len(args) != 0:
                 raise PostParseError(pos,
                     'The %s directive takes no prepositional arguments' % optname)
-            return optname, dict([(key.value, value) for key, value in kwds.key_value_pairs])
+            return optname, kwds.as_python_dict()
         elif directivetype is list:
             if kwds and len(kwds.key_value_pairs) != 0:
                 raise PostParseError(pos,
@@ -957,21 +1229,42 @@ class InterpretCompilerDirectives(CythonTransform):
                 raise PostParseError(pos,
                     'The %s directive takes one compile-time string argument' % optname)
             return (optname, directivetype(optname, str(args[0].value)))
+        elif directivetype is Options.DEFER_ANALYSIS_OF_ARGUMENTS:
+            # signal to pass things on without processing
+            return (optname, (args, kwds.as_python_dict() if kwds else {}))
         else:
             assert False
 
-    def visit_with_directives(self, node, directives):
+    def visit_with_directives(self, node, directives, contents_directives):
+        # contents_directives may be None
         if not directives:
+            assert not contents_directives
             return self.visit_Node(node)
 
         old_directives = self.directives
-        new_directives = dict(old_directives)
-        new_directives.update(directives)
+        new_directives = Options.copy_inherited_directives(old_directives, **directives)
+        if contents_directives is not None:
+            new_contents_directives = Options.copy_inherited_directives(
+                old_directives, **contents_directives)
+        else:
+            new_contents_directives = new_directives
 
         if new_directives == old_directives:
             return self.visit_Node(node)
 
         self.directives = new_directives
+        if (contents_directives is not None and
+                new_contents_directives != new_directives):
+            # we need to wrap the node body in a compiler directives node
+            node.body = Nodes.StatListNode(
+                node.body.pos,
+                stats=[
+                    Nodes.CompilerDirectivesNode(
+                        node.body.pos,
+                        directives=new_contents_directives,
+                        body=node.body)
+                ]
+            )
         retbody = self.visit_Node(node)
         self.directives = old_directives
 
@@ -980,13 +1273,14 @@ class InterpretCompilerDirectives(CythonTransform):
         return Nodes.CompilerDirectivesNode(
             pos=retbody.pos, body=retbody, directives=new_directives)
 
+
     # Handle decorators
     def visit_FuncDefNode(self, node):
-        directives = self._extract_directives(node, 'function')
-        return self.visit_with_directives(node, directives)
+        directives, contents_directives = self._extract_directives(node, 'function')
+        return self.visit_with_directives(node, directives, contents_directives)
 
     def visit_CVarDefNode(self, node):
-        directives = self._extract_directives(node, 'function')
+        directives, _ = self._extract_directives(node, 'function')
         for name, value in directives.items():
             if name == 'locals':
                 node.directive_locals = value
@@ -995,27 +1289,34 @@ class InterpretCompilerDirectives(CythonTransform):
                     node.pos,
                     "Cdef functions can only take cython.locals(), "
                     "staticmethod, or final decorators, got %s." % name))
-        return self.visit_with_directives(node, directives)
+        return self.visit_with_directives(node, directives, contents_directives=None)
 
     def visit_CClassDefNode(self, node):
-        directives = self._extract_directives(node, 'cclass')
-        return self.visit_with_directives(node, directives)
+        directives, contents_directives = self._extract_directives(node, 'cclass')
+        return self.visit_with_directives(node, directives, contents_directives)
 
     def visit_CppClassNode(self, node):
-        directives = self._extract_directives(node, 'cppclass')
-        return self.visit_with_directives(node, directives)
+        directives, contents_directives = self._extract_directives(node, 'cppclass')
+        return self.visit_with_directives(node, directives, contents_directives)
 
     def visit_PyClassDefNode(self, node):
-        directives = self._extract_directives(node, 'class')
-        return self.visit_with_directives(node, directives)
+        directives, contents_directives = self._extract_directives(node, 'class')
+        return self.visit_with_directives(node, directives, contents_directives)
 
     def _extract_directives(self, node, scope_name):
+        """
+        Returns two dicts - directives applied to this function/class
+        and directives applied to its contents. They aren't always the
+        same (since e.g. cfunc should not be applied to inner functions)
+        """
         if not node.decorators:
-            return {}
+            return {}, {}
         # Split the decorators into two lists -- real decorators and directives
         directives = []
         realdecs = []
         both = []
+        current_opt_dict = dict(self.directives)
+        missing = object()
         # Decorators coming first take precedence.
         for dec in node.decorators[::-1]:
             new_directives = self.try_to_parse_directives(dec.decorator)
@@ -1023,8 +1324,14 @@ class InterpretCompilerDirectives(CythonTransform):
                 for directive in new_directives:
                     if self.check_directive_scope(node.pos, directive[0], scope_name):
                         name, value = directive
-                        if self.directives.get(name, object()) != value:
+                        if current_opt_dict.get(name, missing) != value:
+                            if name == 'cfunc' and 'ufunc' in current_opt_dict:
+                                error(dec.pos, "Cannot apply @cfunc to @ufunc, please reverse the decorators.")
                             directives.append(directive)
+                            current_opt_dict[name] = value
+                        else:
+                            warning(dec.pos, "Directive does not change previous value (%s%s)" % (
+                                name, '=%r' % value if value is not None else ''))
                         if directive[0] == 'staticmethod':
                             both.append(dec)
                     # Adapt scope type based on decorators that change it.
@@ -1033,13 +1340,21 @@ class InterpretCompilerDirectives(CythonTransform):
             else:
                 realdecs.append(dec)
         if realdecs and (scope_name == 'cclass' or
-                         isinstance(node, (Nodes.CFuncDefNode, Nodes.CClassDefNode, Nodes.CVarDefNode))):
+                         isinstance(node, (Nodes.CClassDefNode, Nodes.CVarDefNode))):
+            for realdec in realdecs:
+                dec_pos = realdec.pos
+                realdec = realdec.decorator
+                if ((realdec.is_name and realdec.name == "dataclass") or
+                        (realdec.is_attribute and realdec.attribute == "dataclass")):
+                    error(dec_pos,
+                          "Use '@cython.dataclasses.dataclass' on cdef classes to create a dataclass")
+            # Note - arbitrary C function decorators are caught later in DecoratorTransform
             raise PostParseError(realdecs[0].pos, "Cdef functions/classes cannot take arbitrary decorators.")
         node.decorators = realdecs[::-1] + both[::-1]
         # merge or override repeated directives
         optdict = {}
-        for directive in directives:
-            name, value = directive
+        contents_optdict = {}
+        for name, value in directives:
             if name in optdict:
                 old_value = optdict[name]
                 # keywords and arg lists can be merged, everything
@@ -1052,7 +1367,9 @@ class InterpretCompilerDirectives(CythonTransform):
                     optdict[name] = value
             else:
                 optdict[name] = value
-        return optdict
+            if name not in Options.immediate_decorator_directives:
+                contents_optdict[name] = value
+        return optdict, contents_optdict
 
     # Handle with-statements
     def visit_WithStatNode(self, node):
@@ -1071,7 +1388,7 @@ class InterpretCompilerDirectives(CythonTransform):
                     if self.check_directive_scope(node.pos, name, 'with statement'):
                         directive_dict[name] = value
         if directive_dict:
-            return self.visit_with_directives(node.body, directive_dict)
+            return self.visit_with_directives(node.body, directive_dict, contents_directives=None)
         return self.visit_Node(node)
 
 
@@ -1161,7 +1478,7 @@ class ParallelRangeTransform(CythonTransform, SkipDeclarations):
         return node
 
     def visit_CallNode(self, node):
-        self.visit(node.function)
+        self.visitchild(node, 'function')
         if not self.parallel_directive:
             self.visitchildren(node, exclude=('function',))
             return node
@@ -1194,7 +1511,7 @@ class ParallelRangeTransform(CythonTransform, SkipDeclarations):
                       "Nested parallel with blocks are disallowed")
 
             self.state = 'parallel with'
-            body = self.visit(node.body)
+            body = self.visitchild(node, 'body')
             self.state = None
 
             newnode.body = body
@@ -1210,13 +1527,13 @@ class ParallelRangeTransform(CythonTransform, SkipDeclarations):
                 error(node.pos, "The parallel directive must be called")
                 return None
 
-        node.body = self.visit(node.body)
+        self.visitchild(node, 'body')
         return node
 
     def visit_ForInStatNode(self, node):
         "Rewrite 'for i in cython.parallel.prange(...):'"
-        self.visit(node.iterator)
-        self.visit(node.target)
+        self.visitchild(node, 'iterator')
+        self.visitchild(node, 'target')
 
         in_prange = isinstance(node.iterator.sequence,
                                Nodes.ParallelRangeNode)
@@ -1239,9 +1556,9 @@ class ParallelRangeTransform(CythonTransform, SkipDeclarations):
 
             self.state = 'prange'
 
-        self.visit(node.body)
+        self.visitchild(node, 'body')
         self.state = previous_state
-        self.visit(node.else_clause)
+        self.visitchild(node, 'else_clause')
         return node
 
     def visit(self, node):
@@ -1250,12 +1567,13 @@ class ParallelRangeTransform(CythonTransform, SkipDeclarations):
             return super(ParallelRangeTransform, self).visit(node)
 
 
-class WithTransform(CythonTransform, SkipDeclarations):
+class WithTransform(VisitorTransform, SkipDeclarations):
     def visit_WithStatNode(self, node):
         self.visitchildren(node, 'body')
         pos = node.pos
         is_async = node.is_async
         body, target, manager = node.body, node.target, node.manager
+        manager = node.manager = ExprNodes.ProxyNode(manager)
         node.enter_call = ExprNodes.SimpleCallNode(
             pos, function=ExprNodes.AttributeNode(
                 pos, obj=ExprNodes.CloneNode(manager),
@@ -1316,6 +1634,130 @@ class WithTransform(CythonTransform, SkipDeclarations):
         # With statements are never inside expressions.
         return node
 
+    visit_Node = VisitorTransform.recurse_to_children
+
+
+class _GeneratorExpressionArgumentsMarker(TreeVisitor, SkipDeclarations):
+    # called from "MarkClosureVisitor"
+    def __init__(self, gen_expr):
+        super(_GeneratorExpressionArgumentsMarker, self).__init__()
+        self.gen_expr = gen_expr
+
+    def visit_ExprNode(self, node):
+        if not node.is_literal:
+            # Don't bother tagging literal nodes
+            assert (not node.generator_arg_tag)  # nobody has tagged this first
+            node.generator_arg_tag = self.gen_expr
+        self.visitchildren(node)
+
+    def visit_Node(self, node):
+        # We're only interested in the expressions that make up the iterator sequence,
+        # so don't go beyond ExprNodes (e.g. into ForFromStatNode).
+        return
+
+    def visit_GeneratorExpressionNode(self, node):
+        node.generator_arg_tag = self.gen_expr
+        # don't visit children, can't handle overlapping tags
+        # (and assume generator expressions don't end up optimized out in a way
+        #  that would require overlapping tags)
+
+
+class _HandleGeneratorArguments(VisitorTransform, SkipDeclarations):
+    # used from within CreateClosureClasses
+
+    def __call__(self, node):
+        from . import Visitor
+        assert isinstance(node, ExprNodes.GeneratorExpressionNode)
+        self.gen_node = node
+
+        self.args = list(node.def_node.args)
+        self.call_parameters = list(node.call_parameters)
+        self.tag_count = 0
+        self.substitutions = {}
+
+        self.visitchildren(node)
+
+        for k, v in self.substitutions.items():
+            # doing another search for replacements here (at the end) allows us to sweep up
+            # CloneNodes too (which are often generated by the optimizer)
+            # (it could arguably be done more efficiently with a single traversal though)
+            Visitor.recursively_replace_node(node, k, v)
+
+        node.def_node.args = self.args
+        node.call_parameters = self.call_parameters
+        return node
+
+    def visit_GeneratorExpressionNode(self, node):
+        # a generator can also be substituted itself, so handle that case
+        new_node = self._handle_ExprNode(node, do_visit_children=False)
+        # However do not traverse into it. A new _HandleGeneratorArguments visitor will be used
+        # elsewhere to do that.
+        return node
+
+    def _handle_ExprNode(self, node, do_visit_children):
+        if (node.generator_arg_tag is not None and self.gen_node is not None and
+                self.gen_node == node.generator_arg_tag):
+            pos = node.pos
+            # The reason for using ".x" as the name is that this is how CPython
+            # tracks internal variables in loops (e.g.
+            #  { locals() for v in range(10) }
+            # will produce "v" and ".0"). We don't replicate this behaviour completely
+            # but use it as a starting point
+            name_source = self.tag_count
+            self.tag_count += 1
+            name = EncodedString(".{0}".format(name_source))
+            def_node = self.gen_node.def_node
+            if not def_node.local_scope.lookup_here(name):
+                from . import Symtab
+                cname = EncodedString(Naming.genexpr_arg_prefix + Symtab.punycodify_name(str(name_source)))
+                name_decl = Nodes.CNameDeclaratorNode(pos=pos, name=name)
+                type = node.type
+                if type.is_reference and not type.is_fake_reference:
+                    # It isn't obvious whether the right thing to do would be to capture by reference or by
+                    # value (C++ itself doesn't know either for lambda functions and forces a choice).
+                    # However, capture by reference involves converting to FakeReference which would require
+                    # re-analysing AttributeNodes. Therefore I've picked capture-by-value out of convenience
+                    # TODO - could probably be optimized by making the arg a reference but the closure not
+                    # (see https://github.com/cython/cython/issues/2468)
+                    type = type.ref_base_type
+
+                name_decl.type = type
+                new_arg = Nodes.CArgDeclNode(pos=pos, declarator=name_decl,
+                                                base_type=None, default=None, annotation=None)
+                new_arg.name = name_decl.name
+                new_arg.type = type
+
+                self.args.append(new_arg)
+                node.generator_arg_tag = None  # avoid the possibility of this being caught again
+                self.call_parameters.append(node)
+                new_arg.entry = def_node.declare_argument(def_node.local_scope, new_arg)
+                new_arg.entry.cname = cname
+                new_arg.entry.in_closure = True
+
+            if do_visit_children:
+                # now visit the Nodes's children (but remove self.gen_node to not to further
+                # argument substitution)
+                gen_node, self.gen_node = self.gen_node, None
+                self.visitchildren(node)
+                self.gen_node = gen_node
+
+            # replace the node inside the generator with a looked-up name
+            # (initialized_check can safely be False because the source variable will be checked
+            # before it is captured if the check is required)
+            name_node = ExprNodes.NameNode(pos, name=name, initialized_check=False)
+            name_node.entry = self.gen_node.def_node.gbody.local_scope.lookup(name_node.name)
+            name_node.type = name_node.entry.type
+            self.substitutions[node] = name_node
+            return name_node
+        if do_visit_children:
+            self.visitchildren(node)
+        return node
+
+    def visit_ExprNode(self, node):
+        return self._handle_ExprNode(node, True)
+
+    visit_Node = VisitorTransform.recurse_to_children
+
 
 class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
     """
@@ -1328,16 +1770,16 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
     _properties = None
 
     _map_property_attribute = {
-        'getter': '__get__',
-        'setter': '__set__',
-        'deleter': '__del__',
+        'getter': EncodedString('__get__'),
+        'setter': EncodedString('__set__'),
+        'deleter': EncodedString('__del__'),
     }.get
 
     def visit_CClassDefNode(self, node):
         if self._properties is None:
             self._properties = []
         self._properties.append({})
-        super(DecoratorTransform, self).visit_CClassDefNode(node)
+        node = super(DecoratorTransform, self).visit_CClassDefNode(node)
         self._properties.pop()
         return node
 
@@ -1347,6 +1789,32 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
         warning(node.pos, "'property %s:' syntax is deprecated, use '@property'" % node.name, level)
         return node
 
+    def visit_CFuncDefNode(self, node):
+        node = self.visit_FuncDefNode(node)
+        if not node.decorators:
+            return node
+        elif self.scope_type != 'cclass' or self.scope_node.visibility != "extern":
+            # at the moment cdef functions are very restricted in what decorators they can take
+            # so it's simple to test for the small number of allowed decorators....
+            if not (len(node.decorators) == 1 and node.decorators[0].decorator.is_name and
+                    node.decorators[0].decorator.name == "staticmethod"):
+                error(node.decorators[0].pos, "Cdef functions cannot take arbitrary decorators.")
+            return node
+
+        ret_node = node
+        decorator_node = self._find_property_decorator(node)
+        if decorator_node:
+            if decorator_node.decorator.is_name:
+                name = node.declared_name()
+                if name:
+                    ret_node = self._add_property(node, name, decorator_node)
+            else:
+                error(decorator_node.pos, "C property decorator can only be @property")
+
+        if node.decorators:
+            return self._reject_decorated_property(node, node.decorators[0])
+        return ret_node
+
     def visit_DefNode(self, node):
         scope_type = self.scope_type
         node = self.visit_FuncDefNode(node)
@@ -1354,28 +1822,12 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
             return node
 
         # transform @property decorators
-        properties = self._properties[-1]
-        for decorator_node in node.decorators[::-1]:
+        decorator_node = self._find_property_decorator(node)
+        if decorator_node is not None:
             decorator = decorator_node.decorator
-            if decorator.is_name and decorator.name == 'property':
-                if len(node.decorators) > 1:
-                    return self._reject_decorated_property(node, decorator_node)
-                name = node.name
-                node.name = EncodedString('__get__')
-                node.decorators.remove(decorator_node)
-                stat_list = [node]
-                if name in properties:
-                    prop = properties[name]
-                    prop.pos = node.pos
-                    prop.doc = node.doc
-                    prop.body.stats = stat_list
-                    return []
-                prop = Nodes.PropertyNode(node.pos, name=name)
-                prop.doc = node.doc
-                prop.body = Nodes.StatListNode(node.pos, stats=stat_list)
-                properties[name] = prop
-                return [prop]
-            elif decorator.is_attribute and decorator.obj.name in properties:
+            if decorator.is_name:
+                return self._add_property(node, node.name, decorator_node)
+            else:
                 handler_name = self._map_property_attribute(decorator.attribute)
                 if handler_name:
                     if decorator.obj.name != node.name:
@@ -1386,7 +1838,7 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
                     elif len(node.decorators) > 1:
                         return self._reject_decorated_property(node, decorator_node)
                     else:
-                        return self._add_to_property(properties, node, handler_name, decorator_node)
+                        return self._add_to_property(node, handler_name, decorator_node)
 
         # we clear node.decorators, so we need to set the
         # is_staticmethod/is_classmethod attributes now
@@ -1401,6 +1853,18 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
         node.decorators = None
         return self.chain_decorators(node, decs, node.name)
 
+    def _find_property_decorator(self, node):
+        properties = self._properties[-1]
+        for decorator_node in node.decorators[::-1]:
+            decorator = decorator_node.decorator
+            if decorator.is_name and decorator.name == 'property':
+                # @property
+                return decorator_node
+            elif decorator.is_attribute and decorator.obj.name in properties:
+                # @prop.setter etc.
+                return decorator_node
+        return None
+
     @staticmethod
     def _reject_decorated_property(node, decorator_node):
         # restrict transformation to outermost decorator as wrapped properties will probably not work
@@ -1409,9 +1873,42 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
                 error(deco.pos, "Property methods with additional decorators are not supported")
         return node
 
-    @staticmethod
-    def _add_to_property(properties, node, name, decorator):
+    def _add_property(self, node, name, decorator_node):
+        if len(node.decorators) > 1:
+            return self._reject_decorated_property(node, decorator_node)
+        node.decorators.remove(decorator_node)
+        properties = self._properties[-1]
+        is_cproperty = isinstance(node, Nodes.CFuncDefNode)
+        body = Nodes.StatListNode(node.pos, stats=[node])
+        if is_cproperty:
+            if name in properties:
+                error(node.pos, "C property redeclared")
+            if 'inline' not in node.modifiers:
+                error(node.pos, "C property method must be declared 'inline'")
+            prop = Nodes.CPropertyNode(node.pos, doc=node.doc, name=name, body=body)
+        elif name in properties:
+            prop = properties[name]
+            if prop.is_cproperty:
+                error(node.pos, "C property redeclared")
+            else:
+                node.name = EncodedString("__get__")
+                prop.pos = node.pos
+                prop.doc = node.doc
+                prop.body.stats = [node]
+            return None
+        else:
+            node.name = EncodedString("__get__")
+            prop = Nodes.PropertyNode(
+                node.pos, name=name, doc=node.doc, body=body)
+        properties[name] = prop
+        return prop
+
+    def _add_to_property(self, node, name, decorator):
+        properties = self._properties[-1]
         prop = properties[node.name]
+        if prop.is_cproperty:
+            error(node.pos, "C property redeclared")
+            return None
         node.name = name
         node.decorators.remove(decorator)
         stats = prop.body.stats
@@ -1421,7 +1918,7 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
                 break
         else:
             stats.append(node)
-        return []
+        return None
 
     @staticmethod
     def chain_decorators(node, decorators, name):
@@ -1499,6 +1996,10 @@ class CnameDirectivesTransform(CythonTransform, SkipDeclarations):
 
 
 class ForwardDeclareTypes(CythonTransform):
+    """
+    Declare all global cdef names that we allow referencing in other places,
+    before declaring everything (else) in source code order.
+    """
 
     def visit_CompilerDirectivesNode(self, node):
         env = self.module_scope
@@ -1542,6 +2043,14 @@ class ForwardDeclareTypes(CythonTransform):
                     entry.type.get_all_specialized_function_types()
         return node
 
+    def visit_FuncDefNode(self, node):
+        # no traversal needed
+        return node
+
+    def visit_PyClassDefNode(self, node):
+        # no traversal needed
+        return node
+
 
 class AnalyseDeclarationsTransform(EnvTransform):
 
@@ -1622,6 +2131,9 @@ if VALUE is not None:
 
     def visit_CClassDefNode(self, node):
         node = self.visit_ClassDefNode(node)
+        if node.scope and 'dataclasses.dataclass' in node.scope.directives:
+            from .Dataclass import handle_cclass_dataclass
+            handle_cclass_dataclass(node, node.scope.directives['dataclasses.dataclass'], self)
         if node.scope and node.scope.implemented and node.body:
             stats = []
             for entry in node.scope.var_entries:
@@ -1633,8 +2145,8 @@ if VALUE is not None:
             if stats:
                 node.body.stats += stats
             if (node.visibility != 'extern'
-                and not node.scope.lookup('__reduce__')
-                and not node.scope.lookup('__reduce_ex__')):
+                    and not node.scope.lookup('__reduce__')
+                    and not node.scope.lookup('__reduce_ex__')):
                 self._inject_pickle_methods(node)
         return node
 
@@ -1688,9 +2200,9 @@ if VALUE is not None:
 
             pickle_func = TreeFragment(u"""
                 def __reduce_cython__(self):
-                    raise TypeError("%(msg)s")
+                    raise TypeError, "%(msg)s"
                 def __setstate_cython__(self, __pyx_state):
-                    raise TypeError("%(msg)s")
+                    raise TypeError, "%(msg)s"
                 """ % {'msg': msg},
                 level='c_class', pipeline=[NormalizeTree(None)]).substitute({})
             pickle_func.analyse_declarations(node.scope)
@@ -1702,10 +2214,11 @@ if VALUE is not None:
                 if not e.type.is_pyobject:
                     e.type.create_to_py_utility_code(env)
                     e.type.create_from_py_utility_code(env)
+
             all_members_names = [e.name for e in all_members]
             checksums = _calculate_pickle_checksums(all_members_names)
 
-            unpickle_func_name = '__pyx_unpickle_%s' % node.class_name
+            unpickle_func_name = '__pyx_unpickle_%s' % node.punycode_class_name
 
             # TODO(robertwb): Move the state into the third argument
             # so it can be pickled *after* self is memoized.
@@ -1715,7 +2228,7 @@ if VALUE is not None:
                     cdef object __pyx_result
                     if __pyx_checksum not in %(checksums)s:
                         from pickle import PickleError as __pyx_PickleError
-                        raise __pyx_PickleError("Incompatible checksums (0x%%x vs %(checksums)s = (%(members)s))" %% __pyx_checksum)
+                        raise __pyx_PickleError, "Incompatible checksums (0x%%x vs %(checksums)s = (%(members)s))" %% __pyx_checksum
                     __pyx_result = %(class_name)s.__new__(__pyx_type)
                     if __pyx_state is not None:
                         %(unpickle_func_name)s__set_state(<%(class_name)s> __pyx_result, __pyx_state)
@@ -1783,8 +2296,8 @@ if VALUE is not None:
         for decorator in old_decorators:
             func = decorator.decorator
             if (not func.is_name or
-                func.name not in ('staticmethod', 'classmethod') or
-                env.lookup_here(func.name)):
+                    func.name not in ('staticmethod', 'classmethod') or
+                    env.lookup_here(func.name)):
                 # not a static or classmethod
                 decorators.append(decorator)
 
@@ -1802,8 +2315,10 @@ if VALUE is not None:
         "Handle def or cpdef fused functions"
         # Create PyCFunction nodes for each specialization
         node.stats.insert(0, node.py_func)
-        node.py_func = self.visit(node.py_func)
+        self.visitchild(node, 'py_func')
         node.update_fused_defnode_entry(env)
+        # For the moment, fused functions do not support METH_FASTCALL
+        node.py_func.entry.signature.use_fastcall = False
         pycfunc = ExprNodes.PyCFunctionNode.from_defnode(node.py_func, binding=True)
         pycfunc = ExprNodes.ProxyNode(pycfunc.coerce_to_temp(env))
         node.resulting_fused_function = pycfunc
@@ -1846,19 +2361,6 @@ if VALUE is not None:
 
         return node
 
-    def _handle_nogil_cleanup(self, lenv, node):
-        "Handle cleanup for 'with gil' blocks in nogil functions."
-        if lenv.nogil and lenv.has_with_gil_block:
-            # Acquire the GIL for cleanup in 'nogil' functions, by wrapping
-            # the entire function body in try/finally.
-            # The corresponding release will be taken care of by
-            # Nodes.FuncDefNode.generate_function_definitions()
-            node.body = Nodes.NogilTryFinallyStatNode(
-                node.body.pos,
-                body=node.body,
-                finally_clause=Nodes.EnsureGILNode(node.body.pos),
-                finally_except_clause=Nodes.EnsureGILNode(node.body.pos))
-
     def _handle_fused(self, node):
         if node.is_generator and node.has_fused_arguments:
             node.has_fused_arguments = False
@@ -1890,6 +2392,8 @@ if VALUE is not None:
         for var, type_node in node.directive_locals.items():
             if not lenv.lookup_here(var):   # don't redeclare args
                 type = type_node.analyse_as_type(lenv)
+                if type and type.is_fused and lenv.fused_to_specific:
+                    type = type.specialize(lenv.fused_to_specific)
                 if type:
                     lenv.declare_var(var, type, type_node.pos)
                 else:
@@ -1899,17 +2403,18 @@ if VALUE is not None:
             node = self._create_fused_function(env, node)
         else:
             node.body.analyse_declarations(lenv)
-            self._handle_nogil_cleanup(lenv, node)
             self._super_visit_FuncDefNode(node)
 
         self.seen_vars_stack.pop()
+
+        if "ufunc" in lenv.directives:
+            from . import UFuncs
+            return UFuncs.convert_to_ufunc(node)
         return node
 
     def visit_DefNode(self, node):
         node = self.visit_FuncDefNode(node)
         env = self.current_env()
-        if isinstance(node, Nodes.DefNode) and node.is_wrapper:
-            env = env.parent_scope
         if (not isinstance(node, Nodes.DefNode) or
                 node.fused_py_func or node.is_generator_body or
                 not node.needs_assignment_synthesis(env)):
@@ -1959,11 +2464,17 @@ if VALUE is not None:
         assmt.analyse_declarations(env)
         return assmt
 
+    def visit_func_outer_attrs(self, node):
+        # any names in the outer attrs should not be looked up in the function "seen_vars_stack"
+        stack = self.seen_vars_stack.pop()
+        super(AnalyseDeclarationsTransform, self).visit_func_outer_attrs(node)
+        self.seen_vars_stack.append(stack)
+
     def visit_ScopedExprNode(self, node):
         env = self.current_env()
         node.analyse_declarations(env)
         # the node may or may not have a local scope
-        if node.has_local_scope:
+        if node.expr_scope:
             self.seen_vars_stack.append(set(self.seen_vars_stack[-1]))
             self.enter_scope(node, node.expr_scope)
             node.analyse_scoped_declarations(node.expr_scope)
@@ -1971,6 +2482,7 @@ if VALUE is not None:
             self.exit_scope()
             self.seen_vars_stack.pop()
         else:
+
             node.analyse_scoped_declarations(env)
             self.visitchildren(node)
         return node
@@ -1993,7 +2505,7 @@ if VALUE is not None:
         # (so it can't happen later).
         # Note that we don't return the original node, as it is
         # never used after this phase.
-        if True: # private (default)
+        if True:  # private (default)
             return None
 
         self_value = ExprNodes.AttributeNode(
@@ -2085,8 +2597,8 @@ if VALUE is not None:
         if node.name in self.seen_vars_stack[-1]:
             entry = self.current_env().lookup(node.name)
             if (entry is None or entry.visibility != 'extern'
-                and not entry.scope.is_c_class_scope):
-                warning(node.pos, "cdef variable '%s' declared after it is used" % node.name, 2)
+                    and not entry.scope.is_c_class_scope):
+                error(node.pos, "cdef variable '%s' declared after it is used" % node.name)
         self.visitchildren(node)
         return node
 
@@ -2096,13 +2608,12 @@ if VALUE is not None:
         return None
 
     def visit_CnameDecoratorNode(self, node):
-        child_node = self.visit(node.node)
+        child_node = self.visitchild(node, 'node')
         if not child_node:
             return None
-        if type(child_node) is list: # Assignment synthesized
-            node.child_node = child_node[0]
+        if type(child_node) is list:  # Assignment synthesized
+            node.node = child_node[0]
             return [node] + child_node[1:]
-        node.node = child_node
         return node
 
     def create_Property(self, entry):
@@ -2122,6 +2633,11 @@ if VALUE is not None:
         property.doc = entry.doc
         return property
 
+    def visit_AssignmentExpressionNode(self, node):
+        self.visitchildren(node)
+        node.analyse_declarations(self.current_env())
+        return node
+
 
 def _calculate_pickle_checksums(member_names):
     # Cython 0.x used MD5 for the checksum, which a few Python installations remove for security reasons.
@@ -2130,7 +2646,7 @@ def _calculate_pickle_checksums(member_names):
     member_names_string = ' '.join(member_names).encode('utf-8')
     hash_kwargs = {'usedforsecurity': False} if sys.version_info >= (3, 9) else {}
     checksums = []
-    for algo_name in ['md5', 'sha256', 'sha1']:
+    for algo_name in ['sha256', 'sha1', 'md5']:
         try:
             mkchecksum = getattr(hashlib, algo_name)
             checksum = mkchecksum(member_names_string, **hash_kwargs).hexdigest()
@@ -2219,7 +2735,7 @@ class CalculateQualifiedNamesTransform(EnvTransform):
     def visit_ClassDefNode(self, node):
         orig_qualified_name = self.qualified_name[:]
         entry = (getattr(node, 'entry', None) or             # PyClass
-                 self.current_env().lookup_here(node.name))  # CClass
+                 self.current_env().lookup_here(node.target.name))  # CClass
         self._append_entry(entry)
         self._super_visit_ClassDefNode(node)
         self.qualified_name = orig_qualified_name
@@ -2328,8 +2844,8 @@ class ExpandInplaceOperators(EnvTransform):
                                      operand2 = rhs,
                                      inplace=True)
         # Manually analyse types for new node.
-        lhs.analyse_target_types(env)
-        dup.analyse_types(env)
+        lhs = lhs.analyse_target_types(env)
+        dup.analyse_types(env)  # FIXME: no need to reanalyse the copy, right?
         binop.analyse_operation(env)
         node = Nodes.SingleAssignmentNode(
             node.pos,
@@ -2379,13 +2895,15 @@ class AdjustDefByDirectives(CythonTransform, SkipDeclarations):
         return_type_node = self.directives.get('returns')
         if return_type_node is None and self.directives['annotation_typing']:
             return_type_node = node.return_type_annotation
-            # for Python anntations, prefer safe exception handling by default
+            # for Python annotations, prefer safe exception handling by default
             if return_type_node is not None and except_val is None:
                 except_val = (None, True)  # except *
         elif except_val is None:
-            # backward compatible default: no exception check
-            except_val = (None, False)
+            # backward compatible default: no exception check, unless there's also a "@returns" declaration
+            except_val = (None, True if return_type_node else False)
         if 'ccall' in self.directives:
+            if 'cfunc' in self.directives:
+                error(node.pos, "cfunc and ccall directives cannot be combined")
             node = node.as_cfunction(
                 overridable=True, modifiers=modifiers, nogil=nogil,
                 returns=return_type_node, except_val=except_val)
@@ -2437,8 +2955,6 @@ class AlignFunctionDefinitions(CythonTransform):
 
     def visit_ModuleNode(self, node):
         self.scope = node.scope
-        self.directives = node.directives
-        self.imported_names = set()  # hack, see visit_FromImportStatNode()
         self.visitchildren(node)
         return node
 
@@ -2476,15 +2992,45 @@ class AlignFunctionDefinitions(CythonTransform):
                     error(pxd_def.pos, "previous declaration here")
                 return None
             node = node.as_cfunction(pxd_def)
-        elif (self.scope.is_module_scope and self.directives['auto_cpdef']
-              and not node.name in self.imported_names
-              and node.is_cdef_func_compatible()):
-            # FIXME: cpdef-ing should be done in analyse_declarations()
-            node = node.as_cfunction(scope=self.scope)
         # Enable this when nested cdef functions are allowed.
         # self.visitchildren(node)
         return node
 
+    def visit_ExprNode(self, node):
+        # ignore lambdas and everything else that appears in expressions
+        return node
+
+
+class AutoCpdefFunctionDefinitions(CythonTransform):
+
+    def visit_ModuleNode(self, node):
+        self.directives = node.directives
+        self.imported_names = set()  # hack, see visit_FromImportStatNode()
+        self.scope = node.scope
+        self.visitchildren(node)
+        return node
+
+    def visit_DefNode(self, node):
+        if (self.scope.is_module_scope and self.directives['auto_cpdef']
+                and node.name not in self.imported_names
+                and node.is_cdef_func_compatible()):
+            # FIXME: cpdef-ing should be done in analyse_declarations()
+            node = node.as_cfunction(scope=self.scope)
+        return node
+
+    def visit_CClassDefNode(self, node, pxd_def=None):
+        if pxd_def is None:
+            pxd_def = self.scope.lookup(node.class_name)
+        if pxd_def:
+            if not pxd_def.defined_in_pxd:
+                return node
+            outer_scope = self.scope
+            self.scope = pxd_def.type.scope
+        self.visitchildren(node)
+        if pxd_def:
+            self.scope = outer_scope
+        return node
+
     def visit_FromImportStatNode(self, node):
         # hack to prevent conditional import fallback functions from
         # being cdpef-ed (global Python variables currently conflict
@@ -2504,8 +3050,7 @@ class RemoveUnreachableCode(CythonTransform):
         if not self.current_directives['remove_unreachable']:
             return node
         self.visitchildren(node)
-        for idx, stat in enumerate(node.stats):
-            idx += 1
+        for idx, stat in enumerate(node.stats, 1):
             if stat.is_terminator:
                 if idx < len(node.stats):
                     if self.current_directives['warn.unreachable']:
@@ -2604,6 +3149,8 @@ class YieldNodeCollector(TreeVisitor):
 
 
 class MarkClosureVisitor(CythonTransform):
+    # In addition to marking closures this is also responsible to finding parts of the
+    # generator iterable and marking them
 
     def visit_ModuleNode(self, node):
         self.needs_closure = False
@@ -2649,7 +3196,8 @@ class MarkClosureVisitor(CythonTransform):
             star_arg=node.star_arg, starstar_arg=node.starstar_arg,
             doc=node.doc, decorators=node.decorators,
             gbody=gbody, lambda_name=node.lambda_name,
-            return_type_annotation=node.return_type_annotation)
+            return_type_annotation=node.return_type_annotation,
+            is_generator_expression=node.is_generator_expression)
         return coroutine
 
     def visit_CFuncDefNode(self, node):
@@ -2673,6 +3221,19 @@ class MarkClosureVisitor(CythonTransform):
         self.needs_closure = True
         return node
 
+    def visit_GeneratorExpressionNode(self, node):
+        node = self.visit_LambdaNode(node)
+        if not isinstance(node.loop, Nodes._ForInStatNode):
+            # Possibly should handle ForFromStatNode
+            # but for now do nothing
+            return node
+        itseq = node.loop.iterator.sequence
+        # literals do not need replacing with an argument
+        if itseq.is_literal:
+            return node
+        _GeneratorExpressionArgumentsMarker(node).visit(itseq)
+        return node
+
 
 class CreateClosureClasses(CythonTransform):
     # Output closure classes in module scope for all functions
@@ -2726,7 +3287,7 @@ class CreateClosureClasses(CythonTransform):
                 if not node.py_cfunc_node:
                     raise InternalError("DefNode does not have assignment node")
                 inner_node = node.py_cfunc_node
-            inner_node.needs_self_code = False
+            inner_node.needs_closure_code = False
             node.needs_outer_scope = False
 
         if node.is_generator:
@@ -2745,6 +3306,7 @@ class CreateClosureClasses(CythonTransform):
         as_name = '%s_%s' % (
             target_module_scope.next_id(Naming.closure_class_prefix),
             node.entry.cname.replace('.','__'))
+        as_name = EncodedString(as_name)
 
         entry = target_module_scope.declare_c_class(
             name=as_name, pos=node.pos, defining=True,
@@ -2816,6 +3378,10 @@ class CreateClosureClasses(CythonTransform):
             self.visitchildren(node)
             return node
 
+    def visit_GeneratorExpressionNode(self, node):
+        node = _HandleGeneratorArguments()(node)
+        return self.visit_LambdaNode(node)
+
 
 class InjectGilHandling(VisitorTransform, SkipDeclarations):
     """
@@ -2824,20 +3390,20 @@ class InjectGilHandling(VisitorTransform, SkipDeclarations):
     Must run before the AnalyseDeclarationsTransform to make sure the GILStatNodes get
     set up, parallel sections know that the GIL is acquired inside of them, etc.
     """
-    def __call__(self, root):
-        self.nogil = False
-        return super(InjectGilHandling, self).__call__(root)
+    nogil = False
 
     # special node handling
 
-    def visit_RaiseStatNode(self, node):
-        """Allow raising exceptions in nogil sections by wrapping them in a 'with gil' block."""
+    def _inject_gil_in_nogil(self, node):
+        """Allow the (Python statement) node in nogil sections by wrapping it in a 'with gil' block."""
         if self.nogil:
             node = Nodes.GILStatNode(node.pos, state='gil', body=node)
         return node
 
+    visit_RaiseStatNode = _inject_gil_in_nogil
+    visit_PrintStatNode = _inject_gil_in_nogil  # sadly, not the function
+
     # further candidates:
-    # def visit_AssertStatNode(self, node):
     # def visit_ReraiseStatNode(self, node):
 
     # nogil tracking
@@ -2905,6 +3471,7 @@ class GilCheck(VisitorTransform):
         self.env_stack.append(node.local_scope)
         inner_nogil = node.local_scope.nogil
 
+        nogil_declarator_only = self.nogil_declarator_only
         if inner_nogil:
             self.nogil_declarator_only = True
 
@@ -2913,13 +3480,20 @@ class GilCheck(VisitorTransform):
 
         self._visit_scoped_children(node, inner_nogil)
 
-        # This cannot be nested, so it doesn't need backup/restore
-        self.nogil_declarator_only = False
+        # FuncDefNodes can be nested, because a cpdef function contains a def function
+        # inside it. Therefore restore to previous state
+        self.nogil_declarator_only = nogil_declarator_only
 
         self.env_stack.pop()
         return node
 
     def visit_GILStatNode(self, node):
+        if node.condition is not None:
+            error(node.condition.pos,
+                  "Non-constant condition in a "
+                  "`with %s(<condition>)` statement" % node.state)
+            return node
+
         if self.nogil and node.nogil_check:
             node.nogil_check()
 
@@ -2933,6 +3507,8 @@ class GilCheck(VisitorTransform):
             else:
                 error(node.pos, "Trying to release the GIL while it was "
                                 "previously released.")
+        if self.nogil_declarator_only:
+            node.scope_gil_state_known = False
 
         if isinstance(node.finally_clause, Nodes.StatListNode):
             # The finally clause of the GILStatNode is a GILExitNode,
@@ -2983,6 +3559,12 @@ class GilCheck(VisitorTransform):
         self.visitchildren(node)
         return node
 
+    def visit_GILExitNode(self, node):
+        if self.nogil_declarator_only:
+            node.scope_gil_state_known = False
+        self.visitchildren(node)
+        return node
+
     def visit_Node(self, node):
         if self.env_stack and self.nogil and node.nogil_check:
             node.nogil_check(self.env_stack[-1])
@@ -2995,6 +3577,32 @@ class GilCheck(VisitorTransform):
         return node
 
 
+class CoerceCppTemps(EnvTransform, SkipDeclarations):
+    """
+    For temporary expression that are implemented using std::optional it's necessary the temps are
+    assigned using `__pyx_t_x = value;` but accessed using `something = (*__pyx_t_x)`. This transform
+    inserts a coercion node to take care of this, and runs absolutely last (once nothing else can be
+    inserted into the tree)
+
+    TODO: a possible alternative would be to split ExprNode.result() into ExprNode.rhs_rhs() and ExprNode.lhs_rhs()???
+    """
+    def visit_ModuleNode(self, node):
+        if self.current_env().cpp:
+            # skipping this makes it essentially free for C files
+            self.visitchildren(node)
+        return node
+
+    def visit_ExprNode(self, node):
+        self.visitchildren(node)
+        if (self.current_env().directives['cpp_locals'] and
+                node.is_temp and node.type.is_cpp_class and
+                # Fake references are not replaced with "std::optional()".
+                not node.type.is_fake_reference):
+            node = ExprNodes.CppOptionalTempCoercion(node)
+
+        return node
+
+
 class TransformBuiltinMethods(EnvTransform):
     """
     Replace Cython's own cython.* builtins by the corresponding tree nodes.
@@ -3017,9 +3625,7 @@ class TransformBuiltinMethods(EnvTransform):
     def visit_cython_attribute(self, node):
         attribute = node.as_cython_attribute()
         if attribute:
-            if attribute == u'compiled':
-                node = ExprNodes.BoolNode(node.pos, value=True)
-            elif attribute == u'__version__':
+            if attribute == u'__version__':
                 from .. import __version__ as version
                 node = ExprNodes.StringNode(node.pos, value=EncodedString(version))
             elif attribute == u'NULL':
@@ -3064,9 +3670,9 @@ class TransformBuiltinMethods(EnvTransform):
                     error(self.pos, "Builtin 'vars()' called with wrong number of args, expected 0-1, got %d"
                           % len(node.args))
                 if len(node.args) > 0:
-                    return node # nothing to do
+                    return node  # nothing to do
             return ExprNodes.LocalsExprNode(pos, self.current_scope_node(), lenv)
-        else: # dir()
+        else:  # dir()
             if len(node.args) > 1:
                 error(self.pos, "Builtin 'dir()' called with wrong number of args, expected 0-1, got %d"
                       % len(node.args))
@@ -3101,8 +3707,8 @@ class TransformBuiltinMethods(EnvTransform):
 
     def _inject_eval(self, node, func_name):
         lenv = self.current_env()
-        entry = lenv.lookup_here(func_name)
-        if entry or len(node.args) != 1:
+        entry = lenv.lookup(func_name)
+        if len(node.args) != 1 or (entry and not entry.is_builtin):
             return node
         # Inject globals and locals
         node.args.append(ExprNodes.GlobalsExprNode(node.pos))
@@ -3119,8 +3725,7 @@ class TransformBuiltinMethods(EnvTransform):
             return node
         # Inject no-args super
         def_node = self.current_scope_node()
-        if (not isinstance(def_node, Nodes.DefNode) or not def_node.args or
-            len(self.env_stack) < 2):
+        if not isinstance(def_node, Nodes.DefNode) or not def_node.args or len(self.env_stack) < 2:
             return node
         class_node, class_scope = self.env_stack[-2]
         if class_scope.is_py_class_scope:
@@ -3259,10 +3864,17 @@ class ReplaceFusedTypeChecks(VisitorTransform):
         self.visitchildren(node)
         return self.transform(node)
 
+    def visit_GILStatNode(self, node):
+        """
+        Fold constant condition of GILStatNode.
+        """
+        self.visitchildren(node)
+        return self.transform(node)
+
     def visit_PrimaryCmpNode(self, node):
         with Errors.local_errors(ignore=True):
-          type1 = node.operand1.analyse_as_type(self.local_scope)
-          type2 = node.operand2.analyse_as_type(self.local_scope)
+            type1 = node.operand1.analyse_as_type(self.local_scope)
+            type2 = node.operand2.analyse_as_type(self.local_scope)
 
         if type1 and type2:
             false_node = ExprNodes.BoolNode(node.pos, value=False)
@@ -3398,9 +4010,14 @@ class DebugTransform(CythonTransform):
         else:
             pf_cname = node.py_func.entry.func_cname
 
+        # For functions defined using def, cname will be pyfunc_cname=__pyx_pf_*
+        # For functions defined using cpdef or cdef, cname will be func_cname=__pyx_f_*
+        # In all cases, cname will be the name of the function containing the actual code
+        cname = node.entry.pyfunc_cname or node.entry.func_cname
+
         attrs = dict(
             name=node.entry.name or getattr(node, 'name', '<unknown>'),
-            cname=node.entry.func_cname,
+            cname=cname,
             pf_cname=pf_cname,
             qualified_name=node.local_scope.qualified_name,
             lineno=str(node.pos[1]))
@@ -3428,10 +4045,10 @@ class DebugTransform(CythonTransform):
 
     def visit_NameNode(self, node):
         if (self.register_stepinto and
-            node.type is not None and
-            node.type.is_cfunction and
-            getattr(node, 'is_called', False) and
-            node.entry.func_cname is not None):
+                node.type is not None and
+                node.type.is_cfunction and
+                getattr(node, 'is_called', False) and
+                node.entry.func_cname is not None):
             # don't check node.entry.in_cinclude, as 'cdef extern: ...'
             # declared functions are not 'in_cinclude'.
             # This means we will list called 'cdef' functions as
@@ -3450,26 +4067,16 @@ class DebugTransform(CythonTransform):
         it's a "relevant frame" and it will know where to set the breakpoint
         for 'break modulename'.
         """
-        name = node.full_module_name.rpartition('.')[-1]
-
-        cname_py2 = 'init' + name
-        cname_py3 = 'PyInit_' + name
-
-        py2_attrs = dict(
-            name=name,
-            cname=cname_py2,
+        self._serialize_modulenode_as_function(node, dict(
+            name=node.full_module_name.rpartition('.')[-1],
+            cname=node.module_init_func_cname(),
             pf_cname='',
             # Ignore the qualified_name, breakpoints should be set using
             # `cy break modulename:lineno` for module-level breakpoints.
             qualified_name='',
             lineno='1',
             is_initmodule_function="True",
-        )
-
-        py3_attrs = dict(py2_attrs, cname=cname_py3)
-
-        self._serialize_modulenode_as_function(node, py2_attrs)
-        self._serialize_modulenode_as_function(node, py3_attrs)
+        ))
 
     def _serialize_modulenode_as_function(self, node, attrs):
         self.tb.start('Function', attrs=attrs)
diff --git a/Cython/Compiler/Parsing.pxd b/Cython/Compiler/Parsing.pxd
index 25453b39a..72a855fd4 100644
--- a/Cython/Compiler/Parsing.pxd
+++ b/Cython/Compiler/Parsing.pxd
@@ -1,3 +1,5 @@
+# cython: language_level=3
+
 # We declare all of these here to type the first argument.
 
 from __future__ import absolute_import
@@ -19,16 +21,17 @@ cdef p_ident_list(PyrexScanner s)
 
 cdef tuple p_binop_operator(PyrexScanner s)
 cdef p_binop_expr(PyrexScanner s, ops, p_sub_expr_func p_sub_expr)
-cdef p_lambdef(PyrexScanner s, bint allow_conditional=*)
-cdef p_lambdef_nocond(PyrexScanner s)
+cdef p_lambdef(PyrexScanner s)
 cdef p_test(PyrexScanner s)
-cdef p_test_nocond(PyrexScanner s)
+cdef p_test_allow_walrus_after(PyrexScanner s)
+cdef p_namedexpr_test(PyrexScanner s)
 cdef p_or_test(PyrexScanner s)
-cdef p_rassoc_binop_expr(PyrexScanner s, ops, p_sub_expr_func p_subexpr)
+cdef p_rassoc_binop_expr(PyrexScanner s, unicode op, p_sub_expr_func p_subexpr)
 cdef p_and_test(PyrexScanner s)
 cdef p_not_test(PyrexScanner s)
 cdef p_comparison(PyrexScanner s)
 cdef p_test_or_starred_expr(PyrexScanner s)
+cdef p_namedexpr_test_or_starred_expr(PyrexScanner s)
 cdef p_starred_expr(PyrexScanner s)
 cdef p_cascaded_cmp(PyrexScanner s)
 cdef p_cmp_op(PyrexScanner s)
@@ -82,6 +85,7 @@ cdef p_dict_or_set_maker(PyrexScanner s)
 cdef p_backquote_expr(PyrexScanner s)
 cdef p_simple_expr_list(PyrexScanner s, expr=*)
 cdef p_test_or_starred_expr_list(PyrexScanner s, expr=*)
+cdef p_namedexpr_test_or_starred_expr_list(s, expr=*)
 cdef p_testlist(PyrexScanner s)
 cdef p_testlist_star_expr(PyrexScanner s)
 cdef p_testlist_comp(PyrexScanner s)
@@ -106,7 +110,7 @@ cdef p_return_statement(PyrexScanner s)
 cdef p_raise_statement(PyrexScanner s)
 cdef p_import_statement(PyrexScanner s)
 cdef p_from_import_statement(PyrexScanner s, bint first_statement = *)
-cdef p_imported_name(PyrexScanner s, bint is_cimport)
+cdef p_imported_name(PyrexScanner s)
 cdef p_dotted_name(PyrexScanner s, bint as_allowed)
 cdef p_as_name(PyrexScanner s)
 cdef p_assert_statement(PyrexScanner s)
@@ -126,6 +130,8 @@ cdef p_except_clause(PyrexScanner s)
 cdef p_include_statement(PyrexScanner s, ctx)
 cdef p_with_statement(PyrexScanner s)
 cdef p_with_items(PyrexScanner s, bint is_async=*)
+cdef p_with_items_list(PyrexScanner s, bint is_async)
+cdef tuple p_with_item(PyrexScanner s, bint is_async)
 cdef p_with_template(PyrexScanner s)
 cdef p_simple_statement(PyrexScanner s, bint first_statement = *)
 cdef p_simple_statement_list(PyrexScanner s, ctx, bint first_statement = *)
@@ -139,10 +145,10 @@ cdef tuple p_suite_with_docstring(PyrexScanner s, ctx, bint with_doc_only=*)
 cdef tuple _extract_docstring(node)
 cdef p_positional_and_keyword_args(PyrexScanner s, end_sy_set, templates = *)
 
-cpdef p_c_base_type(PyrexScanner s, bint self_flag = *, bint nonempty = *, templates = *)
+cpdef p_c_base_type(PyrexScanner s, bint nonempty = *, templates = *)
 cdef p_calling_convention(PyrexScanner s)
 cdef p_c_complex_base_type(PyrexScanner s, templates = *)
-cdef p_c_simple_base_type(PyrexScanner s, bint self_flag, bint nonempty, templates = *)
+cdef p_c_simple_base_type(PyrexScanner s, bint nonempty, templates = *)
 cdef p_buffer_or_template(PyrexScanner s, base_type_node, templates)
 cdef p_bracketed_base_type(PyrexScanner s, base_type_node, nonempty, empty)
 cdef is_memoryviewslice_access(PyrexScanner s)
@@ -151,7 +157,6 @@ cdef bint looking_at_name(PyrexScanner s) except -2
 cdef object looking_at_expr(PyrexScanner s)# except -2
 cdef bint looking_at_base_type(PyrexScanner s) except -2
 cdef bint looking_at_dotted_name(PyrexScanner s) except -2
-cdef bint looking_at_call(PyrexScanner s) except -2
 cdef p_sign_and_longness(PyrexScanner s)
 cdef p_opt_cname(PyrexScanner s)
 cpdef p_c_declarator(PyrexScanner s, ctx = *, bint empty = *, bint is_type = *, bint cmethod_flag = *,
@@ -163,7 +168,7 @@ cdef p_c_simple_declarator(PyrexScanner s, ctx, bint empty, bint is_type, bint c
                           bint assignable, bint nonempty)
 cdef p_nogil(PyrexScanner s)
 cdef p_with_gil(PyrexScanner s)
-cdef p_exception_value_clause(PyrexScanner s)
+cdef p_exception_value_clause(PyrexScanner s, ctx)
 cpdef p_c_arg_list(PyrexScanner s, ctx = *, bint in_pyfunc = *, bint cmethod_flag = *,
                    bint nonempty_declarators = *, bint kw_only = *, bint annotated = *)
 cdef p_optional_ellipsis(PyrexScanner s)
@@ -197,3 +202,4 @@ cdef dict p_compiler_directive_comments(PyrexScanner s)
 cdef p_template_definition(PyrexScanner s)
 cdef p_cpp_class_definition(PyrexScanner s, pos, ctx)
 cdef p_cpp_class_attribute(PyrexScanner s, ctx)
+cdef p_annotation(PyrexScanner s)
diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py
index 1f20b4c95..d7394ca6f 100644
--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -14,7 +14,7 @@ cython.declare(Nodes=object, ExprNodes=object, EncodedString=object,
                Builtin=object, ModuleNode=object, Utils=object, _unicode=object, _bytes=object,
                re=object, sys=object, _parse_escape_sequences=object, _parse_escape_sequences_raw=object,
                partial=object, reduce=object, _IS_PY3=cython.bint, _IS_2BYTE_UNICODE=cython.bint,
-               _CDEF_MODIFIERS=tuple)
+               _CDEF_MODIFIERS=tuple, COMMON_BINOP_MISTAKES=dict)
 
 from io import StringIO
 import re
@@ -22,7 +22,7 @@ import sys
 from unicodedata import lookup as lookup_unicodechar, category as unicode_category
 from functools import partial, reduce
 
-from .Scanning import PyrexScanner, FileSourceDescriptor, StringSourceDescriptor
+from .Scanning import PyrexScanner, FileSourceDescriptor, tentatively_scan
 from . import Nodes
 from . import ExprNodes
 from . import Builtin
@@ -65,7 +65,7 @@ class Ctx(object):
 
 def p_ident(s, message="Expected an identifier"):
     if s.sy == 'IDENT':
-        name = s.systring
+        name = s.context.intern_ustring(s.systring)
         s.next()
         return name
     else:
@@ -74,7 +74,7 @@ def p_ident(s, message="Expected an identifier"):
 def p_ident_list(s):
     names = []
     while s.sy == 'IDENT':
-        names.append(s.systring)
+        names.append(s.context.intern_ustring(s.systring))
         s.next()
         if s.sy != ',':
             break
@@ -103,12 +103,12 @@ def p_binop_expr(s, ops, p_sub_expr):
             if Future.division in s.context.future_directives:
                 n1.truedivision = True
             else:
-                n1.truedivision = None # unknown
+                n1.truedivision = None  # unknown
     return n1
 
 #lambdef: 'lambda' [varargslist] ':' test
 
-def p_lambdef(s, allow_conditional=True):
+def p_lambdef(s):
     # s.sy == 'lambda'
     pos = s.position()
     s.next()
@@ -119,23 +119,27 @@ def p_lambdef(s, allow_conditional=True):
         args, star_arg, starstar_arg = p_varargslist(
             s, terminator=':', annotated=False)
     s.expect(':')
-    if allow_conditional:
-        expr = p_test(s)
-    else:
-        expr = p_test_nocond(s)
+    expr = p_test(s)
     return ExprNodes.LambdaNode(
         pos, args = args,
         star_arg = star_arg, starstar_arg = starstar_arg,
         result_expr = expr)
 
-#lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
-
-def p_lambdef_nocond(s):
-    return p_lambdef(s, allow_conditional=False)
-
 #test: or_test ['if' or_test 'else' test] | lambdef
 
 def p_test(s):
+    # The check for a following ':=' is only for error reporting purposes.
+    # It simply changes a
+    #   expected ')', found ':='
+    # message into something a bit more descriptive.
+    # It is close to what the PEG parser does in CPython, where an expression has
+    # a lookahead assertion that it isn't followed by ':='
+    expr = p_test_allow_walrus_after(s)
+    if s.sy == ':=':
+        s.error("invalid syntax: assignment expression not allowed in this context")
+    return expr
+
+def p_test_allow_walrus_after(s):
     if s.sy == 'lambda':
         return p_lambdef(s)
     pos = s.position()
@@ -149,34 +153,52 @@ def p_test(s):
     else:
         return expr
 
-#test_nocond: or_test | lambdef_nocond
+def p_namedexpr_test(s):
+    # defined in the LL parser as
+    #  namedexpr_test: test [':=' test]
+    # The requirement that the LHS is a name is not enforced in the grammar.
+    # For comparison the PEG parser does:
+    #  1. look for "name :=", if found it's definitely a named expression
+    #     so look for expression
+    #  2. Otherwise, look for expression
+    lhs = p_test_allow_walrus_after(s)
+    if s.sy == ':=':
+        position = s.position()
+        if not lhs.is_name:
+            s.error("Left-hand side of assignment expression must be an identifier", fatal=False)
+        s.next()
+        rhs = p_test(s)
+        return ExprNodes.AssignmentExpressionNode(position, lhs=lhs, rhs=rhs)
+    return lhs
 
-def p_test_nocond(s):
-    if s.sy == 'lambda':
-        return p_lambdef_nocond(s)
-    else:
-        return p_or_test(s)
 
 #or_test: and_test ('or' and_test)*
 
+COMMON_BINOP_MISTAKES = {'||': 'or', '&&': 'and'}
+
 def p_or_test(s):
-    return p_rassoc_binop_expr(s, ('or',), p_and_test)
+    return p_rassoc_binop_expr(s, u'or', p_and_test)
 
-def p_rassoc_binop_expr(s, ops, p_subexpr):
+def p_rassoc_binop_expr(s, op, p_subexpr):
     n1 = p_subexpr(s)
-    if s.sy in ops:
+    if s.sy == op:
         pos = s.position()
         op = s.sy
         s.next()
-        n2 = p_rassoc_binop_expr(s, ops, p_subexpr)
+        n2 = p_rassoc_binop_expr(s, op, p_subexpr)
         n1 = ExprNodes.binop_node(pos, op, n1, n2)
+    elif s.sy in COMMON_BINOP_MISTAKES and COMMON_BINOP_MISTAKES[s.sy] == op:
+        # Only report this for the current operator since we pass through here twice for 'and' and 'or'.
+        warning(s.position(),
+                "Found the C operator '%s', did you mean the Python operator '%s'?" % (s.sy, op),
+                level=1)
     return n1
 
 #and_test: not_test ('and' not_test)*
 
 def p_and_test(s):
     #return p_binop_expr(s, ('and',), p_not_test)
-    return p_rassoc_binop_expr(s, ('and',), p_not_test)
+    return p_rassoc_binop_expr(s, u'and', p_not_test)
 
 #not_test: 'not' not_test | comparison
 
@@ -209,6 +231,12 @@ def p_test_or_starred_expr(s):
     else:
         return p_test(s)
 
+def p_namedexpr_test_or_starred_expr(s):
+    if s.sy == '*':
+        return p_starred_expr(s)
+    else:
+        return p_namedexpr_test(s)
+
 def p_starred_expr(s):
     pos = s.position()
     if s.sy == '*':
@@ -250,10 +278,10 @@ def p_cmp_op(s):
         op = '!='
     return op
 
-comparison_ops = cython.declare(set, set([
+comparison_ops = cython.declare(frozenset, frozenset((
     '<', '>', '==', '>=', '<=', '<>', '!=',
     'in', 'is', 'not'
-]))
+)))
 
 #expr: xor_expr ('|' xor_expr)*
 
@@ -316,10 +344,12 @@ def p_typecast(s):
     s.next()
     base_type = p_c_base_type(s)
     is_memslice = isinstance(base_type, Nodes.MemoryViewSliceTypeNode)
-    is_template = isinstance(base_type, Nodes.TemplatedTypeNode)
-    is_const = isinstance(base_type, Nodes.CConstTypeNode)
-    if (not is_memslice and not is_template and not is_const
-        and base_type.name is None):
+    is_other_unnamed_type = isinstance(base_type, (
+        Nodes.TemplatedTypeNode,
+        Nodes.CConstOrVolatileTypeNode,
+        Nodes.CTupleBaseTypeNode,
+    ))
+    if not (is_memslice or is_other_unnamed_type) and base_type.name is None:
         s.error("Unknown type")
     declarator = p_c_declarator(s, empty = 1)
     if s.sy == '?':
@@ -330,8 +360,7 @@ def p_typecast(s):
     s.expect(">")
     operand = p_factor(s)
     if is_memslice:
-        return ExprNodes.CythonArrayNode(pos, base_type_node=base_type,
-                                         operand=operand)
+        return ExprNodes.CythonArrayNode(pos, base_type_node=base_type, operand=operand)
 
     return ExprNodes.TypecastNode(pos,
         base_type = base_type,
@@ -444,7 +473,7 @@ def p_trailer(s, node1):
         return p_call(s, node1)
     elif s.sy == '[':
         return p_index(s, node1)
-    else: # s.sy == '.'
+    else:  # s.sy == '.'
         s.next()
         name = p_ident(s)
         return ExprNodes.AttributeNode(pos,
@@ -480,7 +509,7 @@ def p_call_parse_args(s, allow_genexp=True):
             keyword_args.append(p_test(s))
             starstar_seen = True
         else:
-            arg = p_test(s)
+            arg = p_namedexpr_test(s)
             if s.sy == '=':
                 s.next()
                 if not arg.is_name:
@@ -628,9 +657,7 @@ def p_slice_element(s, follow_set):
         return None
 
 def expect_ellipsis(s):
-    s.expect('.')
-    s.expect('.')
-    s.expect('.')
+    s.expect('...')
 
 def make_slice_nodes(pos, subscripts):
     # Convert a list of subscripts as returned
@@ -676,7 +703,7 @@ def p_atom(s):
         return p_dict_or_set_maker(s)
     elif sy == '`':
         return p_backquote_expr(s)
-    elif sy == '.':
+    elif sy == '...':
         expect_ellipsis(s)
         return ExprNodes.EllipsisNode(pos)
     elif sy == 'INT':
@@ -821,7 +848,7 @@ def p_cat_string_literal(s):
             continue
         elif next_kind != kind:
             # concatenating f strings and normal strings is allowed and leads to an f string
-            if set([kind, next_kind]) in (set(['f', 'u']), set(['f', ''])):
+            if {kind, next_kind} in ({'f', 'u'}, {'f', ''}):
                 kind = 'f'
             else:
                 error(pos, "Cannot mix string literals of different types, expected %s'', got %s''" % (
@@ -1073,8 +1100,8 @@ def p_f_string(s, unicode_value, pos, is_raw):
                 if builder.chars:
                     values.append(ExprNodes.UnicodeNode(pos, value=builder.getstring()))
                     builder = StringEncoding.UnicodeLiteralBuilder()
-                next_start, expr_node = p_f_string_expr(s, unicode_value, pos, next_start, is_raw)
-                values.append(expr_node)
+                next_start, expr_nodes = p_f_string_expr(s, unicode_value, pos, next_start, is_raw)
+                values.extend(expr_nodes)
         elif c == '}':
             if part == '}}':
                 builder.append('}')
@@ -1090,12 +1117,16 @@ def p_f_string(s, unicode_value, pos, is_raw):
 
 
 def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
-    # Parses a {}-delimited expression inside an f-string. Returns a FormattedValueNode
-    # and the index in the string that follows the expression.
+    # Parses a {}-delimited expression inside an f-string. Returns a list of nodes
+    # [UnicodeNode?, FormattedValueNode] and the index in the string that follows
+    # the expression.
+    #
+    # ? = Optional
     i = starting_index
     size = len(unicode_value)
     conversion_char = terminal_char = format_spec = None
     format_spec_str = None
+    expr_text = None
     NO_CHAR = 2**30
 
     nested_depth = 0
@@ -1135,12 +1166,15 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
         elif c == '#':
             error(_f_string_error_pos(pos, unicode_value, i),
                   "format string cannot include #")
-        elif nested_depth == 0 and c in '!:}':
-            # allow != as a special case
-            if c == '!' and i + 1 < size and unicode_value[i + 1] == '=':
-                i += 1
-                continue
-
+        elif nested_depth == 0 and c in '><=!:}':
+            # allow special cases with '!' and '='
+            if i + 1 < size and c in '!=><':
+                if unicode_value[i + 1] == '=':
+                    i += 2  # we checked 2, so we can skip 2: '!=', '==', '>=', '<='
+                    continue
+                elif c in '><':  # allow single '<' and '>'
+                    i += 1
+                    continue
             terminal_char = c
             break
         i += 1
@@ -1153,6 +1187,16 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
         error(_f_string_error_pos(pos, unicode_value, starting_index),
               "empty expression not allowed in f-string")
 
+    if terminal_char == '=':
+        i += 1
+        while i < size and unicode_value[i].isspace():
+            i += 1
+
+        if i < size:
+            terminal_char = unicode_value[i]
+            expr_text = unicode_value[starting_index:i]
+        # otherwise: error will be reported below
+
     if terminal_char == '!':
         i += 1
         if i + 2 > size:
@@ -1190,6 +1234,9 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
 
         format_spec_str = unicode_value[start_format_spec:i]
 
+    if expr_text and conversion_char is None and format_spec_str is None:
+        conversion_char = 'r'
+
     if terminal_char != '}':
         error(_f_string_error_pos(pos, unicode_value, i),
               "missing '}' in format string expression" + (
@@ -1208,13 +1255,17 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
     if format_spec_str:
         format_spec = ExprNodes.JoinedStrNode(pos, values=p_f_string(s, format_spec_str, pos, is_raw))
 
-    return i + 1, ExprNodes.FormattedValueNode(
-        pos, value=expr, conversion_char=conversion_char, format_spec=format_spec)
+    nodes = []
+    if expr_text:
+        nodes.append(ExprNodes.UnicodeNode(pos, value=StringEncoding.EncodedString(expr_text)))
+    nodes.append(ExprNodes.FormattedValueNode(pos, value=expr, conversion_char=conversion_char, format_spec=format_spec))
+
+    return i + 1, nodes
 
 
 # since PEP 448:
 # list_display  ::=     "[" [listmaker] "]"
-# listmaker     ::=     (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+# listmaker     ::=     (named_test|star_expr) ( comp_for | (',' (named_test|star_expr))* [','] )
 # comp_iter     ::=     comp_for | comp_if
 # comp_for      ::=     ["async"] "for" expression_list "in" testlist [comp_iter]
 # comp_if       ::=     "if" test [comp_iter]
@@ -1227,7 +1278,7 @@ def p_list_maker(s):
         s.expect(']')
         return ExprNodes.ListNode(pos, args=[])
 
-    expr = p_test_or_starred_expr(s)
+    expr = p_namedexpr_test_or_starred_expr(s)
     if s.sy in ('for', 'async'):
         if expr.is_starred:
             s.error("iterable unpacking cannot be used in comprehension")
@@ -1242,7 +1293,7 @@ def p_list_maker(s):
     # (merged) list literal
     if s.sy == ',':
         s.next()
-        exprs = p_test_or_starred_expr_list(s, expr)
+        exprs = p_namedexpr_test_or_starred_expr_list(s, expr)
     else:
         exprs = [expr]
     s.expect(']')
@@ -1276,7 +1327,12 @@ def p_comp_if(s, body):
     # s.sy == 'if'
     pos = s.position()
     s.next()
-    test = p_test_nocond(s)
+    # Note that Python 3.9+ is actually more restrictive here and Cython now follows
+    # the Python 3.9+ behaviour: https://github.com/python/cpython/issues/86014
+    # On Python <3.9 `[i for i in range(10) if lambda: i if True else 1]` was disallowed
+    # but `[i for i in range(10) if lambda: i]` was allowed.
+    # On Python >=3.9 they're both disallowed.
+    test = p_or_test(s)
     return Nodes.IfStatNode(pos,
         if_clauses = [Nodes.IfClauseNode(pos, condition = test,
                                          body = p_comp_iter(s, body))],
@@ -1433,6 +1489,15 @@ def p_test_or_starred_expr_list(s, expr=None):
         s.next()
     return exprs
 
+def p_namedexpr_test_or_starred_expr_list(s, expr=None):
+    exprs = expr is not None and [expr] or []
+    while s.sy not in expr_terminators:
+        exprs.append(p_namedexpr_test_or_starred_expr(s))
+        if s.sy != ',':
+            break
+        s.next()
+    return exprs
+
 
 #testlist: test (',' test)* [',']
 
@@ -1462,10 +1527,10 @@ def p_testlist_star_expr(s):
 
 def p_testlist_comp(s):
     pos = s.position()
-    expr = p_test_or_starred_expr(s)
+    expr = p_namedexpr_test_or_starred_expr(s)
     if s.sy == ',':
         s.next()
-        exprs = p_test_or_starred_expr_list(s, expr)
+        exprs = p_namedexpr_test_or_starred_expr_list(s, expr)
         return ExprNodes.TupleNode(pos, args = exprs)
     elif s.sy in ('for', 'async'):
         return p_genexp(s, expr)
@@ -1478,8 +1543,8 @@ def p_genexp(s, expr):
         expr.pos, expr = ExprNodes.YieldExprNode(expr.pos, arg=expr)))
     return ExprNodes.GeneratorExpressionNode(expr.pos, loop=loop)
 
-expr_terminators = cython.declare(set, set([
-    ')', ']', '}', ':', '=', 'NEWLINE']))
+expr_terminators = cython.declare(frozenset, frozenset((
+    ')', ']', '}', ':', '=', 'NEWLINE')))
 
 
 #-------------------------------------------------------
@@ -1505,15 +1570,19 @@ def p_nonlocal_statement(s):
 
 def p_expression_or_assignment(s):
     expr = p_testlist_star_expr(s)
+    has_annotation = False
     if s.sy == ':' and (expr.is_name or expr.is_subscript or expr.is_attribute):
+        has_annotation = True
         s.next()
-        expr.annotation = p_test(s)
+        expr.annotation = p_annotation(s)
+
     if s.sy == '=' and expr.is_starred:
         # This is a common enough error to make when learning Cython to let
         # it fail as early as possible and give a very clear error message.
         s.error("a starred assignment target must be in a list or tuple"
                 " - maybe you meant to use an index assignment: var[0] = ...",
                 pos=expr.pos)
+
     expr_list = [expr]
     while s.sy == '=':
         s.next()
@@ -1545,7 +1614,7 @@ def p_expression_or_assignment(s):
 
     rhs = expr_list[-1]
     if len(expr_list) == 2:
-        return Nodes.SingleAssignmentNode(rhs.pos, lhs=expr_list[0], rhs=rhs)
+        return Nodes.SingleAssignmentNode(rhs.pos, lhs=expr_list[0], rhs=rhs, first=has_annotation)
     else:
         return Nodes.CascadedAssignmentNode(rhs.pos, lhs_list=expr_list[:-1], rhs=rhs)
 
@@ -1690,11 +1759,6 @@ def p_import_statement(s):
                 as_name=as_name,
                 is_absolute=is_absolute)
         else:
-            if as_name and "." in dotted_name:
-                name_list = ExprNodes.ListNode(pos, args=[
-                    ExprNodes.IdentifierStringNode(pos, value=s.context.intern_ustring("*"))])
-            else:
-                name_list = None
             stat = Nodes.SingleAssignmentNode(
                 pos,
                 lhs=ExprNodes.NameNode(pos, name=as_name or target_name),
@@ -1702,7 +1766,8 @@ def p_import_statement(s):
                     pos,
                     module_name=ExprNodes.IdentifierStringNode(pos, value=dotted_name),
                     level=0 if is_absolute else None,
-                    name_list=name_list))
+                    get_top_level_module='.' in dotted_name and as_name is None,
+                    name_list=None))
         stats.append(stat)
     return Nodes.StatListNode(pos, stats=stats)
 
@@ -1711,11 +1776,11 @@ def p_from_import_statement(s, first_statement = 0):
     # s.sy == 'from'
     pos = s.position()
     s.next()
-    if s.sy == '.':
+    if s.sy in ('.', '...'):
         # count relative import level
         level = 0
-        while s.sy == '.':
-            level += 1
+        while s.sy in ('.', '...'):
+            level += len(s.sy)
             s.next()
     else:
         level = None
@@ -1734,18 +1799,18 @@ def p_from_import_statement(s, first_statement = 0):
     is_cimport = kind == 'cimport'
     is_parenthesized = False
     if s.sy == '*':
-        imported_names = [(s.position(), s.context.intern_ustring("*"), None, None)]
+        imported_names = [(s.position(), s.context.intern_ustring("*"), None)]
         s.next()
     else:
         if s.sy == '(':
             is_parenthesized = True
             s.next()
-        imported_names = [p_imported_name(s, is_cimport)]
+        imported_names = [p_imported_name(s)]
     while s.sy == ',':
         s.next()
         if is_parenthesized and s.sy == ')':
             break
-        imported_names.append(p_imported_name(s, is_cimport))
+        imported_names.append(p_imported_name(s))
     if is_parenthesized:
         s.expect(')')
     if dotted_name == '__future__':
@@ -1754,7 +1819,7 @@ def p_from_import_statement(s, first_statement = 0):
         elif level:
             s.error("invalid syntax")
         else:
-            for (name_pos, name, as_name, kind) in imported_names:
+            for (name_pos, name, as_name) in imported_names:
                 if name == "braces":
                     s.error("not a chance", name_pos)
                     break
@@ -1765,7 +1830,7 @@ def p_from_import_statement(s, first_statement = 0):
                     break
                 s.context.future_directives.add(directive)
         return Nodes.PassStatNode(pos)
-    elif kind == 'cimport':
+    elif is_cimport:
         return Nodes.FromCImportStatNode(
             pos, module_name=dotted_name,
             relative_level=level,
@@ -1773,7 +1838,7 @@ def p_from_import_statement(s, first_statement = 0):
     else:
         imported_name_strings = []
         items = []
-        for (name_pos, name, as_name, kind) in imported_names:
+        for (name_pos, name, as_name) in imported_names:
             imported_name_strings.append(
                 ExprNodes.IdentifierStringNode(name_pos, value=name))
             items.append(
@@ -1788,19 +1853,11 @@ def p_from_import_statement(s, first_statement = 0):
             items = items)
 
 
-imported_name_kinds = cython.declare(set, set(['class', 'struct', 'union']))
-
-def p_imported_name(s, is_cimport):
+def p_imported_name(s):
     pos = s.position()
-    kind = None
-    if is_cimport and s.systring in imported_name_kinds:
-        kind = s.systring
-        warning(pos, 'the "from module cimport %s name" syntax is deprecated and '
-                'will be removed in Cython 3.0' % kind, 2)
-        s.next()
     name = p_ident(s)
     as_name = p_as_name(s)
-    return (pos, name, as_name, kind)
+    return (pos, name, as_name)
 
 
 def p_dotted_name(s, as_allowed):
@@ -1834,10 +1891,11 @@ def p_assert_statement(s):
         value = p_test(s)
     else:
         value = None
-    return Nodes.AssertStatNode(pos, cond = cond, value = value)
+    return Nodes.AssertStatNode(pos, condition=cond, value=value)
 
 
-statement_terminators = cython.declare(set, set([';', 'NEWLINE', 'EOF']))
+statement_terminators = cython.declare(frozenset, frozenset((
+    ';', 'NEWLINE', 'EOF')))
 
 def p_if_statement(s):
     # s.sy == 'if'
@@ -1853,7 +1911,7 @@ def p_if_statement(s):
 
 def p_if_clause(s):
     pos = s.position()
-    test = p_test(s)
+    test = p_namedexpr_test(s)
     body = p_suite(s)
     return Nodes.IfClauseNode(pos,
         condition = test, body = body)
@@ -1869,7 +1927,7 @@ def p_while_statement(s):
     # s.sy == 'while'
     pos = s.position()
     s.next()
-    test = p_test(s)
+    test = p_namedexpr_test(s)
     body = p_suite(s)
     else_clause = p_else_clause(s)
     return Nodes.WhileStatNode(pos,
@@ -1947,7 +2005,8 @@ def p_for_from_step(s):
     else:
         return None
 
-inequality_relations = cython.declare(set, set(['<', '<=', '>', '>=']))
+inequality_relations = cython.declare(frozenset, frozenset((
+    '<', '<=', '>', '>=')))
 
 def p_target(s, terminator):
     pos = s.position()
@@ -2037,7 +2096,7 @@ def p_except_clause(s):
 
 def p_include_statement(s, ctx):
     pos = s.position()
-    s.next() # 'include'
+    s.next()  # 'include'
     unicode_include_file_name = p_string_literal(s, 'u')[2]
     s.expect_newline("Syntax error in include statement")
     if s.compile_time_eval:
@@ -2066,30 +2125,77 @@ def p_with_statement(s):
 
 
 def p_with_items(s, is_async=False):
+    """
+    Copied from CPython:
+    | 'with' '(' a[asdl_withitem_seq*]=','.with_item+ ','? ')' ':' b=block {
+        _PyAST_With(a, b, NULL, EXTRA) }
+    | 'with' a[asdl_withitem_seq*]=','.with_item+ ':' tc=[TYPE_COMMENT] b=block {
+        _PyAST_With(a, b, NEW_TYPE_COMMENT(p, tc), EXTRA) }
+    Therefore the first thing to try is the bracket-enclosed
+    version and if that fails try the regular version
+    """
+    brackets_succeeded = False
+    items = ()  # unused, but static analysis fails to track that below
+    if s.sy == '(':
+        with tentatively_scan(s) as errors:
+            s.next()
+            items = p_with_items_list(s, is_async)
+            s.expect(")")
+            if s.sy != ":":
+                # Fail - the message doesn't matter because we'll try the
+                # non-bracket version so it'll never be shown
+                s.error("")
+        brackets_succeeded = not errors
+    if not brackets_succeeded:
+        # try the non-bracket version
+        items = p_with_items_list(s, is_async)
+    body = p_suite(s)
+    for cls, pos, kwds in reversed(items):
+        # construct the actual nodes now that we know what the body is
+        body = cls(pos, body=body, **kwds)
+    return body
+
+
+def p_with_items_list(s, is_async):
+    items = []
+    while True:
+        items.append(p_with_item(s, is_async))
+        if s.sy != ",":
+            break
+        s.next()
+        if s.sy == ")":
+            # trailing commas allowed
+            break
+    return items
+
+
+def p_with_item(s, is_async):
+    # In contrast to most parsing functions, this returns a tuple of
+    #  class, pos, kwd_dict
+    # This is because GILStatNode does a reasonable amount of initialization in its
+    # constructor, and requires "body" to be set, which we don't currently have
     pos = s.position()
     if not s.in_python_file and s.sy == 'IDENT' and s.systring in ('nogil', 'gil'):
         if is_async:
             s.error("with gil/nogil cannot be async")
         state = s.systring
         s.next()
-        if s.sy == ',':
+
+        # support conditional gil/nogil
+        condition = None
+        if s.sy == '(':
             s.next()
-            body = p_with_items(s)
-        else:
-            body = p_suite(s)
-        return Nodes.GILStatNode(pos, state=state, body=body)
+            condition = p_test(s)
+            s.expect(')')
+
+        return Nodes.GILStatNode, pos, {"state": state, "condition": condition}
     else:
         manager = p_test(s)
         target = None
         if s.sy == 'IDENT' and s.systring == 'as':
             s.next()
             target = p_starred_expr(s)
-        if s.sy == ',':
-            s.next()
-            body = p_with_items(s, is_async=is_async)
-        else:
-            body = p_suite(s)
-    return Nodes.WithStatNode(pos, manager=manager, target=target, body=body, is_async=is_async)
+        return Nodes.WithStatNode, pos, {"manager": manager, "target": target, "is_async": is_async}
 
 
 def p_with_template(s):
@@ -2195,7 +2301,7 @@ def p_compile_time_expr(s):
 def p_DEF_statement(s):
     pos = s.position()
     denv = s.compile_time_env
-    s.next() # 'DEF'
+    s.next()  # 'DEF'
     name = p_ident(s)
     s.expect('=')
     expr = p_compile_time_expr(s)
@@ -2213,7 +2319,7 @@ def p_IF_statement(s, ctx):
     denv = s.compile_time_env
     result = None
     while 1:
-        s.next() # 'IF' or 'ELIF'
+        s.next()  # 'IF' or 'ELIF'
         expr = p_compile_time_expr(s)
         s.compile_time_eval = current_eval and bool(expr.compile_time_value(denv))
         body = p_suite(s, ctx)
@@ -2243,8 +2349,16 @@ def p_statement(s, ctx, first_statement = 0):
         #    error(s.position(), "'api' not allowed with 'ctypedef'")
         return p_ctypedef_statement(s, ctx)
     elif s.sy == 'DEF':
+        warning(s.position(),
+                "The 'DEF' statement is deprecated and will be removed in a future Cython version. "
+                "Consider using global variables, constants, and in-place literals instead. "
+                "See https://github.com/cython/cython/issues/4310", level=1)
         return p_DEF_statement(s)
     elif s.sy == 'IF':
+        warning(s.position(),
+                "The 'IF' statement is deprecated and will be removed in a future Cython version. "
+                "Consider using runtime conditions or C macros instead. "
+                "See https://github.com/cython/cython/issues/4310", level=1)
         return p_IF_statement(s, ctx)
     elif s.sy == '@':
         if ctx.level not in ('module', 'class', 'c_class', 'function', 'property', 'module_pxd', 'c_class_pxd', 'other'):
@@ -2325,13 +2439,14 @@ def p_statement(s, ctx, first_statement = 0):
             else:
                 if s.sy == 'IDENT' and s.systring == 'async':
                     ident_name = s.systring
+                    ident_pos = s.position()
                     # PEP 492 enables the async/await keywords when it spots "async def ..."
                     s.next()
                     if s.sy == 'def':
                         return p_async_statement(s, ctx, decorators)
                     elif decorators:
                         s.error("Decorators can only be followed by functions or classes")
-                    s.put_back('IDENT', ident_name)  # re-insert original token
+                    s.put_back(u'IDENT', ident_name, ident_pos)  # re-insert original token
                 return p_simple_statement_list(s, ctx, first_statement=first_statement)
 
 
@@ -2399,7 +2514,7 @@ def p_positional_and_keyword_args(s, end_sy_set, templates = None):
         parsed_type = False
         if s.sy == 'IDENT' and s.peek()[0] == '=':
             ident = s.systring
-            s.next() # s.sy is '='
+            s.next()  # s.sy is '='
             s.next()
             if looking_at_expr(s):
                 arg = p_test(s)
@@ -2436,13 +2551,11 @@ def p_positional_and_keyword_args(s, end_sy_set, templates = None):
         s.next()
     return positional_args, keyword_args
 
-def p_c_base_type(s, self_flag = 0, nonempty = 0, templates = None):
-    # If self_flag is true, this is the base type for the
-    # self argument of a C method of an extension type.
+def p_c_base_type(s, nonempty=False, templates=None):
     if s.sy == '(':
         return p_c_complex_base_type(s, templates = templates)
     else:
-        return p_c_simple_base_type(s, self_flag, nonempty = nonempty, templates = templates)
+        return p_c_simple_base_type(s, nonempty=nonempty, templates=templates)
 
 def p_calling_convention(s):
     if s.sy == 'IDENT' and s.systring in calling_convention_words:
@@ -2453,8 +2566,8 @@ def p_calling_convention(s):
         return ""
 
 
-calling_convention_words = cython.declare(
-    set, set(["__stdcall", "__cdecl", "__fastcall"]))
+calling_convention_words = cython.declare(frozenset, frozenset((
+    "__stdcall", "__cdecl", "__fastcall")))
 
 
 def p_c_complex_base_type(s, templates = None):
@@ -2486,24 +2599,38 @@ def p_c_complex_base_type(s, templates = None):
     return type_node
 
 
-def p_c_simple_base_type(s, self_flag, nonempty, templates = None):
-    #print "p_c_simple_base_type: self_flag =", self_flag, nonempty
+def p_c_simple_base_type(s, nonempty, templates=None):
     is_basic = 0
     signed = 1
     longness = 0
     complex = 0
     module_path = []
     pos = s.position()
-    if not s.sy == 'IDENT':
-        error(pos, "Expected an identifier, found '%s'" % s.sy)
-    if s.systring == 'const':
+
+    # Handle const/volatile
+    is_const = is_volatile = 0
+    while s.sy == 'IDENT':
+        if s.systring == 'const':
+            if is_const: error(pos, "Duplicate 'const'")
+            is_const = 1
+        elif s.systring == 'volatile':
+            if is_volatile: error(pos, "Duplicate 'volatile'")
+            is_volatile = 1
+        else:
+            break
         s.next()
-        base_type = p_c_base_type(s, self_flag=self_flag, nonempty=nonempty, templates=templates)
+    if is_const or is_volatile:
+        base_type = p_c_base_type(s, nonempty=nonempty, templates=templates)
         if isinstance(base_type, Nodes.MemoryViewSliceTypeNode):
             # reverse order to avoid having to write "(const int)[:]"
-            base_type.base_type_node = Nodes.CConstTypeNode(pos, base_type=base_type.base_type_node)
+            base_type.base_type_node = Nodes.CConstOrVolatileTypeNode(pos,
+                base_type=base_type.base_type_node, is_const=is_const, is_volatile=is_volatile)
             return base_type
-        return Nodes.CConstTypeNode(pos, base_type=base_type)
+        return Nodes.CConstOrVolatileTypeNode(pos,
+            base_type=base_type, is_const=is_const, is_volatile=is_volatile)
+
+    if s.sy != 'IDENT':
+        error(pos, "Expected an identifier, found '%s'" % s.sy)
     if looking_at_base_type(s):
         #print "p_c_simple_base_type: looking_at_base_type at", s.position()
         is_basic = 1
@@ -2531,27 +2658,29 @@ def p_c_simple_base_type(s, self_flag, nonempty, templates = None):
             name = p_ident(s)
     else:
         name = s.systring
+        name_pos = s.position()
         s.next()
         if nonempty and s.sy != 'IDENT':
             # Make sure this is not a declaration of a variable or function.
             if s.sy == '(':
+                old_pos = s.position()
                 s.next()
                 if (s.sy == '*' or s.sy == '**' or s.sy == '&'
                         or (s.sy == 'IDENT' and s.systring in calling_convention_words)):
-                    s.put_back('(', '(')
+                    s.put_back(u'(', u'(', old_pos)
                 else:
-                    s.put_back('(', '(')
-                    s.put_back('IDENT', name)
+                    s.put_back(u'(', u'(', old_pos)
+                    s.put_back(u'IDENT', name, name_pos)
                     name = None
             elif s.sy not in ('*', '**', '[', '&'):
-                s.put_back('IDENT', name)
+                s.put_back(u'IDENT', name, name_pos)
                 name = None
 
     type_node = Nodes.CSimpleBaseTypeNode(pos,
         name = name, module_path = module_path,
         is_basic_c_type = is_basic, signed = signed,
         complex = complex, longness = longness,
-        is_self_arg = self_flag, templates = templates)
+        templates = templates)
 
     #    declarations here.
     if s.sy == '[':
@@ -2618,13 +2747,13 @@ def is_memoryviewslice_access(s):
     # a memoryview slice declaration is distinguishable from a buffer access
     # declaration by the first entry in the bracketed list.  The buffer will
     # not have an unnested colon in the first entry; the memoryview slice will.
-    saved = [(s.sy, s.systring)]
+    saved = [(s.sy, s.systring, s.position())]
     s.next()
     retval = False
     if s.systring == ':':
         retval = True
     elif s.sy == 'INT':
-        saved.append((s.sy, s.systring))
+        saved.append((s.sy, s.systring, s.position()))
         s.next()
         if s.sy == ':':
             retval = True
@@ -2651,7 +2780,7 @@ def p_memoryviewslice_access(s, base_type_node):
     return result
 
 def looking_at_name(s):
-    return s.sy == 'IDENT' and not s.systring in calling_convention_words
+    return s.sy == 'IDENT' and s.systring not in calling_convention_words
 
 def looking_at_expr(s):
     if s.systring in base_type_start_words:
@@ -2659,15 +2788,16 @@ def looking_at_expr(s):
     elif s.sy == 'IDENT':
         is_type = False
         name = s.systring
+        name_pos = s.position()
         dotted_path = []
         s.next()
 
         while s.sy == '.':
             s.next()
-            dotted_path.append(s.systring)
+            dotted_path.append((s.systring, s.position()))
             s.expect('IDENT')
 
-        saved = s.sy, s.systring
+        saved = s.sy, s.systring, s.position()
         if s.sy == 'IDENT':
             is_type = True
         elif s.sy == '*' or s.sy == '**':
@@ -2685,10 +2815,10 @@ def looking_at_expr(s):
 
         dotted_path.reverse()
         for p in dotted_path:
-            s.put_back('IDENT', p)
-            s.put_back('.', '.')
+            s.put_back(u'IDENT', *p)
+            s.put_back(u'.', u'.', p[1])  # gets the position slightly wrong
 
-        s.put_back('IDENT', name)
+        s.put_back(u'IDENT', name, name_pos)
         return not is_type and saved[0]
     else:
         return True
@@ -2700,26 +2830,17 @@ def looking_at_base_type(s):
 def looking_at_dotted_name(s):
     if s.sy == 'IDENT':
         name = s.systring
+        name_pos = s.position()
         s.next()
         result = s.sy == '.'
-        s.put_back('IDENT', name)
+        s.put_back(u'IDENT', name, name_pos)
         return result
     else:
         return 0
 
-def looking_at_call(s):
-    "See if we're looking at a.b.c("
-    # Don't mess up the original position, so save and restore it.
-    # Unfortunately there's no good way to handle this, as a subsequent call
-    # to next() will not advance the position until it reads a new token.
-    position = s.start_line, s.start_col
-    result = looking_at_expr(s) == u'('
-    if not result:
-        s.start_line, s.start_col = position
-    return result
 
-basic_c_type_names = cython.declare(
-    set, set(["void", "char", "int", "float", "double", "bint"]))
+basic_c_type_names = cython.declare(frozenset, frozenset((
+    "void", "char", "int", "float", "double", "bint")))
 
 special_basic_c_types = cython.declare(dict, {
     # name : (signed, longness)
@@ -2733,17 +2854,17 @@ special_basic_c_types = cython.declare(dict, {
     "Py_tss_t"   : (1, 0),
 })
 
-sign_and_longness_words = cython.declare(
-    set, set(["short", "long", "signed", "unsigned"]))
+sign_and_longness_words = cython.declare(frozenset, frozenset((
+    "short", "long", "signed", "unsigned")))
 
 base_type_start_words = cython.declare(
-    set,
+    frozenset,
     basic_c_type_names
     | sign_and_longness_words
-    | set(special_basic_c_types))
+    | frozenset(special_basic_c_types))
 
-struct_enum_union = cython.declare(
-    set, set(["struct", "union", "enum", "packed"]))
+struct_enum_union = cython.declare(frozenset, frozenset((
+    "struct", "union", "enum", "packed")))
 
 def p_sign_and_longness(s):
     signed = 1
@@ -2798,7 +2919,7 @@ def p_c_declarator(s, ctx = Ctx(), empty = 0, is_type = 0, cmethod_flag = 0,
         pos = s.position()
         if s.sy == '[':
             result = p_c_array_declarator(s, result)
-        else: # sy == '('
+        else:  # sy == '('
             s.next()
             result = p_c_func_declarator(s, pos, ctx, result, cmethod_flag)
         cmethod_flag = 0
@@ -2806,7 +2927,7 @@ def p_c_declarator(s, ctx = Ctx(), empty = 0, is_type = 0, cmethod_flag = 0,
 
 def p_c_array_declarator(s, base):
     pos = s.position()
-    s.next() # '['
+    s.next()  # '['
     if s.sy != ']':
         dim = p_testlist(s)
     else:
@@ -2815,14 +2936,22 @@ def p_c_array_declarator(s, base):
     return Nodes.CArrayDeclaratorNode(pos, base = base, dimension = dim)
 
 def p_c_func_declarator(s, pos, ctx, base, cmethod_flag):
-    #  Opening paren has already been skipped
+    # Opening paren has already been skipped
     args = p_c_arg_list(s, ctx, cmethod_flag = cmethod_flag,
                         nonempty_declarators = 0)
     ellipsis = p_optional_ellipsis(s)
     s.expect(')')
     nogil = p_nogil(s)
-    exc_val, exc_check = p_exception_value_clause(s)
-    # TODO - warning to enforce preferred exception specification order
+    exc_val, exc_check, exc_clause = p_exception_value_clause(s, ctx)
+    if nogil and exc_clause:
+        warning(
+            s.position(),
+            "The keyword 'nogil' should appear at the end of the "
+            "function signature line. Placing it before 'except' "
+            "or 'noexcept' will be disallowed in a future version "
+            "of Cython.",
+            level=2
+        )
     nogil = nogil or p_nogil(s)
     with_gil = p_with_gil(s)
     return Nodes.CFuncDeclaratorNode(pos,
@@ -2830,49 +2959,43 @@ def p_c_func_declarator(s, pos, ctx, base, cmethod_flag):
         exception_value = exc_val, exception_check = exc_check,
         nogil = nogil or ctx.nogil or with_gil, with_gil = with_gil)
 
-supported_overloaded_operators = cython.declare(set, set([
+supported_overloaded_operators = cython.declare(frozenset, frozenset((
     '+', '-', '*', '/', '%',
     '++', '--', '~', '|', '&', '^', '<<', '>>', ',',
     '==', '!=', '>=', '>', '<=', '<',
     '[]', '()', '!', '=',
     'bool',
-]))
+)))
 
 def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
                           assignable, nonempty):
     pos = s.position()
     calling_convention = p_calling_convention(s)
-    if s.sy == '*':
+    if s.sy in ('*', '**'):
+        # scanner returns '**' as a single token
+        is_ptrptr = s.sy == '**'
         s.next()
-        if s.systring == 'const':
-            const_pos = s.position()
+
+        const_pos = s.position()
+        is_const = s.systring == 'const' and s.sy == 'IDENT'
+        if is_const:
             s.next()
-            const_base = p_c_declarator(s, ctx, empty = empty,
-                                       is_type = is_type,
-                                       cmethod_flag = cmethod_flag,
-                                       assignable = assignable,
-                                       nonempty = nonempty)
-            base = Nodes.CConstDeclaratorNode(const_pos, base = const_base)
-        else:
-            base = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
-                                  cmethod_flag = cmethod_flag,
-                                  assignable = assignable, nonempty = nonempty)
-        result = Nodes.CPtrDeclaratorNode(pos,
-            base = base)
-    elif s.sy == '**': # scanner returns this as a single token
-        s.next()
-        base = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
-                              cmethod_flag = cmethod_flag,
-                              assignable = assignable, nonempty = nonempty)
-        result = Nodes.CPtrDeclaratorNode(pos,
-            base = Nodes.CPtrDeclaratorNode(pos,
-                base = base))
-    elif s.sy == '&':
-        s.next()
-        base = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
-                              cmethod_flag = cmethod_flag,
-                              assignable = assignable, nonempty = nonempty)
-        result = Nodes.CReferenceDeclaratorNode(pos, base = base)
+
+        base = p_c_declarator(s, ctx, empty=empty, is_type=is_type,
+                              cmethod_flag=cmethod_flag,
+                              assignable=assignable, nonempty=nonempty)
+        if is_const:
+            base = Nodes.CConstDeclaratorNode(const_pos, base=base)
+        if is_ptrptr:
+            base = Nodes.CPtrDeclaratorNode(pos, base=base)
+        result = Nodes.CPtrDeclaratorNode(pos, base=base)
+    elif s.sy == '&' or (s.sy == '&&' and s.context.cpp):
+        node_class = Nodes.CppRvalueReferenceDeclaratorNode if s.sy == '&&' else Nodes.CReferenceDeclaratorNode
+        s.next()
+        base = p_c_declarator(s, ctx, empty=empty, is_type=is_type,
+                              cmethod_flag=cmethod_flag,
+                              assignable=assignable, nonempty=nonempty)
+        result = node_class(pos, base=base)
     else:
         rhs = None
         if s.sy == 'IDENT':
@@ -2913,7 +3036,7 @@ def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
                             fatal=False)
                 name += op
             elif op == 'IDENT':
-                op = s.systring;
+                op = s.systring
                 if op not in supported_overloaded_operators:
                     s.error("Overloading operator '%s' not yet supported." % op,
                             fatal=False)
@@ -2939,22 +3062,54 @@ def p_with_gil(s):
     else:
         return 0
 
-def p_exception_value_clause(s):
+def p_exception_value_clause(s, ctx):
+    """
+    Parse exception value clause.
+
+    Maps clauses to exc_check / exc_value / exc_clause as follows:
+     ______________________________________________________________________
+    |                             |             |             |            |
+    | Clause                      | exc_check   | exc_value   | exc_clause |
+    | ___________________________ | ___________ | ___________ | __________ |
+    |                             |             |             |            |
+    | <nothing> (default func.)   | True        | None        | False      |
+    | <nothing> (cdef extern)     | False       | None        | False      |
+    | noexcept                    | False       | None        | True       |
+    | except <val>                | False       | <val>       | True       |
+    | except? <val>               | True        | <val>       | True       |
+    | except *                    | True        | None        | True       |
+    | except +                    | '+'         | None        | True       |
+    | except +*                   | '+'         | '*'         | True       |
+    | except +<PyErr>             | '+'         | <PyErr>     | True       |
+    | ___________________________ | ___________ | ___________ | __________ |
+
+    Note that the only reason we need `exc_clause` is to raise a
+    warning when `'except'` or `'noexcept'` is placed after the
+    `'nogil'` keyword.
+    """
+    exc_clause = False
     exc_val = None
-    exc_check = 0
+    if ctx.visibility  == 'extern':
+        exc_check = False
+    else:
+        exc_check = True
 
     if s.sy == 'IDENT' and s.systring == 'noexcept':
+        exc_clause = True
         s.next()
-        exc_check = False  # No-op in Cython 0.29.x
+        exc_check = False
     elif s.sy == 'except':
+        exc_clause = True
         s.next()
         if s.sy == '*':
-            exc_check = 1
+            exc_check = True
             s.next()
         elif s.sy == '+':
             exc_check = '+'
             s.next()
-            if s.sy == 'IDENT':
+            if p_nogil(s):
+                ctx.nogil = True
+            elif s.sy == 'IDENT':
                 name = s.systring
                 s.next()
                 exc_val = p_name(s, name)
@@ -2963,12 +3118,19 @@ def p_exception_value_clause(s):
                 s.next()
         else:
             if s.sy == '?':
-                exc_check = 1
+                exc_check = True
                 s.next()
+            else:
+                exc_check = False
+            # exc_val can be non-None even if exc_check is False, c.f. "except -1"
             exc_val = p_test(s)
-    return exc_val, exc_check
+    if not exc_clause and ctx.visibility  != 'extern' and s.context.legacy_implicit_noexcept:
+        exc_check = False
+        warning(s.position(), "Implicit noexcept declaration is deprecated. Function declaration should contain 'noexcept' keyword.", level=2)
+    return exc_val, exc_check, exc_clause
 
-c_arg_list_terminators = cython.declare(set, set(['*', '**', '.', ')', ':']))
+c_arg_list_terminators = cython.declare(frozenset, frozenset((
+    '*', '**', '...', ')', ':', '/')))
 
 def p_c_arg_list(s, ctx = Ctx(), in_pyfunc = 0, cmethod_flag = 0,
                  nonempty_declarators = 0, kw_only = 0, annotated = 1):
@@ -2987,7 +3149,7 @@ def p_c_arg_list(s, ctx = Ctx(), in_pyfunc = 0, cmethod_flag = 0,
     return args
 
 def p_optional_ellipsis(s):
-    if s.sy == '.':
+    if s.sy == '...':
         expect_ellipsis(s)
         return 1
     else:
@@ -3007,7 +3169,7 @@ def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0,
             complex = 0, longness = 0,
             is_self_arg = cmethod_flag, templates = None)
     else:
-        base_type = p_c_base_type(s, cmethod_flag, nonempty = nonempty)
+        base_type = p_c_base_type(s, nonempty=nonempty)
     declarator = p_c_declarator(s, ctx, nonempty = nonempty)
     if s.sy in ('not', 'or') and not s.in_python_file:
         kind = s.sy
@@ -3022,7 +3184,7 @@ def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0,
         not_none = kind == 'not'
     if annotated and s.sy == ':':
         s.next()
-        annotation = p_test(s)
+        annotation = p_annotation(s)
     if s.sy == '=':
         s.next()
         if 'pxd' in ctx.level:
@@ -3124,6 +3286,12 @@ def p_cdef_extern_block(s, pos, ctx):
 def p_c_enum_definition(s, pos, ctx):
     # s.sy == ident 'enum'
     s.next()
+
+    scoped = False
+    if s.context.cpp and (s.sy == 'class' or (s.sy == 'IDENT' and s.systring == 'struct')):
+        scoped = True
+        s.next()
+
     if s.sy == 'IDENT':
         name = s.systring
         s.next()
@@ -3131,24 +3299,51 @@ def p_c_enum_definition(s, pos, ctx):
         if cname is None and ctx.namespace is not None:
             cname = ctx.namespace + "::" + name
     else:
-        name = None
-        cname = None
-    items = None
+        name = cname = None
+        if scoped:
+            s.error("Unnamed scoped enum not allowed")
+
+    if scoped and s.sy == '(':
+        s.next()
+        underlying_type = p_c_base_type(s)
+        s.expect(')')
+    else:
+        underlying_type = Nodes.CSimpleBaseTypeNode(
+            pos,
+            name="int",
+            module_path = [],
+            is_basic_c_type = True,
+            signed = 1,
+            complex = 0,
+            longness = 0
+        )
+
     s.expect(':')
     items = []
+
+    doc = None
     if s.sy != 'NEWLINE':
         p_c_enum_line(s, ctx, items)
     else:
-        s.next() # 'NEWLINE'
+        s.next()  # 'NEWLINE'
         s.expect_indent()
+        doc = p_doc_string(s)
+
         while s.sy not in ('DEDENT', 'EOF'):
             p_c_enum_line(s, ctx, items)
+
         s.expect_dedent()
+
+    if not items and ctx.visibility != "extern":
+        error(pos, "Empty enum definition not allowed outside a 'cdef extern from' block")
+
     return Nodes.CEnumDefNode(
-        pos, name = name, cname = cname, items = items,
-        typedef_flag = ctx.typedef_flag, visibility = ctx.visibility,
-        create_wrapper = ctx.overridable,
-        api = ctx.api, in_pxd = ctx.level == 'module_pxd')
+        pos, name=name, cname=cname,
+        scoped=scoped, items=items,
+        underlying_type=underlying_type,
+        typedef_flag=ctx.typedef_flag, visibility=ctx.visibility,
+        create_wrapper=ctx.overridable,
+        api=ctx.api, in_pxd=ctx.level == 'module_pxd', doc=doc)
 
 def p_c_enum_line(s, ctx, items):
     if s.sy != 'pass':
@@ -3192,20 +3387,28 @@ def p_c_struct_or_union_definition(s, pos, ctx):
     attributes = None
     if s.sy == ':':
         s.next()
-        s.expect('NEWLINE')
-        s.expect_indent()
         attributes = []
-        body_ctx = Ctx()
-        while s.sy != 'DEDENT':
-            if s.sy != 'pass':
-                attributes.append(
-                    p_c_func_or_var_declaration(s, s.position(), body_ctx))
-            else:
-                s.next()
-                s.expect_newline("Expected a newline")
-        s.expect_dedent()
+        if s.sy == 'pass':
+            s.next()
+            s.expect_newline("Expected a newline", ignore_semicolon=True)
+        else:
+            s.expect('NEWLINE')
+            s.expect_indent()
+            body_ctx = Ctx(visibility=ctx.visibility)
+            while s.sy != 'DEDENT':
+                if s.sy != 'pass':
+                    attributes.append(
+                        p_c_func_or_var_declaration(s, s.position(), body_ctx))
+                else:
+                    s.next()
+                    s.expect_newline("Expected a newline")
+            s.expect_dedent()
+
+        if not attributes and ctx.visibility != "extern":
+            error(pos, "Empty struct or union definition not allowed outside a 'cdef extern from' block")
     else:
         s.expect_newline("Syntax error in struct or union definition")
+
     return Nodes.CStructOrUnionDefNode(pos,
         name = name, cname = cname, kind = kind, attributes = attributes,
         typedef_flag = ctx.typedef_flag, visibility = ctx.visibility,
@@ -3232,7 +3435,7 @@ def p_fused_definition(s, pos, ctx):
     while s.sy != 'DEDENT':
         if s.sy != 'pass':
             #types.append(p_c_declarator(s))
-            types.append(p_c_base_type(s)) #, nonempty=1))
+            types.append(p_c_base_type(s))  #, nonempty=1))
         else:
             s.next()
 
@@ -3363,14 +3566,7 @@ def p_decorators(s):
     while s.sy == '@':
         pos = s.position()
         s.next()
-        decstring = p_dotted_name(s, as_allowed=0)[2]
-        names = decstring.split('.')
-        decorator = ExprNodes.NameNode(pos, name=s.context.intern_ustring(names[0]))
-        for name in names[1:]:
-            decorator = ExprNodes.AttributeNode(
-                pos, attribute=s.context.intern_ustring(name), obj=decorator)
-        if s.sy == '(':
-            decorator = p_call(s, decorator)
+        decorator = p_namedexpr_test(s)
         decorators.append(Nodes.DecoratorNode(pos, decorator=decorator))
         s.expect_newline("Expected a newline after decorator")
     return decorators
@@ -3388,7 +3584,7 @@ def _reject_cdef_modifier_in_py(s, name):
 
 def p_def_statement(s, decorators=None, is_async_def=False):
     # s.sy == 'def'
-    pos = s.position()
+    pos = decorators[0].pos if decorators else s.position()
     # PEP 492 switches the async/await keywords on in "async def" functions
     if is_async_def:
         s.enter_async()
@@ -3405,7 +3601,7 @@ def p_def_statement(s, decorators=None, is_async_def=False):
     return_type_annotation = None
     if s.sy == '->':
         s.next()
-        return_type_annotation = p_test(s)
+        return_type_annotation = p_annotation(s)
         _reject_cdef_modifier_in_py(s, s.systring)
 
     doc, body = p_suite_with_docstring(s, Ctx(level='function'))
@@ -3423,6 +3619,20 @@ def p_varargslist(s, terminator=')', annotated=1):
                         annotated = annotated)
     star_arg = None
     starstar_arg = None
+    if s.sy == '/':
+        if len(args) == 0:
+            s.error("Got zero positional-only arguments despite presence of "
+                    "positional-only specifier '/'")
+        s.next()
+        # Mark all args to the left as pos only
+        for arg in args:
+            arg.pos_only = 1
+        if s.sy == ',':
+            s.next()
+            args.extend(p_c_arg_list(s, in_pyfunc = 1,
+                nonempty_declarators = 1, annotated = annotated))
+        elif s.sy != terminator:
+            s.error("Syntax error in Python function argument list")
     if s.sy == '*':
         s.next()
         if s.sy == 'IDENT':
@@ -3446,7 +3656,7 @@ def p_py_arg_decl(s, annotated = 1):
     annotation = None
     if annotated and s.sy == ':':
         s.next()
-        annotation = p_test(s)
+        annotation = p_annotation(s)
     return Nodes.PyArgDeclNode(pos, name = name, annotation = annotation)
 
 
@@ -3673,6 +3883,9 @@ def p_compiler_directive_comments(s):
             for name in new_directives:
                 if name not in result:
                     pass
+                elif Options.directive_types.get(name) is list:
+                    result[name] += new_directives[name]
+                    new_directives[name] = result[name]
                 elif new_directives[name] == result[name]:
                     warning(pos, "Duplicate directive found: %s" % (name,))
                 else:
@@ -3682,6 +3895,9 @@ def p_compiler_directive_comments(s):
             if 'language_level' in new_directives:
                 # Make sure we apply the language level already to the first token that follows the comments.
                 s.context.set_language_level(new_directives['language_level'])
+            if 'legacy_implicit_noexcept' in new_directives:
+                s.context.legacy_implicit_noexcept = new_directives['legacy_implicit_noexcept']
+
 
             result.update(new_directives)
 
@@ -3696,22 +3912,18 @@ def p_module(s, pxd, full_module_name, ctx=Ctx):
     s.parse_comments = False
 
     if s.context.language_level is None:
-        s.context.set_language_level(2)
+        s.context.set_language_level('3str')
         if pos[0].filename:
             import warnings
             warnings.warn(
-                "Cython directive 'language_level' not set, using 2 for now (Py2). "
-                "This will change in a later release! File: %s" % pos[0].filename,
+                "Cython directive 'language_level' not set, using '3str' for now (Py3). "
+                "This has changed from earlier releases! File: %s" % pos[0].filename,
                 FutureWarning,
                 stacklevel=1 if cython.compiled else 2,
             )
 
+    level = 'module_pxd' if pxd else 'module'
     doc = p_doc_string(s)
-    if pxd:
-        level = 'module_pxd'
-    else:
-        level = 'module'
-
     body = p_statement_list(s, ctx(level=level), first_statement = 1)
     if s.sy != 'EOF':
         s.error("Syntax error in statement [%s,%s]" % (
@@ -3733,7 +3945,6 @@ def p_template_definition(s):
 def p_cpp_class_definition(s, pos,  ctx):
     # s.sy == 'cppclass'
     s.next()
-    module_path = []
     class_name = p_ident(s)
     cname = p_opt_cname(s)
     if cname is None and ctx.namespace is not None:
@@ -3767,6 +3978,10 @@ def p_cpp_class_definition(s, pos,  ctx):
         s.next()
         s.expect('NEWLINE')
         s.expect_indent()
+        # Allow a cppclass to have docstrings. It will be discarded as comment.
+        # The goal of this is consistency: we can make docstrings inside cppclass methods,
+        # so why not on the cppclass itself ?
+        p_doc_string(s)
         attributes = []
         body_ctx = Ctx(visibility = ctx.visibility, level='cpp_class', nogil=nogil or ctx.nogil)
         body_ctx.templates = template_names
@@ -3850,3 +4065,14 @@ def print_parse_tree(f, node, level, key = None):
             f.write("%s]\n" % ind)
             return
     f.write("%s%s\n" % (ind, node))
+
+def p_annotation(s):
+    """An annotation just has the "test" syntax, but also stores the string it came from
+
+    Note that the string is *allowed* to be changed/processed (although isn't here)
+    so may not exactly match the string generated by Python, and if it doesn't
+    then it is not a bug.
+    """
+    pos = s.position()
+    expr = p_test(s)
+    return ExprNodes.AnnotationNode(pos, expr=expr)
diff --git a/Cython/Compiler/Pipeline.py b/Cython/Compiler/Pipeline.py
index 5194c3e49..834eb0e6a 100644
--- a/Cython/Compiler/Pipeline.py
+++ b/Cython/Compiler/Pipeline.py
@@ -19,7 +19,7 @@ def dumptree(t):
 
 def abort_on_errors(node):
     # Stop the pipeline if there are any errors.
-    if Errors.num_errors != 0:
+    if Errors.get_errors_count() != 0:
         raise AbortError("pipeline break")
     return node
 
@@ -58,7 +58,7 @@ def generate_pyx_code_stage_factory(options, result):
 def inject_pxd_code_stage_factory(context):
     def inject_pxd_code_stage(module_node):
         for name, (statlistnode, scope) in context.pxds.items():
-            module_node.merge_in(statlistnode, scope)
+            module_node.merge_in(statlistnode, scope, stage="pxd")
         return module_node
     return inject_pxd_code_stage
 
@@ -80,56 +80,60 @@ def use_utility_code_definitions(scope, target, seen=None):
             use_utility_code_definitions(entry.as_module, target, seen)
 
 
-def sort_utility_codes(utilcodes):
+def sorted_utility_codes_and_deps(utilcodes):
     ranks = {}
-    def get_rank(utilcode):
-        if utilcode not in ranks:
+    get_rank = ranks.get
+
+    def calculate_rank(utilcode):
+        rank = get_rank(utilcode)
+        if rank is None:
             ranks[utilcode] = 0  # prevent infinite recursion on circular dependencies
             original_order = len(ranks)
-            ranks[utilcode] = 1 + min([get_rank(dep) for dep in utilcode.requires or ()] or [-1]) + original_order * 1e-8
-        return ranks[utilcode]
-    for utilcode in utilcodes:
-        get_rank(utilcode)
-    return [utilcode for utilcode, _ in sorted(ranks.items(), key=lambda kv: kv[1])]
-
+            rank = ranks[utilcode] = 1 + (
+                min([calculate_rank(dep) for dep in utilcode.requires]) if utilcode.requires else -1
+                ) + original_order * 1e-8
+        return rank
 
-def normalize_deps(utilcodes):
-    deps = {}
     for utilcode in utilcodes:
-        deps[utilcode] = utilcode
+        calculate_rank(utilcode)
+
+    # include all recursively collected dependencies
+    return sorted(ranks, key=get_rank)
 
-    def unify_dep(dep):
-        if dep in deps:
-            return deps[dep]
-        else:
-            deps[dep] = dep
-            return dep
 
+def normalize_deps(utilcodes):
+    deps = {utilcode:utilcode for utilcode in utilcodes}
     for utilcode in utilcodes:
-        utilcode.requires = [unify_dep(dep) for dep in utilcode.requires or ()]
+        utilcode.requires = [deps.setdefault(dep, dep) for dep in utilcode.requires or ()]
 
 
 def inject_utility_code_stage_factory(context):
     def inject_utility_code_stage(module_node):
         module_node.prepare_utility_code()
         use_utility_code_definitions(context.cython_scope, module_node.scope)
-        module_node.scope.utility_code_list = sort_utility_codes(module_node.scope.utility_code_list)
-        normalize_deps(module_node.scope.utility_code_list)
-        added = []
+
+        utility_code_list = module_node.scope.utility_code_list
+        utility_code_list[:] = sorted_utility_codes_and_deps(utility_code_list)
+        normalize_deps(utility_code_list)
+
+        added = set()
         # Note: the list might be extended inside the loop (if some utility code
         # pulls in other utility code, explicitly or implicitly)
-        for utilcode in module_node.scope.utility_code_list:
+        for utilcode in utility_code_list:
             if utilcode in added:
                 continue
-            added.append(utilcode)
+            added.add(utilcode)
             if utilcode.requires:
                 for dep in utilcode.requires:
-                    if dep not in added and dep not in module_node.scope.utility_code_list:
-                        module_node.scope.utility_code_list.append(dep)
+                    if dep not in added:
+                        utility_code_list.append(dep)
             tree = utilcode.get_tree(cython_scope=context.cython_scope)
             if tree:
-                module_node.merge_in(tree.body, tree.scope, merge_scope=True)
+                module_node.merge_in(tree.with_compiler_directives(),
+                                     tree.scope, stage="utility",
+                                     merge_scope=True)
         return module_node
+
     return inject_utility_code_stage
 
 
@@ -148,8 +152,8 @@ def create_pipeline(context, mode, exclude_classes=()):
     from .ParseTreeTransforms import ExpandInplaceOperators, ParallelRangeTransform
     from .ParseTreeTransforms import CalculateQualifiedNamesTransform
     from .TypeInference import MarkParallelAssignments, MarkOverflowingArithmetic
-    from .ParseTreeTransforms import AdjustDefByDirectives, AlignFunctionDefinitions
-    from .ParseTreeTransforms import RemoveUnreachableCode, GilCheck
+    from .ParseTreeTransforms import AdjustDefByDirectives, AlignFunctionDefinitions, AutoCpdefFunctionDefinitions
+    from .ParseTreeTransforms import RemoveUnreachableCode, GilCheck, CoerceCppTemps
     from .FlowControl import ControlFlowAnalysis
     from .AnalysedTreeTransforms import AutoTestDictTransform
     from .AutoDocTransforms import EmbedSignature
@@ -185,10 +189,11 @@ def create_pipeline(context, mode, exclude_classes=()):
         TrackNumpyAttributes(),
         InterpretCompilerDirectives(context, context.compiler_directives),
         ParallelRangeTransform(context),
+        WithTransform(),
         AdjustDefByDirectives(context),
-        WithTransform(context),
-        MarkClosureVisitor(context),
         _align_function_definitions,
+        MarkClosureVisitor(context),
+        AutoCpdefFunctionDefinitions(context),
         RemoveUnreachableCode(context),
         ConstantFolding(),
         FlattenInListTransform(),
@@ -219,26 +224,26 @@ def create_pipeline(context, mode, exclude_classes=()):
         ConsolidateOverflowCheck(context),
         DropRefcountingTransform(),
         FinalOptimizePhase(context),
+        CoerceCppTemps(context),
         GilCheck(),
         ]
-    filtered_stages = []
-    for s in stages:
-        if s.__class__ not in exclude_classes:
-            filtered_stages.append(s)
-    return filtered_stages
+    if exclude_classes:
+        stages = [s for s in stages if s.__class__ not in exclude_classes]
+    return stages
 
 def create_pyx_pipeline(context, options, result, py=False, exclude_classes=()):
-    if py:
-        mode = 'py'
-    else:
-        mode = 'pyx'
+    mode = 'py' if py else 'pyx'
+
     test_support = []
+    ctest_support = []
     if options.evaluate_tree_assertions:
         from ..TestUtils import TreeAssertVisitor
-        test_support.append(TreeAssertVisitor())
+        test_validator = TreeAssertVisitor()
+        test_support.append(test_validator)
+        ctest_support.append(test_validator.create_c_file_validator())
 
     if options.gdb_debug:
-        from ..Debugger import DebugWriter # requires Py2.5+
+        from ..Debugger import DebugWriter  # requires Py2.5+
         from .ParseTreeTransforms import DebugTransform
         context.gdb_debug_outputwriter = DebugWriter.CythonDebugWriter(
             options.output_dir)
@@ -254,7 +259,9 @@ def create_pyx_pipeline(context, options, result, py=False, exclude_classes=()):
          inject_utility_code_stage_factory(context),
          abort_on_errors],
         debug_transform,
-        [generate_pyx_code_stage_factory(options, result)]))
+        [generate_pyx_code_stage_factory(options, result)],
+        ctest_support,
+    ))
 
 def create_pxd_pipeline(context, scope, module_name):
     from .CodeGeneration import ExtractPxdCode
@@ -284,11 +291,25 @@ def create_pyx_as_pxd_pipeline(context, result):
                                            FlattenInListTransform,
                                            WithTransform
                                            ])
+    from .Visitor import VisitorTransform
+    class SetInPxdTransform(VisitorTransform):
+        # A number of nodes have an "in_pxd" attribute which affects AnalyseDeclarationsTransform
+        # (for example controlling pickling generation). Set it, to make sure we don't mix them up with
+        # the importing main module.
+        # FIXME: This should be done closer to the parsing step.
+        def visit_StatNode(self, node):
+            if hasattr(node, "in_pxd"):
+                node.in_pxd = True
+            self.visitchildren(node)
+            return node
+
+        visit_Node = VisitorTransform.recurse_to_children
+
     for stage in pyx_pipeline:
         pipeline.append(stage)
         if isinstance(stage, AnalyseDeclarationsTransform):
-            # This is the last stage we need.
-            break
+            pipeline.insert(-1, SetInPxdTransform())
+            break  # This is the last stage we need.
     def fake_pxd(root):
         for entry in root.scope.entries.values():
             if not entry.in_cinclude:
@@ -326,11 +347,30 @@ def insert_into_pipeline(pipeline, transform, before=None, after=None):
 
 _pipeline_entry_points = {}
 
+try:
+    from threading import local as _threadlocal
+except ImportError:
+    class _threadlocal(object): pass
+
+threadlocal = _threadlocal()
+
+
+def get_timings():
+    try:
+        return threadlocal.cython_pipeline_timings
+    except AttributeError:
+        return {}
+
 
 def run_pipeline(pipeline, source, printtree=True):
     from .Visitor import PrintTree
     exec_ns = globals().copy() if DebugFlags.debug_verbose_pipeline else None
 
+    try:
+        timings = threadlocal.cython_pipeline_timings
+    except AttributeError:
+        timings = threadlocal.cython_pipeline_timings = {}
+
     def run(phase, data):
         return phase(data)
 
@@ -339,29 +379,39 @@ def run_pipeline(pipeline, source, printtree=True):
     try:
         try:
             for phase in pipeline:
-                if phase is not None:
-                    if not printtree and isinstance(phase, PrintTree):
-                        continue
-                    if DebugFlags.debug_verbose_pipeline:
-                        t = time()
-                        print("Entering pipeline phase %r" % phase)
-                        # create a new wrapper for each step to show the name in profiles
-                        phase_name = getattr(phase, '__name__', type(phase).__name__)
-                        try:
-                            run = _pipeline_entry_points[phase_name]
-                        except KeyError:
-                            exec("def %s(phase, data): return phase(data)" % phase_name, exec_ns)
-                            run = _pipeline_entry_points[phase_name] = exec_ns[phase_name]
-                    data = run(phase, data)
-                    if DebugFlags.debug_verbose_pipeline:
-                        print("    %.3f seconds" % (time() - t))
+                if phase is None:
+                    continue
+                if not printtree and isinstance(phase, PrintTree):
+                    continue
+
+                phase_name = getattr(phase, '__name__', type(phase).__name__)
+                if DebugFlags.debug_verbose_pipeline:
+                    print("Entering pipeline phase %r" % phase)
+                    # create a new wrapper for each step to show the name in profiles
+                    try:
+                        run = _pipeline_entry_points[phase_name]
+                    except KeyError:
+                        exec("def %s(phase, data): return phase(data)" % phase_name, exec_ns)
+                        run = _pipeline_entry_points[phase_name] = exec_ns[phase_name]
+
+                t = time()
+                data = run(phase, data)
+                t = time() - t
+
+                try:
+                    old_t, count = timings[phase_name]
+                except KeyError:
+                    old_t, count = 0, 0
+                timings[phase_name] = (old_t + int(t * 1000000), count + 1)
+                if DebugFlags.debug_verbose_pipeline:
+                    print("    %.3f seconds" % t)
         except CompileError as err:
             # err is set
             Errors.report_error(err, use_stack=False)
             error = err
     except InternalError as err:
         # Only raise if there was not an earlier error
-        if Errors.num_errors == 0:
+        if Errors.get_errors_count() == 0:
             raise
         error = err
     except AbortError as err:
diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py
index c309bd04b..19d193dcf 100644
--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -12,13 +12,15 @@ try:
     reduce
 except NameError:
     from functools import reduce
+from functools import partial
+from itertools import product
 
 from Cython.Utils import cached_function
 from .Code import UtilityCode, LazyUtilityCode, TempitaUtilityCode
 from . import StringEncoding
 from . import Naming
 
-from .Errors import error, warning
+from .Errors import error, CannotSpecialize
 
 
 class BaseType(object):
@@ -46,7 +48,9 @@ class BaseType(object):
     def cast_code(self, expr_code):
         return "((%s)%s)" % (self.empty_declaration_code(), expr_code)
 
-    def empty_declaration_code(self):
+    def empty_declaration_code(self, pyrex=False):
+        if pyrex:
+            return self.declaration_code('', pyrex=True)
         if self._empty_declaration is None:
             self._empty_declaration = self.declaration_code('')
         return self._empty_declaration
@@ -75,7 +79,7 @@ class BaseType(object):
         """
         return self
 
-    def get_fused_types(self, result=None, seen=None, subtypes=None):
+    def get_fused_types(self, result=None, seen=None, subtypes=None, include_function_return_type=False):
         subtypes = subtypes or self.subtypes
         if not subtypes:
             return None
@@ -88,10 +92,10 @@ class BaseType(object):
             list_or_subtype = getattr(self, attr)
             if list_or_subtype:
                 if isinstance(list_or_subtype, BaseType):
-                    list_or_subtype.get_fused_types(result, seen)
+                    list_or_subtype.get_fused_types(result, seen, include_function_return_type=include_function_return_type)
                 else:
                     for subtype in list_or_subtype:
-                        subtype.get_fused_types(result, seen)
+                        subtype.get_fused_types(result, seen, include_function_return_type=include_function_return_type)
 
         return result
 
@@ -115,7 +119,7 @@ class BaseType(object):
         Deduce any template params in this (argument) type given the actual
         argument type.
 
-        http://en.cppreference.com/w/cpp/language/function_template#Template_argument_deduction
+        https://en.cppreference.com/w/cpp/language/function_template#Template_argument_deduction
         """
         return {}
 
@@ -176,15 +180,22 @@ class PyrexType(BaseType):
     #  is_ptr                boolean     Is a C pointer type
     #  is_null_ptr           boolean     Is the type of NULL
     #  is_reference          boolean     Is a C reference type
-    #  is_const              boolean     Is a C const type.
+    #  is_rvalue_reference   boolean     Is a C++ rvalue reference type
+    #  is_const              boolean     Is a C const type
+    #  is_volatile           boolean     Is a C volatile type
+    #  is_cv_qualified       boolean     Is a C const or volatile type
     #  is_cfunction          boolean     Is a C function type
     #  is_struct_or_union    boolean     Is a C struct or union type
     #  is_struct             boolean     Is a C struct type
+    #  is_cpp_class          boolean     Is a C++ class
+    #  is_optional_cpp_class boolean     Is a C++ class with variable lifetime handled with std::optional
     #  is_enum               boolean     Is a C enum type
+    #  is_cpp_enum           boolean     Is a C++ scoped enum type
     #  is_typedef            boolean     Is a typedef type
     #  is_string             boolean     Is a C char * type
     #  is_pyunicode_ptr      boolean     Is a C PyUNICODE * type
     #  is_cpp_string         boolean     Is a C++ std::string type
+    #  python_type_constructor_name     string or None     non-None if it is a Python type constructor that can be indexed/"templated"
     #  is_unicode_char       boolean     Is either Py_UCS4 or Py_UNICODE
     #  is_returncode         boolean     Is used only to signal exceptions
     #  is_error              boolean     Is the dummy error type
@@ -192,6 +203,10 @@ class PyrexType(BaseType):
     #  is_pythran_expr       boolean     Is Pythran expr
     #  is_numpy_buffer       boolean     Is Numpy array buffer
     #  has_attributes        boolean     Has C dot-selectable attributes
+    #  needs_cpp_construction  boolean     Needs C++ constructor and destructor when used in a cdef class
+    #  needs_refcounting     boolean     Needs code to be generated similar to incref/gotref/decref.
+    #                                    Largely used internally.
+    #  equivalent_type       type        A C or Python type that is equivalent to this Python or C type.
     #  default_value         string      Initial value that can be assigned before first user assignment.
     #  declaration_value     string      The value statically assigned on declaration (if any).
     #  entry                 Entry       The Entry for this type
@@ -226,6 +241,7 @@ class PyrexType(BaseType):
     is_extension_type = 0
     is_final_type = 0
     is_builtin_type = 0
+    is_cython_builtin_type = 0
     is_numeric = 0
     is_int = 0
     is_float = 0
@@ -235,13 +251,20 @@ class PyrexType(BaseType):
     is_ptr = 0
     is_null_ptr = 0
     is_reference = 0
+    is_fake_reference = 0
+    is_rvalue_reference = 0
     is_const = 0
+    is_volatile = 0
+    is_cv_qualified = 0
     is_cfunction = 0
     is_struct_or_union = 0
     is_cpp_class = 0
+    is_optional_cpp_class = 0
+    python_type_constructor_name = None
     is_cpp_string = 0
     is_struct = 0
     is_enum = 0
+    is_cpp_enum = False
     is_typedef = 0
     is_string = 0
     is_pyunicode_ptr = 0
@@ -254,6 +277,9 @@ class PyrexType(BaseType):
     is_pythran_expr = 0
     is_numpy_buffer = 0
     has_attributes = 0
+    needs_cpp_construction = 0
+    needs_refcounting = 0
+    equivalent_type = None
     default_value = ""
     declaration_value = ""
 
@@ -262,7 +288,8 @@ class PyrexType(BaseType):
         return self
 
     def specialize(self, values):
-        # TODO(danilo): Override wherever it makes sense.
+        # Returns the concrete type if this is a fused type, or otherwise the type itself.
+        # May raise Errors.CannotSpecialize on failure
         return self
 
     def literal_code(self, value):
@@ -317,7 +344,8 @@ class PyrexType(BaseType):
         return 0
 
     def _assign_from_py_code(self, source_code, result_code, error_pos, code,
-                             from_py_function=None, error_condition=None, extra_args=None):
+                             from_py_function=None, error_condition=None, extra_args=None,
+                             special_none_cvalue=None):
         args = ', ' + ', '.join('%s' % arg for arg in extra_args) if extra_args else ''
         convert_call = "%s(%s%s)" % (
             from_py_function or self.from_py_function,
@@ -326,11 +354,44 @@ class PyrexType(BaseType):
         )
         if self.is_enum:
             convert_call = typecast(self, c_long_type, convert_call)
+        if special_none_cvalue:
+            # NOTE: requires 'source_code' to be simple!
+            convert_call = "(__Pyx_Py_IsNone(%s) ? (%s) : (%s))" % (
+                source_code, special_none_cvalue, convert_call)
         return '%s = %s; %s' % (
             result_code,
             convert_call,
             code.error_goto_if(error_condition or self.error_condition(result_code), error_pos))
 
+    def _generate_dummy_refcounting(self, code, *ignored_args, **ignored_kwds):
+        if self.needs_refcounting:
+            raise NotImplementedError("Ref-counting operation not yet implemented for type %s" %
+                                      self)
+
+    def _generate_dummy_refcounting_assignment(self, code, cname, rhs_cname, *ignored_args, **ignored_kwds):
+        if self.needs_refcounting:
+            raise NotImplementedError("Ref-counting operation not yet implemented for type %s" %
+                                      self)
+        code.putln("%s = %s" % (cname, rhs_cname))
+
+    generate_incref = generate_xincref = generate_decref = generate_xdecref \
+        = generate_decref_clear = generate_xdecref_clear \
+        = generate_gotref = generate_xgotref = generate_giveref = generate_xgiveref \
+            = _generate_dummy_refcounting
+
+    generate_decref_set = generate_xdecref_set = _generate_dummy_refcounting_assignment
+
+    def nullcheck_string(self, code, cname):
+        if self.needs_refcounting:
+            raise NotImplementedError("Ref-counting operation not yet implemented for type %s" %
+                                      self)
+        code.putln("1")
+
+    def cpp_optional_declaration_code(self, entity_code, dll_linkage=None):
+        # declares an std::optional c++ variable
+        raise NotImplementedError(
+            "cpp_optional_declaration_code only implemented for c++ classes and not type %s" % self)
+
 
 def public_decl(base_code, dll_linkage):
     if dll_linkage:
@@ -338,20 +399,15 @@ def public_decl(base_code, dll_linkage):
     else:
         return base_code
 
-def create_typedef_type(name, base_type, cname, is_external=0, namespace=None):
-    is_fused = base_type.is_fused
-    if base_type.is_complex or is_fused:
-        if is_external:
-            if is_fused:
-                msg = "Fused"
-            else:
-                msg = "Complex"
-
-            raise ValueError("%s external typedefs not supported" % msg)
 
+def create_typedef_type(name, base_type, cname, is_external=0, namespace=None):
+    if is_external:
+        if base_type.is_complex or base_type.is_fused:
+            raise ValueError("%s external typedefs not supported" % (
+                "Fused" if base_type.is_fused else "Complex"))
+    if base_type.is_complex or base_type.is_fused:
         return base_type
-    else:
-        return CTypedefType(name, base_type, cname, is_external, namespace)
+    return CTypedefType(name, base_type, cname, is_external, namespace)
 
 
 class CTypedefType(BaseType):
@@ -447,9 +503,9 @@ class CTypedefType(BaseType):
                                  "TO_PY_FUNCTION": self.to_py_function}))
                     return True
                 elif base_type.is_float:
-                    pass # XXX implement!
+                    pass  # XXX implement!
                 elif base_type.is_complex:
-                    pass # XXX implement!
+                    pass  # XXX implement!
                     pass
                 elif base_type.is_cpp_string:
                     cname = "__pyx_convert_PyObject_string_to_py_%s" % type_identifier(self)
@@ -476,13 +532,16 @@ class CTypedefType(BaseType):
                     self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name()
                     env.use_utility_code(TempitaUtilityCode.load_cached(
                         "CIntFromPy", "TypeConversion.c",
-                        context={"TYPE": self.empty_declaration_code(),
-                                 "FROM_PY_FUNCTION": self.from_py_function}))
+                        context={
+                            "TYPE": self.empty_declaration_code(),
+                            "FROM_PY_FUNCTION": self.from_py_function,
+                            "IS_ENUM": base_type.is_enum,
+                        }))
                     return True
                 elif base_type.is_float:
-                    pass # XXX implement!
+                    pass  # XXX implement!
                 elif base_type.is_complex:
-                    pass # XXX implement!
+                    pass  # XXX implement!
                 elif base_type.is_cpp_string:
                     cname = '__pyx_convert_string_from_py_%s' % type_identifier(self)
                     context = {
@@ -507,11 +566,13 @@ class CTypedefType(BaseType):
             source_code, result_code, result_type, to_py_function)
 
     def from_py_call_code(self, source_code, result_code, error_pos, code,
-                          from_py_function=None, error_condition=None):
+                          from_py_function=None, error_condition=None,
+                          special_none_cvalue=None):
         return self.typedef_base_type.from_py_call_code(
             source_code, result_code, error_pos, code,
             from_py_function or self.from_py_function,
-            error_condition or self.error_condition(result_code)
+            error_condition or self.error_condition(result_code),
+            special_none_cvalue=special_none_cvalue,
         )
 
     def overflow_check_binop(self, binop, env, const_rhs=False):
@@ -561,8 +622,13 @@ class CTypedefType(BaseType):
 class MemoryViewSliceType(PyrexType):
 
     is_memoryviewslice = 1
+    default_value = "{ 0, 0, { 0 }, { 0 }, { 0 } }"
 
     has_attributes = 1
+    needs_refcounting = 1  # Ideally this would be true and reference counting for
+        # memoryview and pyobject code could be generated in the same way.
+        # However, memoryviews are sufficiently specialized that this doesn't
+        # seem practical. Implement a limited version of it for now
     scope = None
 
     # These are special cased in Defnode
@@ -591,7 +657,7 @@ class MemoryViewSliceType(PyrexType):
         'ptr' -- Pointer stored in this dimension.
         'full' -- Check along this dimension, don't assume either.
 
-        the packing specifiers specify how the array elements are layed-out
+        the packing specifiers specify how the array elements are laid-out
         in memory.
 
         'contig' -- The data is contiguous in memory along this dimension.
@@ -633,6 +699,10 @@ class MemoryViewSliceType(PyrexType):
         else:
             return False
 
+    def __ne__(self, other):
+        # TODO drop when Python2 is dropped
+        return not (self == other)
+
     def same_as_resolved_type(self, other_type):
         return ((other_type.is_memoryviewslice and
             #self.writable_needed == other_type.writable_needed and  # FIXME: should be only uni-directional
@@ -650,10 +720,10 @@ class MemoryViewSliceType(PyrexType):
     def declaration_code(self, entity_code,
             for_display = 0, dll_linkage = None, pyrex = 0):
         # XXX: we put these guards in for now...
-        assert not pyrex
         assert not dll_linkage
         from . import MemoryView
-        base_code = str(self) if for_display else MemoryView.memviewslice_cname
+        base_code = StringEncoding.EncodedString(
+            str(self) if pyrex or for_display else MemoryView.memviewslice_cname)
         return self.base_declaration_code(
                 base_code,
                 entity_code)
@@ -713,8 +783,8 @@ class MemoryViewSliceType(PyrexType):
             to_axes_f = contig_dim + follow_dim * (ndim -1)
 
             dtype = self.dtype
-            if dtype.is_const:
-                dtype = dtype.const_base_type
+            if dtype.is_cv_qualified:
+                dtype = dtype.cv_base_type
 
             to_memview_c = MemoryViewSliceType(dtype, to_axes_c)
             to_memview_f = MemoryViewSliceType(dtype, to_axes_f)
@@ -791,17 +861,20 @@ class MemoryViewSliceType(PyrexType):
         #    return False
 
         src_dtype, dst_dtype = src.dtype, dst.dtype
-        if dst_dtype.is_const:
-            # Requesting read-only views is always ok => consider only the non-const base type.
-            dst_dtype = dst_dtype.const_base_type
-            if src_dtype.is_const:
-                # When assigning between read-only views, compare only the non-const base types.
-                src_dtype = src_dtype.const_base_type
-        elif copying and src_dtype.is_const:
-            # Copying by value => ignore const on source.
-            src_dtype = src_dtype.const_base_type
-
-        if src_dtype != dst_dtype:
+        # We can add but not remove const/volatile modifiers
+        # (except if we are copying by value, then anything is fine)
+        if not copying:
+            if src_dtype.is_const and not dst_dtype.is_const:
+                return False
+            if src_dtype.is_volatile and not dst_dtype.is_volatile:
+                return False
+        # const/volatile checks are done, remove those qualifiers
+        if src_dtype.is_cv_qualified:
+            src_dtype = src_dtype.cv_base_type
+        if dst_dtype.is_cv_qualified:
+            dst_dtype = dst_dtype.cv_base_type
+
+        if not src_dtype.same_as(dst_dtype):
             return False
 
         if src.ndim != dst.ndim:
@@ -918,13 +991,16 @@ class MemoryViewSliceType(PyrexType):
         return True
 
     def from_py_call_code(self, source_code, result_code, error_pos, code,
-                          from_py_function=None, error_condition=None):
+                          from_py_function=None, error_condition=None,
+                          special_none_cvalue=None):
         # NOTE: auto-detection of readonly buffers is disabled:
         # writable = self.writable_needed or not self.dtype.is_const
         writable = not self.dtype.is_const
         return self._assign_from_py_code(
             source_code, result_code, error_pos, code, from_py_function, error_condition,
-            extra_args=['PyBUF_WRITABLE' if writable else '0'])
+            extra_args=['PyBUF_WRITABLE' if writable else '0'],
+            special_none_cvalue=special_none_cvalue,
+        )
 
     def create_to_py_utility_code(self, env):
         self._dtype_to_py_func, self._dtype_from_py_func = self.dtype_object_conversion_funcs(env)
@@ -1032,6 +1108,36 @@ class MemoryViewSliceType(PyrexType):
     def cast_code(self, expr_code):
         return expr_code
 
+    # When memoryviews are increfed currently seems heavily special-cased.
+    # Therefore, use our own function for now
+    def generate_incref(self, code, name, **kwds):
+        pass
+
+    def generate_incref_memoryviewslice(self, code, slice_cname, have_gil):
+        # TODO ideally would be done separately
+        code.putln("__PYX_INC_MEMVIEW(&%s, %d);" % (slice_cname, int(have_gil)))
+
+    # decref however did look to always apply for memoryview slices
+    # with "have_gil" set to True by default
+    def generate_xdecref(self, code, cname, nanny, have_gil):
+        code.putln("__PYX_XCLEAR_MEMVIEW(&%s, %d);" % (cname, int(have_gil)))
+
+    def generate_decref(self, code, cname, nanny, have_gil):
+        # Fall back to xdecref since we don't care to have a separate decref version for this.
+        self.generate_xdecref(code, cname, nanny, have_gil)
+
+    def generate_xdecref_clear(self, code, cname, clear_before_decref, **kwds):
+        self.generate_xdecref(code, cname, **kwds)
+        code.putln("%s.memview = NULL; %s.data = NULL;" % (cname, cname))
+
+    def generate_decref_clear(self, code, cname, **kwds):
+        # memoryviews don't currently distinguish between xdecref and decref
+        self.generate_xdecref_clear(code, cname, **kwds)
+
+    # memoryviews don't participate in giveref/gotref
+    generate_gotref = generate_xgotref = generate_xgiveref = generate_giveref = lambda *args: None
+
+
 
 class BufferType(BaseType):
     #
@@ -1129,6 +1235,8 @@ class PyObjectType(PyrexType):
     is_extern = False
     is_subclassed = False
     is_gc_simple = False
+    builtin_trashcan = False  # builtin type using trashcan
+    needs_refcounting = True
 
     def __str__(self):
         return "Python object"
@@ -1181,11 +1289,85 @@ class PyObjectType(PyrexType):
     def check_for_null_code(self, cname):
         return cname
 
+    def generate_incref(self, code, cname, nanny):
+        if nanny:
+            code.putln("__Pyx_INCREF(%s);" % self.as_pyobject(cname))
+        else:
+            code.putln("Py_INCREF(%s);" % self.as_pyobject(cname))
+
+    def generate_xincref(self, code, cname, nanny):
+        if nanny:
+            code.putln("__Pyx_XINCREF(%s);" % self.as_pyobject(cname))
+        else:
+            code.putln("Py_XINCREF(%s);" % self.as_pyobject(cname))
+
+    def generate_decref(self, code, cname, nanny, have_gil):
+        # have_gil is for the benefit of memoryviewslice - it's ignored here
+        assert have_gil
+        self._generate_decref(code, cname, nanny, null_check=False, clear=False)
+
+    def generate_xdecref(self, code, cname, nanny, have_gil):
+        # in this (and other) PyObjectType functions, have_gil is being
+        # passed to provide a common interface with MemoryviewSlice.
+        # It's ignored here
+        self._generate_decref(code, cname, nanny, null_check=True,
+                         clear=False)
+
+    def generate_decref_clear(self, code, cname, clear_before_decref, nanny, have_gil):
+        self._generate_decref(code, cname, nanny, null_check=False,
+                         clear=True, clear_before_decref=clear_before_decref)
+
+    def generate_xdecref_clear(self, code, cname, clear_before_decref=False, nanny=True, have_gil=None):
+        self._generate_decref(code, cname, nanny, null_check=True,
+                         clear=True, clear_before_decref=clear_before_decref)
+
+    def generate_gotref(self, code, cname):
+        code.putln("__Pyx_GOTREF(%s);" % self.as_pyobject(cname))
+
+    def generate_xgotref(self, code, cname):
+        code.putln("__Pyx_XGOTREF(%s);" % self.as_pyobject(cname))
+
+    def generate_giveref(self, code, cname):
+        code.putln("__Pyx_GIVEREF(%s);" % self.as_pyobject(cname))
+
+    def generate_xgiveref(self, code, cname):
+        code.putln("__Pyx_XGIVEREF(%s);" % self.as_pyobject(cname))
+
+    def generate_decref_set(self, code, cname, rhs_cname):
+        code.putln("__Pyx_DECREF_SET(%s, %s);" % (cname, rhs_cname))
+
+    def generate_xdecref_set(self, code, cname, rhs_cname):
+        code.putln("__Pyx_XDECREF_SET(%s, %s);" % (cname, rhs_cname))
+
+    def _generate_decref(self, code, cname, nanny, null_check=False,
+                    clear=False, clear_before_decref=False):
+        prefix = '__Pyx' if nanny else 'Py'
+        X = 'X' if null_check else ''
+
+        if clear:
+            if clear_before_decref:
+                if not nanny:
+                    X = ''  # CPython doesn't have a Py_XCLEAR()
+                code.putln("%s_%sCLEAR(%s);" % (prefix, X, cname))
+            else:
+                code.putln("%s_%sDECREF(%s); %s = 0;" % (
+                    prefix, X, self.as_pyobject(cname), cname))
+        else:
+            code.putln("%s_%sDECREF(%s);" % (
+                prefix, X, self.as_pyobject(cname)))
+
+    def nullcheck_string(self, cname):
+        return cname
+
+
+builtin_types_that_cannot_create_refcycles = frozenset({
+    'object', 'bool', 'int', 'long', 'float', 'complex',
+    'bytearray', 'bytes', 'unicode', 'str', 'basestring',
+})
 
-builtin_types_that_cannot_create_refcycles = set([
-    'bool', 'int', 'long', 'float', 'complex',
-    'bytearray', 'bytes', 'unicode', 'str', 'basestring'
-])
+builtin_types_with_trashcan = frozenset({
+    'dict', 'list', 'set', 'frozenset', 'tuple', 'type',
+})
 
 
 class BuiltinObjectType(PyObjectType):
@@ -1211,6 +1393,7 @@ class BuiltinObjectType(PyObjectType):
         self.typeptr_cname = "(&%s)" % cname
         self.objstruct_cname = objstruct_cname
         self.is_gc_simple = name in builtin_types_that_cannot_create_refcycles
+        self.builtin_trashcan = name in builtin_types_with_trashcan
         if name == 'type':
             # Special case the type type, as many C API calls (and other
             # libraries) actually expect a PyTypeObject* for type arguments.
@@ -1276,6 +1459,12 @@ class BuiltinObjectType(PyObjectType):
             type_check = 'PyByteArray_Check'
         elif type_name == 'frozenset':
             type_check = 'PyFrozenSet_Check'
+        elif type_name == 'int':
+            # For backwards compatibility of (Py3) 'x: int' annotations in Py2, we also allow 'long' there.
+            type_check = '__Pyx_Py3Int_Check'
+        elif type_name == "memoryview":
+            # captialize doesn't catch the 'V'
+            type_check = "PyMemoryView_Check"
         else:
             type_check = 'Py%s_Check' % type_name.capitalize()
         if exact and type_name not in ('bool', 'slice', 'Exception'):
@@ -1292,14 +1481,9 @@ class BuiltinObjectType(PyObjectType):
             check += '||((%s) == Py_None)' % arg
         if self.name == 'basestring':
             name = '(PY_MAJOR_VERSION < 3 ? "basestring" : "str")'
-            space_for_name = 16
         else:
             name = '"%s"' % self.name
-            # avoid wasting too much space but limit number of different format strings
-            space_for_name = (len(self.name) // 16 + 1) * 16
-        error = '((void)PyErr_Format(PyExc_TypeError, "Expected %%.%ds, got %%.200s", %s, Py_TYPE(%s)->tp_name), 0)' % (
-            space_for_name, name, arg)
-        return check + '||' + error
+        return check + ' || __Pyx_RaiseUnexpectedTypeError(%s, %s)' % (name, arg)
 
     def declaration_code(self, entity_code,
             for_display = 0, dll_linkage = None, pyrex = 0):
@@ -1318,7 +1502,7 @@ class BuiltinObjectType(PyObjectType):
 
     def cast_code(self, expr_code, to_object_struct = False):
         return "((%s*)%s)" % (
-            to_object_struct and self.objstruct_cname or self.decl_type, # self.objstruct_cname may be None
+            to_object_struct and self.objstruct_cname or self.decl_type,  # self.objstruct_cname may be None
             expr_code)
 
     def py_type_name(self):
@@ -1332,7 +1516,6 @@ class PyExtensionType(PyObjectType):
     #
     #  name             string
     #  scope            CClassScope      Attribute namespace
-    #  visibility       string
     #  typedef_flag     boolean
     #  base_type        PyExtensionType or None
     #  module_name      string or None   Qualified name of defining module
@@ -1346,13 +1529,20 @@ class PyExtensionType(PyObjectType):
     #  vtable_cname     string           Name of C method table definition
     #  early_init       boolean          Whether to initialize early (as opposed to during module execution).
     #  defered_declarations [thunk]      Used to declare class hierarchies in order
+    #  is_external      boolean          Defined in a extern block
     #  check_size       'warn', 'error', 'ignore'    What to do if tp_basicsize does not match
+    #  dataclass_fields  OrderedDict nor None   Used for inheriting from dataclasses
+    #  multiple_bases    boolean          Does this class have multiple bases
+    #  has_sequence_flag  boolean        Set Py_TPFLAGS_SEQUENCE
 
     is_extension_type = 1
     has_attributes = 1
     early_init = 1
 
     objtypedef_cname = None
+    dataclass_fields = None
+    multiple_bases = False
+    has_sequence_flag = False
 
     def __init__(self, name, typedef_flag, base_type, is_external=0, check_size=None):
         self.name = name
@@ -1510,9 +1700,11 @@ class CType(PyrexType):
             source_code or 'NULL')
 
     def from_py_call_code(self, source_code, result_code, error_pos, code,
-                          from_py_function=None, error_condition=None):
+                          from_py_function=None, error_condition=None,
+                          special_none_cvalue=None):
         return self._assign_from_py_code(
-            source_code, result_code, error_pos, code, from_py_function, error_condition)
+            source_code, result_code, error_pos, code, from_py_function, error_condition,
+            special_none_cvalue=special_none_cvalue)
 
 
 
@@ -1543,8 +1735,14 @@ class PythranExpr(CType):
             self.scope = scope = Symtab.CClassScope('', None, visibility="extern")
             scope.parent_type = self
             scope.directives = {}
-            scope.declare_var("shape", CPtrType(c_long_type), None, cname="_shape", is_cdef=True)
-            scope.declare_var("ndim", c_long_type, None, cname="value", is_cdef=True)
+
+            scope.declare_var("ndim", c_long_type, pos=None, cname="value", is_cdef=True)
+            scope.declare_cproperty(
+                "shape", c_ptr_type(c_long_type), "__Pyx_PythranShapeAccessor",
+                doc="Pythran array shape",
+                visibility="extern",
+                nogil=True,
+            )
 
         return True
 
@@ -1558,59 +1756,76 @@ class PythranExpr(CType):
         return hash(self.pythran_type)
 
 
-class CConstType(BaseType):
+class CConstOrVolatileType(BaseType):
+    "A C const or volatile type"
 
-    is_const = 1
-    subtypes = ['const_base_type']
+    subtypes = ['cv_base_type']
 
-    def __init__(self, const_base_type):
-        self.const_base_type = const_base_type
-        if const_base_type.has_attributes and const_base_type.scope is not None:
-            from . import Symtab
-            self.scope = Symtab.CConstScope(const_base_type.scope)
+    is_cv_qualified = 1
+
+    def __init__(self, base_type, is_const=0, is_volatile=0):
+        self.cv_base_type = base_type
+        self.is_const = is_const
+        self.is_volatile = is_volatile
+        if base_type.has_attributes and base_type.scope is not None:
+            from .Symtab import CConstOrVolatileScope
+            self.scope = CConstOrVolatileScope(base_type.scope, is_const, is_volatile)
+
+    def cv_string(self):
+        cvstring = ""
+        if self.is_const:
+            cvstring = "const " + cvstring
+        if self.is_volatile:
+            cvstring = "volatile " + cvstring
+        return cvstring
 
     def __repr__(self):
-        return "<CConstType %s>" % repr(self.const_base_type)
+        return "<CConstOrVolatileType %s%r>" % (self.cv_string(), self.cv_base_type)
 
     def __str__(self):
         return self.declaration_code("", for_display=1)
 
     def declaration_code(self, entity_code,
             for_display = 0, dll_linkage = None, pyrex = 0):
+        cv = self.cv_string()
         if for_display or pyrex:
-            return "const " + self.const_base_type.declaration_code(entity_code, for_display, dll_linkage, pyrex)
+            return cv + self.cv_base_type.declaration_code(entity_code, for_display, dll_linkage, pyrex)
         else:
-            return self.const_base_type.declaration_code("const %s" % entity_code, for_display, dll_linkage, pyrex)
+            return self.cv_base_type.declaration_code(cv + entity_code, for_display, dll_linkage, pyrex)
 
     def specialize(self, values):
-        base_type = self.const_base_type.specialize(values)
-        if base_type == self.const_base_type:
+        base_type = self.cv_base_type.specialize(values)
+        if base_type == self.cv_base_type:
             return self
-        else:
-            return CConstType(base_type)
+        return CConstOrVolatileType(base_type,
+                self.is_const, self.is_volatile)
 
     def deduce_template_params(self, actual):
-        return self.const_base_type.deduce_template_params(actual)
+        return self.cv_base_type.deduce_template_params(actual)
 
     def can_coerce_to_pyobject(self, env):
-        return self.const_base_type.can_coerce_to_pyobject(env)
+        return self.cv_base_type.can_coerce_to_pyobject(env)
 
     def can_coerce_from_pyobject(self, env):
-        return self.const_base_type.can_coerce_from_pyobject(env)
+        return self.cv_base_type.can_coerce_from_pyobject(env)
 
     def create_to_py_utility_code(self, env):
-        if self.const_base_type.create_to_py_utility_code(env):
-            self.to_py_function = self.const_base_type.to_py_function
+        if self.cv_base_type.create_to_py_utility_code(env):
+            self.to_py_function = self.cv_base_type.to_py_function
             return True
 
     def same_as_resolved_type(self, other_type):
-        if other_type.is_const:
-            return self.const_base_type.same_as_resolved_type(other_type.const_base_type)
-        # Accept const LHS <- non-const RHS.
-        return self.const_base_type.same_as_resolved_type(other_type)
+        if other_type.is_cv_qualified:
+            return self.cv_base_type.same_as_resolved_type(other_type.cv_base_type)
+        # Accept cv LHS <- non-cv RHS.
+        return self.cv_base_type.same_as_resolved_type(other_type)
 
     def __getattr__(self, name):
-        return getattr(self.const_base_type, name)
+        return getattr(self.cv_base_type, name)
+
+
+def CConstType(base_type):
+    return CConstOrVolatileType(base_type, is_const=1)
 
 
 class FusedType(CType):
@@ -1634,7 +1849,27 @@ class FusedType(CType):
         for t in types:
             if t.is_fused:
                 # recursively merge in subtypes
-                for subtype in t.types:
+                if isinstance(t, FusedType):
+                    t_types = t.types
+                else:
+                    # handle types that aren't a fused type themselves but contain fused types
+                    # for example a C++ template where the template type is fused.
+                    t_fused_types = t.get_fused_types()
+                    t_types = []
+                    for substitution in product(
+                        *[fused_type.types for fused_type in t_fused_types]
+                    ):
+                        t_types.append(
+                            t.specialize(
+                                {
+                                    fused_type: sub
+                                    for fused_type, sub in zip(
+                                        t_fused_types, substitution
+                                    )
+                                }
+                            )
+                        )
+                for subtype in t_types:
                     if subtype not in flattened_types:
                         flattened_types.append(subtype)
             elif t not in flattened_types:
@@ -1653,9 +1888,12 @@ class FusedType(CType):
         return 'FusedType(name=%r)' % self.name
 
     def specialize(self, values):
-        return values[self]
+        if self in values:
+            return values[self]
+        else:
+            raise CannotSpecialize()
 
-    def get_fused_types(self, result=None, seen=None):
+    def get_fused_types(self, result=None, seen=None, include_function_return_type=False):
         if result is None:
             return [self]
 
@@ -1738,6 +1976,7 @@ class CNumericType(CType):
             base_code = type_name.replace('PY_LONG_LONG', 'long long')
         else:
             base_code = public_decl(type_name, dll_linkage)
+        base_code = StringEncoding.EncodedString(base_code)
         return self.base_declaration_code(base_code, entity_code)
 
     def attributes_known(self):
@@ -1807,8 +2046,11 @@ class CIntLike(object):
             self.from_py_function = "__Pyx_PyInt_As_" + self.specialization_name()
             env.use_utility_code(TempitaUtilityCode.load_cached(
                 "CIntFromPy", "TypeConversion.c",
-                context={"TYPE": self.empty_declaration_code(),
-                         "FROM_PY_FUNCTION": self.from_py_function}))
+                context={
+                    "TYPE": self.empty_declaration_code(),
+                    "FROM_PY_FUNCTION": self.from_py_function,
+                    "IS_ENUM": self.is_enum,
+                }))
         return True
 
     @staticmethod
@@ -2012,7 +2254,7 @@ class CPyUCS4IntType(CIntType):
     # is 0..1114111, which is checked when converting from an integer
     # value.
 
-    to_py_function = "PyUnicode_FromOrdinal"
+    to_py_function = "__Pyx_PyUnicode_FromOrdinal"
     from_py_function = "__Pyx_PyObject_AsPy_UCS4"
 
     def can_coerce_to_pystring(self, env, format_spec=None):
@@ -2036,7 +2278,7 @@ class CPyUnicodeIntType(CIntType):
     # Py_UNICODE is 0..1114111, which is checked when converting from
     # an integer value.
 
-    to_py_function = "PyUnicode_FromOrdinal"
+    to_py_function = "__Pyx_PyUnicode_FromOrdinal"
     from_py_function = "__Pyx_PyObject_AsPy_UNICODE"
 
     def can_coerce_to_pystring(self, env, format_spec=None):
@@ -2110,14 +2352,27 @@ class CFloatType(CNumericType):
 class CComplexType(CNumericType):
 
     is_complex = 1
-    to_py_function = "__pyx_PyComplex_FromComplex"
     has_attributes = 1
     scope = None
 
+    @property
+    def to_py_function(self):
+        return "__pyx_PyComplex_FromComplex%s" % self.implementation_suffix
+
     def __init__(self, real_type):
         while real_type.is_typedef and not real_type.typedef_is_external:
             real_type = real_type.typedef_base_type
         self.funcsuffix = "_%s" % real_type.specialization_name()
+        if not real_type.is_float:
+            # neither C nor C++ supports non-floating complex numbers,
+            # so fall back the on Cython implementation.
+            self.implementation_suffix = "_Cy"
+        elif real_type.is_typedef and real_type.typedef_is_external:
+            # C can't handle typedefs in complex numbers,
+            # so in this case also fall back on the Cython implementation.
+            self.implementation_suffix = "_CyTypedef"
+        else:
+            self.implementation_suffix = ""
         if real_type.is_float:
             self.math_h_modifier = real_type.math_h_modifier
         else:
@@ -2170,8 +2425,8 @@ class CComplexType(CNumericType):
     def assignable_from(self, src_type):
         # Temporary hack/feature disabling, see #441
         if (not src_type.is_complex and src_type.is_numeric and src_type.is_typedef
-            and src_type.typedef_is_external):
-             return False
+                and src_type.typedef_is_external):
+            return False
         elif src_type.is_pyobject:
             return True
         else:
@@ -2179,8 +2434,8 @@ class CComplexType(CNumericType):
 
     def assignable_from_resolved_type(self, src_type):
         return (src_type.is_complex and self.real_type.assignable_from_resolved_type(src_type.real_type)
-                    or src_type.is_numeric and self.real_type.assignable_from_resolved_type(src_type)
-                    or src_type is error_type)
+            or src_type.is_numeric and self.real_type.assignable_from_resolved_type(src_type)
+            or src_type is error_type)
 
     def attributes_known(self):
         if self.scope is None:
@@ -2209,18 +2464,23 @@ class CComplexType(CNumericType):
             'real_type': self.real_type.empty_declaration_code(),
             'func_suffix': self.funcsuffix,
             'm': self.math_h_modifier,
-            'is_float': int(self.real_type.is_float)
+            'is_float': int(self.real_type.is_float),
+            'is_extern_float_typedef': int(
+                self.real_type.is_float and self.real_type.is_typedef and self.real_type.typedef_is_external)
         }
 
     def create_declaration_utility_code(self, env):
         # This must always be run, because a single CComplexType instance can be shared
         # across multiple compilations (the one created in the module scope)
-        env.use_utility_code(UtilityCode.load_cached('Header', 'Complex.c'))
-        env.use_utility_code(UtilityCode.load_cached('RealImag', 'Complex.c'))
+        if self.real_type.is_float:
+            env.use_utility_code(UtilityCode.load_cached('Header', 'Complex.c'))
+        utility_code_context = self._utility_code_context()
+        env.use_utility_code(UtilityCode.load_cached(
+            'RealImag' + self.implementation_suffix, 'Complex.c'))
         env.use_utility_code(TempitaUtilityCode.load_cached(
-            'Declarations', 'Complex.c', self._utility_code_context()))
+            'Declarations', 'Complex.c', utility_code_context))
         env.use_utility_code(TempitaUtilityCode.load_cached(
-            'Arithmetic', 'Complex.c', self._utility_code_context()))
+            'Arithmetic', 'Complex.c', utility_code_context))
         return True
 
     def can_coerce_to_pyobject(self, env):
@@ -2230,7 +2490,8 @@ class CComplexType(CNumericType):
         return True
 
     def create_to_py_utility_code(self, env):
-        env.use_utility_code(UtilityCode.load_cached('ToPy', 'Complex.c'))
+        env.use_utility_code(TempitaUtilityCode.load_cached(
+            'ToPy', 'Complex.c', self._utility_code_context()))
         return True
 
     def create_from_py_utility_code(self, env):
@@ -2263,6 +2524,12 @@ class CComplexType(CNumericType):
     def cast_code(self, expr_code):
         return expr_code
 
+    def real_code(self, expr_code):
+        return "__Pyx_CREAL%s(%s)" % (self.implementation_suffix, expr_code)
+
+    def imag_code(self, expr_code):
+        return "__Pyx_CIMAG%s(%s)" % (self.implementation_suffix, expr_code)
+
 complex_ops = {
     (1, '-'): 'neg',
     (1, 'zero'): 'is_zero',
@@ -2275,6 +2542,42 @@ complex_ops = {
 }
 
 
+class SoftCComplexType(CComplexType):
+    """
+    a**b in Python can return either a complex or a float
+    depending on the sign of a. This "soft complex" type is
+    stored as a C complex (and so is a little slower than a
+    direct C double) but it prints/coerces to a float if
+    the imaginary part is 0. Therefore it provides a C
+    representation of the Python behaviour.
+    """
+
+    to_py_function = "__pyx_Py_FromSoftComplex"
+
+    def __init__(self):
+        super(SoftCComplexType, self).__init__(c_double_type)
+
+    def declaration_code(self, entity_code, for_display=0, dll_linkage=None, pyrex=0):
+        base_result =  super(SoftCComplexType, self).declaration_code(
+            entity_code,
+            for_display=for_display,
+            dll_linkage=dll_linkage,
+            pyrex=pyrex,
+        )
+        if for_display:
+            return "soft %s" % base_result
+        else:
+            return base_result
+
+    def create_to_py_utility_code(self, env):
+        env.use_utility_code(UtilityCode.load_cached('SoftComplexToPy', 'Complex.c'))
+        return True
+
+    def __repr__(self):
+        result = super(SoftCComplexType, self).__repr__()
+        assert result[-1] == ">"
+        return "%s (soft)%s" % (result[:-1], result[-1])
+
 class CPyTSSTType(CType):
     #
     #   PEP-539 "Py_tss_t" type
@@ -2303,8 +2606,8 @@ class CPointerBaseType(CType):
 
     def __init__(self, base_type):
         self.base_type = base_type
-        if base_type.is_const:
-            base_type = base_type.const_base_type
+        if base_type.is_cv_qualified:
+            base_type = base_type.cv_base_type
         for char_type in (c_char_type, c_uchar_type, c_schar_type):
             if base_type.same_as(char_type):
                 self.is_string = 1
@@ -2347,6 +2650,7 @@ class CPointerBaseType(CType):
         if self.is_string:
             assert isinstance(value, str)
             return '"%s"' % StringEncoding.escape_byte_string(value)
+        return str(value)
 
 
 class CArrayType(CPointerBaseType):
@@ -2366,7 +2670,7 @@ class CArrayType(CPointerBaseType):
         return False
 
     def __hash__(self):
-        return hash(self.base_type) + 28 # arbitrarily chosen offset
+        return hash(self.base_type) + 28  # arbitrarily chosen offset
 
     def __repr__(self):
         return "<CArrayType %s %s>" % (self.size, repr(self.base_type))
@@ -2483,13 +2787,21 @@ class CArrayType(CPointerBaseType):
         return True
 
     def from_py_call_code(self, source_code, result_code, error_pos, code,
-                          from_py_function=None, error_condition=None):
+                          from_py_function=None, error_condition=None,
+                          special_none_cvalue=None):
         assert not error_condition, '%s: %s' % (error_pos, error_condition)
+        assert not special_none_cvalue, '%s: %s' % (error_pos, special_none_cvalue)  # not currently supported
         call_code = "%s(%s, %s, %s)" % (
             from_py_function or self.from_py_function,
             source_code, result_code, self.size)
         return code.error_goto_if_neg(call_code, error_pos)
 
+    def error_condition(self, result_code):
+        # It isn't possible to use CArrays as return type so the error_condition
+        # is irrelevant. Returning a falsy value does avoid an error when getting
+        # from_py_call_code from a typedef.
+        return ""
+
 
 class CPtrType(CPointerBaseType):
     #  base_type     CType              Reference type
@@ -2498,7 +2810,7 @@ class CPtrType(CPointerBaseType):
     default_value = "0"
 
     def __hash__(self):
-        return hash(self.base_type) + 27 # arbitrarily chosen offset
+        return hash(self.base_type) + 27  # arbitrarily chosen offset
 
     def __eq__(self, other):
         if isinstance(other, CType) and other.is_ptr:
@@ -2528,8 +2840,8 @@ class CPtrType(CPointerBaseType):
             return 1
         if other_type.is_null_ptr:
             return 1
-        if self.base_type.is_const:
-            self = CPtrType(self.base_type.const_base_type)
+        if self.base_type.is_cv_qualified:
+            self = CPtrType(self.base_type.cv_base_type)
         if self.base_type.is_cfunction:
             if other_type.is_ptr:
                 other_type = other_type.base_type.resolve()
@@ -2565,32 +2877,30 @@ class CPtrType(CPointerBaseType):
             return self.base_type.find_cpp_operation_type(operator, operand_type)
         return None
 
+    def get_fused_types(self, result=None, seen=None, include_function_return_type=False):
+        # For function pointers, include the return type - unlike for fused functions themselves,
+        # where the return type cannot be an independent fused type (i.e. is derived or non-fused).
+        return super(CPointerBaseType, self).get_fused_types(result, seen, include_function_return_type=True)
+
 
 class CNullPtrType(CPtrType):
 
     is_null_ptr = 1
 
 
-class CReferenceType(BaseType):
+class CReferenceBaseType(BaseType):
 
-    is_reference = 1
     is_fake_reference = 0
 
+    # Common base type for C reference and C++ rvalue reference types.
+
+    subtypes = ['ref_base_type']
+
     def __init__(self, base_type):
         self.ref_base_type = base_type
 
     def __repr__(self):
-        return "<CReferenceType %s>" % repr(self.ref_base_type)
-
-    def __str__(self):
-        return "%s &" % self.ref_base_type
-
-    def declaration_code(self, entity_code,
-            for_display = 0, dll_linkage = None, pyrex = 0):
-        #print "CReferenceType.declaration_code: pointer to", self.base_type ###
-        return self.ref_base_type.declaration_code(
-            "&%s" % entity_code,
-            for_display, dll_linkage, pyrex)
+        return "<%r %s>" % (self.__class__.__name__, self.ref_base_type)
 
     def specialize(self, values):
         base_type = self.ref_base_type.specialize(values)
@@ -2606,13 +2916,25 @@ class CReferenceType(BaseType):
         return getattr(self.ref_base_type, name)
 
 
+class CReferenceType(CReferenceBaseType):
+
+    is_reference = 1
+
+    def __str__(self):
+        return "%s &" % self.ref_base_type
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        #print "CReferenceType.declaration_code: pointer to", self.base_type ###
+        return self.ref_base_type.declaration_code(
+            "&%s" % entity_code,
+            for_display, dll_linkage, pyrex)
+
+
 class CFakeReferenceType(CReferenceType):
 
     is_fake_reference = 1
 
-    def __repr__(self):
-        return "<CFakeReferenceType %s>" % repr(self.ref_base_type)
-
     def __str__(self):
         return "%s [&]" % self.ref_base_type
 
@@ -2622,6 +2944,20 @@ class CFakeReferenceType(CReferenceType):
         return "__Pyx_FakeReference<%s> %s" % (self.ref_base_type.empty_declaration_code(), entity_code)
 
 
+class CppRvalueReferenceType(CReferenceBaseType):
+
+    is_rvalue_reference = 1
+
+    def __str__(self):
+        return "%s &&" % self.ref_base_type
+
+    def declaration_code(self, entity_code,
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        return self.ref_base_type.declaration_code(
+            "&&%s" % entity_code,
+            for_display, dll_linkage, pyrex)
+
+
 class CFuncType(CType):
     #  return_type      CType
     #  args             [CFuncTypeArg]
@@ -2639,12 +2975,14 @@ class CFuncType(CType):
     #                               (used for optimisation overrides)
     #  is_const_method  boolean
     #  is_static_method boolean
+    #  op_arg_struct    CPtrType   Pointer to optional argument struct
 
     is_cfunction = 1
     original_sig = None
     cached_specialized_types = None
     from_fused = False
     is_const_method = False
+    op_arg_struct = None
 
     subtypes = ['return_type', 'args']
 
@@ -2793,8 +3131,8 @@ class CFuncType(CType):
         # is performed elsewhere).
         for i in range(as_cmethod, len(other_type.args)):
             if not self.args[i].type.same_as(
-                other_type.args[i].type):
-                    return 0
+                    other_type.args[i].type):
+                return 0
         if self.has_varargs != other_type.has_varargs:
             return 0
         if not self.return_type.subtype_of_resolved_type(other_type.return_type):
@@ -2814,6 +3152,9 @@ class CFuncType(CType):
             # must catch C++ exceptions if we raise them
             return 0
         if not other_type.exception_check or other_type.exception_value is not None:
+            # There's no problem if this type doesn't emit exceptions but the other type checks
+            if other_type.exception_check and not (self.exception_check or self.exception_value):
+                return 1
             # if other does not *always* check exceptions, self must comply
             if not self._same_exception_value(other_type.exception_value):
                 return 0
@@ -2899,8 +3240,10 @@ class CFuncType(CType):
         if (pyrex or for_display) and not self.return_type.is_pyobject:
             if self.exception_value and self.exception_check:
                 trailer = " except? %s" % self.exception_value
-            elif self.exception_value:
+            elif self.exception_value and not self.exception_check:
                 trailer = " except %s" % self.exception_value
+            elif not self.exception_value and not self.exception_check:
+                trailer = " noexcept"
             elif self.exception_check == '+':
                 trailer = " except +"
             elif self.exception_check and for_display:
@@ -3021,10 +3364,13 @@ class CFuncType(CType):
 
         return result
 
-    def get_fused_types(self, result=None, seen=None, subtypes=None):
+    def get_fused_types(self, result=None, seen=None, subtypes=None, include_function_return_type=False):
         """Return fused types in the order they appear as parameter types"""
-        return super(CFuncType, self).get_fused_types(result, seen,
-                                                      subtypes=['args'])
+        return super(CFuncType, self).get_fused_types(
+            result, seen,
+            # for function pointer types, we consider the result type; for plain function
+            # types we don't (because it must be derivable from the arguments)
+            subtypes=self.subtypes if include_function_return_type else ['args'])
 
     def specialize_entry(self, entry, cname):
         assert not self.is_fused
@@ -3050,8 +3396,16 @@ class CFuncType(CType):
         if not self.can_coerce_to_pyobject(env):
             return False
         from .UtilityCode import CythonUtilityCode
-        safe_typename = re.sub('[^a-zA-Z0-9]', '__', self.declaration_code("", pyrex=1))
-        to_py_function = "__Pyx_CFunc_%s_to_py" % safe_typename
+
+        # include argument names into the c function name to ensure cname is unique
+        # between functions with identical types but different argument names
+        from .Symtab import punycodify_name
+        def arg_name_part(arg):
+            return "%s%s" % (len(arg.name), punycodify_name(arg.name)) if arg.name else "0"
+        arg_names = [ arg_name_part(arg) for arg in self.args ]
+        arg_names = "_".join(arg_names)
+        safe_typename = type_identifier(self, pyrex=True)
+        to_py_function = "__Pyx_CFunc_%s_to_py_%s" % (safe_typename, arg_names)
 
         for arg in self.args:
             if not arg.type.is_pyobject and not arg.type.create_from_py_utility_code(env):
@@ -3243,7 +3597,7 @@ class CFuncTypeArg(BaseType):
             self.annotation = annotation
         self.type = type
         self.pos = pos
-        self.needs_type_test = False # TODO: should these defaults be set in analyse_types()?
+        self.needs_type_test = False  # TODO: should these defaults be set in analyse_types()?
 
     def __repr__(self):
         return "%s:%s" % (self.name, repr(self.type))
@@ -3254,6 +3608,12 @@ class CFuncTypeArg(BaseType):
     def specialize(self, values):
         return CFuncTypeArg(self.name, self.type.specialize(values), self.pos, self.cname)
 
+    def is_forwarding_reference(self):
+        if self.type.is_rvalue_reference:
+            if (isinstance(self.type.ref_base_type, TemplatePlaceholderType)
+                    and not self.type.ref_base_type.is_cv_qualified):
+                return True
+        return False
 
 class ToPyStructUtilityCode(object):
 
@@ -3321,7 +3681,7 @@ class CStructOrUnionType(CType):
     has_attributes = 1
     exception_check = True
 
-    def __init__(self, name, kind, scope, typedef_flag, cname, packed=False):
+    def __init__(self, name, kind, scope, typedef_flag, cname, packed=False, in_cpp=False):
         self.name = name
         self.cname = cname
         self.kind = kind
@@ -3336,6 +3696,7 @@ class CStructOrUnionType(CType):
         self._convert_to_py_code = None
         self._convert_from_py_code = None
         self.packed = packed
+        self.needs_cpp_construction = self.is_struct and in_cpp
 
     def can_coerce_to_pyobject(self, env):
         if self._convert_to_py_code is False:
@@ -3413,6 +3774,7 @@ class CStructOrUnionType(CType):
                 var_entries=self.scope.var_entries,
                 funcname=self.from_py_function,
             )
+            env.use_utility_code(UtilityCode.load_cached("RaiseUnexpectedTypeError", "ObjectHandling.c"))
             from .UtilityCode import CythonUtilityCode
             self._convert_from_py_code = CythonUtilityCode.load(
                 "FromPyStructUtility" if self.is_struct else "FromPyUnionUtility",
@@ -3505,6 +3867,7 @@ class CppClassType(CType):
 
     is_cpp_class = 1
     has_attributes = 1
+    needs_cpp_construction = 1
     exception_check = True
     namespace = None
 
@@ -3578,10 +3941,12 @@ class CppClassType(CType):
                 'maybe_unordered': self.maybe_unordered(),
                 'type': self.cname,
             })
+            # Override directives that should not be inherited from user code.
             from .UtilityCode import CythonUtilityCode
+            directives = CythonUtilityCode.filter_inherited_directives(env.directives)
             env.use_utility_code(CythonUtilityCode.load(
                 cls.replace('unordered_', '') + ".from_py", "CppConvert.pyx",
-                context=context, compiler_directives=env.directives))
+                context=context, compiler_directives=directives))
             self.from_py_function = cname
             return True
 
@@ -3624,16 +3989,18 @@ class CppClassType(CType):
                 'type': self.cname,
             })
             from .UtilityCode import CythonUtilityCode
+            # Override directives that should not be inherited from user code.
+            directives = CythonUtilityCode.filter_inherited_directives(env.directives)
             env.use_utility_code(CythonUtilityCode.load(
                 cls.replace('unordered_', '') + ".to_py", "CppConvert.pyx",
-                context=context, compiler_directives=env.directives))
+                context=context, compiler_directives=directives))
             self.to_py_function = cname
             return True
 
     def is_template_type(self):
         return self.templates is not None and self.template_type is None
 
-    def get_fused_types(self, result=None, seen=None):
+    def get_fused_types(self, result=None, seen=None, include_function_return_type=False):
         if result is None:
             result = []
             seen = set()
@@ -3644,7 +4011,7 @@ class CppClassType(CType):
                 T.get_fused_types(result, seen)
         return result
 
-    def specialize_here(self, pos, template_values=None):
+    def specialize_here(self, pos, env, template_values=None):
         if not self.is_template_type():
             error(pos, "'%s' type is not a template" % self)
             return error_type
@@ -3669,10 +4036,12 @@ class CppClassType(CType):
             return error_type
         has_object_template_param = False
         for value in template_values:
-            if value.is_pyobject:
+            if value.is_pyobject or value.needs_refcounting:
                 has_object_template_param = True
+                type_description = "Python object" if value.is_pyobject else "Reference-counted"
                 error(pos,
-                      "Python object type '%s' cannot be used as a template argument" % value)
+                      "%s type '%s' cannot be used as a template argument" % (
+                          type_description, value))
         if has_object_template_param:
             return error_type
         return self.specialize(dict(zip(self.templates, template_values)))
@@ -3695,23 +4064,23 @@ class CppClassType(CType):
             specialized.namespace = self.namespace.specialize(values)
         specialized.scope = self.scope.specialize(values, specialized)
         if self.cname == 'std::vector':
-          # vector<bool> is special cased in the C++ standard, and its
-          # accessors do not necessarily return references to the underlying
-          # elements (which may be bit-packed).
-          # http://www.cplusplus.com/reference/vector/vector-bool/
-          # Here we pretend that the various methods return bool values
-          # (as the actual returned values are coercable to such, and
-          # we don't support call expressions as lvalues).
-          T = values.get(self.templates[0], None)
-          if T and not T.is_fused and T.empty_declaration_code() == 'bool':
-            for bit_ref_returner in ('at', 'back', 'front'):
-              if bit_ref_returner in specialized.scope.entries:
-                specialized.scope.entries[bit_ref_returner].type.return_type = T
+            # vector<bool> is special cased in the C++ standard, and its
+            # accessors do not necessarily return references to the underlying
+            # elements (which may be bit-packed).
+            # http://www.cplusplus.com/reference/vector/vector-bool/
+            # Here we pretend that the various methods return bool values
+            # (as the actual returned values are coercable to such, and
+            # we don't support call expressions as lvalues).
+            T = values.get(self.templates[0], None)
+            if T and not T.is_fused and T.empty_declaration_code() == 'bool':
+                for bit_ref_returner in ('at', 'back', 'front'):
+                    if bit_ref_returner in specialized.scope.entries:
+                        specialized.scope.entries[bit_ref_returner].type.return_type = T
         return specialized
 
     def deduce_template_params(self, actual):
-        if actual.is_const:
-            actual = actual.const_base_type
+        if actual.is_cv_qualified:
+            actual = actual.cv_base_type
         if actual.is_reference:
             actual = actual.ref_base_type
         if self == actual:
@@ -3765,6 +4134,12 @@ class CppClassType(CType):
             base_code = public_decl(base_code, dll_linkage)
         return self.base_declaration_code(base_code, entity_code)
 
+    def cpp_optional_declaration_code(self, entity_code, dll_linkage=None, template_params=None):
+        return "__Pyx_Optional_Type<%s> %s" % (
+                self.declaration_code("", False, dll_linkage, False,
+                                    template_params),
+                entity_code)
+
     def is_subclass(self, other_type):
         if self.same_as_resolved_type(other_type):
             return 1
@@ -3847,6 +4222,101 @@ class CppClassType(CType):
         if constructor is not None and best_match([], constructor.all_alternatives()) is None:
             error(pos, "C++ class must have a nullary constructor to be %s" % msg)
 
+    def cpp_optional_check_for_null_code(self, cname):
+        # only applies to c++ classes that are being declared as std::optional
+        return "(%s.has_value())" % cname
+
+
+class CppScopedEnumType(CType):
+    # name    string
+    # doc     string or None
+    # cname   string
+
+    is_cpp_enum = True
+
+    def __init__(self, name, cname, underlying_type, namespace=None, doc=None):
+        self.name = name
+        self.doc = doc
+        self.cname = cname
+        self.values = []
+        self.underlying_type = underlying_type
+        self.namespace = namespace
+
+    def __str__(self):
+        return self.name
+
+    def declaration_code(self, entity_code,
+                        for_display=0, dll_linkage=None, pyrex=0):
+        if pyrex or for_display:
+            type_name = self.name
+        else:
+            if self.namespace:
+                type_name = "%s::%s" % (
+                    self.namespace.empty_declaration_code(),
+                    self.cname
+                )
+            else:
+                type_name = "__PYX_ENUM_CLASS_DECL %s" % self.cname
+            type_name = public_decl(type_name, dll_linkage)
+        return self.base_declaration_code(type_name, entity_code)
+
+    def create_from_py_utility_code(self, env):
+        if self.from_py_function:
+            return True
+        if self.underlying_type.create_from_py_utility_code(env):
+            self.from_py_function = '(%s)%s' % (
+                self.cname, self.underlying_type.from_py_function
+            )
+        return True
+
+    def create_to_py_utility_code(self, env):
+        if self.to_py_function is not None:
+            return True
+        if self.entry.create_wrapper:
+            from .UtilityCode import CythonUtilityCode
+            self.to_py_function = "__Pyx_Enum_%s_to_py" % self.name
+            if self.entry.scope != env.global_scope():
+                module_name = self.entry.scope.qualified_name
+            else:
+                module_name = None
+            env.use_utility_code(CythonUtilityCode.load(
+                "EnumTypeToPy", "CpdefEnums.pyx",
+                context={"funcname": self.to_py_function,
+                        "name": self.name,
+                        "items": tuple(self.values),
+                        "underlying_type": self.underlying_type.empty_declaration_code(),
+                        "module_name": module_name,
+                        "is_flag": False,
+                        },
+                outer_module_scope=self.entry.scope  # ensure that "name" is findable
+            ))
+            return True
+        if self.underlying_type.create_to_py_utility_code(env):
+            # Using a C++11 lambda here, which is fine since
+            # scoped enums are a C++11 feature
+            self.to_py_function = '[](const %s& x){return %s((%s)x);}' % (
+                self.cname,
+                self.underlying_type.to_py_function,
+                self.underlying_type.empty_declaration_code()
+            )
+        return True
+
+    def create_type_wrapper(self, env):
+        from .UtilityCode import CythonUtilityCode
+        rst = CythonUtilityCode.load(
+            "CppScopedEnumType", "CpdefEnums.pyx",
+            context={
+                "name": self.name,
+                "cname": self.cname.split("::")[-1],
+                "items": tuple(self.values),
+                "underlying_type": self.underlying_type.empty_declaration_code(),
+                "enum_doc": self.doc,
+                "static_modname": env.qualified_name,
+            },
+            outer_module_scope=env.global_scope())
+
+        env.use_utility_code(rst)
+
 
 class TemplatePlaceholderType(CType):
 
@@ -3897,16 +4367,18 @@ def is_optional_template_param(type):
 
 class CEnumType(CIntLike, CType):
     #  name           string
+    #  doc            string or None
     #  cname          string or None
     #  typedef_flag   boolean
     #  values         [string], populated during declaration analysis
 
     is_enum = 1
     signed = 1
-    rank = -1 # Ranks below any integer type
+    rank = -1  # Ranks below any integer type
 
-    def __init__(self, name, cname, typedef_flag, namespace=None):
+    def __init__(self, name, cname, typedef_flag, namespace=None, doc=None):
         self.name = name
+        self.doc = doc
         self.cname = cname
         self.values = []
         self.typedef_flag = typedef_flag
@@ -3945,12 +4417,47 @@ class CEnumType(CIntLike, CType):
 
     def create_type_wrapper(self, env):
         from .UtilityCode import CythonUtilityCode
+        # Generate "int"-like conversion function
+        old_to_py_function = self.to_py_function
+        self.to_py_function = None
+        CIntLike.create_to_py_utility_code(self, env)
+        enum_to_pyint_func = self.to_py_function
+        self.to_py_function = old_to_py_function  # we don't actually want to overwrite this
+
         env.use_utility_code(CythonUtilityCode.load(
             "EnumType", "CpdefEnums.pyx",
             context={"name": self.name,
-                     "items": tuple(self.values)},
+                     "items": tuple(self.values),
+                     "enum_doc": self.doc,
+                     "enum_to_pyint_func": enum_to_pyint_func,
+                     "static_modname": env.qualified_name,
+                     },
             outer_module_scope=env.global_scope()))
 
+    def create_to_py_utility_code(self, env):
+        if self.to_py_function is not None:
+            return self.to_py_function
+        if not self.entry.create_wrapper:
+            return super(CEnumType, self).create_to_py_utility_code(env)
+        from .UtilityCode import CythonUtilityCode
+        self.to_py_function = "__Pyx_Enum_%s_to_py" % self.name
+        if self.entry.scope != env.global_scope():
+            module_name = self.entry.scope.qualified_name
+        else:
+            module_name = None
+        env.use_utility_code(CythonUtilityCode.load(
+            "EnumTypeToPy", "CpdefEnums.pyx",
+            context={"funcname": self.to_py_function,
+                    "name": self.name,
+                    "items": tuple(self.values),
+                    "underlying_type": "int",
+                    "module_name": module_name,
+                    "is_flag": True,
+                    },
+            outer_module_scope=self.entry.scope  # ensure that "name" is findable
+        ))
+        return True
+
 
 class CTupleType(CType):
     # components [PyrexType]
@@ -3966,6 +4473,9 @@ class CTupleType(CType):
         self.exception_check = True
         self._convert_to_py_code = None
         self._convert_from_py_code = None
+        # equivalent_type must be set now because it isn't available at import time
+        from .Builtin import tuple_type
+        self.equivalent_type = tuple_type
 
     def __str__(self):
         return "(%s)" % ", ".join(str(c) for c in self.components)
@@ -3973,7 +4483,7 @@ class CTupleType(CType):
     def declaration_code(self, entity_code,
             for_display = 0, dll_linkage = None, pyrex = 0):
         if pyrex or for_display:
-            return str(self)
+            return "%s %s" % (str(self), entity_code)
         else:
             return self.base_declaration_code(self.cname, entity_code)
 
@@ -4085,21 +4595,91 @@ class ErrorType(PyrexType):
         return "dummy"
 
 
+class PythonTypeConstructorMixin(object):
+    """Used to help Cython interpret indexed types from the typing module (or similar)
+    """
+    modifier_name = None
+
+    def set_python_type_constructor_name(self, name):
+        self.python_type_constructor_name = name
+
+    def specialize_here(self, pos, env, template_values=None):
+        # for a lot of the typing classes it doesn't really matter what the template is
+        # (i.e. typing.Dict[int] is really just a dict)
+        return self
+
+    def __repr__(self):
+        if self.base_type:
+            return "%s[%r]" % (self.name, self.base_type)
+        else:
+            return self.name
+
+    def is_template_type(self):
+        return True
+
+
+class BuiltinTypeConstructorObjectType(BuiltinObjectType, PythonTypeConstructorMixin):
+    """
+    builtin types like list, dict etc which can be subscripted in annotations
+    """
+    def __init__(self, name, cname, objstruct_cname=None):
+        super(BuiltinTypeConstructorObjectType, self).__init__(
+            name, cname, objstruct_cname=objstruct_cname)
+        self.set_python_type_constructor_name(name)
+
+
+class PythonTupleTypeConstructor(BuiltinTypeConstructorObjectType):
+    def specialize_here(self, pos, env, template_values=None):
+        if (template_values and None not in template_values and
+                not any(v.is_pyobject for v in template_values)):
+            entry = env.declare_tuple_type(pos, template_values)
+            if entry:
+                entry.used = True
+                return entry.type
+        return super(PythonTupleTypeConstructor, self).specialize_here(pos, env, template_values)
+
+
+class SpecialPythonTypeConstructor(PyObjectType, PythonTypeConstructorMixin):
+    """
+    For things like ClassVar, Optional, etc, which are not types and disappear during type analysis.
+    """
+
+    def __init__(self, name):
+        super(SpecialPythonTypeConstructor, self).__init__()
+        self.set_python_type_constructor_name(name)
+        self.modifier_name = name
+
+    def __repr__(self):
+        return self.name
+
+    def resolve(self):
+        return self
+
+    def specialize_here(self, pos, env, template_values=None):
+        if len(template_values) != 1:
+            error(pos, "'%s' takes exactly one template argument." % self.name)
+            return error_type
+        if template_values[0] is None:
+            # FIXME: allowing unknown types for now since we don't recognise all Python types.
+            return None
+        # Replace this type with the actual 'template' argument.
+        return template_values[0].resolve()
+
+
 rank_to_type_name = (
-    "char",         # 0
-    "short",        # 1
-    "int",          # 2
-    "long",         # 3
-    "PY_LONG_LONG", # 4
-    "float",        # 5
-    "double",       # 6
-    "long double",  # 7
+    "char",          # 0
+    "short",         # 1
+    "int",           # 2
+    "long",          # 3
+    "PY_LONG_LONG",  # 4
+    "float",         # 5
+    "double",        # 6
+    "long double",   # 7
 )
 
-_rank_to_type_name = list(rank_to_type_name)
-RANK_INT  = _rank_to_type_name.index('int')
-RANK_LONG = _rank_to_type_name.index('long')
-RANK_FLOAT = _rank_to_type_name.index('float')
+RANK_INT  = rank_to_type_name.index('int')
+RANK_LONG = rank_to_type_name.index('long')
+RANK_FLOAT = rank_to_type_name.index('float')
 UNSIGNED = 0
 SIGNED = 2
 
@@ -4136,6 +4716,8 @@ c_float_complex_type =      CComplexType(c_float_type)
 c_double_complex_type =     CComplexType(c_double_type)
 c_longdouble_complex_type = CComplexType(c_longdouble_type)
 
+soft_complex_type = SoftCComplexType()
+
 c_anon_enum_type =   CAnonEnumType(-1)
 c_returncode_type =  CReturnCodeType(RANK_INT)
 c_bint_type =        CBIntType(RANK_INT)
@@ -4303,8 +4885,7 @@ def best_match(arg_types, functions, pos=None, env=None, args=None):
         # Check no. of args
         max_nargs = len(func_type.args)
         min_nargs = max_nargs - func_type.optional_arg_count
-        if actual_nargs < min_nargs or \
-            (not func_type.has_varargs and actual_nargs > max_nargs):
+        if actual_nargs < min_nargs or (not func_type.has_varargs and actual_nargs > max_nargs):
             if max_nargs == min_nargs and not func_type.has_varargs:
                 expectation = max_nargs
             elif actual_nargs < min_nargs:
@@ -4316,12 +4897,23 @@ def best_match(arg_types, functions, pos=None, env=None, args=None):
             errors.append((func, error_mesg))
             continue
         if func_type.templates:
+            # For any argument/parameter pair A/P, if P is a forwarding reference,
+            # use lvalue-reference-to-A for deduction in place of A when the
+            # function call argument is an lvalue. See:
+            # https://en.cppreference.com/w/cpp/language/template_argument_deduction#Deduction_from_a_function_call
+            arg_types_for_deduction = list(arg_types)
+            if func.type.is_cfunction and args:
+                for i, formal_arg in enumerate(func.type.args):
+                    if formal_arg.is_forwarding_reference():
+                        if args[i].is_lvalue():
+                            arg_types_for_deduction[i] = c_ref_type(arg_types[i])
             deductions = reduce(
                 merge_template_deductions,
-                [pattern.type.deduce_template_params(actual) for (pattern, actual) in zip(func_type.args, arg_types)],
+                [pattern.type.deduce_template_params(actual) for (pattern, actual) in zip(func_type.args, arg_types_for_deduction)],
                 {})
             if deductions is None:
-                errors.append((func, "Unable to deduce type parameters for %s given (%s)" % (func_type, ', '.join(map(str, arg_types)))))
+                errors.append((func, "Unable to deduce type parameters for %s given (%s)" % (
+                    func_type, ', '.join(map(str, arg_types_for_deduction)))))
             elif len(deductions) < len(func_type.templates):
                 errors.append((func, "Unable to deduce type parameter %s" % (
                     ", ".join([param.name for param in set(func_type.templates) - set(deductions.keys())]))))
@@ -4456,10 +5048,10 @@ def widest_numeric_type(type1, type2):
         type1 = type1.ref_base_type
     if type2.is_reference:
         type2 = type2.ref_base_type
-    if type1.is_const:
-        type1 = type1.const_base_type
-    if type2.is_const:
-        type2 = type2.const_base_type
+    if type1.is_cv_qualified:
+        type1 = type1.cv_base_type
+    if type2.is_cv_qualified:
+        type2 = type2.cv_base_type
     if type1 == type2:
         widest_type = type1
     elif type1.is_complex or type2.is_complex:
@@ -4471,6 +5063,14 @@ def widest_numeric_type(type1, type2):
             widest_numeric_type(
                 real_type(type1),
                 real_type(type2)))
+        if type1 is soft_complex_type or type2 is soft_complex_type:
+            type1_is_other_complex = type1 is not soft_complex_type and type1.is_complex
+            type2_is_other_complex = type2 is not soft_complex_type and type2.is_complex
+            if (not type1_is_other_complex and not type2_is_other_complex and
+                    widest_type.real_type == soft_complex_type.real_type):
+                # ensure we can do an actual "is" comparison
+                # (this possibly goes slightly wrong when mixing long double and soft complex)
+                widest_type = soft_complex_type
     elif type1.is_enum and type2.is_enum:
         widest_type = c_int_type
     elif type1.rank < type2.rank:
@@ -4505,14 +5105,19 @@ def independent_spanning_type(type1, type2):
             type1 = type1.ref_base_type
         else:
             type2 = type2.ref_base_type
-    if type1 == type2:
+
+    resolved_type1 = type1.resolve()
+    resolved_type2 = type2.resolve()
+    if resolved_type1 == resolved_type2:
         return type1
-    elif (type1 is c_bint_type or type2 is c_bint_type) and (type1.is_numeric and type2.is_numeric):
+    elif ((resolved_type1 is c_bint_type or resolved_type2 is c_bint_type)
+            and (type1.is_numeric and type2.is_numeric)):
         # special case: if one of the results is a bint and the other
         # is another C integer, we must prevent returning a numeric
         # type so that we do not lose the ability to coerce to a
         # Python bool if we have to.
         return py_object_type
+
     span_type = _spanning_type(type1, type2)
     if span_type is None:
         return error_type
@@ -4649,35 +5254,38 @@ def parse_basic_type(name):
             name = 'int'
     return simple_c_type(signed, longness, name)
 
-def c_array_type(base_type, size):
-    # Construct a C array type.
+
+def _construct_type_from_base(cls, base_type, *args):
     if base_type is error_type:
         return error_type
-    else:
-        return CArrayType(base_type, size)
+    return cls(base_type, *args)
+
+def c_array_type(base_type, size):
+    # Construct a C array type.
+    return _construct_type_from_base(CArrayType, base_type, size)
 
 def c_ptr_type(base_type):
     # Construct a C pointer type.
-    if base_type is error_type:
-        return error_type
-    elif base_type.is_reference:
-        return CPtrType(base_type.ref_base_type)
-    else:
-        return CPtrType(base_type)
+    if base_type.is_reference:
+        base_type = base_type.ref_base_type
+    return _construct_type_from_base(CPtrType, base_type)
 
 def c_ref_type(base_type):
     # Construct a C reference type
-    if base_type is error_type:
-        return error_type
-    else:
-        return CReferenceType(base_type)
+    return _construct_type_from_base(CReferenceType, base_type)
+
+def cpp_rvalue_ref_type(base_type):
+    # Construct a C++ rvalue reference type
+    return _construct_type_from_base(CppRvalueReferenceType, base_type)
 
 def c_const_type(base_type):
     # Construct a C const type.
-    if base_type is error_type:
-        return error_type
-    else:
-        return CConstType(base_type)
+    return _construct_type_from_base(CConstType, base_type)
+
+def c_const_or_volatile_type(base_type, is_const, is_volatile):
+    # Construct a C const/volatile type.
+    return _construct_type_from_base(CConstOrVolatileType, base_type, is_const, is_volatile)
+
 
 def same_type(type1, type2):
     return type1.same_as(type2)
@@ -4703,28 +5311,42 @@ def typecast(to_type, from_type, expr_code):
 def type_list_identifier(types):
     return cap_length('__and_'.join(type_identifier(type) for type in types))
 
+_special_type_characters = {
+    '__': '__dunder',
+    'const ': '__const_',
+    ' ': '__space_',
+    '*': '__ptr',
+    '&': '__ref',
+    '&&': '__fwref',
+    '[': '__lArr',
+    ']': '__rArr',
+    '<': '__lAng',
+    '>': '__rAng',
+    '(': '__lParen',
+    ')': '__rParen',
+    ',': '__comma_',
+    '...': '__EL',
+    '::': '__in_',
+    ':': '__D',
+}
+
+_escape_special_type_characters = partial(re.compile(
+    # join substrings in reverse order to put longer matches first, e.g. "::" before ":"
+    " ?(%s) ?" % "|".join(re.escape(s) for s in sorted(_special_type_characters, reverse=True))
+).sub, lambda match: _special_type_characters[match.group(1)])
+
+def type_identifier(type, pyrex=False):
+    decl = type.empty_declaration_code(pyrex=pyrex)
+    return type_identifier_from_declaration(decl)
+
 _type_identifier_cache = {}
-def type_identifier(type):
-    decl = type.empty_declaration_code()
+def type_identifier_from_declaration(decl):
     safe = _type_identifier_cache.get(decl)
     if safe is None:
         safe = decl
         safe = re.sub(' +', ' ', safe)
-        safe = re.sub(' ([^a-zA-Z0-9_])', r'\1', safe)
-        safe = re.sub('([^a-zA-Z0-9_]) ', r'\1', safe)
-        safe = (safe.replace('__', '__dunder')
-                    .replace('const ', '__const_')
-                    .replace(' ', '__space_')
-                    .replace('*', '__ptr')
-                    .replace('&', '__ref')
-                    .replace('[', '__lArr')
-                    .replace(']', '__rArr')
-                    .replace('<', '__lAng')
-                    .replace('>', '__rAng')
-                    .replace('(', '__lParen')
-                    .replace(')', '__rParen')
-                    .replace(',', '__comma_')
-                    .replace('::', '__in_'))
+        safe = re.sub(' ?([^a-zA-Z0-9_]) ?', r'\1', safe)
+        safe = _escape_special_type_characters(safe)
         safe = cap_length(re.sub('[^a-zA-Z0-9_]', lambda x: '__%X' % ord(x.group(0)), safe))
         _type_identifier_cache[decl] = safe
     return safe
diff --git a/Cython/Compiler/Scanning.pxd b/Cython/Compiler/Scanning.pxd
index 59593f88a..2d64565c0 100644
--- a/Cython/Compiler/Scanning.pxd
+++ b/Cython/Compiler/Scanning.pxd
@@ -1,4 +1,4 @@
-from __future__ import absolute_import
+# cython: language_level=3
 
 import cython
 
@@ -9,11 +9,6 @@ cdef unicode any_string_prefix, IDENT
 cdef get_lexicon()
 cdef initial_compile_time_env()
 
-cdef class Method:
-    cdef object name
-    cdef dict kwargs
-    cdef readonly object __name__  # for tracing the scanner
-
 ## methods commented with '##' out are used by Parsing.py when compiled.
 
 @cython.final
@@ -39,10 +34,11 @@ cdef class PyrexScanner(Scanner):
     cdef public indentation_char
     cdef public int bracket_nesting_level
     cdef readonly bint async_enabled
-    cdef public sy
-    cdef public systring
+    cdef public unicode sy
+    cdef public systring  # EncodedString
+    cdef public list put_back_on_failure
 
-    cdef long current_level(self)
+    cdef Py_ssize_t current_level(self)
     #cpdef commentline(self, text)
     #cpdef open_bracket_action(self, text)
     #cpdef close_bracket_action(self, text)
@@ -50,13 +46,12 @@ cdef class PyrexScanner(Scanner):
     #cpdef begin_string_action(self, text)
     #cpdef end_string_action(self, text)
     #cpdef unclosed_string_action(self, text)
-    @cython.locals(current_level=cython.long, new_level=cython.long)
+    @cython.locals(current_level=Py_ssize_t, new_level=Py_ssize_t)
     cpdef indentation_action(self, text)
     #cpdef eof_action(self, text)
     ##cdef next(self)
     ##cdef peek(self)
     #cpdef put_back(self, sy, systring)
-    #cdef unread(self, token, value)
     ##cdef bint expect(self, what, message = *) except -2
     ##cdef expect_keyword(self, what, message = *)
     ##cdef expected(self, what, message = *)
@@ -65,3 +60,4 @@ cdef class PyrexScanner(Scanner):
     ##cdef expect_newline(self, message=*, bint ignore_semicolon=*)
     ##cdef int enter_async(self) except -1
     ##cdef int exit_async(self) except -1
+    cdef void error_at_scanpos(self, str message) except *
diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py
index f61144033..d12d9d305 100644
--- a/Cython/Compiler/Scanning.py
+++ b/Cython/Compiler/Scanning.py
@@ -1,4 +1,4 @@
-# cython: infer_types=True, language_level=3, py2_import=True, auto_pickle=False
+# cython: infer_types=True, language_level=3, auto_pickle=False
 #
 #   Cython Scanner
 #
@@ -12,11 +12,13 @@ cython.declare(make_lexicon=object, lexicon=object,
 
 import os
 import platform
+from unicodedata import normalize
+from contextlib import contextmanager
 
 from .. import Utils
 from ..Plex.Scanners import Scanner
 from ..Plex.Errors import UnrecognizedInput
-from .Errors import error, warning
+from .Errors import error, warning, hold_errors, release_errors, CompileError
 from .Lexicon import any_string_prefix, make_lexicon, IDENT
 from .Future import print_function
 
@@ -51,25 +53,6 @@ pyx_reserved_words = py_reserved_words + [
 ]
 
 
-class Method(object):
-
-    def __init__(self, name, **kwargs):
-        self.name = name
-        self.kwargs = kwargs or None
-        self.__name__ = name  # for Plex tracing
-
-    def __call__(self, stream, text):
-        method = getattr(stream, self.name)
-        # self.kwargs is almost always unused => avoid call overhead
-        return method(text, **self.kwargs) if self.kwargs is not None else method(text)
-
-    def __copy__(self):
-        return self  # immutable, no need to copy
-
-    def __deepcopy__(self, memo):
-        return self  # immutable, no need to copy
-
-
 #------------------------------------------------------------------
 
 class CompileTimeScope(object):
@@ -154,7 +137,7 @@ class SourceDescriptor(object):
     _escaped_description = None
     _cmp_name = ''
     def __str__(self):
-        assert False # To catch all places where a descriptor is used directly as a filename
+        assert False  # To catch all places where a descriptor is used directly as a filename
 
     def set_file_type_from_name(self, filename):
         name, ext = os.path.splitext(filename)
@@ -295,7 +278,7 @@ class StringSourceDescriptor(SourceDescriptor):
     get_error_description = get_description
 
     def get_filenametable_entry(self):
-        return "stringsource"
+        return "<stringsource>"
 
     def __hash__(self):
         return id(self)
@@ -318,6 +301,8 @@ class PyrexScanner(Scanner):
     #  compile_time_env   dict     Environment for conditional compilation
     #  compile_time_eval  boolean  In a true conditional compilation context
     #  compile_time_expr  boolean  In a compile-time expression context
+    #  put_back_on_failure  list or None  If set, this records states so the tentatively_scan
+    #                                       contextmanager can restore it
 
     def __init__(self, file, filename, parent_scanner=None,
                  scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
@@ -356,10 +341,19 @@ class PyrexScanner(Scanner):
         self.indentation_char = None
         self.bracket_nesting_level = 0
 
+        self.put_back_on_failure = None
+
         self.begin('INDENT')
         self.sy = ''
         self.next()
 
+    def normalize_ident(self, text):
+        try:
+            text.encode('ascii')  # really just name.isascii but supports Python 2 and 3
+        except UnicodeEncodeError:
+            text = normalize('NFKC', text)
+        self.produce(IDENT, text)
+
     def commentline(self, text):
         if self.parse_comments:
             self.produce('commentline', text)
@@ -402,7 +396,7 @@ class PyrexScanner(Scanner):
 
     def unclosed_string_action(self, text):
         self.end_string_action(text)
-        self.error("Unclosed string literal")
+        self.error_at_scanpos("Unclosed string literal")
 
     def indentation_action(self, text):
         self.begin('')
@@ -418,9 +412,9 @@ class PyrexScanner(Scanner):
                 #print "Scanner.indentation_action: setting indent_char to", repr(c)
             else:
                 if self.indentation_char != c:
-                    self.error("Mixed use of tabs and spaces")
+                    self.error_at_scanpos("Mixed use of tabs and spaces")
             if text.replace(c, "") != "":
-                self.error("Mixed use of tabs and spaces")
+                self.error_at_scanpos("Mixed use of tabs and spaces")
         # Figure out how many indents/dedents to do
         current_level = self.current_level()
         new_level = len(text)
@@ -438,7 +432,7 @@ class PyrexScanner(Scanner):
                 self.produce('DEDENT', '')
             #print "...current level now", self.current_level() ###
             if new_level != self.current_level():
-                self.error("Inconsistent indentation")
+                self.error_at_scanpos("Inconsistent indentation")
 
     def eof_action(self, text):
         while len(self.indentation_stack) > 1:
@@ -450,7 +444,7 @@ class PyrexScanner(Scanner):
         try:
             sy, systring = self.read()
         except UnrecognizedInput:
-            self.error("Unrecognized character")
+            self.error_at_scanpos("Unrecognized character")
             return  # just a marker, error() always raises
         if sy == IDENT:
             if systring in self.keywords:
@@ -461,9 +455,11 @@ class PyrexScanner(Scanner):
                 else:
                     sy = systring
             systring = self.context.intern_ustring(systring)
+        if self.put_back_on_failure is not None:
+            self.put_back_on_failure.append((sy, systring, self.position()))
         self.sy = sy
         self.systring = systring
-        if False: # debug_scanner:
+        if False:  # debug_scanner:
             _, line, col = self.position()
             if not self.systring or self.sy == self.systring:
                 t = self.sy
@@ -473,20 +469,20 @@ class PyrexScanner(Scanner):
 
     def peek(self):
         saved = self.sy, self.systring
+        saved_pos = self.position()
         self.next()
         next = self.sy, self.systring
-        self.unread(*next)
+        self.unread(self.sy, self.systring, self.position())
         self.sy, self.systring = saved
+        self.last_token_position_tuple = saved_pos
         return next
 
-    def put_back(self, sy, systring):
-        self.unread(self.sy, self.systring)
+    def put_back(self, sy, systring, pos):
+        self.unread(self.sy, self.systring, self.last_token_position_tuple)
         self.sy = sy
         self.systring = systring
+        self.last_token_position_tuple = pos
 
-    def unread(self, token, value):
-        # This method should be added to Plex
-        self.queue.insert(0, (token, value))
 
     def error(self, message, pos=None, fatal=True):
         if pos is None:
@@ -496,6 +492,12 @@ class PyrexScanner(Scanner):
         err = error(pos, message)
         if fatal: raise err
 
+    def error_at_scanpos(self, message):
+        # Like error(fatal=True), but gets the current scanning position rather than
+        # the position of the last token read.
+        pos = self.get_current_scan_pos()
+        self.error(message, pos, True)
+
     def expect(self, what, message=None):
         if self.sy == what:
             self.next()
@@ -549,3 +551,30 @@ class PyrexScanner(Scanner):
             self.keywords.discard('async')
             if self.sy in ('async', 'await'):
                 self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy)
+
+@contextmanager
+@cython.locals(scanner=Scanner)
+def tentatively_scan(scanner):
+    errors = hold_errors()
+    try:
+        put_back_on_failure = scanner.put_back_on_failure
+        scanner.put_back_on_failure = []
+        initial_state = (scanner.sy, scanner.systring, scanner.position())
+        try:
+            yield errors
+        except CompileError as e:
+            pass
+        finally:
+            if errors:
+                if scanner.put_back_on_failure:
+                    for put_back in reversed(scanner.put_back_on_failure[:-1]):
+                        scanner.put_back(*put_back)
+                    # we need to restore the initial state too
+                    scanner.put_back(*initial_state)
+            elif put_back_on_failure is not None:
+                # the outer "tentatively_scan" block that we're in might still
+                # want to undo this block
+                put_back_on_failure.extend(scanner.put_back_on_failure)
+            scanner.put_back_on_failure = put_back_on_failure
+    finally:
+        release_errors(ignore=True)
diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py
index c37e8aab7..192fc3de3 100644
--- a/Cython/Compiler/StringEncoding.py
+++ b/Cython/Compiler/StringEncoding.py
@@ -138,6 +138,24 @@ class EncodedString(_unicode):
     def as_utf8_string(self):
         return bytes_literal(self.utf8encode(), 'utf8')
 
+    def as_c_string_literal(self):
+        # first encodes the string then produces a c string literal
+        if self.encoding is None:
+            s = self.as_utf8_string()
+        else:
+            s = bytes_literal(self.byteencode(), self.encoding)
+        return s.as_c_string_literal()
+
+    if not hasattr(_unicode, "isascii"):
+        def isascii(self):
+            # not defined for Python3.7+ since the class already has it
+            try:
+                self.encode("ascii")
+            except UnicodeEncodeError:
+                return False
+            else:
+                return True
+
 
 def string_contains_surrogates(ustring):
     """
@@ -211,6 +229,11 @@ class BytesLiteral(_bytes):
         value = split_string_literal(escape_byte_string(self))
         return '"%s"' % value
 
+    if not hasattr(_bytes, "isascii"):
+        def isascii(self):
+            # already defined for Python3.7+
+            return True
+
 
 def bytes_literal(s, encoding):
     assert isinstance(s, bytes)
@@ -226,6 +249,12 @@ def encoded_string(s, encoding):
         s.encoding = encoding
     return s
 
+def encoded_string_or_bytes_literal(s, encoding):
+    if isinstance(s, bytes):
+        return bytes_literal(s, encoding)
+    else:
+        return encoded_string(s, encoding)
+
 
 char_from_escape_sequence = {
     r'\a' : u'\a',
@@ -291,7 +320,7 @@ def escape_byte_string(s):
     """
     s = _replace_specials(s)
     try:
-        return s.decode("ASCII") # trial decoding: plain ASCII => done
+        return s.decode("ASCII")  #  trial decoding: plain ASCII => done
     except UnicodeDecodeError:
         pass
     if IS_PYTHON3:
@@ -324,7 +353,7 @@ def split_string_literal(s, limit=2000):
         while start < len(s):
             end = start + limit
             if len(s) > end-4 and '\\' in s[end-4:end]:
-                end -= 4 - s[end-4:end].find('\\') # just before the backslash
+                end -= 4 - s[end-4:end].find('\\')  # just before the backslash
                 while s[end-1] == '\\':
                     end -= 1
                     if end == start:
diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py
index 7361a55ae..608d68c22 100644
--- a/Cython/Compiler/Symtab.py
+++ b/Cython/Compiler/Symtab.py
@@ -13,6 +13,7 @@ try:
 except ImportError:  # Py3
     import builtins
 
+from ..Utils import try_finally_contextmanager
 from .Errors import warning, error, InternalError
 from .StringEncoding import EncodedString
 from . import Options, Naming
@@ -20,18 +21,19 @@ from . import PyrexTypes
 from .PyrexTypes import py_object_type, unspecified_type
 from .TypeSlots import (
     pyfunction_signature, pymethod_signature, richcmp_special_methods,
-    get_special_method_signature, get_property_accessor_signature)
+    get_slot_table, get_property_accessor_signature)
 from . import Future
 
 from . import Code
 
-iso_c99_keywords = set(
-['auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
+iso_c99_keywords = {
+    'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
     'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
     'int', 'long', 'register', 'return', 'short', 'signed', 'sizeof',
     'static', 'struct', 'switch', 'typedef', 'union', 'unsigned', 'void',
     'volatile', 'while',
-    '_Bool', '_Complex'', _Imaginary', 'inline', 'restrict'])
+    '_Bool', '_Complex'', _Imaginary', 'inline', 'restrict',
+}
 
 
 def c_safe_identifier(cname):
@@ -42,6 +44,28 @@ def c_safe_identifier(cname):
         cname = Naming.pyrex_prefix + cname
     return cname
 
+def punycodify_name(cname, mangle_with=None):
+    # if passed the mangle_with should be a byte string
+    # modified from  PEP489
+    try:
+        cname.encode('ascii')
+    except UnicodeEncodeError:
+        cname = cname.encode('punycode').replace(b'-', b'_').decode('ascii')
+        if mangle_with:
+            # sometimes it necessary to mangle unicode names alone where
+            # they'll be inserted directly into C, because the punycode
+            # transformation can turn them into invalid identifiers
+            cname = "%s_%s" % (mangle_with, cname)
+        elif cname.startswith(Naming.pyrex_prefix):
+            # a punycode name could also be a valid ascii variable name so
+            # change the prefix to distinguish
+            cname = cname.replace(Naming.pyrex_prefix,
+                                  Naming.pyunicode_identifier_prefix, 1)
+
+    return cname
+
+
+
 
 class BufferAux(object):
     writable_needed = False
@@ -87,6 +111,7 @@ class Entry(object):
     # doc_cname        string or None  C const holding the docstring
     # getter_cname     string          C func for getting property
     # setter_cname     string          C func for setting or deleting property
+    # is_cproperty     boolean         Is an inline property of an external type
     # is_self_arg      boolean    Is the "self" arg of an exttype method
     # is_arg           boolean    Is the arg of a method
     # is_local         boolean    Is a local variable
@@ -134,6 +159,12 @@ class Entry(object):
     # cf_used          boolean    Entry is used
     # is_fused_specialized boolean Whether this entry of a cdef or def function
     #                              is a specialization
+    # is_cgetter       boolean    Is a c-level getter function
+    # is_cpp_optional  boolean    Entry should be declared as std::optional (cpp_locals directive)
+    # known_standard_library_import     Either None (default), an empty string (definitely can't be determined)
+    #                             or a string of "modulename.something.attribute"
+    #                             Used for identifying imports from typing/dataclasses etc
+    # pytyping_modifiers          Python type modifiers like "typing.ClassVar" but also "dataclasses.InitVar"
 
     # TODO: utility_code and utility_code_definition serves the same purpose...
 
@@ -141,6 +172,7 @@ class Entry(object):
     borrowed = 0
     init = ""
     annotation = None
+    pep563_annotation = None
     visibility = 'private'
     is_builtin = 0
     is_cglobal = 0
@@ -160,6 +192,7 @@ class Entry(object):
     is_cpp_class = 0
     is_const = 0
     is_property = 0
+    is_cproperty = 0
     doc_cname = None
     getter_cname = None
     setter_cname = None
@@ -203,6 +236,10 @@ class Entry(object):
     error_on_uninitialized = False
     cf_used = True
     outer_entry = None
+    is_cgetter = False
+    is_cpp_optional = False
+    known_standard_library_import = None
+    pytyping_modifiers = None
 
     def __init__(self, name, cname, type, pos = None, init = None):
         self.name = name
@@ -238,6 +275,19 @@ class Entry(object):
         else:
             return NotImplemented
 
+    @property
+    def cf_is_reassigned(self):
+        return len(self.cf_assignments) > 1
+
+    def make_cpp_optional(self):
+        assert self.type.is_cpp_class
+        self.is_cpp_optional = True
+        assert not self.utility_code  # we're not overwriting anything?
+        self.utility_code_definition = Code.UtilityCode.load_cached("OptionalLocals", "CppSupport.cpp")
+
+    def declared_with_pytyping_modifier(self, modifier_name):
+        return modifier_name in self.pytyping_modifiers if self.pytyping_modifiers else False
+
 
 class InnerEntry(Entry):
     """
@@ -262,6 +312,7 @@ class InnerEntry(Entry):
         self.cf_assignments = outermost_entry.cf_assignments
         self.cf_references = outermost_entry.cf_references
         self.overloaded_alternatives = outermost_entry.overloaded_alternatives
+        self.is_cpp_optional = outermost_entry.is_cpp_optional
         self.inner_entries.append(self)
 
     def __getattr__(self, name):
@@ -291,10 +342,13 @@ class Scope(object):
     # is_builtin_scope  boolean            Is the builtin scope of Python/Cython
     # is_py_class_scope boolean            Is a Python class scope
     # is_c_class_scope  boolean            Is an extension type scope
+    # is_local_scope    boolean            Is a local (i.e. function/method/generator) scope
     # is_closure_scope  boolean            Is a closure scope
+    # is_generator_expression_scope boolean   A subset of closure scope used for generator expressions
     # is_passthrough    boolean            Outer scope is passed directly
     # is_cpp_class_scope  boolean          Is a C++ class scope
     # is_property_scope boolean            Is a extension type property scope
+    # is_c_dataclass_scope     boolean or "frozen"  is a cython.dataclasses.dataclass
     # scope_prefix      string             Disambiguator for C names
     # in_cinclude       boolean            Suppress C declaration code
     # qualified_name    string             "modname" or "modname.classname"
@@ -308,17 +362,22 @@ class Scope(object):
     is_py_class_scope = 0
     is_c_class_scope = 0
     is_closure_scope = 0
-    is_genexpr_scope = 0
+    is_local_scope = False
+    is_generator_expression_scope = 0
+    is_comprehension_scope = 0
     is_passthrough = 0
     is_cpp_class_scope = 0
     is_property_scope = 0
     is_module_scope = 0
+    is_c_dataclass_scope = False
     is_internal = 0
     scope_prefix = ""
     in_cinclude = 0
     nogil = 0
     fused_to_specific = None
     return_type = None
+    # Do ambiguous type names like 'int' and 'float' refer to the C types? (Otherwise, Python types.)
+    in_c_type_context = True
 
     def __init__(self, name, outer_scope, parent_scope):
         # The outer_scope is the next scope in the lookup chain.
@@ -347,7 +406,6 @@ class Scope(object):
         self.defined_c_classes = []
         self.imported_c_classes = {}
         self.cname_to_entry = {}
-        self.string_to_entry = {}
         self.identifier_to_entry = {}
         self.num_to_entry = {}
         self.obj_to_entry = {}
@@ -358,11 +416,11 @@ class Scope(object):
     def __deepcopy__(self, memo):
         return self
 
-    def merge_in(self, other, merge_unused=True, whitelist=None):
+    def merge_in(self, other, merge_unused=True, allowlist=None):
         # Use with care...
         entries = []
         for name, entry in other.entries.items():
-            if not whitelist or name in whitelist:
+            if not allowlist or name in allowlist:
                 if entry.used or merge_unused:
                     entries.append((name, entry))
 
@@ -390,7 +448,7 @@ class Scope(object):
 
     def mangle(self, prefix, name = None):
         if name:
-            return "%s%s%s" % (prefix, self.scope_prefix, name)
+            return punycodify_name("%s%s%s" % (prefix, self.scope_prefix, name))
         else:
             return self.parent_scope.mangle(prefix, self.name)
 
@@ -436,17 +494,26 @@ class Scope(object):
             for scope in sorted(self.subscopes, key=operator.attrgetter('scope_prefix')):
                 yield scope
 
+    @try_finally_contextmanager
+    def new_c_type_context(self, in_c_type_context=None):
+        old_c_type_context = self.in_c_type_context
+        if in_c_type_context is not None:
+            self.in_c_type_context = in_c_type_context
+        yield
+        self.in_c_type_context = old_c_type_context
+
     def declare(self, name, cname, type, pos, visibility, shadow = 0, is_type = 0, create_wrapper = 0):
         # Create new entry, and add to dictionary if
         # name is not None. Reports a warning if already
         # declared.
-        if type.is_buffer and not isinstance(self, LocalScope): # and not is_type:
+        if type.is_buffer and not isinstance(self, LocalScope):  # and not is_type:
             error(pos, 'Buffer types only allowed as function local variables')
         if not self.in_cinclude and cname and re.match("^_[_A-Z]+$", cname):
-            # See http://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html#Reserved-Names
+            # See https://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html#Reserved-Names
             warning(pos, "'%s' is a reserved name in C." % cname, -1)
+
         entries = self.entries
-        if name and name in entries and not shadow:
+        if name and name in entries and not shadow and not self.is_builtin_scope:
             old_entry = entries[name]
 
             # Reject redeclared C++ functions only if they have the same type signature.
@@ -487,8 +554,7 @@ class Scope(object):
                 entries[name] = entry
 
         if type.is_memoryviewslice:
-            from . import MemoryView
-            entry.init = MemoryView.memslice_entry_init
+            entry.init = type.default_value
 
         entry.scope = self
         entry.visibility = visibility
@@ -522,7 +588,8 @@ class Scope(object):
         if defining:
             self.type_entries.append(entry)
 
-        if not template:
+        # don't replace an entry that's already set
+        if not template and getattr(type, "entry", None) is None:
             type.entry = entry
 
         # here we would set as_variable to an object representing this type
@@ -563,8 +630,10 @@ class Scope(object):
                 cname = self.mangle(Naming.type_prefix, name)
         entry = self.lookup_here(name)
         if not entry:
+            in_cpp = self.is_cpp()
             type = PyrexTypes.CStructOrUnionType(
-                name, kind, scope, typedef_flag, cname, packed)
+                name, kind, scope, typedef_flag, cname, packed,
+                in_cpp = in_cpp)
             entry = self.declare_type(name, type, pos, cname,
                 visibility = visibility, api = api,
                 defining = scope is not None)
@@ -650,12 +719,12 @@ class Scope(object):
             error(pos, "'%s' previously declared as '%s'" % (
                 entry.name, entry.visibility))
 
-    def declare_enum(self, name, pos, cname, typedef_flag,
-            visibility = 'private', api = 0, create_wrapper = 0):
+    def declare_enum(self, name, pos, cname, scoped, typedef_flag,
+            visibility='private', api=0, create_wrapper=0, doc=None):
         if name:
             if not cname:
                 if (self.in_cinclude or visibility == 'public'
-                    or visibility == 'extern' or api):
+                        or visibility == 'extern' or api):
                     cname = name
                 else:
                     cname = self.mangle(Naming.type_prefix, name)
@@ -663,13 +732,21 @@ class Scope(object):
                 namespace = self.outer_scope.lookup(self.name).type
             else:
                 namespace = None
-            type = PyrexTypes.CEnumType(name, cname, typedef_flag, namespace)
+
+            if scoped:
+                type = PyrexTypes.CppScopedEnumType(name, cname, namespace, doc=doc)
+            else:
+                type = PyrexTypes.CEnumType(name, cname, typedef_flag, namespace, doc=doc)
         else:
             type = PyrexTypes.c_anon_enum_type
         entry = self.declare_type(name, type, pos, cname = cname,
             visibility = visibility, api = api)
+        if scoped:
+            entry.utility_code = Code.UtilityCode.load_cached("EnumClassDecl", "CppSupport.cpp")
+            self.use_entry_utility_code(entry)
         entry.create_wrapper = create_wrapper
         entry.enum_values = []
+
         self.sue_entries.append(entry)
         return entry
 
@@ -677,27 +754,45 @@ class Scope(object):
         return self.outer_scope.declare_tuple_type(pos, components)
 
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'private',
-                    api = 0, in_pxd = 0, is_cdef = 0):
+                    cname=None, visibility='private',
+                    api=False, in_pxd=False, is_cdef=False, pytyping_modifiers=None):
         # Add an entry for a variable.
         if not cname:
             if visibility != 'private' or api:
                 cname = name
             else:
                 cname = self.mangle(Naming.var_prefix, name)
-        if type.is_cpp_class and visibility != 'extern':
-            type.check_nullary_constructor(pos)
         entry = self.declare(name, cname, type, pos, visibility)
         entry.is_variable = 1
+        if type.is_cpp_class and visibility != 'extern':
+            if self.directives['cpp_locals']:
+                entry.make_cpp_optional()
+            else:
+                type.check_nullary_constructor(pos)
         if in_pxd and visibility != 'extern':
             entry.defined_in_pxd = 1
             entry.used = 1
         if api:
             entry.api = 1
             entry.used = 1
+        if pytyping_modifiers:
+            entry.pytyping_modifiers = pytyping_modifiers
         return entry
 
+    def _reject_pytyping_modifiers(self, pos, modifiers, allowed=()):
+        if not modifiers:
+            return
+        for modifier in modifiers:
+            if modifier not in allowed:
+                error(pos, "Modifier '%s' is not allowed here." % modifier)
+
+    def declare_assignment_expression_target(self, name, type, pos):
+        # In most cases declares the variable as normal.
+        # For generator expressions and comprehensions the variable is declared in their parent
+        return self.declare_var(name, type, pos)
+
     def declare_builtin(self, name, pos):
+        name = self.mangle_class_private_name(name)
         return self.outer_scope.declare_builtin(name, pos)
 
     def _declare_pyfunction(self, name, pos, visibility='extern', entry=None):
@@ -719,7 +814,7 @@ class Scope(object):
                 entry.type = py_object_type
             elif entry.type is not py_object_type:
                 return self._declare_pyfunction(name, pos, visibility=visibility, entry=entry)
-        else: # declare entry stub
+        else:  # declare entry stub
             self.declare_var(name, py_object_type, pos, visibility=visibility)
         entry = self.declare_var(None, py_object_type, pos,
                                  cname=name, visibility='private')
@@ -736,7 +831,7 @@ class Scope(object):
         qualified_name = self.qualify_name(lambda_name)
 
         entry = self.declare(None, func_cname, py_object_type, pos, 'private')
-        entry.name = lambda_name
+        entry.name = EncodedString(lambda_name)
         entry.qualified_name = qualified_name
         entry.pymethdef_cname = pymethdef_cname
         entry.func_cname = func_cname
@@ -769,7 +864,8 @@ class Scope(object):
                 entry.cname = cname
                 entry.func_cname = cname
             if visibility != 'private' and visibility != entry.visibility:
-                warning(pos, "Function '%s' previously declared as '%s', now as '%s'" % (name, entry.visibility, visibility), 1)
+                warning(pos, "Function '%s' previously declared as '%s', now as '%s'" % (
+                    name, entry.visibility, visibility), 1)
             if overridable != entry.is_overridable:
                 warning(pos, "Function '%s' previously declared as '%s'" % (
                     name, 'cpdef' if overridable else 'cdef'), 1)
@@ -778,15 +874,15 @@ class Scope(object):
                 entry.type = entry.type.with_with_gil(type.with_gil)
             else:
                 if visibility == 'extern' and entry.visibility == 'extern':
-                    can_override = False
+                    can_override = self.is_builtin_scope
                     if self.is_cpp():
                         can_override = True
-                    elif cname:
+                    elif cname and not can_override:
                         # if all alternatives have different cnames,
                         # it's safe to allow signature overrides
                         for alt_entry in entry.all_alternatives():
                             if not alt_entry.cname or cname == alt_entry.cname:
-                                break # cname not unique!
+                                break  # cname not unique!
                         else:
                             can_override = True
                     if can_override:
@@ -830,6 +926,23 @@ class Scope(object):
         type.entry = entry
         return entry
 
+    def declare_cgetter(self, name, return_type, pos=None, cname=None,
+                        visibility="private", modifiers=(), defining=False, **cfunc_type_config):
+        assert all(
+            k in ('exception_value', 'exception_check', 'nogil', 'with_gil', 'is_const_method', 'is_static_method')
+            for k in cfunc_type_config
+        )
+        cfunc_type = PyrexTypes.CFuncType(
+            return_type,
+            [PyrexTypes.CFuncTypeArg("self", self.parent_type, None)],
+            **cfunc_type_config)
+        entry = self.declare_cfunction(
+            name, cfunc_type, pos, cname=None, visibility=visibility, modifiers=modifiers, defining=defining)
+        entry.is_cgetter = True
+        if cname is not None:
+            entry.func_cname = cname
+        return entry
+
     def add_cfunction(self, name, type, pos, cname, visibility, modifiers, inherited=False):
         # Add a C function entry without giving it a func_cname.
         entry = self.declare(name, cname, type, pos, visibility)
@@ -875,37 +988,81 @@ class Scope(object):
     def lookup(self, name):
         # Look up name in this scope or an enclosing one.
         # Return None if not found.
-        return (self.lookup_here(name)
-            or (self.outer_scope and self.outer_scope.lookup(name))
-            or None)
+
+        mangled_name = self.mangle_class_private_name(name)
+        entry = (self.lookup_here(name)  # lookup here also does mangling
+                or (self.outer_scope and self.outer_scope.lookup(mangled_name))
+                or None)
+        if entry:
+            return entry
+
+        # look up the original name in the outer scope
+        # Not strictly Python behaviour but see https://github.com/cython/cython/issues/3544
+        entry = (self.outer_scope and self.outer_scope.lookup(name)) or None
+        if entry and entry.is_pyglobal:
+            self._emit_class_private_warning(entry.pos, name)
+        return entry
 
     def lookup_here(self, name):
         # Look up in this scope only, return None if not found.
+
+        entry = self.entries.get(self.mangle_class_private_name(name), None)
+        if entry:
+            return entry
+        # Also check the unmangled name in the current scope
+        # (even if mangling should give us something else).
+        # This is to support things like global __foo which makes a declaration for __foo
+        return self.entries.get(name, None)
+
+    def lookup_here_unmangled(self, name):
         return self.entries.get(name, None)
 
+    def lookup_assignment_expression_target(self, name):
+        # For most cases behaves like "lookup_here".
+        # However, it does look outwards for comprehension and generator expression scopes
+        return self.lookup_here(name)
+
     def lookup_target(self, name):
         # Look up name in this scope only. Declare as Python
         # variable if not found.
         entry = self.lookup_here(name)
         if not entry:
+            entry = self.lookup_here_unmangled(name)
+            if entry and entry.is_pyglobal:
+                self._emit_class_private_warning(entry.pos, name)
+        if not entry:
             entry = self.declare_var(name, py_object_type, None)
         return entry
 
-    def lookup_type(self, name):
-        entry = self.lookup(name)
+    def _type_or_specialized_type_from_entry(self, entry):
         if entry and entry.is_type:
             if entry.type.is_fused and self.fused_to_specific:
                 return entry.type.specialize(self.fused_to_specific)
             return entry.type
 
+    def lookup_type(self, name):
+        entry = self.lookup(name)
+        # The logic here is:
+        #  1. if entry is a type then return it (and maybe specialize it)
+        #  2. if the entry comes from a known standard library import then follow that
+        #  3. repeat step 1 with the (possibly) updated entry
+
+        tp = self._type_or_specialized_type_from_entry(entry)
+        if tp:
+            return tp
+        # allow us to find types from the "typing" module and similar
+        if entry and entry.known_standard_library_import:
+            from .Builtin import get_known_standard_library_entry
+            entry = get_known_standard_library_entry(entry.known_standard_library_import)
+        return self._type_or_specialized_type_from_entry(entry)
+
     def lookup_operator(self, operator, operands):
         if operands[0].type.is_cpp_class:
             obj_type = operands[0].type
             method = obj_type.scope.lookup("operator%s" % operator)
             if method is not None:
                 arg_types = [arg.type for arg in operands[1:]]
-                res = PyrexTypes.best_match([arg.type for arg in operands[1:]],
-                                            method.all_alternatives())
+                res = PyrexTypes.best_match(arg_types, method.all_alternatives())
                 if res is not None:
                     return res
         function = self.lookup("operator%s" % operator)
@@ -915,7 +1072,7 @@ class Scope(object):
 
         # look-up nonmember methods listed within a class
         method_alternatives = []
-        if len(operands)==2: # binary operators only
+        if len(operands) == 2:  # binary operators only
             for n in range(2):
                 if operands[n].type.is_cpp_class:
                     obj_type = operands[n].type
@@ -939,6 +1096,11 @@ class Scope(object):
         operands = [FakeOperand(pos, type=type) for type in types]
         return self.lookup_operator(operator, operands)
 
+    def _emit_class_private_warning(self, pos, name):
+        warning(pos, "Global name %s matched from within class scope "
+                            "in contradiction to to Python 'class private name' rules. "
+                            "This may change in a future release." % name, 1)
+
     def use_utility_code(self, new_code):
         self.global_scope().use_utility_code(new_code)
 
@@ -1000,9 +1162,9 @@ class BuiltinScope(Scope):
             Scope.__init__(self, "__builtin__", PreImportScope(), None)
         self.type_names = {}
 
-        for name, definition in sorted(self.builtin_entries.items()):
-            cname, type = definition
-            self.declare_var(name, type, None, cname)
+        # Most entries are initialized in init_builtins, except for "bool"
+        # which is apparently a special case because it conflicts with C++ bool
+        self.declare_var("bool", py_object_type, None, "((PyObject*)&PyBool_Type)")
 
     def lookup(self, name, language_level=None, str_is_str=None):
         # 'language_level' and 'str_is_str' are passed by ModuleScope
@@ -1027,8 +1189,7 @@ class BuiltinScope(Scope):
         # If python_equiv == "*", the Python equivalent has the same name
         # as the entry, otherwise it has the name specified by python_equiv.
         name = EncodedString(name)
-        entry = self.declare_cfunction(name, type, None, cname, visibility='extern',
-                                       utility_code=utility_code)
+        entry = self.declare_cfunction(name, type, None, cname, visibility='extern', utility_code=utility_code)
         if python_equiv:
             if python_equiv == "*":
                 python_equiv = name
@@ -1043,9 +1204,10 @@ class BuiltinScope(Scope):
             entry.as_variable = var_entry
         return entry
 
-    def declare_builtin_type(self, name, cname, utility_code = None, objstruct_cname = None):
+    def declare_builtin_type(self, name, cname, utility_code=None,
+                             objstruct_cname=None, type_class=PyrexTypes.BuiltinObjectType):
         name = EncodedString(name)
-        type = PyrexTypes.BuiltinObjectType(name, cname, objstruct_cname)
+        type = type_class(name, cname, objstruct_cname)
         scope = CClassScope(name, outer_scope=None, visibility='extern')
         scope.directives = {}
         if name == 'bool':
@@ -1055,10 +1217,12 @@ class BuiltinScope(Scope):
         entry = self.declare_type(name, type, None, visibility='extern')
         entry.utility_code = utility_code
 
-        var_entry = Entry(name = entry.name,
-            type = self.lookup('type').type, # make sure "type" is the first type declared...
-            pos = entry.pos,
-            cname = entry.type.typeptr_cname)
+        var_entry = Entry(
+            name=entry.name,
+            type=self.lookup('type').type,  # make sure "type" is the first type declared...
+            pos=entry.pos,
+            cname=entry.type.typeptr_cname,
+        )
         var_entry.qualified_name = self.qualify_name(name)
         var_entry.is_variable = 1
         var_entry.is_cglobal = 1
@@ -1075,36 +1239,8 @@ class BuiltinScope(Scope):
     def builtin_scope(self):
         return self
 
-    builtin_entries = {
-
-        "type":   ["((PyObject*)&PyType_Type)", py_object_type],
-
-        "bool":   ["((PyObject*)&PyBool_Type)", py_object_type],
-        "int":    ["((PyObject*)&PyInt_Type)", py_object_type],
-        "long":   ["((PyObject*)&PyLong_Type)", py_object_type],
-        "float":  ["((PyObject*)&PyFloat_Type)", py_object_type],
-        "complex":["((PyObject*)&PyComplex_Type)", py_object_type],
-
-        "bytes":  ["((PyObject*)&PyBytes_Type)", py_object_type],
-        "bytearray":   ["((PyObject*)&PyByteArray_Type)", py_object_type],
-        "str":    ["((PyObject*)&PyString_Type)", py_object_type],
-        "unicode":["((PyObject*)&PyUnicode_Type)", py_object_type],
-
-        "tuple":  ["((PyObject*)&PyTuple_Type)", py_object_type],
-        "list":   ["((PyObject*)&PyList_Type)", py_object_type],
-        "dict":   ["((PyObject*)&PyDict_Type)", py_object_type],
-        "set":    ["((PyObject*)&PySet_Type)", py_object_type],
-        "frozenset":   ["((PyObject*)&PyFrozenSet_Type)", py_object_type],
-
-        "slice":  ["((PyObject*)&PySlice_Type)", py_object_type],
-#        "file":   ["((PyObject*)&PyFile_Type)", py_object_type],  # not in Py3
-
-        "None":   ["Py_None", py_object_type],
-        "False":  ["Py_False", py_object_type],
-        "True":   ["Py_True", py_object_type],
-    }
 
-const_counter = 1 # As a temporary solution for compiling code in pxds
+const_counter = 1  # As a temporary solution for compiling code in pxds
 
 class ModuleScope(Scope):
     # module_name          string             Python name of the module
@@ -1116,7 +1252,6 @@ class ModuleScope(Scope):
     # utility_code_list    [UtilityCode]      Queuing utility codes for forwarding to Code.py
     # c_includes           {key: IncludeCode} C headers or verbatim code to be generated
     #                                         See process_include() for more documentation
-    # string_to_entry      {string : Entry}   Map string const to entry
     # identifier_to_entry  {string : Entry}   Map identifier string const to entry
     # context              Context
     # parent_module        Scope              Parent in the import namespace
@@ -1136,19 +1271,13 @@ class ModuleScope(Scope):
     is_cython_builtin = 0
     old_style_globals = 0
 
-    def __init__(self, name, parent_module, context):
+    def __init__(self, name, parent_module, context, is_package=False):
         from . import Builtin
         self.parent_module = parent_module
         outer_scope = Builtin.builtin_scope
         Scope.__init__(self, name, outer_scope, parent_module)
-        if name == "__init__":
-            # Treat Spam/__init__.pyx specially, so that when Python loads
-            # Spam/__init__.so, initSpam() is defined.
-            self.module_name = parent_module.module_name
-            self.is_package = True
-        else:
-            self.module_name = name
-            self.is_package = False
+        self.is_package = is_package
+        self.module_name = name
         self.module_name = EncodedString(self.module_name)
         self.context = context
         self.module_cname = Naming.module_cname
@@ -1239,7 +1368,7 @@ class ModuleScope(Scope):
             entry = self.declare(None, None, py_object_type, pos, 'private')
         if Options.cache_builtins and name not in Code.uncachable_builtins:
             entry.is_builtin = 1
-            entry.is_const = 1 # cached
+            entry.is_const = 1  # cached
             entry.name = name
             entry.cname = Naming.builtin_prefix + name
             self.cached_builtins.append(entry)
@@ -1261,9 +1390,16 @@ class ModuleScope(Scope):
             # explicit relative cimport
             # error of going beyond top-level is handled in cimport node
             relative_to = self
-            while relative_level > 0 and relative_to:
+
+            top_level = 1 if self.is_package else 0
+            # * top_level == 1 when file is __init__.pyx, current package (relative_to) is the current module
+            #   i.e. dot in `from . import ...` points to the current package
+            # * top_level == 0 when file is regular module, current package (relative_to) is parent module
+            #   i.e. dot in `from . import ...` points to the package where module is placed
+            while relative_level > top_level and relative_to:
                 relative_to = relative_to.parent_module
                 relative_level -= 1
+
         elif relative_level != 0:
             # -1 or None: try relative cimport first, then absolute
             relative_to = self.parent_module
@@ -1273,7 +1409,7 @@ class ModuleScope(Scope):
         return module_scope.context.find_module(
             module_name, relative_to=relative_to, pos=pos, absolute_fallback=absolute_fallback)
 
-    def find_submodule(self, name):
+    def find_submodule(self, name, as_package=False):
         # Find and return scope for a submodule of this module,
         # creating a new empty one if necessary. Doesn't parse .pxd.
         if '.' in name:
@@ -1282,10 +1418,10 @@ class ModuleScope(Scope):
             submodule = None
         scope = self.lookup_submodule(name)
         if not scope:
-            scope = ModuleScope(name, parent_module=self, context=self.context)
+            scope = ModuleScope(name, parent_module=self, context=self.context, is_package=True if submodule else as_package)
             self.module_entries[name] = scope
         if submodule:
-            scope = scope.find_submodule(submodule)
+            scope = scope.find_submodule(submodule, as_package=as_package)
         return scope
 
     def lookup_submodule(self, name):
@@ -1364,7 +1500,7 @@ class ModuleScope(Scope):
         entry = self.lookup_here(name)
         if entry:
             if entry.is_pyglobal and entry.as_module is scope:
-                return entry # Already declared as the same module
+                return entry  # Already declared as the same module
             if not (entry.is_pyglobal and not entry.as_module):
                 # SAGE -- I put this here so Pyrex
                 # cimport's work across directories.
@@ -1382,14 +1518,15 @@ class ModuleScope(Scope):
         return entry
 
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'private',
-                    api = 0, in_pxd = 0, is_cdef = 0):
+                    cname=None, visibility='private',
+                    api=False, in_pxd=False, is_cdef=False, pytyping_modifiers=None):
         # Add an entry for a global variable. If it is a Python
         # object type, and not declared with cdef, it will live
         # in the module dictionary, otherwise it will be a C
         # global variable.
-        if not visibility in ('private', 'public', 'extern'):
+        if visibility not in ('private', 'public', 'extern'):
             error(pos, "Module-level variable cannot be declared %s" % visibility)
+        self._reject_pytyping_modifiers(pos, pytyping_modifiers, ('typing.Optional',))  # let's allow at least this one
         if not is_cdef:
             if type is unspecified_type:
                 type = py_object_type
@@ -1425,7 +1562,7 @@ class ModuleScope(Scope):
 
         entry = Scope.declare_var(self, name, type, pos,
                                   cname=cname, visibility=visibility,
-                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef)
+                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef, pytyping_modifiers=pytyping_modifiers)
         if is_cdef:
             entry.is_cglobal = 1
             if entry.type.declaration_value:
@@ -1454,7 +1591,7 @@ class ModuleScope(Scope):
         entry = self.lookup_here(name)
         if entry and entry.defined_in_pxd:
             if entry.visibility != "private":
-                mangled_cname = self.mangle(Naming.var_prefix, name)
+                mangled_cname = self.mangle(Naming.func_prefix, name)
                 if entry.cname == mangled_cname:
                     cname = name
                     entry.cname = cname
@@ -1505,7 +1642,7 @@ class ModuleScope(Scope):
         if entry and not shadow:
             type = entry.type
             if not (entry.is_type and type.is_extension_type):
-                entry = None # Will cause redeclaration and produce an error
+                entry = None  # Will cause redeclaration and produce an error
             else:
                 scope = type.scope
                 if typedef_flag and (not scope or scope.defined):
@@ -1585,6 +1722,15 @@ class ModuleScope(Scope):
 
         if self.directives.get('final'):
             entry.type.is_final_type = True
+        collection_type = self.directives.get('collection_type')
+        if collection_type:
+            from .UtilityCode import NonManglingModuleScope
+            if not isinstance(self, NonManglingModuleScope):
+                # TODO - DW would like to make it public, but I'm making it internal-only
+                # for now to avoid adding new features without consensus
+                error(pos, "'collection_type' is not a public cython directive")
+        if collection_type == 'sequence':
+            entry.type.has_sequence_flag = True
 
         # cdef classes are always exported, but we need to set it to
         # distinguish between unused Cython utility code extension classes
@@ -1727,6 +1873,7 @@ class ModuleScope(Scope):
 
 
 class LocalScope(Scope):
+    is_local_scope = True
 
     # Does the function have a 'with gil:' block?
     has_with_gil_block = False
@@ -1740,10 +1887,11 @@ class LocalScope(Scope):
         Scope.__init__(self, name, outer_scope, parent_scope)
 
     def mangle(self, prefix, name):
-        return prefix + name
+        return punycodify_name(prefix + name)
 
     def declare_arg(self, name, type, pos):
         # Add an entry for an argument of a function.
+        name = self.mangle_class_private_name(name)
         cname = self.mangle(Naming.var_prefix, name)
         entry = self.declare(name, cname, type, pos, 'private')
         entry.is_variable = 1
@@ -1755,14 +1903,15 @@ class LocalScope(Scope):
         return entry
 
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'private',
-                    api = 0, in_pxd = 0, is_cdef = 0):
+                    cname=None, visibility='private',
+                    api=False, in_pxd=False, is_cdef=False, pytyping_modifiers=None):
+        name = self.mangle_class_private_name(name)
         # Add an entry for a local variable.
         if visibility in ('public', 'readonly'):
             error(pos, "Local variable cannot be declared %s" % visibility)
         entry = Scope.declare_var(self, name, type, pos,
                                   cname=cname, visibility=visibility,
-                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef)
+                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef, pytyping_modifiers=pytyping_modifiers)
         if entry.type.declaration_value:
             entry.init = entry.type.declaration_value
         entry.is_local = 1
@@ -1790,24 +1939,28 @@ class LocalScope(Scope):
             if entry is None or not entry.from_closure:
                 error(pos, "no binding for nonlocal '%s' found" % name)
 
+    def _create_inner_entry_for_closure(self, name, entry):
+        entry.in_closure = True
+        inner_entry = InnerEntry(entry, self)
+        inner_entry.is_variable = True
+        self.entries[name] = inner_entry
+        return inner_entry
+
     def lookup(self, name):
         # Look up name in this scope or an enclosing one.
         # Return None if not found.
+
         entry = Scope.lookup(self, name)
         if entry is not None:
             entry_scope = entry.scope
-            while entry_scope.is_genexpr_scope:
+            while entry_scope.is_comprehension_scope:
                 entry_scope = entry_scope.outer_scope
             if entry_scope is not self and entry_scope.is_closure_scope:
                 if hasattr(entry.scope, "scope_class"):
                     raise InternalError("lookup() after scope class created.")
                 # The actual c fragment for the different scopes differs
                 # on the outside and inside, so we make a new entry
-                entry.in_closure = True
-                inner_entry = InnerEntry(entry, self)
-                inner_entry.is_variable = True
-                self.entries[name] = inner_entry
-                return inner_entry
+                return self._create_inner_entry_for_closure(name, entry)
         return entry
 
     def mangle_closure_cnames(self, outer_scope_cname):
@@ -1824,19 +1977,21 @@ class LocalScope(Scope):
                 elif entry.in_closure:
                     entry.original_cname = entry.cname
                     entry.cname = "%s->%s" % (Naming.cur_scope_cname, entry.cname)
+                    if entry.type.is_cpp_class and entry.scope.directives['cpp_locals']:
+                        entry.make_cpp_optional()
 
 
-class GeneratorExpressionScope(Scope):
-    """Scope for generator expressions and comprehensions.  As opposed
-    to generators, these can be easily inlined in some cases, so all
+class ComprehensionScope(Scope):
+    """Scope for comprehensions (but not generator expressions, which use ClosureScope).
+    As opposed to generators, these can be easily inlined in some cases, so all
     we really need is a scope that holds the loop variable(s).
     """
-    is_genexpr_scope = True
+    is_comprehension_scope = True
 
     def __init__(self, outer_scope):
         parent_scope = outer_scope
         # TODO: also ignore class scopes?
-        while parent_scope.is_genexpr_scope:
+        while parent_scope.is_comprehension_scope:
             parent_scope = parent_scope.parent_scope
         name = parent_scope.global_scope().next_id(Naming.genexpr_id_ref)
         Scope.__init__(self, name, outer_scope, parent_scope)
@@ -1845,7 +2000,7 @@ class GeneratorExpressionScope(Scope):
 
         # Class/ExtType scopes are filled at class creation time, i.e. from the
         # module init function or surrounding function.
-        while outer_scope.is_genexpr_scope or outer_scope.is_c_class_scope or outer_scope.is_py_class_scope:
+        while outer_scope.is_comprehension_scope or outer_scope.is_c_class_scope or outer_scope.is_py_class_scope:
             outer_scope = outer_scope.outer_scope
         self.var_entries = outer_scope.var_entries  # keep declarations outside
         outer_scope.subscopes.add(self)
@@ -1854,13 +2009,14 @@ class GeneratorExpressionScope(Scope):
         return '%s%s' % (self.genexp_prefix, self.parent_scope.mangle(prefix, name))
 
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'private',
-                    api = 0, in_pxd = 0, is_cdef = True):
+                    cname=None, visibility='private',
+                    api=False, in_pxd=False, is_cdef=True, pytyping_modifiers=None):
         if type is unspecified_type:
             # if the outer scope defines a type for this variable, inherit it
             outer_entry = self.outer_scope.lookup(name)
             if outer_entry and outer_entry.is_variable:
-                type = outer_entry.type # may still be 'unspecified_type' !
+                type = outer_entry.type  # may still be 'unspecified_type' !
+        self._reject_pytyping_modifiers(pos, pytyping_modifiers)
         # the parent scope needs to generate code for the variable, but
         # this scope must hold its name exclusively
         cname = '%s%s' % (self.genexp_prefix, self.parent_scope.mangle(Naming.var_prefix, name or self.next_id()))
@@ -1875,6 +2031,10 @@ class GeneratorExpressionScope(Scope):
         self.entries[name] = entry
         return entry
 
+    def declare_assignment_expression_target(self, name, type, pos):
+        # should be declared in the parent scope instead
+        return self.parent_scope.declare_var(name, type, pos)
+
     def declare_pyfunction(self, name, pos, allow_redefine=False):
         return self.outer_scope.declare_pyfunction(
             name, pos, allow_redefine)
@@ -1885,6 +2045,12 @@ class GeneratorExpressionScope(Scope):
     def add_lambda_def(self, def_node):
         return self.outer_scope.add_lambda_def(def_node)
 
+    def lookup_assignment_expression_target(self, name):
+        entry = self.lookup_here(name)
+        if not entry:
+            entry = self.parent_scope.lookup_assignment_expression_target(name)
+        return entry
+
 
 class ClosureScope(LocalScope):
 
@@ -1906,17 +2072,36 @@ class ClosureScope(LocalScope):
     def declare_pyfunction(self, name, pos, allow_redefine=False):
         return LocalScope.declare_pyfunction(self, name, pos, allow_redefine, visibility='private')
 
+    def declare_assignment_expression_target(self, name, type, pos):
+        return self.declare_var(name, type, pos)
+
+
+class GeneratorExpressionScope(ClosureScope):
+    is_generator_expression_scope = True
+
+    def declare_assignment_expression_target(self, name, type, pos):
+        entry = self.parent_scope.declare_var(name, type, pos)
+        return self._create_inner_entry_for_closure(name, entry)
+
+    def lookup_assignment_expression_target(self, name):
+        entry = self.lookup_here(name)
+        if not entry:
+            entry = self.parent_scope.lookup_assignment_expression_target(name)
+            if entry:
+                return self._create_inner_entry_for_closure(name, entry)
+        return entry
+
 
 class StructOrUnionScope(Scope):
     #  Namespace of a C struct or union.
 
     def __init__(self, name="?"):
-        Scope.__init__(self, name, None, None)
+        Scope.__init__(self, name, outer_scope=None, parent_scope=None)
 
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'private',
-                    api = 0, in_pxd = 0, is_cdef = 0,
-                    allow_pyobject=False, allow_memoryview=False):
+                    cname=None, visibility='private',
+                    api=False, in_pxd=False, is_cdef=False, pytyping_modifiers=None,
+                    allow_pyobject=False, allow_memoryview=False, allow_refcounted=False):
         # Add an entry for an attribute.
         if not cname:
             cname = name
@@ -1924,16 +2109,20 @@ class StructOrUnionScope(Scope):
                 cname = c_safe_identifier(cname)
         if type.is_cfunction:
             type = PyrexTypes.CPtrType(type)
+        self._reject_pytyping_modifiers(pos, pytyping_modifiers)
         entry = self.declare(name, cname, type, pos, visibility)
         entry.is_variable = 1
         self.var_entries.append(entry)
-        if type.is_pyobject and not allow_pyobject:
-            error(pos, "C struct/union member cannot be a Python object")
-        elif type.is_memoryviewslice and not allow_memoryview:
-            # Memory views wrap their buffer owner as a Python object.
-            error(pos, "C struct/union member cannot be a memory view")
-        if visibility != 'private':
-            error(pos, "C struct/union member cannot be declared %s" % visibility)
+        if type.is_pyobject:
+            if not allow_pyobject:
+                error(pos, "C struct/union member cannot be a Python object")
+        elif type.is_memoryviewslice:
+            if not allow_memoryview:
+                # Memory views wrap their buffer owner as a Python object.
+                error(pos, "C struct/union member cannot be a memory view")
+        elif type.needs_refcounting:
+            if not allow_refcounted:
+                error(pos, "C struct/union member cannot be reference-counted type '%s'" % type)
         return entry
 
     def declare_cfunction(self, name, type, pos,
@@ -1954,6 +2143,14 @@ class ClassScope(Scope):
     #                          declared in the class
     #  doc    string or None   Doc string
 
+    def mangle_class_private_name(self, name):
+        # a few utilitycode names need to specifically be ignored
+        if name and name.lower().startswith("__pyx_"):
+            return name
+        if name and name.startswith('__') and not name.endswith('__'):
+            name = EncodedString('_%s%s' % (self.class_name.lstrip('_'), name))
+        return name
+
     def __init__(self, name, outer_scope):
         Scope.__init__(self, name, outer_scope, outer_scope)
         self.class_name = name
@@ -1987,28 +2184,16 @@ class PyClassScope(ClassScope):
 
     is_py_class_scope = 1
 
-    def mangle_class_private_name(self, name):
-        return self.mangle_special_name(name)
-
-    def mangle_special_name(self, name):
-        if name and name.startswith('__') and not name.endswith('__'):
-            name = EncodedString('_%s%s' % (self.class_name.lstrip('_'), name))
-        return name
-
-    def lookup_here(self, name):
-        name = self.mangle_special_name(name)
-        return ClassScope.lookup_here(self, name)
-
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'private',
-                    api = 0, in_pxd = 0, is_cdef = 0):
-        name = self.mangle_special_name(name)
+                    cname=None, visibility='private',
+                    api=False, in_pxd=False, is_cdef=False, pytyping_modifiers=None):
+        name = self.mangle_class_private_name(name)
         if type is unspecified_type:
             type = py_object_type
         # Add an entry for a class attribute.
         entry = Scope.declare_var(self, name, type, pos,
                                   cname=cname, visibility=visibility,
-                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef)
+                                  api=api, in_pxd=in_pxd, is_cdef=is_cdef, pytyping_modifiers=pytyping_modifiers)
         entry.is_pyglobal = 1
         entry.is_pyclass_attr = 1
         return entry
@@ -2043,7 +2228,7 @@ class PyClassScope(ClassScope):
 class CClassScope(ClassScope):
     #  Namespace of an extension type.
     #
-    #  parent_type           CClassType
+    #  parent_type           PyExtensionType
     #  #typeobj_cname        string or None
     #  #objstruct_cname      string
     #  method_table_cname    string
@@ -2062,7 +2247,7 @@ class CClassScope(ClassScope):
 
     has_pyobject_attrs = False
     has_memoryview_attrs = False
-    has_cpp_class_attrs = False
+    has_cpp_constructable_attrs = False
     has_cyclic_pyobject_attrs = False
     defined = False
     implemented = False
@@ -2087,6 +2272,22 @@ class CClassScope(ClassScope):
             return not self.parent_type.is_gc_simple
         return False
 
+    def needs_trashcan(self):
+        # If the trashcan directive is explicitly set to False,
+        # unconditionally disable the trashcan.
+        directive = self.directives.get('trashcan')
+        if directive is False:
+            return False
+        # If the directive is set to True and the class has Python-valued
+        # C attributes, then it should use the trashcan in tp_dealloc.
+        if directive and self.has_cyclic_pyobject_attrs:
+            return True
+        # Use the trashcan if the base class uses it
+        base_type = self.parent_type.base_type
+        if base_type and base_type.scope is not None:
+            return base_type.scope.needs_trashcan()
+        return self.parent_type.builtin_trashcan
+
     def needs_tp_clear(self):
         """
         Do we need to generate an implementation for the tp_clear slot? Can
@@ -2094,6 +2295,25 @@ class CClassScope(ClassScope):
         """
         return self.needs_gc() and not self.directives.get('no_gc_clear', False)
 
+    def may_have_finalize(self):
+        """
+        This covers cases where we definitely have a __del__ function
+        and also cases where one of the base classes could have a __del__
+        function but we don't know.
+        """
+        current_type_scope = self
+        while current_type_scope:
+            del_entry = current_type_scope.lookup_here("__del__")
+            if del_entry and del_entry.is_special:
+                return True
+            if (current_type_scope.parent_type.is_extern or not current_type_scope.implemented or
+                    current_type_scope.parent_type.multiple_bases):
+                # we don't know if we have __del__, so assume we do and call it
+                return True
+            current_base_type = current_type_scope.parent_type.base_type
+            current_type_scope = current_base_type.scope if current_base_type else None
+        return False
+
     def get_refcounted_entries(self, include_weakref=False,
                                include_gc_simple=True):
         py_attrs = []
@@ -2114,15 +2334,30 @@ class CClassScope(ClassScope):
         return have_entries, (py_attrs, py_buffers, memoryview_slices)
 
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'private',
-                    api = 0, in_pxd = 0, is_cdef = 0):
+                    cname=None, visibility='private',
+                    api=False, in_pxd=False, is_cdef=False, pytyping_modifiers=None):
+        name = self.mangle_class_private_name(name)
+
+        if pytyping_modifiers:
+            if "typing.ClassVar" in pytyping_modifiers:
+                is_cdef = 0
+                if not type.is_pyobject:
+                    if not type.equivalent_type:
+                        warning(pos, "ClassVar[] requires the type to be a Python object type. Found '%s', using object instead." % type)
+                        type = py_object_type
+                    else:
+                        type = type.equivalent_type
+            if  "dataclasses.InitVar" in pytyping_modifiers and not self.is_c_dataclass_scope:
+                error(pos, "Use of cython.dataclasses.InitVar does not make sense outside a dataclass")
+
         if is_cdef:
             # Add an entry for an attribute.
             if self.defined:
                 error(pos,
                     "C attributes cannot be added in implementation part of"
                     " extension type defined in a pxd")
-            if not self.is_closure_class_scope and get_special_method_signature(name):
+            if (not self.is_closure_class_scope and
+                    get_slot_table(self.directives).get_special_method_signature(name)):
                 error(pos,
                     "The name '%s' is reserved for a special method."
                         % name)
@@ -2130,16 +2365,21 @@ class CClassScope(ClassScope):
                 cname = name
                 if visibility == 'private':
                     cname = c_safe_identifier(cname)
-            if type.is_cpp_class and visibility != 'extern':
-                type.check_nullary_constructor(pos)
-                self.use_utility_code(Code.UtilityCode("#include <new>"))
+                cname = punycodify_name(cname, Naming.unicode_structmember_prefix)
             entry = self.declare(name, cname, type, pos, visibility)
             entry.is_variable = 1
             self.var_entries.append(entry)
+            entry.pytyping_modifiers = pytyping_modifiers
+            if type.is_cpp_class and visibility != 'extern':
+                if self.directives['cpp_locals']:
+                    entry.make_cpp_optional()
+                else:
+                    type.check_nullary_constructor(pos)
             if type.is_memoryviewslice:
                 self.has_memoryview_attrs = True
-            elif type.is_cpp_class:
-                self.has_cpp_class_attrs = True
+            elif type.needs_cpp_construction:
+                self.use_utility_code(Code.UtilityCode("#include <new>"))
+                self.has_cpp_constructable_attrs = True
             elif type.is_pyobject and (self.is_closure_class_scope or name != '__weakref__'):
                 self.has_pyobject_attrs = True
                 if (not type.is_builtin_type
@@ -2167,12 +2407,13 @@ class CClassScope(ClassScope):
             # Add an entry for a class attribute.
             entry = Scope.declare_var(self, name, type, pos,
                                       cname=cname, visibility=visibility,
-                                      api=api, in_pxd=in_pxd, is_cdef=is_cdef)
+                                      api=api, in_pxd=in_pxd, is_cdef=is_cdef, pytyping_modifiers=pytyping_modifiers)
             entry.is_member = 1
-            entry.is_pyglobal = 1 # xxx: is_pyglobal changes behaviour in so many places that
-                                  # I keep it in for now. is_member should be enough
-                                  # later on
+            # xxx: is_pyglobal changes behaviour in so many places that I keep it in for now.
+            # is_member should be enough later on
+            entry.is_pyglobal = 1
             self.namespace_cname = "(PyObject *)%s" % self.parent_type.typeptr_cname
+
             return entry
 
     def declare_pyfunction(self, name, pos, allow_redefine=False):
@@ -2189,7 +2430,7 @@ class CClassScope(ClassScope):
                 "in a future version of Pyrex and Cython. Use __cinit__ instead.")
         entry = self.declare_var(name, py_object_type, pos,
                                  visibility='extern')
-        special_sig = get_special_method_signature(name)
+        special_sig = get_slot_table(self.directives).get_special_method_signature(name)
         if special_sig:
             # Special methods get put in the method table with a particular
             # signature declared in advance.
@@ -2220,7 +2461,9 @@ class CClassScope(ClassScope):
     def declare_cfunction(self, name, type, pos,
                           cname=None, visibility='private', api=0, in_pxd=0,
                           defining=0, modifiers=(), utility_code=None, overridable=False):
-        if get_special_method_signature(name) and not self.parent_type.is_builtin_type:
+        name = self.mangle_class_private_name(name)
+        if (get_slot_table(self.directives).get_special_method_signature(name)
+                and not self.parent_type.is_builtin_type):
             error(pos, "Special methods must be declared with 'def', not 'cdef'")
         args = type.args
         if not type.is_static_method:
@@ -2231,10 +2474,11 @@ class CClassScope(ClassScope):
                       (args[0].type, name, self.parent_type))
         entry = self.lookup_here(name)
         if cname is None:
-            cname = c_safe_identifier(name)
+            cname = punycodify_name(c_safe_identifier(name), Naming.unicode_vtabentry_prefix)
         if entry:
             if not entry.is_cfunction:
-                warning(pos, "'%s' redeclared  " % name, 0)
+                error(pos, "'%s' redeclared " % name)
+                entry.already_declared_here()
             else:
                 if defining and entry.func_cname:
                     error(pos, "'%s' already defined" % name)
@@ -2246,13 +2490,14 @@ class CClassScope(ClassScope):
                     entry.type = entry.type.with_with_gil(type.with_gil)
                 elif type.compatible_signature_with(entry.type, as_cmethod = 1) and type.nogil == entry.type.nogil:
                     if (self.defined and not in_pxd
-                        and not type.same_c_signature_as_resolved_type(entry.type, as_cmethod = 1, as_pxd_definition = 1)):
+                            and not type.same_c_signature_as_resolved_type(
+                                entry.type, as_cmethod=1, as_pxd_definition=1)):
                         # TODO(robertwb): Make this an error.
                         warning(pos,
                             "Compatible but non-identical C method '%s' not redeclared "
                             "in definition part of extension type '%s'.  "
                             "This may cause incorrect vtables to be generated." % (
-                                    name, self.class_name), 2)
+                                name, self.class_name), 2)
                         warning(entry.pos, "Previous declaration is here", 2)
                     entry = self.add_cfunction(name, type, pos, cname, visibility='ignore', modifiers=modifiers)
                 else:
@@ -2272,8 +2517,7 @@ class CClassScope(ClassScope):
         if u'inline' in modifiers:
             entry.is_inline_cmethod = True
 
-        if (self.parent_type.is_final_type or entry.is_inline_cmethod or
-            self.directives.get('final')):
+        if self.parent_type.is_final_type or entry.is_inline_cmethod or self.directives.get('final'):
             entry.is_final_cmethod = True
             entry.final_func_cname = entry.func_cname
 
@@ -2282,8 +2526,8 @@ class CClassScope(ClassScope):
     def add_cfunction(self, name, type, pos, cname, visibility, modifiers, inherited=False):
         # Add a cfunction entry without giving it a func_cname.
         prev_entry = self.lookup_here(name)
-        entry = ClassScope.add_cfunction(self, name, type, pos, cname,
-                                         visibility, modifiers, inherited=inherited)
+        entry = ClassScope.add_cfunction(
+            self, name, type, pos, cname, visibility, modifiers, inherited=inherited)
         entry.is_cmethod = 1
         entry.prev_entry = prev_entry
         return entry
@@ -2292,8 +2536,8 @@ class CClassScope(ClassScope):
         # overridden methods of builtin types still have their Python
         # equivalent that must be accessible to support bound methods
         name = EncodedString(name)
-        entry = self.declare_cfunction(name, type, None, cname, visibility='extern',
-                                       utility_code=utility_code)
+        entry = self.declare_cfunction(
+            name, type, pos=None, cname=cname, visibility='extern', utility_code=utility_code)
         var_entry = Entry(name, name, py_object_type)
         var_entry.qualified_name = name
         var_entry.is_variable = 1
@@ -2303,18 +2547,44 @@ class CClassScope(ClassScope):
         entry.as_variable = var_entry
         return entry
 
-    def declare_property(self, name, doc, pos):
+    def declare_property(self, name, doc, pos, ctype=None, property_scope=None):
         entry = self.lookup_here(name)
         if entry is None:
-            entry = self.declare(name, name, py_object_type, pos, 'private')
-        entry.is_property = 1
+            entry = self.declare(name, name, py_object_type if ctype is None else ctype, pos, 'private')
+        entry.is_property = True
+        if ctype is not None:
+            entry.is_cproperty = True
         entry.doc = doc
-        entry.scope = PropertyScope(name,
-            outer_scope = self.global_scope(), parent_scope = self)
-        entry.scope.parent_type = self.parent_type
+        if property_scope is None:
+            entry.scope = PropertyScope(name, class_scope=self)
+        else:
+            entry.scope = property_scope
         self.property_entries.append(entry)
         return entry
 
+    def declare_cproperty(self, name, type, cfunc_name, doc=None, pos=None, visibility='extern',
+                          nogil=False, with_gil=False, exception_value=None, exception_check=False,
+                          utility_code=None):
+        """Internal convenience method to declare a C property function in one go.
+        """
+        property_entry = self.declare_property(name, doc=doc, ctype=type, pos=pos)
+        cfunc_entry = property_entry.scope.declare_cfunction(
+            name=name,
+            type=PyrexTypes.CFuncType(
+                type,
+                [PyrexTypes.CFuncTypeArg("self", self.parent_type, pos=None)],
+                nogil=nogil,
+                with_gil=with_gil,
+                exception_value=exception_value,
+                exception_check=exception_check,
+            ),
+            cname=cfunc_name,
+            utility_code=utility_code,
+            visibility=visibility,
+            pos=pos,
+        )
+        return property_entry, cfunc_entry
+
     def declare_inherited_c_attributes(self, base_scope):
         # Declare entries for all the C attributes of an
         # inherited type, with cnames modified appropriately
@@ -2328,6 +2598,8 @@ class CClassScope(ClassScope):
                 base_entry.name, adapt(base_entry.cname),
                 base_entry.type, None, 'private')
             entry.is_variable = 1
+            entry.is_inherited = True
+            entry.annotation = base_entry.annotation
             self.inherited_var_entries.append(entry)
 
         # If the class defined in a pxd, specific entries have not been added.
@@ -2343,9 +2615,9 @@ class CClassScope(ClassScope):
             is_builtin = var_entry and var_entry.is_builtin
             if not is_builtin:
                 cname = adapt(cname)
-            entry = self.add_cfunction(base_entry.name, base_entry.type,
-                                       base_entry.pos, cname,
-                                       base_entry.visibility, base_entry.func_modifiers, inherited=True)
+            entry = self.add_cfunction(
+                base_entry.name, base_entry.type, base_entry.pos, cname,
+                base_entry.visibility, base_entry.func_modifiers, inherited=True)
             entry.is_inherited = 1
             if base_entry.is_final_cmethod:
                 entry.is_final_cmethod = True
@@ -2379,11 +2651,12 @@ class CppClassScope(Scope):
                 template_entry.is_type = 1
 
     def declare_var(self, name, type, pos,
-                    cname = None, visibility = 'extern',
-                    api = 0, in_pxd = 0, is_cdef = 0, defining = 0):
+                    cname=None, visibility='extern',
+                    api=False, in_pxd=False, is_cdef=False, defining=False, pytyping_modifiers=None):
         # Add an entry for an attribute.
         if not cname:
             cname = name
+        self._reject_pytyping_modifiers(pos, pytyping_modifiers)
         entry = self.lookup_here(name)
         if defining and entry is not None:
             if entry.type.same_as(type):
@@ -2409,7 +2682,7 @@ class CppClassScope(Scope):
         class_name = self.name.split('::')[-1]
         if name in (class_name, '__init__') and cname is None:
             cname = "%s__init__%s" % (Naming.func_prefix, class_name)
-            name = '<init>'
+            name = EncodedString('<init>')
             type.return_type = PyrexTypes.CVoidType()
             # This is called by the actual constructor, but need to support
             # arguments that cannot by called by value.
@@ -2423,7 +2696,7 @@ class CppClassScope(Scope):
             type.args = [maybe_ref(arg) for arg in type.args]
         elif name == '__dealloc__' and cname is None:
             cname = "%s__dealloc__%s" % (Naming.func_prefix, class_name)
-            name = '<del>'
+            name = EncodedString('<del>')
             type.return_type = PyrexTypes.CVoidType()
         if name in ('<init>', '<del>') and type.nogil:
             for base in self.type.base_classes:
@@ -2453,19 +2726,18 @@ class CppClassScope(Scope):
         # Declare entries for all the C++ attributes of an
         # inherited type, with cnames modified appropriately
         # to work with this type.
-        for base_entry in \
-            base_scope.inherited_var_entries + base_scope.var_entries:
-                #constructor/destructor is not inherited
-                if base_entry.name in ("<init>", "<del>"):
-                    continue
-                #print base_entry.name, self.entries
-                if base_entry.name in self.entries:
-                    base_entry.name    # FIXME: is there anything to do in this case?
-                entry = self.declare(base_entry.name, base_entry.cname,
-                    base_entry.type, None, 'extern')
-                entry.is_variable = 1
-                entry.is_inherited = 1
-                self.inherited_var_entries.append(entry)
+        for base_entry in base_scope.inherited_var_entries + base_scope.var_entries:
+            #constructor/destructor is not inherited
+            if base_entry.name in ("<init>", "<del>"):
+                continue
+            #print base_entry.name, self.entries
+            if base_entry.name in self.entries:
+                base_entry.name    # FIXME: is there anything to do in this case?
+            entry = self.declare(base_entry.name, base_entry.cname,
+                base_entry.type, None, 'extern')
+            entry.is_variable = 1
+            entry.is_inherited = 1
+            self.inherited_var_entries.append(entry)
         for base_entry in base_scope.cfunc_entries:
             entry = self.declare_cfunction(base_entry.name, base_entry.type,
                                            base_entry.pos, base_entry.cname,
@@ -2477,7 +2749,7 @@ class CppClassScope(Scope):
             if base_entry.name not in base_templates:
                 entry = self.declare_type(base_entry.name, base_entry.type,
                                           base_entry.pos, base_entry.cname,
-                                          base_entry.visibility)
+                                          base_entry.visibility, defining=False)
                 entry.is_inherited = 1
 
     def specialize(self, values, type_entry):
@@ -2507,6 +2779,23 @@ class CppClassScope(Scope):
         return scope
 
 
+class CppScopedEnumScope(Scope):
+    #  Namespace of a ScopedEnum
+
+    def __init__(self, name, outer_scope):
+        Scope.__init__(self, name, outer_scope, None)
+
+    def declare_var(self, name, type, pos,
+                    cname=None, visibility='extern', pytyping_modifiers=None):
+        # Add an entry for an attribute.
+        if not cname:
+            cname = name
+        self._reject_pytyping_modifiers(pos, pytyping_modifiers)
+        entry = self.declare(name, cname, type, pos, visibility)
+        entry.is_variable = True
+        return entry
+
+
 class PropertyScope(Scope):
     #  Scope holding the __get__, __set__ and __del__ methods for
     #  a property of an extension type.
@@ -2515,6 +2804,31 @@ class PropertyScope(Scope):
 
     is_property_scope = 1
 
+    def __init__(self, name, class_scope):
+        # outer scope is None for some internal properties
+        outer_scope = class_scope.global_scope() if class_scope.outer_scope else None
+        Scope.__init__(self, name, outer_scope, parent_scope=class_scope)
+        self.parent_type = class_scope.parent_type
+        self.directives = class_scope.directives
+
+    def declare_cfunction(self, name, type, pos, *args, **kwargs):
+        """Declare a C property function.
+        """
+        if type.return_type.is_void:
+            error(pos, "C property method cannot return 'void'")
+
+        if type.args and type.args[0].type is py_object_type:
+            # Set 'self' argument type to extension type.
+            type.args[0].type = self.parent_scope.parent_type
+        elif len(type.args) != 1:
+            error(pos, "C property method must have a single (self) argument")
+        elif not (type.args[0].type.is_pyobject or type.args[0].type is self.parent_scope.parent_type):
+            error(pos, "C property method must have a single (object) argument")
+
+        entry = Scope.declare_cfunction(self, name, type, pos, *args, **kwargs)
+        entry.is_cproperty = True
+        return entry
+
     def declare_pyfunction(self, name, pos, allow_redefine=False):
         # Add an entry for a method.
         signature = get_property_accessor_signature(name)
@@ -2529,23 +2843,27 @@ class PropertyScope(Scope):
             return None
 
 
-class CConstScope(Scope):
+class CConstOrVolatileScope(Scope):
 
-    def __init__(self, const_base_type_scope):
+    def __init__(self, base_type_scope, is_const=0, is_volatile=0):
         Scope.__init__(
             self,
-            'const_' + const_base_type_scope.name,
-            const_base_type_scope.outer_scope,
-            const_base_type_scope.parent_scope)
-        self.const_base_type_scope = const_base_type_scope
+            'cv_' + base_type_scope.name,
+            base_type_scope.outer_scope,
+            base_type_scope.parent_scope)
+        self.base_type_scope = base_type_scope
+        self.is_const = is_const
+        self.is_volatile = is_volatile
 
     def lookup_here(self, name):
-        entry = self.const_base_type_scope.lookup_here(name)
+        entry = self.base_type_scope.lookup_here(name)
         if entry is not None:
             entry = copy.copy(entry)
-            entry.type = PyrexTypes.c_const_type(entry.type)
+            entry.type = PyrexTypes.c_const_or_volatile_type(
+                    entry.type, self.is_const, self.is_volatile)
             return entry
 
+
 class TemplateScope(Scope):
     def __init__(self, name, outer_scope):
         Scope.__init__(self, name, outer_scope, None)
diff --git a/Cython/Compiler/Tests/TestBuffer.py b/Cython/Compiler/Tests/TestBuffer.py
index 1f69d9652..2f653d0ff 100644
--- a/Cython/Compiler/Tests/TestBuffer.py
+++ b/Cython/Compiler/Tests/TestBuffer.py
@@ -55,7 +55,7 @@ class TestBufferOptions(CythonTest):
         root = self.fragment(s, pipeline=[NormalizeTree(self), PostParse(self)]).root
         if not expect_error:
             vardef = root.stats[0].body.stats[0]
-            assert isinstance(vardef, CVarDefNode) # use normal assert as this is to validate the test code
+            assert isinstance(vardef, CVarDefNode)  # use normal assert as this is to validate the test code
             buftype = vardef.base_type
             self.assertTrue(isinstance(buftype, TemplatedTypeNode))
             self.assertTrue(isinstance(buftype.base_type_node, CSimpleBaseTypeNode))
@@ -99,7 +99,7 @@ class TestBufferOptions(CythonTest):
 
     # add exotic and impossible combinations as they come along...
 
+
 if __name__ == '__main__':
     import unittest
     unittest.main()
-
diff --git a/Cython/Compiler/Tests/TestCmdLine.py b/Cython/Compiler/Tests/TestCmdLine.py
index bd31da000..290efd1d7 100644
--- a/Cython/Compiler/Tests/TestCmdLine.py
+++ b/Cython/Compiler/Tests/TestCmdLine.py
@@ -1,8 +1,12 @@
-
+import os
 import sys
 import re
 from unittest import TestCase
 try:
+    from unittest.mock import patch, Mock
+except ImportError:  # Py2
+    from mock import patch, Mock
+try:
     from StringIO import StringIO
 except ImportError:
     from io import StringIO  # doesn't accept 'str' in Py2
@@ -10,38 +14,39 @@ except ImportError:
 from .. import Options
 from ..CmdLine import parse_command_line
 
+from .Utils import backup_Options, restore_Options, check_global_options
 
-def check_global_options(expected_options, white_list=[]):
-    """
-    returns error message of "" if check Ok
-    """
-    no_value = object()
-    for name, orig_value in expected_options.items():
-        if name not in white_list:
-            if getattr(Options, name, no_value) != orig_value:
-                return "error in option " + name
-    return ""
+unpatched_exists = os.path.exists
 
+def patched_exists(path):
+    # avoid the Cython command raising a file not found error
+    if path in (
+        'source.pyx',
+        os.path.join('/work/dir', 'source.pyx'),
+        os.path.join('my_working_path', 'source.pyx'),
+        'file.pyx',
+        'file1.pyx',
+        'file2.pyx',
+        'file3.pyx',
+        'foo.pyx',
+        'bar.pyx',
+    ):
+        return True
+    return unpatched_exists(path)
 
+@patch('os.path.exists', new=Mock(side_effect=patched_exists))
 class CmdLineParserTest(TestCase):
     def setUp(self):
-        backup = {}
-        for name, value in vars(Options).items():
-            backup[name] = value
-        self._options_backup = backup
+        self._options_backup = backup_Options()
 
     def tearDown(self):
-        no_value = object()
-        for name, orig_value in self._options_backup.items():
-            if getattr(Options, name, no_value) != orig_value:
-                setattr(Options, name, orig_value)
+        restore_Options(self._options_backup)
 
     def check_default_global_options(self, white_list=[]):
         self.assertEqual(check_global_options(self._options_backup, white_list), "")
 
     def check_default_options(self, options, white_list=[]):
-        from ..Main import CompilationOptions, default_options
-        default_options = CompilationOptions(default_options)
+        default_options = Options.CompilationOptions(Options.default_options)
         no_value = object()
         for name in default_options.__dict__.keys():
             if name not in white_list:
@@ -121,6 +126,397 @@ class CmdLineParserTest(TestCase):
         self.assertEqual(Options.annotate_coverage_xml, 'cov.xml')
         self.assertTrue(options.gdb_debug)
         self.assertEqual(options.output_dir, '/gdb/outdir')
+        self.assertEqual(options.compiler_directives['wraparound'], False)
+
+    def test_embed_before_positional(self):
+        options, sources = parse_command_line([
+            '--embed',
+            'source.pyx',
+        ])
+        self.assertEqual(sources, ['source.pyx'])
+        self.assertEqual(Options.embed, 'main')
+
+    def test_two_embeds(self):
+        options, sources = parse_command_line([
+            '--embed', '--embed=huhu',
+            'source.pyx',
+        ])
+        self.assertEqual(sources, ['source.pyx'])
+        self.assertEqual(Options.embed, 'huhu')
+
+    def test_two_embeds2(self):
+        options, sources = parse_command_line([
+            '--embed=huhu', '--embed',
+            'source.pyx',
+        ])
+        self.assertEqual(sources, ['source.pyx'])
+        self.assertEqual(Options.embed, 'main')
+
+    def test_no_annotate(self):
+        options, sources = parse_command_line([
+            '--embed=huhu', 'source.pyx'
+        ])
+        self.assertFalse(Options.annotate)
+
+    def test_annotate_short(self):
+        options, sources = parse_command_line([
+            '-a',
+            'source.pyx',
+        ])
+        self.assertEqual(Options.annotate, 'default')
+
+    def test_annotate_long(self):
+        options, sources = parse_command_line([
+            '--annotate',
+            'source.pyx',
+        ])
+        self.assertEqual(Options.annotate, 'default')
+
+    def test_annotate_fullc(self):
+        options, sources = parse_command_line([
+            '--annotate-fullc',
+            'source.pyx',
+        ])
+        self.assertEqual(Options.annotate, 'fullc')
+
+    def test_short_w(self):
+        options, sources = parse_command_line([
+            '-w', 'my_working_path',
+            'source.pyx'
+        ])
+        self.assertEqual(options.working_path, 'my_working_path')
+        self.check_default_global_options()
+        self.check_default_options(options, ['working_path'])
+
+    def test_short_o(self):
+        options, sources = parse_command_line([
+            '-o', 'my_output',
+            'source.pyx'
+        ])
+        self.assertEqual(options.output_file, 'my_output')
+        self.check_default_global_options()
+        self.check_default_options(options, ['output_file'])
+
+    def test_short_z(self):
+        options, sources = parse_command_line([
+            '-z', 'my_preimport',
+            'source.pyx'
+        ])
+        self.assertEqual(Options.pre_import, 'my_preimport')
+        self.check_default_global_options(['pre_import'])
+        self.check_default_options(options)
+
+    def test_convert_range(self):
+        options, sources = parse_command_line([
+            '--convert-range',
+            'source.pyx'
+        ])
+        self.assertEqual(Options.convert_range, True)
+        self.check_default_global_options(['convert_range'])
+        self.check_default_options(options)
+
+    def test_line_directives(self):
+        options, sources = parse_command_line([
+            '--line-directives',
+            'source.pyx'
+        ])
+        self.assertEqual(options.emit_linenums, True)
+        self.check_default_global_options()
+        self.check_default_options(options, ['emit_linenums'])
+
+    def test_no_c_in_traceback(self):
+        options, sources = parse_command_line([
+            '--no-c-in-traceback',
+            'source.pyx'
+        ])
+        self.assertEqual(options.c_line_in_traceback, False)
+        self.check_default_global_options()
+        self.check_default_options(options, ['c_line_in_traceback'])
+
+    def test_gdb(self):
+        options, sources = parse_command_line([
+            '--gdb',
+            'source.pyx'
+        ])
+        self.assertEqual(options.gdb_debug, True)
+        self.assertEqual(options.output_dir, os.curdir)
+        self.check_default_global_options()
+        self.check_default_options(options, ['gdb_debug', 'output_dir'])
+
+    def test_3str(self):
+        options, sources = parse_command_line([
+            '--3str',
+            'source.pyx'
+        ])
+        self.assertEqual(options.language_level, '3str')
+        self.check_default_global_options()
+        self.check_default_options(options, ['language_level'])
+
+    def test_capi_reexport_cincludes(self):
+        options, sources = parse_command_line([
+            '--capi-reexport-cincludes',
+            'source.pyx'
+        ])
+        self.assertEqual(options.capi_reexport_cincludes, True)
+        self.check_default_global_options()
+        self.check_default_options(options, ['capi_reexport_cincludes'])
+
+    def test_fast_fail(self):
+        options, sources = parse_command_line([
+            '--fast-fail',
+            'source.pyx'
+        ])
+        self.assertEqual(Options.fast_fail, True)
+        self.check_default_global_options(['fast_fail'])
+        self.check_default_options(options)
+
+    def test_cimport_from_pyx(self):
+        options, sources = parse_command_line([
+            '--cimport-from-pyx',
+            'source.pyx'
+        ])
+        self.assertEqual(Options.cimport_from_pyx, True)
+        self.check_default_global_options(['cimport_from_pyx'])
+        self.check_default_options(options)
+
+    def test_Werror(self):
+        options, sources = parse_command_line([
+            '-Werror',
+            'source.pyx'
+        ])
+        self.assertEqual(Options.warning_errors, True)
+        self.check_default_global_options(['warning_errors'])
+        self.check_default_options(options)
+
+    def test_warning_errors(self):
+        options, sources = parse_command_line([
+            '--warning-errors',
+            'source.pyx'
+        ])
+        self.assertEqual(Options.warning_errors, True)
+        self.check_default_global_options(['warning_errors'])
+        self.check_default_options(options)
+
+    def test_Wextra(self):
+        options, sources = parse_command_line([
+            '-Wextra',
+            'source.pyx'
+        ])
+        self.assertEqual(options.compiler_directives, Options.extra_warnings)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compiler_directives'])
+
+    def test_warning_extra(self):
+        options, sources = parse_command_line([
+            '--warning-extra',
+            'source.pyx'
+        ])
+        self.assertEqual(options.compiler_directives, Options.extra_warnings)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compiler_directives'])
+
+    def test_old_style_globals(self):
+        options, sources = parse_command_line([
+            '--old-style-globals',
+            'source.pyx'
+        ])
+        self.assertEqual(Options.old_style_globals, True)
+        self.check_default_global_options(['old_style_globals'])
+        self.check_default_options(options)
+
+    def test_directive_multiple(self):
+        options, source = parse_command_line([
+               '-X', 'cdivision=True',
+               '-X', 'c_string_type=bytes',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compiler_directives['cdivision'], True)
+        self.assertEqual(options.compiler_directives['c_string_type'], 'bytes')
+        self.check_default_global_options()
+        self.check_default_options(options, ['compiler_directives'])
+
+    def test_directive_multiple_v2(self):
+        options, source = parse_command_line([
+               '-X', 'cdivision=True,c_string_type=bytes',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compiler_directives['cdivision'], True)
+        self.assertEqual(options.compiler_directives['c_string_type'], 'bytes')
+        self.check_default_global_options()
+        self.check_default_options(options, ['compiler_directives'])
+
+    def test_directive_value_yes(self):
+        options, source = parse_command_line([
+               '-X', 'cdivision=YeS',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compiler_directives['cdivision'], True)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compiler_directives'])
+
+    def test_directive_value_no(self):
+        options, source = parse_command_line([
+               '-X', 'cdivision=no',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compiler_directives['cdivision'], False)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compiler_directives'])
+
+    def test_directive_value_invalid(self):
+        self.assertRaises(ValueError, parse_command_line, [
+               '-X', 'cdivision=sadfasd',
+               'source.pyx'
+        ])
+
+    def test_directive_key_invalid(self):
+        self.assertRaises(ValueError, parse_command_line, [
+               '-X', 'abracadabra',
+               'source.pyx'
+        ])
+
+    def test_directive_no_value(self):
+        self.assertRaises(ValueError, parse_command_line, [
+               '-X', 'cdivision',
+               'source.pyx'
+        ])
+
+    def test_compile_time_env_short(self):
+        options, source = parse_command_line([
+               '-E', 'MYSIZE=10',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compile_time_env['MYSIZE'], 10)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compile_time_env'])
+
+    def test_compile_time_env_long(self):
+        options, source = parse_command_line([
+               '--compile-time-env', 'MYSIZE=10',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compile_time_env['MYSIZE'], 10)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compile_time_env'])
+
+    def test_compile_time_env_multiple(self):
+        options, source = parse_command_line([
+               '-E', 'MYSIZE=10', '-E', 'ARRSIZE=11',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compile_time_env['MYSIZE'], 10)
+        self.assertEqual(options.compile_time_env['ARRSIZE'], 11)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compile_time_env'])
+
+    def test_compile_time_env_multiple_v2(self):
+        options, source = parse_command_line([
+               '-E', 'MYSIZE=10,ARRSIZE=11',
+               'source.pyx'
+        ])
+        self.assertEqual(options.compile_time_env['MYSIZE'], 10)
+        self.assertEqual(options.compile_time_env['ARRSIZE'], 11)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compile_time_env'])
+
+    def test_option_first(self):
+        options, sources = parse_command_line(['-V', 'file.pyx'])
+        self.assertEqual(sources, ['file.pyx'])
+
+    def test_file_inbetween(self):
+        options, sources = parse_command_line(['-V', 'file.pyx', '-a'])
+        self.assertEqual(sources, ['file.pyx'])
+
+    def test_option_trailing(self):
+        options, sources = parse_command_line(['file.pyx', '-V'])
+        self.assertEqual(sources, ['file.pyx'])
+
+    def test_multiple_files(self):
+        options, sources = parse_command_line([
+             'file1.pyx', '-V',
+             'file2.pyx', '-a',
+             'file3.pyx'
+        ])
+        self.assertEqual(sources, ['file1.pyx', 'file2.pyx', 'file3.pyx'])
+
+    def test_debug_flags(self):
+        options, sources = parse_command_line([
+             '--debug-disposal-code', '--debug-coercion',
+             'file3.pyx'
+        ])
+        from Cython.Compiler import DebugFlags
+        for name in ['debug_disposal_code', 'debug_temp_alloc', 'debug_coercion']:
+            self.assertEqual(getattr(DebugFlags, name), name in ['debug_disposal_code', 'debug_coercion'])
+            setattr(DebugFlags, name, 0)  # restore original value
+
+    def test_gdb_overwrites_gdb_outdir(self):
+        options, sources = parse_command_line([
+            '--gdb-outdir=my_dir', '--gdb',
+            'file3.pyx'
+        ])
+        self.assertEqual(options.gdb_debug, True)
+        self.assertEqual(options.output_dir, os.curdir)
+        self.check_default_global_options()
+        self.check_default_options(options, ['gdb_debug', 'output_dir'])
+
+    def test_gdb_first(self):
+        options, sources = parse_command_line([
+            '--gdb', '--gdb-outdir=my_dir',
+            'file3.pyx'
+        ])
+        self.assertEqual(options.gdb_debug, True)
+        self.assertEqual(options.output_dir, 'my_dir')
+        self.check_default_global_options()
+        self.check_default_options(options, ['gdb_debug', 'output_dir'])
+
+    def test_coverage_overwrites_annotation(self):
+        options, sources = parse_command_line([
+            '--annotate-fullc', '--annotate-coverage=my.xml',
+            'file3.pyx'
+        ])
+        self.assertEqual(Options.annotate, True)
+        self.assertEqual(Options.annotate_coverage_xml, 'my.xml')
+        self.check_default_global_options(['annotate', 'annotate_coverage_xml'])
+        self.check_default_options(options)
+
+    def test_coverage_first(self):
+        options, sources = parse_command_line([
+            '--annotate-coverage=my.xml', '--annotate-fullc',
+            'file3.pyx'
+        ])
+        self.assertEqual(Options.annotate, 'fullc')
+        self.assertEqual(Options.annotate_coverage_xml, 'my.xml')
+        self.check_default_global_options(['annotate', 'annotate_coverage_xml'])
+        self.check_default_options(options)
+
+    def test_annotate_first_fullc_second(self):
+        options, sources = parse_command_line([
+            '--annotate', '--annotate-fullc',
+            'file3.pyx'
+        ])
+        self.assertEqual(Options.annotate, 'fullc')
+        self.check_default_global_options(['annotate'])
+        self.check_default_options(options)
+
+    def test_annotate_fullc_first(self):
+        options, sources = parse_command_line([
+            '--annotate-fullc', '--annotate',
+            'file3.pyx'
+        ])
+        self.assertEqual(Options.annotate, 'default')
+        self.check_default_global_options(['annotate'])
+        self.check_default_options(options)
+
+    def test_warning_extra_dont_overwrite(self):
+        options, sources = parse_command_line([
+            '-X', 'cdivision=True',
+            '--warning-extra',
+            '-X', 'c_string_type=bytes',
+            'source.pyx'
+        ])
+        self.assertTrue(len(options.compiler_directives), len(Options.extra_warnings) + 1)
+        self.check_default_global_options()
+        self.check_default_options(options, ['compiler_directives'])
 
     def test_module_name(self):
         options, sources = parse_command_line([
@@ -145,25 +541,38 @@ class CmdLineParserTest(TestCase):
                 self.assertRaises(SystemExit, parse_command_line, list(args))
             finally:
                 sys.stderr = old_stderr
-            msg = stderr.getvalue().strip()
-            self.assertTrue(msg)
+            msg = stderr.getvalue()
+            err_msg = 'Message "{}"'.format(msg.strip())
+            self.assertTrue(msg.startswith('usage: '),
+                            '%s does not start with "usage :"' % err_msg)
+            self.assertTrue(': error: ' in msg,
+                            '%s does not contain ": error :"' % err_msg)
             if regex:
                 self.assertTrue(re.search(regex, msg),
-                                '"%s" does not match search "%s"' %
-                                (msg, regex))
+                                '%s does not match search "%s"' %
+                                (err_msg, regex))
 
         error(['-1'],
-              'Unknown compiler flag: -1')
-        error(['-I'])
-        error(['--version=-a'])
-        error(['--version=--annotate=true'])
-        error(['--working'])
-        error(['--verbose=1'])
-        error(['--cleanup'])
+              'unknown option -1')
+        error(['-I'],
+              'argument -I/--include-dir: expected one argument')
+        error(['--version=-a'],
+              "argument -V/--version: ignored explicit argument '-a'")
+        error(['--version=--annotate=true'],
+              "argument -V/--version: ignored explicit argument "
+              "'--annotate=true'")
+        error(['--working'],
+              "argument -w/--working: expected one argument")
+        error(['--verbose=1'],
+              "argument -v/--verbose: ignored explicit argument '1'")
+        error(['--cleanup'],
+              "argument --cleanup: expected one argument")
         error(['--debug-disposal-code-wrong-name', 'file3.pyx'],
-              "Unknown debug flag: debug_disposal_code_wrong_name")
-        error(['--module-name', 'foo.pyx'])
-        error(['--module-name', 'foo.bar'])
+              "unknown option --debug-disposal-code-wrong-name")
+        error(['--module-name', 'foo.pyx'],
+              "Need at least one source file")
+        error(['--module-name', 'foo.bar'],
+              "Need at least one source file")
         error(['--module-name', 'foo.bar', 'foo.pyx', 'bar.pyx'],
               "Only one source file allowed when using --module-name")
         error(['--module-name', 'foo.bar', '--timestamps', 'foo.pyx'],
diff --git a/Cython/Compiler/Tests/TestGrammar.py b/Cython/Compiler/Tests/TestGrammar.py
index 3dddc960b..852b48c33 100644
--- a/Cython/Compiler/Tests/TestGrammar.py
+++ b/Cython/Compiler/Tests/TestGrammar.py
@@ -7,9 +7,12 @@ Uses TreeFragment to test invalid syntax.
 
 from __future__ import absolute_import
 
+import ast
+import textwrap
+
 from ...TestUtils import CythonTest
-from ..Errors import CompileError
 from .. import ExprNodes
+from ..Errors import CompileError
 
 # Copied from CPython's test_grammar.py
 VALID_UNDERSCORE_LITERALS = [
@@ -27,7 +30,15 @@ VALID_UNDERSCORE_LITERALS = [
     '1e1_0',
     '.1_4',
     '.1_4e1',
+    '0b_0',
+    '0x_f',
+    '0o_5',
+    '1_00_00j',
+    '1_00_00.5j',
+    '1_00_00e5_1j',
     '.1_4j',
+    '(1_2.5+3_3j)',
+    '(.5_6j)',
 ]
 
 # Copied from CPython's test_grammar.py
@@ -36,22 +47,29 @@ INVALID_UNDERSCORE_LITERALS = [
     '0_',
     '42_',
     '1.4j_',
+    '0x_',
     '0b1_',
     '0xf_',
     '0o5_',
+    '0 if 1_Else 1',
     # Underscores in the base selector:
     '0_b0',
     '0_xf',
     '0_o5',
-    # Underscore right after the base selector:
-    '0b_0',
-    '0x_f',
-    '0o_5',
     # Old-style octal, still disallowed:
-    #'0_7',
-    #'09_99',
-    # Special case with exponent:
-    '0 if 1_Else 1',
+    # FIXME: still need to support PY_VERSION_HEX < 3
+    '0_7',
+    '09_99',
+    # Multiple consecutive underscores:
+    '4_______2',
+    '0.1__4',
+    '0.1__4j',
+    '0b1001__0100',
+    '0xffff__ffff',
+    '0x___',
+    '0o5__77',
+    '1e1__0',
+    '1e1__0j',
     # Underscore right before a dot:
     '1_.4',
     '1_.4j',
@@ -59,24 +77,24 @@ INVALID_UNDERSCORE_LITERALS = [
     '1._4',
     '1._4j',
     '._5',
+    '._5j',
     # Underscore right after a sign:
     '1.0e+_1',
-    # Multiple consecutive underscores:
-    '4_______2',
-    '0.1__4',
-    '0b1001__0100',
-    '0xffff__ffff',
-    '0o5__77',
-    '1e1__0',
+    '1.0e+_1j',
     # Underscore right before j:
     '1.4_j',
     '1.4e5_j',
     # Underscore right before e:
     '1_e1',
     '1.4_e1',
+    '1.4_e1j',
     # Underscore right after e:
     '1e_1',
     '1.4e_1',
+    '1.4e_1j',
+    # Complex cases with parens:
+    '(1+1.5_j_)',
+    '(1+1.5_j)',
     # Whitespace in literals
     '1_ 2',
     '1 _2',
@@ -88,6 +106,39 @@ INVALID_UNDERSCORE_LITERALS = [
 ]
 
 
+INVALID_ELLIPSIS = [
+    (". . .", 2, 0),
+    (". ..", 2, 0),
+    (".. .", 2, 0),
+    (". ...", 2, 0),
+    (". ... .", 2, 0),
+    (".. ... .", 2, 0),
+    (". ... ..", 2, 0),
+    ("""
+    (
+        .
+        ..
+    )
+    """, 3, 4),
+    ("""
+    [
+        ..
+        .,
+        None
+    ]
+    """, 3, 4),
+    ("""
+    {
+        None,
+        .
+        .
+
+        .
+    }
+    """, 4, 4)
+]
+
+
 class TestGrammar(CythonTest):
 
     def test_invalid_number_literals(self):
@@ -117,11 +168,34 @@ class TestGrammar(CythonTest):
                     # Add/MulNode() -> literal is first or second operand
                     literal_node = literal_node.operand2 if i % 2 else literal_node.operand1
                 if 'j' in literal or 'J' in literal:
-                    assert isinstance(literal_node, ExprNodes.ImagNode)
+                    if '+' in literal:
+                        # FIXME: tighten this test
+                        assert isinstance(literal_node, ExprNodes.AddNode), (literal, literal_node)
+                    else:
+                        assert isinstance(literal_node, ExprNodes.ImagNode), (literal, literal_node)
                 elif '.' in literal or 'e' in literal or 'E' in literal and not ('0x' in literal or '0X' in literal):
-                    assert isinstance(literal_node, ExprNodes.FloatNode)
+                    assert isinstance(literal_node, ExprNodes.FloatNode), (literal, literal_node)
                 else:
-                    assert isinstance(literal_node, ExprNodes.IntNode)
+                    assert isinstance(literal_node, ExprNodes.IntNode), (literal, literal_node)
+
+    def test_invalid_ellipsis(self):
+        ERR = ":{0}:{1}: Expected an identifier or literal"
+        for code, line, col in INVALID_ELLIPSIS:
+            try:
+                ast.parse(textwrap.dedent(code))
+            except SyntaxError as exc:
+                assert True
+            else:
+                assert False, "Invalid Python code '%s' failed to raise an exception" % code
+
+            try:
+                self.fragment(u'''\
+                # cython: language_level=3
+                ''' + code)
+            except CompileError as exc:
+                assert ERR.format(line, col) in str(exc), str(exc)
+            else:
+                assert False, "Invalid Cython code '%s' failed to raise an exception" % code
 
 
 if __name__ == "__main__":
diff --git a/Cython/Compiler/Tests/TestMemView.py b/Cython/Compiler/Tests/TestMemView.py
index 3792f26e9..1d04a17fc 100644
--- a/Cython/Compiler/Tests/TestMemView.py
+++ b/Cython/Compiler/Tests/TestMemView.py
@@ -53,11 +53,11 @@ class TestMemviewParsing(CythonTest):
     # we also test other similar declarations (buffers, anonymous C arrays)
     # since the parsing has to distinguish between them.
 
-    def disable_test_no_buf_arg(self): # TODO
+    def disable_test_no_buf_arg(self):  # TODO
         self.not_parseable(u"Expected ']'",
                 u"cdef extern foo(object[int, ndim=2])")
 
-    def disable_test_parse_sizeof(self): # TODO
+    def disable_test_parse_sizeof(self):  # TODO
         self.parse(u"sizeof(int[NN])")
         self.parse(u"sizeof(int[])")
         self.parse(u"sizeof(int[][NN])")
diff --git a/Cython/Compiler/Tests/TestParseTreeTransforms.py b/Cython/Compiler/Tests/TestParseTreeTransforms.py
index 8a16f98cc..6e29263e5 100644
--- a/Cython/Compiler/Tests/TestParseTreeTransforms.py
+++ b/Cython/Compiler/Tests/TestParseTreeTransforms.py
@@ -5,7 +5,7 @@ from Cython.TestUtils import TransformTest
 from Cython.Compiler.ParseTreeTransforms import *
 from Cython.Compiler.ParseTreeTransforms import _calculate_pickle_checksums
 from Cython.Compiler.Nodes import *
-from Cython.Compiler import Main, Symtab
+from Cython.Compiler import Main, Symtab, Options
 
 
 class TestNormalizeTree(TransformTest):
@@ -91,7 +91,7 @@ class TestNormalizeTree(TransformTest):
         t = self.run_pipeline([NormalizeTree(None)], u"pass")
         self.assertTrue(len(t.stats) == 0)
 
-class TestWithTransform(object): # (TransformTest): # Disabled!
+class TestWithTransform(object):  # (TransformTest): # Disabled!
 
     def test_simplified(self):
         t = self.run_pipeline([WithTransform(None)], u"""
@@ -179,8 +179,8 @@ class TestInterpretCompilerDirectives(TransformTest):
     def setUp(self):
         super(TestInterpretCompilerDirectives, self).setUp()
 
-        compilation_options = Main.CompilationOptions(Main.default_options)
-        ctx = compilation_options.create_context()
+        compilation_options = Options.CompilationOptions(Options.default_options)
+        ctx = Main.Context.from_options(compilation_options)
 
         transform = InterpretCompilerDirectives(ctx, ctx.compiler_directives)
         transform.module_scope = Symtab.ModuleScope('__main__', None, ctx)
diff --git a/Cython/Compiler/Tests/TestScanning.py b/Cython/Compiler/Tests/TestScanning.py
new file mode 100644
index 000000000..e9cac1b47
--- /dev/null
+++ b/Cython/Compiler/Tests/TestScanning.py
@@ -0,0 +1,136 @@
+from __future__ import unicode_literals
+
+import unittest
+from io import StringIO
+import string
+
+from .. import Scanning
+from ..Symtab import ModuleScope
+from ..TreeFragment import StringParseContext
+from ..Errors import init_thread
+
+# generate some fake code - just a bunch of lines of the form "a0 a1 ..."
+code = []
+for ch in string.ascii_lowercase:
+    line = " ".join(["%s%s" % (ch, n) for n in range(10)])
+    code.append(line)
+code = "\n".join(code)
+
+init_thread()
+
+
+class TestScanning(unittest.TestCase):
+    def make_scanner(self):
+        source = Scanning.StringSourceDescriptor("fake code", code)
+        buf = StringIO(code)
+        context = StringParseContext("fake context")
+        scope = ModuleScope("fake_module", None, None)
+
+        return Scanning.PyrexScanner(buf, source, scope=scope, context=context)
+
+    def test_put_back_positions(self):
+        scanner = self.make_scanner()
+
+        self.assertEqual(scanner.sy, "IDENT")
+        self.assertEqual(scanner.systring, "a0")
+        scanner.next()
+        self.assertEqual(scanner.sy, "IDENT")
+        self.assertEqual(scanner.systring, "a1")
+        a1pos = scanner.position()
+        self.assertEqual(a1pos[1:], (1, 3))
+        a2peek = scanner.peek()  # shouldn't mess up the position
+        self.assertEqual(a1pos, scanner.position())
+        scanner.next()
+        self.assertEqual(a2peek, (scanner.sy, scanner.systring))
+
+        # find next line
+        while scanner.sy != "NEWLINE":
+            scanner.next()
+
+        line_sy = []
+        line_systring = []
+        line_pos = []
+
+        scanner.next()
+        while scanner.sy != "NEWLINE":
+            line_sy.append(scanner.sy)
+            line_systring.append(scanner.systring)
+            line_pos.append(scanner.position())
+            scanner.next()
+
+        for sy, systring, pos in zip(
+            line_sy[::-1], line_systring[::-1], line_pos[::-1]
+        ):
+            scanner.put_back(sy, systring, pos)
+
+        n = 0
+        while scanner.sy != "NEWLINE":
+            self.assertEqual(scanner.sy, line_sy[n])
+            self.assertEqual(scanner.systring, line_systring[n])
+            self.assertEqual(scanner.position(), line_pos[n])
+            scanner.next()
+            n += 1
+
+        self.assertEqual(n, len(line_pos))
+
+    def test_tentatively_scan(self):
+        scanner = self.make_scanner()
+        with Scanning.tentatively_scan(scanner) as errors:
+            while scanner.sy != "NEWLINE":
+                scanner.next()
+        self.assertFalse(errors)
+
+        scanner.next()
+        self.assertEqual(scanner.systring, "b0")
+        pos = scanner.position()
+        with Scanning.tentatively_scan(scanner) as errors:
+            while scanner.sy != "NEWLINE":
+                scanner.next()
+                if scanner.systring == "b7":
+                    scanner.error("Oh no not b7!")
+                    break
+        self.assertTrue(errors)
+        self.assertEqual(scanner.systring, "b0")  # state has been restored
+        self.assertEqual(scanner.position(), pos)
+        scanner.next()
+        self.assertEqual(scanner.systring, "b1")  # and we can keep going again
+        scanner.next()
+        self.assertEqual(scanner.systring, "b2")  # and we can keep going again
+
+        with Scanning.tentatively_scan(scanner) as error:
+            scanner.error("Something has gone wrong with the current symbol")
+        self.assertEqual(scanner.systring, "b2")
+        scanner.next()
+        self.assertEqual(scanner.systring, "b3")
+
+        # test a few combinations of nested scanning
+        sy1, systring1 = scanner.sy, scanner.systring
+        pos1 = scanner.position()
+        with Scanning.tentatively_scan(scanner):
+            scanner.next()
+            sy2, systring2 = scanner.sy, scanner.systring
+            pos2 = scanner.position()
+            with Scanning.tentatively_scan(scanner):
+                with Scanning.tentatively_scan(scanner):
+                    scanner.next()
+                    scanner.next()
+                    scanner.error("Ooops")
+                self.assertEqual((scanner.sy, scanner.systring), (sy2, systring2))
+            self.assertEqual((scanner.sy, scanner.systring), (sy2, systring2))
+            scanner.error("eee")
+        self.assertEqual((scanner.sy, scanner.systring), (sy1, systring1))
+        with Scanning.tentatively_scan(scanner):
+            scanner.next()
+            scanner.next()
+            with Scanning.tentatively_scan(scanner):
+                scanner.next()
+                # no error - but this block should be unwound by the outer block too
+            scanner.next()
+            scanner.error("Oooops")
+        self.assertEqual((scanner.sy, scanner.systring), (sy1, systring1))
+
+
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Cython/Compiler/Tests/TestSignatureMatching.py b/Cython/Compiler/Tests/TestSignatureMatching.py
index 166bb225b..57647c873 100644
--- a/Cython/Compiler/Tests/TestSignatureMatching.py
+++ b/Cython/Compiler/Tests/TestSignatureMatching.py
@@ -47,7 +47,7 @@ class SignatureMatcherTest(unittest.TestCase):
         self.assertMatches(function_types[1], [pt.c_long_type, pt.c_int_type], functions)
 
     def test_cpp_reference_cpp_class(self):
-        classes = [ cppclasstype("Test%d"%i, []) for i in range(2) ]
+        classes = [ cppclasstype("Test%d" % i, []) for i in range(2) ]
         function_types = [
             cfunctype(pt.CReferenceType(classes[0])),
             cfunctype(pt.CReferenceType(classes[1])),
@@ -58,7 +58,7 @@ class SignatureMatcherTest(unittest.TestCase):
         self.assertMatches(function_types[1], [classes[1]], functions)
 
     def test_cpp_reference_cpp_class_and_int(self):
-        classes = [ cppclasstype("Test%d"%i, []) for i in range(2) ]
+        classes = [ cppclasstype("Test%d" % i, []) for i in range(2) ]
         function_types = [
             cfunctype(pt.CReferenceType(classes[0]), pt.c_int_type),
             cfunctype(pt.CReferenceType(classes[0]), pt.c_long_type),
diff --git a/Cython/Compiler/Tests/TestTreeFragment.py b/Cython/Compiler/Tests/TestTreeFragment.py
index 9ee8da547..d2006b402 100644
--- a/Cython/Compiler/Tests/TestTreeFragment.py
+++ b/Cython/Compiler/Tests/TestTreeFragment.py
@@ -2,7 +2,6 @@ from Cython.TestUtils import CythonTest
 from Cython.Compiler.TreeFragment import *
 from Cython.Compiler.Nodes import *
 from Cython.Compiler.UtilNodes import *
-import Cython.Compiler.Naming as Naming
 
 class TestTreeFragments(CythonTest):
 
diff --git a/Cython/Compiler/Tests/TestTreePath.py b/Cython/Compiler/Tests/TestTreePath.py
index bee53b3d2..b2013086b 100644
--- a/Cython/Compiler/Tests/TestTreePath.py
+++ b/Cython/Compiler/Tests/TestTreePath.py
@@ -1,5 +1,4 @@
 import unittest
-from Cython.Compiler.Visitor import PrintTree
 from Cython.TestUtils import TransformTest
 from Cython.Compiler.TreePath import find_first, find_all
 from Cython.Compiler import Nodes, ExprNodes
diff --git a/Cython/Compiler/Tests/TestTypes.py b/Cython/Compiler/Tests/TestTypes.py
index f2f6f3773..4693dd806 100644
--- a/Cython/Compiler/Tests/TestTypes.py
+++ b/Cython/Compiler/Tests/TestTypes.py
@@ -17,3 +17,59 @@ class TestMethodDispatcherTransform(unittest.TestCase):
 
         cenum = PT.CEnumType("E", "cenum", typedef_flag=False)
         assert_widest(PT.c_int_type, cenum, PT.c_int_type)
+
+
+class TestTypeIdentifiers(unittest.TestCase):
+
+    TEST_DATA = [
+        ("char*", "char__ptr"),
+        ("char *", "char__ptr"),
+        ("char **", "char__ptr__ptr"),
+        ("_typedef", "_typedef"),
+        ("__typedef", "__dundertypedef"),
+        ("___typedef", "__dunder_typedef"),
+        ("____typedef", "__dunder__dundertypedef"),
+        ("_____typedef", "__dunder__dunder_typedef"),
+        ("const __typedef", "__const___dundertypedef"),
+        ("int[42]", "int__lArr42__rArr"),
+        ("int[:]", "int__lArr__D__rArr"),
+        ("int[:,:]", "int__lArr__D__comma___D__rArr"),
+        ("int[:,:,:]", "int__lArr__D__comma___D__comma___D__rArr"),
+        ("int[:,:,...]", "int__lArr__D__comma___D__comma___EL__rArr"),
+        ("std::vector", "std__in_vector"),
+        ("std::vector&&", "std__in_vector__fwref"),
+        ("const std::vector", "__const_std__in_vector"),
+        ("const std::vector&", "__const_std__in_vector__ref"),
+        ("const_std", "const_std"),
+    ]
+
+    def test_escape_special_type_characters(self):
+        test_func = PT._escape_special_type_characters  # keep test usage visible for IDEs
+        function_name = "_escape_special_type_characters"
+        self._test_escape(function_name)
+
+    def test_type_identifier_for_declaration(self):
+        test_func = PT.type_identifier_from_declaration  # keep test usage visible for IDEs
+        function_name = test_func.__name__
+        self._test_escape(function_name)
+
+        # differences due to whitespace removal
+        test_data = [
+            ("const &std::vector", "const__refstd__in_vector"),
+            ("const &std::vector<int>", "const__refstd__in_vector__lAngint__rAng"),
+            ("const &&std::vector", "const__fwrefstd__in_vector"),
+            ("const &&&std::vector", "const__fwref__refstd__in_vector"),
+            ("const &&std::vector", "const__fwrefstd__in_vector"),
+            ("void (*func)(int x, float y)",
+             "975d51__void__lParen__ptrfunc__rParen__lParenint__space_x__comma_float__space_y__rParen__etc"),
+            ("float ** (*func)(int x, int[:] y)",
+             "31883a__float__ptr__ptr__lParen__ptrfunc__rParen__lParenint__space_x__comma_int__lArr__D__rArry__rParen__etc"),
+        ]
+        self._test_escape(function_name, test_data)
+
+    def _test_escape(self, func_name, test_data=TEST_DATA):
+        escape = getattr(PT, func_name)
+        for declaration, expected in test_data:
+            escaped_value = escape(declaration)
+            self.assertEqual(escaped_value, expected, "%s('%s') == '%s' != '%s'" % (
+                func_name, declaration, escaped_value, expected))
diff --git a/Cython/Compiler/Tests/TestUtilityLoad.py b/Cython/Compiler/Tests/TestUtilityLoad.py
index 3d1906ca0..44ae461ab 100644
--- a/Cython/Compiler/Tests/TestUtilityLoad.py
+++ b/Cython/Compiler/Tests/TestUtilityLoad.py
@@ -22,14 +22,11 @@ class TestUtilityLoader(unittest.TestCase):
     cls = Code.UtilityCode
 
     def test_load_as_string(self):
-        got = strip_2tup(self.cls.load_as_string(self.name))
-        self.assertEqual(got, self.expected)
-
         got = strip_2tup(self.cls.load_as_string(self.name, self.filename))
         self.assertEqual(got, self.expected)
 
     def test_load(self):
-        utility = self.cls.load(self.name)
+        utility = self.cls.load(self.name, from_file=self.filename)
         got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected)
 
@@ -37,10 +34,6 @@ class TestUtilityLoader(unittest.TestCase):
         got = strip_2tup((required.proto, required.impl))
         self.assertEqual(got, self.required)
 
-        utility = self.cls.load(self.name, from_file=self.filename)
-        got = strip_2tup((utility.proto, utility.impl))
-        self.assertEqual(got, self.expected)
-
         utility = self.cls.load_cached(self.name, from_file=self.filename)
         got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected)
@@ -59,11 +52,11 @@ class TestTempitaUtilityLoader(TestUtilityLoader):
     cls = Code.TempitaUtilityCode
 
     def test_load_as_string(self):
-        got = strip_2tup(self.cls.load_as_string(self.name, context=self.context))
+        got = strip_2tup(self.cls.load_as_string(self.name, self.filename, context=self.context))
         self.assertEqual(got, self.expected_tempita)
 
     def test_load(self):
-        utility = self.cls.load(self.name, context=self.context)
+        utility = self.cls.load(self.name, self.filename, context=self.context)
         got = strip_2tup((utility.proto, utility.impl))
         self.assertEqual(got, self.expected_tempita)
 
diff --git a/Cython/Compiler/Tests/Utils.py b/Cython/Compiler/Tests/Utils.py
new file mode 100644
index 000000000..a158ecc50
--- /dev/null
+++ b/Cython/Compiler/Tests/Utils.py
@@ -0,0 +1,36 @@
+import copy
+
+from .. import Options
+
+
+def backup_Options():
+    backup = {}
+    for name, value in vars(Options).items():
+        # we need a deep copy of _directive_defaults, because they can be changed
+        if name == '_directive_defaults':
+            value = copy.deepcopy(value)
+        backup[name] = value
+    return backup
+
+
+def restore_Options(backup):
+    no_value = object()
+    for name, orig_value in backup.items():
+        if getattr(Options, name, no_value) != orig_value:
+            setattr(Options, name, orig_value)
+    # strip Options from new keys that might have been added:
+    for name in vars(Options).keys():
+        if name not in backup:
+            delattr(Options, name)
+
+
+def check_global_options(expected_options, white_list=[]):
+    """
+    returns error message of "" if check Ok
+    """
+    no_value = object()
+    for name, orig_value in expected_options.items():
+        if name not in white_list:
+            if getattr(Options, name, no_value) != orig_value:
+                return "error in option " + name
+    return ""
diff --git a/Cython/Compiler/TreeFragment.py b/Cython/Compiler/TreeFragment.py
index b85da8191..cef4469b5 100644
--- a/Cython/Compiler/TreeFragment.py
+++ b/Cython/Compiler/TreeFragment.py
@@ -29,8 +29,7 @@ class StringParseContext(Main.Context):
             include_directories = []
         if compiler_directives is None:
             compiler_directives = {}
-        # TODO: see if "language_level=3" also works for our internal code here.
-        Main.Context.__init__(self, include_directories, compiler_directives, cpp=cpp, language_level=2)
+        Main.Context.__init__(self, include_directories, compiler_directives, cpp=cpp, language_level='3str')
         self.module_name = name
 
     def find_module(self, module_name, relative_to=None, pos=None, need_pxd=1, absolute_fallback=True):
@@ -179,7 +178,7 @@ class TemplateTransform(VisitorTransform):
             if pos is None: pos = node.pos
             return ApplyPositionAndCopy(pos)(sub)
         else:
-            return self.visit_Node(node) # make copy as usual
+            return self.visit_Node(node)  # make copy as usual
 
     def visit_NameNode(self, node):
         temphandle = self.tempmap.get(node.name)
@@ -235,7 +234,7 @@ class TreeFragment(object):
                 fmt_pxds[key] = fmt(value)
             mod = t = parse_from_strings(name, fmt_code, fmt_pxds, level=level, initial_pos=initial_pos)
             if level is None:
-                t = t.body # Make sure a StatListNode is at the top
+                t = t.body  # Make sure a StatListNode is at the top
             if not isinstance(t, StatListNode):
                 t = StatListNode(pos=mod.pos, stats=[t])
             for transform in pipeline:
diff --git a/Cython/Compiler/TypeInference.py b/Cython/Compiler/TypeInference.py
index c7ffee7d2..447d352be 100644
--- a/Cython/Compiler/TypeInference.py
+++ b/Cython/Compiler/TypeInference.py
@@ -104,10 +104,11 @@ class MarkParallelAssignments(EnvTransform):
         is_special = False
         sequence = node.iterator.sequence
         target = node.target
+        iterator_scope = node.iterator.expr_scope or self.current_env()
         if isinstance(sequence, ExprNodes.SimpleCallNode):
             function = sequence.function
             if sequence.self is None and function.is_name:
-                entry = self.current_env().lookup(function.name)
+                entry = iterator_scope.lookup(function.name)
                 if not entry or entry.is_builtin:
                     if function.name == 'reversed' and len(sequence.args) == 1:
                         sequence = sequence.args[0]
@@ -115,7 +116,7 @@ class MarkParallelAssignments(EnvTransform):
                         if target.is_sequence_constructor and len(target.args) == 2:
                             iterator = sequence.args[0]
                             if iterator.is_name:
-                                iterator_type = iterator.infer_type(self.current_env())
+                                iterator_type = iterator.infer_type(iterator_scope)
                                 if iterator_type.is_builtin_type:
                                     # assume that builtin types have a length within Py_ssize_t
                                     self.mark_assignment(
@@ -127,7 +128,7 @@ class MarkParallelAssignments(EnvTransform):
         if isinstance(sequence, ExprNodes.SimpleCallNode):
             function = sequence.function
             if sequence.self is None and function.is_name:
-                entry = self.current_env().lookup(function.name)
+                entry = iterator_scope.lookup(function.name)
                 if not entry or entry.is_builtin:
                     if function.name in ('range', 'xrange'):
                         is_special = True
@@ -140,7 +141,6 @@ class MarkParallelAssignments(EnvTransform):
                                                      '+',
                                                      sequence.args[0],
                                                      sequence.args[2]))
-
         if not is_special:
             # A for-loop basically translates to subsequent calls to
             # __getitem__(), so using an IndexNode here allows us to
@@ -178,7 +178,7 @@ class MarkParallelAssignments(EnvTransform):
         return node
 
     def visit_FromCImportStatNode(self, node):
-        pass # Can't be assigned to...
+        return node  # Can't be assigned to...
 
     def visit_FromImportStatNode(self, node):
         for name, target in node.items:
@@ -308,10 +308,10 @@ class MarkOverflowingArithmetic(CythonTransform):
 
     def visit_SimpleCallNode(self, node):
         if node.function.is_name and node.function.name == 'abs':
-          # Overflows for minimum value of fixed size ints.
-          return self.visit_dangerous_node(node)
+            # Overflows for minimum value of fixed size ints.
+            return self.visit_dangerous_node(node)
         else:
-          return self.visit_neutral_node(node)
+            return self.visit_neutral_node(node)
 
     visit_UnopNode = visit_neutral_node
 
@@ -359,10 +359,17 @@ class SimpleAssignmentTypeInferer(object):
     Note: in order to support cross-closure type inference, this must be
     applies to nested scopes in top-down order.
     """
-    def set_entry_type(self, entry, entry_type):
-        entry.type = entry_type
+    def set_entry_type(self, entry, entry_type, scope):
         for e in entry.all_entries():
             e.type = entry_type
+            if e.type.is_memoryviewslice:
+                # memoryview slices crash if they don't get initialized
+                e.init = e.type.default_value
+            if e.type.is_cpp_class:
+                if scope.directives['cpp_locals']:
+                    e.make_cpp_optional()
+                else:
+                    e.type.check_nullary_constructor(entry.pos)
 
     def infer_types(self, scope):
         enabled = scope.directives['infer_types']
@@ -370,12 +377,12 @@ class SimpleAssignmentTypeInferer(object):
 
         if enabled == True:
             spanning_type = aggressive_spanning_type
-        elif enabled is None: # safe mode
+        elif enabled is None:  # safe mode
             spanning_type = safe_spanning_type
         else:
             for entry in scope.entries.values():
                 if entry.type is unspecified_type:
-                    self.set_entry_type(entry, py_object_type)
+                    self.set_entry_type(entry, py_object_type, scope)
             return
 
         # Set of assignments
@@ -404,7 +411,7 @@ class SimpleAssignmentTypeInferer(object):
             else:
                 entry = node.entry
                 node_type = spanning_type(
-                    types, entry.might_overflow, entry.pos, scope)
+                    types, entry.might_overflow, scope)
             node.inferred_type = node_type
 
         def infer_name_node_type_partial(node):
@@ -413,7 +420,7 @@ class SimpleAssignmentTypeInferer(object):
             if not types:
                 return
             entry = node.entry
-            return spanning_type(types, entry.might_overflow, entry.pos, scope)
+            return spanning_type(types, entry.might_overflow, scope)
 
         def inferred_types(entry):
             has_none = False
@@ -488,9 +495,9 @@ class SimpleAssignmentTypeInferer(object):
                 types = inferred_types(entry)
                 if types and all(types):
                     entry_type = spanning_type(
-                        types, entry.might_overflow, entry.pos, scope)
+                        types, entry.might_overflow, scope)
                     inferred.add(entry)
-            self.set_entry_type(entry, entry_type)
+            self.set_entry_type(entry, entry_type, scope)
 
         def reinfer():
             dirty = False
@@ -498,9 +505,9 @@ class SimpleAssignmentTypeInferer(object):
                 for assmt in entry.cf_assignments:
                     assmt.infer_type()
                 types = inferred_types(entry)
-                new_type = spanning_type(types, entry.might_overflow, entry.pos, scope)
+                new_type = spanning_type(types, entry.might_overflow, scope)
                 if new_type != entry.type:
-                    self.set_entry_type(entry, new_type)
+                    self.set_entry_type(entry, new_type, scope)
                     dirty = True
             return dirty
 
@@ -530,22 +537,20 @@ def find_spanning_type(type1, type2):
         return PyrexTypes.c_double_type
     return result_type
 
-def simply_type(result_type, pos):
+def simply_type(result_type):
     if result_type.is_reference:
         result_type = result_type.ref_base_type
-    if result_type.is_const:
-        result_type = result_type.const_base_type
-    if result_type.is_cpp_class:
-        result_type.check_nullary_constructor(pos)
+    if result_type.is_cv_qualified:
+        result_type = result_type.cv_base_type
     if result_type.is_array:
         result_type = PyrexTypes.c_ptr_type(result_type.base_type)
     return result_type
 
-def aggressive_spanning_type(types, might_overflow, pos, scope):
-    return simply_type(reduce(find_spanning_type, types), pos)
+def aggressive_spanning_type(types, might_overflow, scope):
+    return simply_type(reduce(find_spanning_type, types))
 
-def safe_spanning_type(types, might_overflow, pos, scope):
-    result_type = simply_type(reduce(find_spanning_type, types), pos)
+def safe_spanning_type(types, might_overflow, scope):
+    result_type = simply_type(reduce(find_spanning_type, types))
     if result_type.is_pyobject:
         # In theory, any specific Python type is always safe to
         # infer. However, inferring str can cause some existing code
@@ -555,9 +560,11 @@ def safe_spanning_type(types, might_overflow, pos, scope):
             return py_object_type
         else:
             return result_type
-    elif result_type is PyrexTypes.c_double_type:
+    elif (result_type is PyrexTypes.c_double_type or
+            result_type is PyrexTypes.c_float_type):
         # Python's float type is just a C double, so it's safe to use
-        # the C type instead
+        # the C type instead. Similarly if given a C float, it leads to
+        # a small loss of precision vs Python but is otherwise the same
         return result_type
     elif result_type is PyrexTypes.c_bint_type:
         # find_spanning_type() only returns 'bint' for clean boolean
@@ -577,8 +584,12 @@ def safe_spanning_type(types, might_overflow, pos, scope):
         # used, won't arise in pure Python, and there shouldn't be side
         # effects, so I'm declaring this safe.
         return result_type
-    # TODO: double complex should be OK as well, but we need
-    # to make sure everything is supported.
+    elif result_type.is_memoryviewslice:
+        return result_type
+    elif result_type is PyrexTypes.soft_complex_type:
+        return result_type
+    elif result_type == PyrexTypes.c_double_complex_type:
+        return result_type
     elif (result_type.is_int or result_type.is_enum) and not might_overflow:
         return result_type
     elif (not result_type.can_coerce_to_pyobject(scope)
diff --git a/Cython/Compiler/TypeSlots.py b/Cython/Compiler/TypeSlots.py
index 0ef17ede6..2c891f648 100644
--- a/Cython/Compiler/TypeSlots.py
+++ b/Cython/Compiler/TypeSlots.py
@@ -9,6 +9,8 @@ from . import Naming
 from . import PyrexTypes
 from .Errors import error
 
+import copy
+
 invisible = ['__cinit__', '__dealloc__', '__richcmp__',
              '__nonzero__', '__bool__']
 
@@ -23,6 +25,7 @@ class Signature(object):
     #  fixed_arg_format   string
     #  ret_format         string
     #  error_value        string
+    #  use_fastcall       boolean
     #
     #  The formats are strings made up of the following
     #  characters:
@@ -49,6 +52,7 @@ class Signature(object):
     #    '*'  rest of args passed as generic Python
     #           arg tuple and kw dict (must be last
     #           char in format string)
+    #    '?'  optional object arg (currently for pow only)
 
     format_map = {
         'O': PyrexTypes.py_object_type,
@@ -68,6 +72,7 @@ class Signature(object):
         'S': PyrexTypes.c_char_ptr_ptr_type,
         'r': PyrexTypes.c_returncode_type,
         'B': PyrexTypes.c_py_buffer_ptr_type,
+        '?': PyrexTypes.py_object_type
         # 'T', '-' and '*' are handled otherwise
         # and are not looked up in here
     }
@@ -86,20 +91,27 @@ class Signature(object):
         'z': "-1",
     }
 
-    def __init__(self, arg_format, ret_format):
-        self.has_dummy_arg = 0
-        self.has_generic_args = 0
+    # Use METH_FASTCALL instead of METH_VARARGS
+    use_fastcall = False
+
+    def __init__(self, arg_format, ret_format, nogil=False):
+        self.has_dummy_arg = False
+        self.has_generic_args = False
+        self.optional_object_arg_count = 0
         if arg_format[:1] == '-':
-            self.has_dummy_arg = 1
+            self.has_dummy_arg = True
             arg_format = arg_format[1:]
         if arg_format[-1:] == '*':
-            self.has_generic_args = 1
+            self.has_generic_args = True
             arg_format = arg_format[:-1]
+        if arg_format[-1:] == '?':
+            self.optional_object_arg_count += 1
         self.fixed_arg_format = arg_format
         self.ret_format = ret_format
         self.error_value = self.error_value_map.get(ret_format, None)
         self.exception_check = ret_format != 'r' and self.error_value is not None
         self.is_staticmethod = False
+        self.nogil = nogil
 
     def __repr__(self):
         return '<Signature[%s(%s%s)]>' % (
@@ -107,7 +119,10 @@ class Signature(object):
             ', '.join(self.fixed_arg_format),
             '*' if self.has_generic_args else '')
 
-    def num_fixed_args(self):
+    def min_num_fixed_args(self):
+        return self.max_num_fixed_args() - self.optional_object_arg_count
+
+    def max_num_fixed_args(self):
         return len(self.fixed_arg_format)
 
     def is_self_arg(self, i):
@@ -135,7 +150,7 @@ class Signature(object):
     def function_type(self, self_arg_override=None):
         #  Construct a C function type descriptor for this signature
         args = []
-        for i in range(self.num_fixed_args()):
+        for i in range(self.max_num_fixed_args()):
             if self_arg_override is not None and self.is_self_arg(i):
                 assert isinstance(self_arg_override, PyrexTypes.CFuncTypeArg)
                 args.append(self_arg_override)
@@ -149,7 +164,8 @@ class Signature(object):
         exc_value = self.exception_value()
         return PyrexTypes.CFuncType(
             ret_type, args, exception_value=exc_value,
-            exception_check=self.exception_check)
+            exception_check=self.exception_check,
+            nogil=self.nogil)
 
     def method_flags(self):
         if self.ret_format == "O":
@@ -157,17 +173,50 @@ class Signature(object):
             if self.has_dummy_arg:
                 full_args = "O" + full_args
             if full_args in ["O", "T"]:
-                if self.has_generic_args:
-                    return [method_varargs, method_keywords]
-                else:
+                if not self.has_generic_args:
                     return [method_noargs]
+                elif self.use_fastcall:
+                    return [method_fastcall, method_keywords]
+                else:
+                    return [method_varargs, method_keywords]
             elif full_args in ["OO", "TO"] and not self.has_generic_args:
                 return [method_onearg]
 
             if self.is_staticmethod:
-                return [method_varargs, method_keywords]
+                if self.use_fastcall:
+                    return [method_fastcall, method_keywords]
+                else:
+                    return [method_varargs, method_keywords]
         return None
 
+    def method_function_type(self):
+        # Return the C function type
+        mflags = self.method_flags()
+        kw = "WithKeywords" if (method_keywords in mflags) else ""
+        for m in mflags:
+            if m == method_noargs or m == method_onearg:
+                return "PyCFunction"
+            if m == method_varargs:
+                return "PyCFunction" + kw
+            if m == method_fastcall:
+                return "__Pyx_PyCFunction_FastCall" + kw
+        return None
+
+    def with_fastcall(self):
+        # Return a copy of this Signature with use_fastcall=True
+        sig = copy.copy(self)
+        sig.use_fastcall = True
+        return sig
+
+    @property
+    def fastvar(self):
+        # Used to select variants of functions, one dealing with METH_VARARGS
+        # and one dealing with __Pyx_METH_FASTCALL
+        if self.use_fastcall:
+            return "FASTCALL"
+        else:
+            return "VARARGS"
+
 
 class SlotDescriptor(object):
     #  Abstract base class for type slot descriptors.
@@ -178,15 +227,26 @@ class SlotDescriptor(object):
     #  py3                           Indicates presence of slot in Python 3
     #  py2                           Indicates presence of slot in Python 2
     #  ifdef                         Full #ifdef string that slot is wrapped in. Using this causes py3, py2 and flags to be ignored.)
+    #  used_ifdef                    Full #ifdef string that the slot value is wrapped in (otherwise it is assigned NULL)
+    #                                Unlike "ifdef" the slot is defined and this just controls if it receives a value
 
     def __init__(self, slot_name, dynamic=False, inherited=False,
-                 py3=True, py2=True, ifdef=None):
+                 py3=True, py2=True, ifdef=None, is_binop=False,
+                 used_ifdef=None):
         self.slot_name = slot_name
         self.is_initialised_dynamically = dynamic
         self.is_inherited = inherited
         self.ifdef = ifdef
+        self.used_ifdef = used_ifdef
         self.py3 = py3
         self.py2 = py2
+        self.is_binop = is_binop
+
+    def slot_code(self, scope):
+        raise NotImplementedError()
+
+    def spec_value(self, scope):
+        return self.slot_code(scope)
 
     def preprocessor_guard_code(self):
         ifdef = self.ifdef
@@ -194,13 +254,30 @@ class SlotDescriptor(object):
         py3 = self.py3
         guard = None
         if ifdef:
-            guard = ("#if %s" % ifdef)
+            guard = "#if %s" % ifdef
         elif not py3 or py3 == '<RESERVED>':
-            guard = ("#if PY_MAJOR_VERSION < 3")
+            guard = "#if PY_MAJOR_VERSION < 3"
         elif not py2:
-            guard = ("#if PY_MAJOR_VERSION >= 3")
+            guard = "#if PY_MAJOR_VERSION >= 3"
         return guard
 
+    def generate_spec(self, scope, code):
+        if self.is_initialised_dynamically:
+            return
+        value = self.spec_value(scope)
+        if value == "0":
+            return
+        preprocessor_guard = self.preprocessor_guard_code()
+        if not preprocessor_guard:
+            if self.py3 and self.slot_name.startswith('bf_'):
+                # The buffer protocol requires Limited API 3.11, so check if the spec slots are available.
+                preprocessor_guard = "#if defined(Py_%s)" % self.slot_name
+        if preprocessor_guard:
+            code.putln(preprocessor_guard)
+        code.putln("{Py_%s, (void *)%s}," % (self.slot_name, value))
+        if preprocessor_guard:
+            code.putln("#endif")
+
     def generate(self, scope, code):
         preprocessor_guard = self.preprocessor_guard_code()
         if preprocessor_guard:
@@ -215,7 +292,7 @@ class SlotDescriptor(object):
                 # PyPy currently has a broken PyType_Ready() that fails to
                 # inherit some slots.  To work around this, we explicitly
                 # set inherited slots here, but only in PyPy since CPython
-                # handles this better than we do.
+                # handles this better than we do (except for buffer slots in type specs).
                 inherited_value = value
                 current_scope = scope
                 while (inherited_value == "0"
@@ -225,12 +302,20 @@ class SlotDescriptor(object):
                     current_scope = current_scope.parent_type.base_type.scope
                     inherited_value = self.slot_code(current_scope)
                 if inherited_value != "0":
-                    code.putln("#if CYTHON_COMPILING_IN_PYPY")
+                    # we always need inherited buffer slots for the type spec
+                    is_buffer_slot = int(self.slot_name in ("bf_getbuffer", "bf_releasebuffer"))
+                    code.putln("#if CYTHON_COMPILING_IN_PYPY || %d" % is_buffer_slot)
                     code.putln("%s, /*%s*/" % (inherited_value, self.slot_name))
                     code.putln("#else")
                     end_pypy_guard = True
 
+        if self.used_ifdef:
+            code.putln("#if %s" % self.used_ifdef)
         code.putln("%s, /*%s*/" % (value, self.slot_name))
+        if self.used_ifdef:
+            code.putln("#else")
+            code.putln("NULL, /*%s*/" % self.slot_name)
+            code.putln("#endif")
 
         if end_pypy_guard:
             code.putln("#endif")
@@ -248,14 +333,20 @@ class SlotDescriptor(object):
 
     def generate_dynamic_init_code(self, scope, code):
         if self.is_initialised_dynamically:
-            value = self.slot_code(scope)
-            if value != "0":
-                code.putln("%s.%s = %s;" % (
-                    scope.parent_type.typeobj_cname,
-                    self.slot_name,
-                    value
-                    )
-                )
+            self.generate_set_slot_code(
+                self.slot_code(scope), scope, code)
+
+    def generate_set_slot_code(self, value, scope, code):
+        if value == "0":
+            return
+
+        if scope.parent_type.typeptr_cname:
+            target = "%s->%s" % (scope.parent_type.typeptr_cname, self.slot_name)
+        else:
+            assert scope.parent_type.typeobj_cname
+            target = "%s.%s" % (scope.parent_type.typeobj_cname, self.slot_name)
+
+        code.putln("%s = %s;" % (target, value))
 
 
 class FixedSlot(SlotDescriptor):
@@ -285,8 +376,8 @@ class MethodSlot(SlotDescriptor):
     #  method_name  string           The __xxx__ name of the method
     #  alternatives [string]         Alternative list of __xxx__ names for the method
 
-    def __init__(self, signature, slot_name, method_name, fallback=None,
-                 py3=True, py2=True, ifdef=None, inherited=True):
+    def __init__(self, signature, slot_name, method_name, method_name_to_slot,
+                 fallback=None, py3=True, py2=True, ifdef=None, inherited=True):
         SlotDescriptor.__init__(self, slot_name, py3=py3, py2=py2,
                                 ifdef=ifdef, inherited=inherited)
         self.signature = signature
@@ -360,30 +451,61 @@ class GCClearReferencesSlot(GCDependentSlot):
 class ConstructorSlot(InternalMethodSlot):
     #  Descriptor for tp_new and tp_dealloc.
 
-    def __init__(self, slot_name, method, **kargs):
+    def __init__(self, slot_name, method=None, **kargs):
         InternalMethodSlot.__init__(self, slot_name, **kargs)
         self.method = method
 
-    def slot_code(self, scope):
-        entry = scope.lookup_here(self.method)
-        if (self.slot_name != 'tp_new'
-                and scope.parent_type.base_type
+    def _needs_own(self, scope):
+        if (scope.parent_type.base_type
                 and not scope.has_pyobject_attrs
                 and not scope.has_memoryview_attrs
-                and not scope.has_cpp_class_attrs
-                and not (entry and entry.is_special)):
+                and not scope.has_cpp_constructable_attrs
+                and not (self.slot_name == 'tp_new' and scope.parent_type.vtabslot_cname)):
+            entry = scope.lookup_here(self.method) if self.method else None
+            if not (entry and entry.is_special):
+                return False
+        # Unless we can safely delegate to the parent, all types need a tp_new().
+        return True
+
+    def _parent_slot_function(self, scope):
+        parent_type_scope = scope.parent_type.base_type.scope
+        if scope.parent_scope is parent_type_scope.parent_scope:
+            entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
+            if entry.visibility != 'extern':
+                return self.slot_code(parent_type_scope)
+        return None
+
+    def slot_code(self, scope):
+        if not self._needs_own(scope):
             # if the type does not have object attributes, it can
             # delegate GC methods to its parent - iff the parent
             # functions are defined in the same module
-            parent_type_scope = scope.parent_type.base_type.scope
-            if scope.parent_scope is parent_type_scope.parent_scope:
-                entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
-                if entry.visibility != 'extern':
-                    return self.slot_code(parent_type_scope)
-        if entry and not entry.is_special:
-            return "0"
+            slot_code = self._parent_slot_function(scope)
+            return slot_code or '0'
         return InternalMethodSlot.slot_code(self, scope)
 
+    def spec_value(self, scope):
+        slot_function = self.slot_code(scope)
+        if self.slot_name == "tp_dealloc" and slot_function != scope.mangle_internal("tp_dealloc"):
+            # Not used => inherit from base type.
+            return "0"
+        return slot_function
+
+    def generate_dynamic_init_code(self, scope, code):
+        if self.slot_code(scope) != '0':
+            return
+        # If we don't have our own slot function and don't know the
+        # parent function statically, copy it dynamically.
+        base_type = scope.parent_type.base_type
+        if base_type.typeptr_cname:
+            src = '%s->%s' % (base_type.typeptr_cname, self.slot_name)
+        elif base_type.is_extension_type and base_type.typeobj_cname:
+            src = '%s.%s' % (base_type.typeobj_cname, self.slot_name)
+        else:
+            return
+
+        self.generate_set_slot_code(src, scope, code)
+
 
 class SyntheticSlot(InternalMethodSlot):
     #  Type slot descriptor for a synthesized method which
@@ -404,6 +526,20 @@ class SyntheticSlot(InternalMethodSlot):
         else:
             return self.default_value
 
+    def spec_value(self, scope):
+        return self.slot_code(scope)
+
+
+class BinopSlot(SyntheticSlot):
+    def __init__(self, signature, slot_name, left_method, method_name_to_slot, **kargs):
+        assert left_method.startswith('__')
+        right_method = '__r' + left_method[2:]
+        SyntheticSlot.__init__(
+                self, slot_name, [left_method, right_method], "0", is_binop=True, **kargs)
+        # MethodSlot causes special method registration.
+        self.left_slot = MethodSlot(signature, "", left_method, method_name_to_slot, **kargs)
+        self.right_slot = MethodSlot(signature, "", right_method, method_name_to_slot, **kargs)
+
 
 class RichcmpSlot(MethodSlot):
     def slot_code(self, scope):
@@ -432,8 +568,16 @@ class TypeFlagsSlot(SlotDescriptor):
             value += "|Py_TPFLAGS_BASETYPE"
         if scope.needs_gc():
             value += "|Py_TPFLAGS_HAVE_GC"
+        if scope.may_have_finalize():
+            value += "|Py_TPFLAGS_HAVE_FINALIZE"
+        if scope.parent_type.has_sequence_flag:
+            value += "|Py_TPFLAGS_SEQUENCE"
         return value
 
+    def generate_spec(self, scope, code):
+        # Flags are stored in the PyType_Spec, not in a PyType_Slot.
+        return
+
 
 class DocStringSlot(SlotDescriptor):
     #  Descriptor for the docstring slot.
@@ -444,7 +588,7 @@ class DocStringSlot(SlotDescriptor):
             return "0"
         if doc.is_unicode:
             doc = doc.as_utf8_string()
-        return doc.as_c_string_literal()
+        return "PyDoc_STR(%s)" % doc.as_c_string_literal()
 
 
 class SuiteSlot(SlotDescriptor):
@@ -452,7 +596,7 @@ class SuiteSlot(SlotDescriptor):
     #
     #  sub_slots   [SlotDescriptor]
 
-    def __init__(self, sub_slots, slot_type, slot_name, ifdef=None):
+    def __init__(self, sub_slots, slot_type, slot_name, substructures, ifdef=None):
         SlotDescriptor.__init__(self, slot_name, ifdef=ifdef)
         self.sub_slots = sub_slots
         self.slot_type = slot_type
@@ -487,7 +631,9 @@ class SuiteSlot(SlotDescriptor):
             if self.ifdef:
                 code.putln("#endif")
 
-substructures = []   # List of all SuiteSlot instances
+    def generate_spec(self, scope, code):
+        for slot in self.sub_slots:
+            slot.generate_spec(scope, code)
 
 class MethodTableSlot(SlotDescriptor):
     #  Slot descriptor for the method table.
@@ -503,8 +649,42 @@ class MemberTableSlot(SlotDescriptor):
     #  Slot descriptor for the table of Python-accessible attributes.
 
     def slot_code(self, scope):
+        # Only used in specs.
         return "0"
 
+    def get_member_specs(self, scope):
+        return [
+            get_slot_by_name("tp_dictoffset", scope.directives).members_slot_value(scope),
+            #get_slot_by_name("tp_weaklistoffset").spec_value(scope),
+        ]
+
+    def is_empty(self, scope):
+        for member_entry in self.get_member_specs(scope):
+            if member_entry:
+                return False
+        return True
+
+    def substructure_cname(self, scope):
+        return "%s%s_%s" % (Naming.pyrex_prefix, self.slot_name, scope.class_name)
+
+    def generate_substructure_spec(self, scope, code):
+        if self.is_empty(scope):
+            return
+        from .Code import UtilityCode
+        code.globalstate.use_utility_code(UtilityCode.load_cached("IncludeStructmemberH", "ModuleSetupCode.c"))
+
+        code.putln("static struct PyMemberDef %s[] = {" % self.substructure_cname(scope))
+        for member_entry in self.get_member_specs(scope):
+            if member_entry:
+                code.putln(member_entry)
+        code.putln("{NULL, 0, 0, 0, NULL}")
+        code.putln("};")
+
+    def spec_value(self, scope):
+        if self.is_empty(scope):
+            return "0"
+        return self.substructure_cname(scope)
+
 
 class GetSetSlot(SlotDescriptor):
     #  Slot descriptor for the table of attribute get & set methods.
@@ -520,13 +700,13 @@ class BaseClassSlot(SlotDescriptor):
     #  Slot descriptor for the base class slot.
 
     def __init__(self, name):
-        SlotDescriptor.__init__(self, name, dynamic = 1)
+        SlotDescriptor.__init__(self, name, dynamic=True)
 
     def generate_dynamic_init_code(self, scope, code):
         base_type = scope.parent_type.base_type
         if base_type:
-            code.putln("%s.%s = %s;" % (
-                scope.parent_type.typeobj_cname,
+            code.putln("%s->%s = %s;" % (
+                scope.parent_type.typeptr_cname,
                 self.slot_name,
                 base_type.typeptr_cname))
 
@@ -551,10 +731,11 @@ class DictOffsetSlot(SlotDescriptor):
         else:
             return "0"
 
-
-# The following dictionary maps __xxx__ method names to slot descriptors.
-
-method_name_to_slot = {}
+    def members_slot_value(self, scope):
+        dict_offset = self.slot_code(scope)
+        if dict_offset == "0":
+            return None
+        return '{"__dictoffset__", T_PYSSIZET, %s, READONLY, NULL},' % dict_offset
 
 ## The following slots are (or could be) initialised with an
 ## extern function pointer.
@@ -569,17 +750,6 @@ method_name_to_slot = {}
 #
 #------------------------------------------------------------------------------------------
 
-def get_special_method_signature(name):
-    #  Given a method name, if it is a special method,
-    #  return its signature, else return None.
-    slot = method_name_to_slot.get(name)
-    if slot:
-        return slot.signature
-    elif name in richcmp_special_methods:
-        return ibinaryfunc
-    else:
-        return None
-
 
 def get_property_accessor_signature(name):
     #  Return signature of accessor for an extension type
@@ -592,7 +762,7 @@ def get_base_slot_function(scope, slot):
     #  This is useful for enabling the compiler to optimize calls
     #  that recursively climb the class hierarchy.
     base_type = scope.parent_type.base_type
-    if scope.parent_scope is base_type.scope.parent_scope:
+    if base_type and scope.parent_scope is base_type.scope.parent_scope:
         parent_slot = slot.slot_code(base_type.scope)
         if parent_slot != '0':
             entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
@@ -613,16 +783,16 @@ def get_slot_function(scope, slot):
     return None
 
 
-def get_slot_by_name(slot_name):
+def get_slot_by_name(slot_name, compiler_directives):
     # For now, only search the type struct, no referenced sub-structs.
-    for slot in slot_table:
+    for slot in get_slot_table(compiler_directives).slot_table:
         if slot.slot_name == slot_name:
             return slot
     assert False, "Slot not found: %s" % slot_name
 
 
 def get_slot_code_by_name(scope, slot_name):
-    slot = get_slot_by_name(slot_name)
+    slot = get_slot_by_name(slot_name, scope.directives)
     return slot.slot_code(scope)
 
 def is_reverse_number_slot(name):
@@ -634,8 +804,8 @@ def is_reverse_number_slot(name):
     """
     if name.startswith("__r") and name.endswith("__"):
         forward_name = name.replace("r", "", 1)
-        for meth in PyNumberMethods:
-            if getattr(meth, "method_name", None) == forward_name:
+        for meth in get_slot_table(None).PyNumberMethods:
+            if hasattr(meth, "right_slot"):
                 return True
     return False
 
@@ -668,8 +838,8 @@ pyfunction_onearg = Signature("-O", "O")
 unaryfunc = Signature("T", "O")            # typedef PyObject * (*unaryfunc)(PyObject *);
 binaryfunc = Signature("OO", "O")          # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *);
 ibinaryfunc = Signature("TO", "O")         # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *);
-ternaryfunc = Signature("OOO", "O")        # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
-iternaryfunc = Signature("TOO", "O")       # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+powternaryfunc = Signature("OO?", "O")     # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+ipowternaryfunc = Signature("TO?", "O")    # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
 callfunc = Signature("T*", "O")            # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
 inquiry = Signature("T", "i")              # typedef int (*inquiry)(PyObject *);
 lenfunc = Signature("T", "z")              # typedef Py_ssize_t (*lenfunc)(PyObject *);
@@ -682,7 +852,7 @@ ssizessizeargfunc = Signature("Tzz", "O")  # typedef PyObject *(*ssizessizeargfu
 intobjargproc = Signature("TiO", 'r')      # typedef int(*intobjargproc)(PyObject *, int, PyObject *);
 ssizeobjargproc = Signature("TzO", 'r')    # typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *);
 intintobjargproc = Signature("TiiO", 'r')  # typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *);
-ssizessizeobjargproc = Signature("TzzO", 'r') # typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *);
+ssizessizeobjargproc = Signature("TzzO", 'r')  # typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *);
 
 intintargproc = Signature("Tii", 'r')
 ssizessizeargproc = Signature("Tzz", 'r')
@@ -732,103 +902,8 @@ property_accessor_signatures = {
     '__del__': Signature("T", 'r')
 }
 
-#------------------------------------------------------------------------------------------
-#
-#  Descriptor tables for the slots of the various type object
-#  substructures, in the order they appear in the structure.
-#
-#------------------------------------------------------------------------------------------
 
-PyNumberMethods_Py3_GUARD = "PY_MAJOR_VERSION < 3 || (CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x03050000)"
-
-PyNumberMethods = (
-    MethodSlot(binaryfunc, "nb_add", "__add__"),
-    MethodSlot(binaryfunc, "nb_subtract", "__sub__"),
-    MethodSlot(binaryfunc, "nb_multiply", "__mul__"),
-    MethodSlot(binaryfunc, "nb_divide", "__div__", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(binaryfunc, "nb_remainder", "__mod__"),
-    MethodSlot(binaryfunc, "nb_divmod", "__divmod__"),
-    MethodSlot(ternaryfunc, "nb_power", "__pow__"),
-    MethodSlot(unaryfunc, "nb_negative", "__neg__"),
-    MethodSlot(unaryfunc, "nb_positive", "__pos__"),
-    MethodSlot(unaryfunc, "nb_absolute", "__abs__"),
-    MethodSlot(inquiry, "nb_nonzero", "__nonzero__", py3 = ("nb_bool", "__bool__")),
-    MethodSlot(unaryfunc, "nb_invert", "__invert__"),
-    MethodSlot(binaryfunc, "nb_lshift", "__lshift__"),
-    MethodSlot(binaryfunc, "nb_rshift", "__rshift__"),
-    MethodSlot(binaryfunc, "nb_and", "__and__"),
-    MethodSlot(binaryfunc, "nb_xor", "__xor__"),
-    MethodSlot(binaryfunc, "nb_or", "__or__"),
-    EmptySlot("nb_coerce", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(unaryfunc, "nb_int", "__int__", fallback="__long__"),
-    MethodSlot(unaryfunc, "nb_long", "__long__", fallback="__int__", py3 = "<RESERVED>"),
-    MethodSlot(unaryfunc, "nb_float", "__float__"),
-    MethodSlot(unaryfunc, "nb_oct", "__oct__", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(unaryfunc, "nb_hex", "__hex__", ifdef = PyNumberMethods_Py3_GUARD),
-
-    # Added in release 2.0
-    MethodSlot(ibinaryfunc, "nb_inplace_add", "__iadd__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_subtract", "__isub__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_multiply", "__imul__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_divide", "__idiv__", ifdef = PyNumberMethods_Py3_GUARD),
-    MethodSlot(ibinaryfunc, "nb_inplace_remainder", "__imod__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_power", "__ipow__"), # actually ternaryfunc!!!
-    MethodSlot(ibinaryfunc, "nb_inplace_lshift", "__ilshift__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_rshift", "__irshift__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_and", "__iand__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_xor", "__ixor__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_or", "__ior__"),
-
-    # Added in release 2.2
-    # The following require the Py_TPFLAGS_HAVE_CLASS flag
-    MethodSlot(binaryfunc, "nb_floor_divide", "__floordiv__"),
-    MethodSlot(binaryfunc, "nb_true_divide", "__truediv__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__"),
-    MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__"),
-
-    # Added in release 2.5
-    MethodSlot(unaryfunc, "nb_index", "__index__"),
-
-    # Added in release 3.5
-    MethodSlot(binaryfunc, "nb_matrix_multiply", "__matmul__", ifdef="PY_VERSION_HEX >= 0x03050000"),
-    MethodSlot(ibinaryfunc, "nb_inplace_matrix_multiply", "__imatmul__", ifdef="PY_VERSION_HEX >= 0x03050000"),
-)
-
-PySequenceMethods = (
-    MethodSlot(lenfunc, "sq_length", "__len__"),
-    EmptySlot("sq_concat"), # nb_add used instead
-    EmptySlot("sq_repeat"), # nb_multiply used instead
-    SyntheticSlot("sq_item", ["__getitem__"], "0"),    #EmptySlot("sq_item"),   # mp_subscript used instead
-    MethodSlot(ssizessizeargfunc, "sq_slice", "__getslice__"),
-    EmptySlot("sq_ass_item"), # mp_ass_subscript used instead
-    SyntheticSlot("sq_ass_slice", ["__setslice__", "__delslice__"], "0"),
-    MethodSlot(cmpfunc, "sq_contains", "__contains__"),
-    EmptySlot("sq_inplace_concat"), # nb_inplace_add used instead
-    EmptySlot("sq_inplace_repeat"), # nb_inplace_multiply used instead
-)
-
-PyMappingMethods = (
-    MethodSlot(lenfunc, "mp_length", "__len__"),
-    MethodSlot(objargfunc, "mp_subscript", "__getitem__"),
-    SyntheticSlot("mp_ass_subscript", ["__setitem__", "__delitem__"], "0"),
-)
-
-PyBufferProcs = (
-    MethodSlot(readbufferproc, "bf_getreadbuffer", "__getreadbuffer__", py3 = False),
-    MethodSlot(writebufferproc, "bf_getwritebuffer", "__getwritebuffer__", py3 = False),
-    MethodSlot(segcountproc, "bf_getsegcount", "__getsegcount__", py3 = False),
-    MethodSlot(charbufferproc, "bf_getcharbuffer", "__getcharbuffer__", py3 = False),
-
-    MethodSlot(getbufferproc, "bf_getbuffer", "__getbuffer__"),
-    MethodSlot(releasebufferproc, "bf_releasebuffer", "__releasebuffer__")
-)
-
-PyAsyncMethods = (
-    MethodSlot(unaryfunc, "am_await", "__await__"),
-    MethodSlot(unaryfunc, "am_aiter", "__aiter__"),
-    MethodSlot(unaryfunc, "am_anext", "__anext__"),
-    EmptySlot("am_send", ifdef="PY_VERSION_HEX >= 0x030A00A3"),
-)
+PyNumberMethods_Py2only_GUARD = "PY_MAJOR_VERSION < 3 || (CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x03050000)"
 
 #------------------------------------------------------------------------------------------
 #
@@ -836,100 +911,262 @@ PyAsyncMethods = (
 #  top-level type slots, beginning with tp_dealloc, in the order they
 #  appear in the type object.
 #
+# It depends on some compiler directives (currently c_api_binop_methods), so the
+# slot tables for each set of compiler directives are generated lazily and put in
+# the _slot_table_dict
+#
 #------------------------------------------------------------------------------------------
 
-slot_table = (
-    ConstructorSlot("tp_dealloc", '__dealloc__'),
-    EmptySlot("tp_print", ifdef="PY_VERSION_HEX < 0x030800b4"),
-    EmptySlot("tp_vectorcall_offset", ifdef="PY_VERSION_HEX >= 0x030800b4"),
-    EmptySlot("tp_getattr"),
-    EmptySlot("tp_setattr"),
-
-    # tp_compare (Py2) / tp_reserved (Py3<3.5) / tp_as_async (Py3.5+) is always used as tp_as_async in Py3
-    MethodSlot(cmpfunc, "tp_compare", "__cmp__", ifdef="PY_MAJOR_VERSION < 3"),
-    SuiteSlot(PyAsyncMethods, "__Pyx_PyAsyncMethodsStruct", "tp_as_async", ifdef="PY_MAJOR_VERSION >= 3"),
-
-    MethodSlot(reprfunc, "tp_repr", "__repr__"),
-
-    SuiteSlot(PyNumberMethods, "PyNumberMethods", "tp_as_number"),
-    SuiteSlot(PySequenceMethods, "PySequenceMethods", "tp_as_sequence"),
-    SuiteSlot(PyMappingMethods, "PyMappingMethods", "tp_as_mapping"),
-
-    MethodSlot(hashfunc, "tp_hash", "__hash__", inherited=False),    # Py3 checks for __richcmp__
-    MethodSlot(callfunc, "tp_call", "__call__"),
-    MethodSlot(reprfunc, "tp_str", "__str__"),
-
-    SyntheticSlot("tp_getattro", ["__getattr__","__getattribute__"], "0"), #"PyObject_GenericGetAttr"),
-    SyntheticSlot("tp_setattro", ["__setattr__", "__delattr__"], "0"), #"PyObject_GenericSetAttr"),
-
-    SuiteSlot(PyBufferProcs, "PyBufferProcs", "tp_as_buffer"),
-
-    TypeFlagsSlot("tp_flags"),
-    DocStringSlot("tp_doc"),
-
-    GCDependentSlot("tp_traverse"),
-    GCClearReferencesSlot("tp_clear"),
-
-    RichcmpSlot(richcmpfunc, "tp_richcompare", "__richcmp__", inherited=False),  # Py3 checks for __hash__
-
-    EmptySlot("tp_weaklistoffset"),
-
-    MethodSlot(getiterfunc, "tp_iter", "__iter__"),
-    MethodSlot(iternextfunc, "tp_iternext", "__next__"),
+class SlotTable(object):
+    def __init__(self, old_binops):
+        # The following dictionary maps __xxx__ method names to slot descriptors.
+        method_name_to_slot = {}
+        self._get_slot_by_method_name = method_name_to_slot.get
+        self.substructures = []   # List of all SuiteSlot instances
+
+        bf = binaryfunc if old_binops else ibinaryfunc
+        ptf = powternaryfunc if old_binops else ipowternaryfunc
+
+        #  Descriptor tables for the slots of the various type object
+        #  substructures, in the order they appear in the structure.
+        self.PyNumberMethods = (
+            BinopSlot(bf, "nb_add", "__add__", method_name_to_slot),
+            BinopSlot(bf, "nb_subtract", "__sub__", method_name_to_slot),
+            BinopSlot(bf, "nb_multiply", "__mul__", method_name_to_slot),
+            BinopSlot(bf, "nb_divide", "__div__", method_name_to_slot,
+                      ifdef = PyNumberMethods_Py2only_GUARD),
+            BinopSlot(bf, "nb_remainder", "__mod__", method_name_to_slot),
+            BinopSlot(bf, "nb_divmod", "__divmod__", method_name_to_slot),
+            BinopSlot(ptf, "nb_power", "__pow__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_negative", "__neg__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_positive", "__pos__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_absolute", "__abs__", method_name_to_slot),
+            MethodSlot(inquiry, "nb_bool", "__bool__", method_name_to_slot,
+                       py2 = ("nb_nonzero", "__nonzero__")),
+            MethodSlot(unaryfunc, "nb_invert", "__invert__", method_name_to_slot),
+            BinopSlot(bf, "nb_lshift", "__lshift__", method_name_to_slot),
+            BinopSlot(bf, "nb_rshift", "__rshift__", method_name_to_slot),
+            BinopSlot(bf, "nb_and", "__and__", method_name_to_slot),
+            BinopSlot(bf, "nb_xor", "__xor__", method_name_to_slot),
+            BinopSlot(bf, "nb_or", "__or__", method_name_to_slot),
+            EmptySlot("nb_coerce", ifdef = PyNumberMethods_Py2only_GUARD),
+            MethodSlot(unaryfunc, "nb_int", "__int__", method_name_to_slot, fallback="__long__"),
+            MethodSlot(unaryfunc, "nb_long", "__long__", method_name_to_slot,
+                       fallback="__int__", py3 = "<RESERVED>"),
+            MethodSlot(unaryfunc, "nb_float", "__float__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_oct", "__oct__", method_name_to_slot,
+                       ifdef = PyNumberMethods_Py2only_GUARD),
+            MethodSlot(unaryfunc, "nb_hex", "__hex__", method_name_to_slot,
+                       ifdef = PyNumberMethods_Py2only_GUARD),
+
+            # Added in release 2.0
+            MethodSlot(ibinaryfunc, "nb_inplace_add", "__iadd__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_subtract", "__isub__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_multiply", "__imul__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_divide", "__idiv__", method_name_to_slot,
+                       ifdef = PyNumberMethods_Py2only_GUARD),
+            MethodSlot(ibinaryfunc, "nb_inplace_remainder", "__imod__", method_name_to_slot),
+            MethodSlot(ptf, "nb_inplace_power", "__ipow__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_lshift", "__ilshift__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_rshift", "__irshift__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_and", "__iand__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_xor", "__ixor__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_or", "__ior__", method_name_to_slot),
+
+            # Added in release 2.2
+            # The following require the Py_TPFLAGS_HAVE_CLASS flag
+            BinopSlot(bf, "nb_floor_divide", "__floordiv__", method_name_to_slot),
+            BinopSlot(bf, "nb_true_divide", "__truediv__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__", method_name_to_slot),
+
+            # Added in release 2.5
+            MethodSlot(unaryfunc, "nb_index", "__index__", method_name_to_slot),
+
+            # Added in release 3.5
+            BinopSlot(bf, "nb_matrix_multiply", "__matmul__", method_name_to_slot,
+                      ifdef="PY_VERSION_HEX >= 0x03050000"),
+            MethodSlot(ibinaryfunc, "nb_inplace_matrix_multiply", "__imatmul__", method_name_to_slot,
+                       ifdef="PY_VERSION_HEX >= 0x03050000"),
+        )
+
+        self.PySequenceMethods = (
+            MethodSlot(lenfunc, "sq_length", "__len__", method_name_to_slot),
+            EmptySlot("sq_concat"),  # nb_add used instead
+            EmptySlot("sq_repeat"),  # nb_multiply used instead
+            SyntheticSlot("sq_item", ["__getitem__"], "0"),    #EmptySlot("sq_item"),   # mp_subscript used instead
+            MethodSlot(ssizessizeargfunc, "sq_slice", "__getslice__", method_name_to_slot),
+            EmptySlot("sq_ass_item"),  # mp_ass_subscript used instead
+            SyntheticSlot("sq_ass_slice", ["__setslice__", "__delslice__"], "0"),
+            MethodSlot(cmpfunc, "sq_contains", "__contains__", method_name_to_slot),
+            EmptySlot("sq_inplace_concat"),  # nb_inplace_add used instead
+            EmptySlot("sq_inplace_repeat"),  # nb_inplace_multiply used instead
+        )
+
+        self.PyMappingMethods = (
+            MethodSlot(lenfunc, "mp_length", "__len__", method_name_to_slot),
+            MethodSlot(objargfunc, "mp_subscript", "__getitem__", method_name_to_slot),
+            SyntheticSlot("mp_ass_subscript", ["__setitem__", "__delitem__"], "0"),
+        )
+
+        self.PyBufferProcs = (
+            MethodSlot(readbufferproc, "bf_getreadbuffer", "__getreadbuffer__", method_name_to_slot,
+                       py3 = False),
+            MethodSlot(writebufferproc, "bf_getwritebuffer", "__getwritebuffer__", method_name_to_slot,
+                       py3 = False),
+            MethodSlot(segcountproc, "bf_getsegcount", "__getsegcount__", method_name_to_slot,
+                       py3 = False),
+            MethodSlot(charbufferproc, "bf_getcharbuffer", "__getcharbuffer__", method_name_to_slot,
+                       py3 = False),
+
+            MethodSlot(getbufferproc, "bf_getbuffer", "__getbuffer__", method_name_to_slot),
+            MethodSlot(releasebufferproc, "bf_releasebuffer", "__releasebuffer__", method_name_to_slot)
+        )
+
+        self.PyAsyncMethods = (
+            MethodSlot(unaryfunc, "am_await", "__await__", method_name_to_slot),
+            MethodSlot(unaryfunc, "am_aiter", "__aiter__", method_name_to_slot),
+            MethodSlot(unaryfunc, "am_anext", "__anext__", method_name_to_slot),
+            EmptySlot("am_send", ifdef="PY_VERSION_HEX >= 0x030A00A3"),
+        )
+
+        self.slot_table = (
+            ConstructorSlot("tp_dealloc", '__dealloc__'),
+            EmptySlot("tp_print", ifdef="PY_VERSION_HEX < 0x030800b4"),
+            EmptySlot("tp_vectorcall_offset", ifdef="PY_VERSION_HEX >= 0x030800b4"),
+            EmptySlot("tp_getattr"),
+            EmptySlot("tp_setattr"),
+
+            # tp_compare (Py2) / tp_reserved (Py3<3.5) / tp_as_async (Py3.5+) is always used as tp_as_async in Py3
+            MethodSlot(cmpfunc, "tp_compare", "__cmp__", method_name_to_slot, ifdef="PY_MAJOR_VERSION < 3"),
+            SuiteSlot(self. PyAsyncMethods, "__Pyx_PyAsyncMethodsStruct", "tp_as_async",
+                      self.substructures, ifdef="PY_MAJOR_VERSION >= 3"),
+
+            MethodSlot(reprfunc, "tp_repr", "__repr__", method_name_to_slot),
+
+            SuiteSlot(self.PyNumberMethods, "PyNumberMethods", "tp_as_number", self.substructures),
+            SuiteSlot(self.PySequenceMethods, "PySequenceMethods", "tp_as_sequence", self.substructures),
+            SuiteSlot(self.PyMappingMethods, "PyMappingMethods", "tp_as_mapping", self.substructures),
+
+            MethodSlot(hashfunc, "tp_hash", "__hash__", method_name_to_slot,
+                       inherited=False),    # Py3 checks for __richcmp__
+            MethodSlot(callfunc, "tp_call", "__call__", method_name_to_slot),
+            MethodSlot(reprfunc, "tp_str", "__str__", method_name_to_slot),
+
+            SyntheticSlot("tp_getattro", ["__getattr__","__getattribute__"], "0"),  #"PyObject_GenericGetAttr"),
+            SyntheticSlot("tp_setattro", ["__setattr__", "__delattr__"], "0"),  #"PyObject_GenericSetAttr"),
+
+            SuiteSlot(self.PyBufferProcs, "PyBufferProcs", "tp_as_buffer", self.substructures),
+
+            TypeFlagsSlot("tp_flags"),
+            DocStringSlot("tp_doc"),
+
+            GCDependentSlot("tp_traverse"),
+            GCClearReferencesSlot("tp_clear"),
+
+            RichcmpSlot(richcmpfunc, "tp_richcompare", "__richcmp__", method_name_to_slot,
+                        inherited=False),  # Py3 checks for __hash__
+
+            EmptySlot("tp_weaklistoffset"),
+
+            MethodSlot(getiterfunc, "tp_iter", "__iter__", method_name_to_slot),
+            MethodSlot(iternextfunc, "tp_iternext", "__next__", method_name_to_slot),
+
+            MethodTableSlot("tp_methods"),
+            MemberTableSlot("tp_members"),
+            GetSetSlot("tp_getset"),
+
+            BaseClassSlot("tp_base"),  #EmptySlot("tp_base"),
+            EmptySlot("tp_dict"),
+
+            SyntheticSlot("tp_descr_get", ["__get__"], "0"),
+            SyntheticSlot("tp_descr_set", ["__set__", "__delete__"], "0"),
+
+            DictOffsetSlot("tp_dictoffset", ifdef="!CYTHON_USE_TYPE_SPECS"),  # otherwise set via "__dictoffset__" member
+
+            MethodSlot(initproc, "tp_init", "__init__", method_name_to_slot),
+            EmptySlot("tp_alloc"),  #FixedSlot("tp_alloc", "PyType_GenericAlloc"),
+            ConstructorSlot("tp_new", "__cinit__"),
+            EmptySlot("tp_free"),
+
+            EmptySlot("tp_is_gc"),
+            EmptySlot("tp_bases"),
+            EmptySlot("tp_mro"),
+            EmptySlot("tp_cache"),
+            EmptySlot("tp_subclasses"),
+            EmptySlot("tp_weaklist"),
+            EmptySlot("tp_del"),
+            EmptySlot("tp_version_tag"),
+            SyntheticSlot("tp_finalize", ["__del__"], "0", ifdef="PY_VERSION_HEX >= 0x030400a1",
+                          used_ifdef="CYTHON_USE_TP_FINALIZE"),
+            EmptySlot("tp_vectorcall", ifdef="PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)"),
+            EmptySlot("tp_print", ifdef="__PYX_NEED_TP_PRINT_SLOT == 1"),
+            EmptySlot("tp_watched", ifdef="PY_VERSION_HEX >= 0x030C0000"),
+            # PyPy specific extension - only here to avoid C compiler warnings.
+            EmptySlot("tp_pypy_flags", ifdef="CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000"),
+        )
+
+        #------------------------------------------------------------------------------------------
+        #
+        #  Descriptors for special methods which don't appear directly
+        #  in the type object or its substructures. These methods are
+        #  called from slot functions synthesized by Cython.
+        #
+        #------------------------------------------------------------------------------------------
+
+        MethodSlot(initproc, "", "__cinit__", method_name_to_slot)
+        MethodSlot(destructor, "", "__dealloc__", method_name_to_slot)
+        MethodSlot(destructor, "", "__del__", method_name_to_slot)
+        MethodSlot(objobjargproc, "", "__setitem__", method_name_to_slot)
+        MethodSlot(objargproc, "", "__delitem__", method_name_to_slot)
+        MethodSlot(ssizessizeobjargproc, "", "__setslice__", method_name_to_slot)
+        MethodSlot(ssizessizeargproc, "", "__delslice__", method_name_to_slot)
+        MethodSlot(getattrofunc, "", "__getattr__", method_name_to_slot)
+        MethodSlot(getattrofunc, "", "__getattribute__", method_name_to_slot)
+        MethodSlot(setattrofunc, "", "__setattr__", method_name_to_slot)
+        MethodSlot(delattrofunc, "", "__delattr__", method_name_to_slot)
+        MethodSlot(descrgetfunc, "", "__get__", method_name_to_slot)
+        MethodSlot(descrsetfunc, "", "__set__", method_name_to_slot)
+        MethodSlot(descrdelfunc, "", "__delete__", method_name_to_slot)
+
+    def get_special_method_signature(self, name):
+        #  Given a method name, if it is a special method,
+        #  return its signature, else return None.
+        slot = self._get_slot_by_method_name(name)
+        if slot:
+            return slot.signature
+        elif name in richcmp_special_methods:
+            return ibinaryfunc
+        else:
+            return None
 
-    MethodTableSlot("tp_methods"),
-    MemberTableSlot("tp_members"),
-    GetSetSlot("tp_getset"),
+    def get_slot_by_method_name(self, method_name):
+        # For now, only search the type struct, no referenced sub-structs.
+        return self._get_slot_by_method_name(method_name)
 
-    BaseClassSlot("tp_base"), #EmptySlot("tp_base"),
-    EmptySlot("tp_dict"),
+    def __iter__(self):
+        # make it easier to iterate over all the slots
+        return iter(self.slot_table)
 
-    SyntheticSlot("tp_descr_get", ["__get__"], "0"),
-    SyntheticSlot("tp_descr_set", ["__set__", "__delete__"], "0"),
 
-    DictOffsetSlot("tp_dictoffset"),
+_slot_table_dict = {}
 
-    MethodSlot(initproc, "tp_init", "__init__"),
-    EmptySlot("tp_alloc"), #FixedSlot("tp_alloc", "PyType_GenericAlloc"),
-    InternalMethodSlot("tp_new"),
-    EmptySlot("tp_free"),
+def get_slot_table(compiler_directives):
+    if not compiler_directives:
+        # fetch default directives here since the builtin type classes don't have
+        # directives set
+        from .Options import get_directive_defaults
+        compiler_directives = get_directive_defaults()
 
-    EmptySlot("tp_is_gc"),
-    EmptySlot("tp_bases"),
-    EmptySlot("tp_mro"),
-    EmptySlot("tp_cache"),
-    EmptySlot("tp_subclasses"),
-    EmptySlot("tp_weaklist"),
-    EmptySlot("tp_del"),
-    EmptySlot("tp_version_tag"),
-    EmptySlot("tp_finalize", ifdef="PY_VERSION_HEX >= 0x030400a1"),
-    EmptySlot("tp_vectorcall", ifdef="PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)"),
-    EmptySlot("tp_print", ifdef="PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000"),
-    # PyPy specific extension - only here to avoid C compiler warnings.
-    EmptySlot("tp_pypy_flags", ifdef="CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000"),
-)
+    old_binops = compiler_directives['c_api_binop_methods']
+    key = (old_binops,)
+    if key not in _slot_table_dict:
+        _slot_table_dict[key] = SlotTable(old_binops=old_binops)
+    return _slot_table_dict[key]
 
-#------------------------------------------------------------------------------------------
-#
-#  Descriptors for special methods which don't appear directly
-#  in the type object or its substructures. These methods are
-#  called from slot functions synthesized by Cython.
-#
-#------------------------------------------------------------------------------------------
 
-MethodSlot(initproc, "", "__cinit__")
-MethodSlot(destructor, "", "__dealloc__")
-MethodSlot(objobjargproc, "", "__setitem__")
-MethodSlot(objargproc, "", "__delitem__")
-MethodSlot(ssizessizeobjargproc, "", "__setslice__")
-MethodSlot(ssizessizeargproc, "", "__delslice__")
-MethodSlot(getattrofunc, "", "__getattr__")
-MethodSlot(getattrofunc, "", "__getattribute__")
-MethodSlot(setattrofunc, "", "__setattr__")
-MethodSlot(delattrofunc, "", "__delattr__")
-MethodSlot(descrgetfunc, "", "__get__")
-MethodSlot(descrsetfunc, "", "__set__")
-MethodSlot(descrdelfunc, "", "__delete__")
+# Populate "special_method_names" based on the default directives (so it can always be accessed quickly).
+special_method_names = set(get_slot_table(compiler_directives=None))
 
 
 # Method flags for python-exposed methods.
@@ -937,5 +1174,6 @@ MethodSlot(descrdelfunc, "", "__delete__")
 method_noargs   = "METH_NOARGS"
 method_onearg   = "METH_O"
 method_varargs  = "METH_VARARGS"
+method_fastcall = "__Pyx_METH_FASTCALL"  # Actually VARARGS on versions < 3.7
 method_keywords = "METH_KEYWORDS"
 method_coexist  = "METH_COEXIST"
diff --git a/Cython/Compiler/UFuncs.py b/Cython/Compiler/UFuncs.py
new file mode 100644
index 000000000..5e641d785
--- /dev/null
+++ b/Cython/Compiler/UFuncs.py
@@ -0,0 +1,286 @@
+from . import (
+    Nodes,
+    ExprNodes,
+    FusedNode,
+    TreeFragment,
+    Pipeline,
+    ParseTreeTransforms,
+    Naming,
+    UtilNodes,
+)
+from .Errors import error
+from . import PyrexTypes
+from .UtilityCode import CythonUtilityCode
+from .Code import TempitaUtilityCode, UtilityCode
+from .Visitor import PrintTree, TreeVisitor, VisitorTransform
+
+numpy_int_types = [
+    "NPY_BYTE",
+    "NPY_INT8",
+    "NPY_SHORT",
+    "NPY_INT16",
+    "NPY_INT",
+    "NPY_INT32",
+    "NPY_LONG",
+    "NPY_LONGLONG",
+    "NPY_INT64",
+]
+numpy_uint_types = [tp.replace("NPY_", "NPY_U") for tp in numpy_int_types]
+# note: half float type is deliberately omitted
+numpy_numeric_types = (
+    numpy_int_types
+    + numpy_uint_types
+    + [
+        "NPY_FLOAT",
+        "NPY_FLOAT32",
+        "NPY_DOUBLE",
+        "NPY_FLOAT64",
+        "NPY_LONGDOUBLE",
+    ]
+)
+
+
+def _get_type_constant(pos, type_):
+    if type_.is_complex:
+        # 'is' checks don't seem to work for complex types
+        if type_ == PyrexTypes.c_float_complex_type:
+            return "NPY_CFLOAT"
+        elif type_ == PyrexTypes.c_double_complex_type:
+            return "NPY_CDOUBLE"
+        elif type_ == PyrexTypes.c_longdouble_complex_type:
+            return "NPY_CLONGDOUBLE"
+    elif type_.is_numeric:
+        postfix = type_.empty_declaration_code().upper().replace(" ", "")
+        typename = "NPY_%s" % postfix
+        if typename in numpy_numeric_types:
+            return typename
+    elif type_.is_pyobject:
+        return "NPY_OBJECT"
+    # TODO possible NPY_BOOL to bint but it needs a cast?
+    # TODO NPY_DATETIME, NPY_TIMEDELTA, NPY_STRING, NPY_UNICODE and maybe NPY_VOID might be handleable
+    error(pos, "Type '%s' cannot be used as a ufunc argument" % type_)
+
+
+class _FindCFuncDefNode(TreeVisitor):
+    """
+    Finds the CFuncDefNode in the tree
+
+    The assumption is that there's only one CFuncDefNode
+    """
+
+    found_node = None
+
+    def visit_Node(self, node):
+        if self.found_node:
+            return
+        else:
+            self.visitchildren(node)
+
+    def visit_CFuncDefNode(self, node):
+        self.found_node = node
+
+    def __call__(self, tree):
+        self.visit(tree)
+        return self.found_node
+
+
+def get_cfunc_from_tree(tree):
+    return _FindCFuncDefNode()(tree)
+
+
+class _ArgumentInfo(object):
+    """
+    Everything related to defining an input/output argument for a ufunc
+
+    type  - PyrexType
+    type_constant  - str such as "NPY_INT8" representing numpy dtype constants
+    """
+
+    def __init__(self, type, type_constant):
+        self.type = type
+        self.type_constant = type_constant
+
+
+class UFuncConversion(object):
+    def __init__(self, node):
+        self.node = node
+        self.global_scope = node.local_scope.global_scope()
+
+        self.in_definitions = self.get_in_type_info()
+        self.out_definitions = self.get_out_type_info()
+
+    def get_in_type_info(self):
+        definitions = []
+        for n, arg in enumerate(self.node.args):
+            type_const = _get_type_constant(self.node.pos, arg.type)
+            definitions.append(_ArgumentInfo(arg.type, type_const))
+        return definitions
+
+    def get_out_type_info(self):
+        if self.node.return_type.is_ctuple:
+            components = self.node.return_type.components
+        else:
+            components = [self.node.return_type]
+        definitions = []
+        for n, type in enumerate(components):
+            definitions.append(
+                _ArgumentInfo(type, _get_type_constant(self.node.pos, type))
+            )
+        return definitions
+
+    def generate_cy_utility_code(self):
+        arg_types = [a.type for a in self.in_definitions]
+        out_types = [a.type for a in self.out_definitions]
+        inline_func_decl = self.node.entry.type.declaration_code(
+            self.node.entry.cname, pyrex=True
+        )
+        self.node.entry.used = True
+
+        ufunc_cname = self.global_scope.next_id(self.node.entry.name + "_ufunc_def")
+
+        will_be_called_without_gil = not (any(t.is_pyobject for t in arg_types) or
+            any(t.is_pyobject for t in out_types))
+
+        context = dict(
+            func_cname=ufunc_cname,
+            in_types=arg_types,
+            out_types=out_types,
+            inline_func_call=self.node.entry.cname,
+            inline_func_declaration=inline_func_decl,
+            nogil=self.node.entry.type.nogil,
+            will_be_called_without_gil=will_be_called_without_gil,
+        )
+
+        code = CythonUtilityCode.load(
+            "UFuncDefinition",
+            "UFuncs.pyx",
+            context=context,
+            outer_module_scope=self.global_scope,
+        )
+
+        tree = code.get_tree(entries_only=True)
+        return tree
+
+    def use_generic_utility_code(self):
+        # use the invariant C utility code
+        self.global_scope.use_utility_code(
+            UtilityCode.load_cached("UFuncsInit", "UFuncs_C.c")
+        )
+        self.global_scope.use_utility_code(
+            UtilityCode.load_cached("NumpyImportUFunc", "NumpyImportArray.c")
+        )
+
+
+def convert_to_ufunc(node):
+    if isinstance(node, Nodes.CFuncDefNode):
+        if node.local_scope.parent_scope.is_c_class_scope:
+            error(node.pos, "Methods cannot currently be converted to a ufunc")
+            return node
+        converters = [UFuncConversion(node)]
+        original_node = node
+    elif isinstance(node, FusedNode.FusedCFuncDefNode) and isinstance(
+        node.node, Nodes.CFuncDefNode
+    ):
+        if node.node.local_scope.parent_scope.is_c_class_scope:
+            error(node.pos, "Methods cannot currently be converted to a ufunc")
+            return node
+        converters = [UFuncConversion(n) for n in node.nodes]
+        original_node = node.node
+    else:
+        error(node.pos, "Only C functions can be converted to a ufunc")
+        return node
+
+    if not converters:
+        return  # this path probably shouldn't happen
+
+    del converters[0].global_scope.entries[original_node.entry.name]
+    # the generic utility code is generic, so there's no reason to do it multiple times
+    converters[0].use_generic_utility_code()
+    return [node] + _generate_stats_from_converters(converters, original_node)
+
+
+def generate_ufunc_initialization(converters, cfunc_nodes, original_node):
+    global_scope = converters[0].global_scope
+    ufunc_funcs_name = global_scope.next_id(Naming.pyrex_prefix + "funcs")
+    ufunc_types_name = global_scope.next_id(Naming.pyrex_prefix + "types")
+    ufunc_data_name = global_scope.next_id(Naming.pyrex_prefix + "data")
+    type_constants = []
+    narg_in = None
+    narg_out = None
+    for c in converters:
+        in_const = [d.type_constant for d in c.in_definitions]
+        if narg_in is not None:
+            assert narg_in == len(in_const)
+        else:
+            narg_in = len(in_const)
+        type_constants.extend(in_const)
+        out_const = [d.type_constant for d in c.out_definitions]
+        if narg_out is not None:
+            assert narg_out == len(out_const)
+        else:
+            narg_out = len(out_const)
+        type_constants.extend(out_const)
+
+    func_cnames = [cfnode.entry.cname for cfnode in cfunc_nodes]
+
+    context = dict(
+        ufunc_funcs_name=ufunc_funcs_name,
+        func_cnames=func_cnames,
+        ufunc_types_name=ufunc_types_name,
+        type_constants=type_constants,
+        ufunc_data_name=ufunc_data_name,
+    )
+    global_scope.use_utility_code(
+        TempitaUtilityCode.load("UFuncConsts", "UFuncs_C.c", context=context)
+    )
+
+    pos = original_node.pos
+    func_name = original_node.entry.name
+    docstr = original_node.doc
+    args_to_func = '%s(), %s, %s(), %s, %s, %s, PyUFunc_None, "%s", %s, 0' % (
+        ufunc_funcs_name,
+        ufunc_data_name,
+        ufunc_types_name,
+        len(func_cnames),
+        narg_in,
+        narg_out,
+        func_name,
+        docstr.as_c_string_literal() if docstr else "NULL",
+    )
+
+    call_node = ExprNodes.PythonCapiCallNode(
+        pos,
+        function_name="PyUFunc_FromFuncAndData",
+        # use a dummy type because it's honestly too fiddly
+        func_type=PyrexTypes.CFuncType(
+            PyrexTypes.py_object_type,
+            [PyrexTypes.CFuncTypeArg("dummy", PyrexTypes.c_void_ptr_type, None)],
+        ),
+        args=[
+            ExprNodes.ConstNode(
+                pos, type=PyrexTypes.c_void_ptr_type, value=args_to_func
+            )
+        ],
+    )
+    lhs_entry = global_scope.declare_var(func_name, PyrexTypes.py_object_type, pos)
+    assgn_node = Nodes.SingleAssignmentNode(
+        pos,
+        lhs=ExprNodes.NameNode(
+            pos, name=func_name, type=PyrexTypes.py_object_type, entry=lhs_entry
+        ),
+        rhs=call_node,
+    )
+    return assgn_node
+
+
+def _generate_stats_from_converters(converters, node):
+    stats = []
+    for converter in converters:
+        tree = converter.generate_cy_utility_code()
+        ufunc_node = get_cfunc_from_tree(tree)
+        # merge in any utility code
+        converter.global_scope.utility_code_list.extend(tree.scope.utility_code_list)
+        stats.append(ufunc_node)
+
+    stats.append(generate_ufunc_initialization(converters, stats, node))
+    return stats
diff --git a/Cython/Compiler/UtilNodes.py b/Cython/Compiler/UtilNodes.py
index c41748ace..81d3038ea 100644
--- a/Cython/Compiler/UtilNodes.py
+++ b/Cython/Compiler/UtilNodes.py
@@ -10,7 +10,7 @@ from . import Nodes
 from . import ExprNodes
 from .Nodes import Node
 from .ExprNodes import AtomicExprNode
-from .PyrexTypes import c_ptr_type
+from .PyrexTypes import c_ptr_type, c_bint_type
 
 
 class TempHandle(object):
@@ -45,7 +45,7 @@ class TempRefNode(AtomicExprNode):
 
     def calculate_result_code(self):
         result = self.handle.temp
-        if result is None: result = "<error>" # might be called and overwritten
+        if result is None: result = "<error>"  # might be called and overwritten
         return result
 
     def generate_result_code(self, code):
@@ -122,8 +122,7 @@ class ResultRefNode(AtomicExprNode):
         self.may_hold_none = may_hold_none
         if expression is not None:
             self.pos = expression.pos
-            if hasattr(expression, "type"):
-                self.type = expression.type
+            self.type = getattr(expression, "type", None)
         if pos is not None:
             self.pos = pos
         if type is not None:
@@ -144,13 +143,17 @@ class ResultRefNode(AtomicExprNode):
 
     def update_expression(self, expression):
         self.expression = expression
-        if hasattr(expression, "type"):
-            self.type = expression.type
+        type = getattr(expression, "type", None)
+        if type:
+            self.type = type
+
+    def analyse_target_declaration(self, env):
+        pass  # OK - we can assign to this
 
     def analyse_types(self, env):
         if self.expression is not None:
             if not self.expression.type:
-              self.expression = self.expression.analyse_types(env)
+                self.expression = self.expression.analyse_types(env)
             self.type = self.expression.type
         return self
 
@@ -175,7 +178,7 @@ class ResultRefNode(AtomicExprNode):
             return self.expression.may_be_none()
         if self.type is not None:
             return self.type.is_pyobject
-        return True # play safe
+        return True  # play it safe
 
     def is_simple(self):
         return True
@@ -233,7 +236,10 @@ class LetNodeMixin:
         if self._result_in_temp:
             self.temp = self.temp_expression.result()
         else:
-            self.temp_expression.make_owned_reference(code)
+            if self.temp_type.is_memoryviewslice:
+                self.temp_expression.make_owned_memoryviewslice(code)
+            else:
+                self.temp_expression.make_owned_reference(code)
             self.temp = code.funcstate.allocate_temp(
                 self.temp_type, manage_ref=True)
             code.putln("%s = %s;" % (self.temp, self.temp_expression.result()))
@@ -246,7 +252,7 @@ class LetNodeMixin:
             self.temp_expression.generate_disposal_code(code)
             self.temp_expression.free_temps(code)
         else:
-            if self.temp_type.is_pyobject:
+            if self.temp_type.needs_refcounting:
                 code.put_decref_clear(self.temp, self.temp_type)
             code.funcstate.release_temp(self.temp)
 
@@ -354,6 +360,29 @@ class TempResultFromStatNode(ExprNodes.ExprNode):
         self.body = self.body.analyse_expressions(env)
         return self
 
+    def may_be_none(self):
+        return self.result_ref.may_be_none()
+
     def generate_result_code(self, code):
         self.result_ref.result_code = self.result()
         self.body.generate_execution_code(code)
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+
+
+class HasGilNode(AtomicExprNode):
+    """
+    Simple node that evaluates to 0 or 1 depending on whether we're
+    in a nogil context
+    """
+    type = c_bint_type
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        self.has_gil = code.funcstate.gil_owned
+
+    def calculate_result_code(self):
+        return "1" if self.has_gil else "0"
diff --git a/Cython/Compiler/UtilityCode.py b/Cython/Compiler/UtilityCode.py
index 98e9ab5bf..e2df2586b 100644
--- a/Cython/Compiler/UtilityCode.py
+++ b/Cython/Compiler/UtilityCode.py
@@ -131,7 +131,7 @@ class CythonUtilityCode(Code.UtilityCodeBase):
             p = []
             for t in pipeline:
                 p.append(t)
-                if isinstance(p, ParseTreeTransforms.AnalyseDeclarationsTransform):
+                if isinstance(t, ParseTreeTransforms.AnalyseDeclarationsTransform):
                     break
 
             pipeline = p
@@ -173,8 +173,15 @@ class CythonUtilityCode(Code.UtilityCodeBase):
         if self.context_types:
             # inject types into module scope
             def scope_transform(module_node):
+                dummy_entry = object()
                 for name, type in self.context_types.items():
+                    # Restore the old type entry after declaring the type.
+                    # We need to access types in the scope, but this shouldn't alter the entry
+                    # that is visible from everywhere else
+                    old_type_entry = getattr(type, "entry", dummy_entry)
                     entry = module_node.scope.declare_type(name, type, None, visibility='extern')
+                    if old_type_entry is not dummy_entry:
+                        type.entry = old_type_entry
                     entry.in_cinclude = True
                 return module_node
 
@@ -196,10 +203,10 @@ class CythonUtilityCode(Code.UtilityCodeBase):
         Load a utility code as a string. Returns (proto, implementation)
         """
         util = cls.load(util_code_name, from_file, **kwargs)
-        return util.proto, util.impl # keep line numbers => no lstrip()
+        return util.proto, util.impl  # keep line numbers => no lstrip()
 
     def declare_in_scope(self, dest_scope, used=False, cython_scope=None,
-                         whitelist=None):
+                         allowlist=None):
         """
         Declare all entries from the utility code in dest_scope. Code will only
         be included for used entries. If module_name is given, declare the
@@ -218,7 +225,7 @@ class CythonUtilityCode(Code.UtilityCodeBase):
             entry.used = used
 
         original_scope = tree.scope
-        dest_scope.merge_in(original_scope, merge_unused=True, whitelist=whitelist)
+        dest_scope.merge_in(original_scope, merge_unused=True, allowlist=allowlist)
         tree.scope = dest_scope
 
         for dep in self.requires:
@@ -227,6 +234,27 @@ class CythonUtilityCode(Code.UtilityCodeBase):
 
         return original_scope
 
+    @staticmethod
+    def filter_inherited_directives(current_directives):
+        """
+        Cython utility code should usually only pick up a few directives from the
+        environment (those that intentionally control its function) and ignore most
+        other compiler directives. This function provides a sensible default list
+        of directives to copy.
+        """
+        from .Options import _directive_defaults
+        utility_code_directives = dict(_directive_defaults)
+        inherited_directive_names = (
+            'binding', 'always_allow_keywords', 'allow_none_for_extension_args',
+            'auto_pickle', 'ccomplex',
+            'c_string_type', 'c_string_encoding',
+            'optimize.inline_defnode_calls', 'optimize.unpack_method_calls',
+            'optimize.unpack_method_calls_in_pyinit', 'optimize.use_switch')
+        for name in inherited_directive_names:
+            if name in current_directives:
+                utility_code_directives[name] = current_directives[name]
+        return utility_code_directives
+
 
 def declare_declarations_in_scope(declaration_string, env, private_type=True,
                                   *args, **kwargs):
diff --git a/Cython/Compiler/Visitor.pxd b/Cython/Compiler/Visitor.pxd
index d5d5692aa..9c5aac6dc 100644
--- a/Cython/Compiler/Visitor.pxd
+++ b/Cython/Compiler/Visitor.pxd
@@ -1,4 +1,4 @@
-from __future__ import absolute_import
+# cython: language_level=3str
 
 cimport cython
 
@@ -10,15 +10,15 @@ cdef class TreeVisitor:
     cdef _visit(self, obj)
     cdef find_handler(self, obj)
     cdef _visitchild(self, child, parent, attrname, idx)
-    cdef dict _visitchildren(self, parent, attrs)
-    cpdef visitchildren(self, parent, attrs=*)
+    cdef dict _visitchildren(self, parent, attrs, exclude)
+    cpdef visitchildren(self, parent, attrs=*, exclude=*)
     cdef _raise_compiler_error(self, child, e)
 
 cdef class VisitorTransform(TreeVisitor):
-    cdef dict _process_children(self, parent, attrs=*)
+    cdef dict _process_children(self, parent, attrs=*, exclude=*)
     cpdef visitchildren(self, parent, attrs=*, exclude=*)
     cdef list _flatten_list(self, list orig_list)
-    cdef list _select_attrs(self, attrs, exclude)
+    cpdef visitchild(self, parent, str attr, idx=*)
 
 cdef class CythonTransform(VisitorTransform):
     cdef public context
@@ -47,8 +47,8 @@ cdef class MethodDispatcherTransform(EnvTransform):
                                      node, function, arg_list, kwargs)
 
 cdef class RecursiveNodeReplacer(VisitorTransform):
-     cdef public orig_node
-     cdef public new_node
+    cdef public orig_node
+    cdef public new_node
 
 cdef class NodeFinder(TreeVisitor):
     cdef node
diff --git a/Cython/Compiler/Visitor.py b/Cython/Compiler/Visitor.py
index a35d13e1d..92e2eb9c0 100644
--- a/Cython/Compiler/Visitor.py
+++ b/Cython/Compiler/Visitor.py
@@ -1,5 +1,5 @@
 # cython: infer_types=True
-# cython: language_level=3
+# cython: language_level=3str
 # cython: auto_pickle=False
 
 #
@@ -80,7 +80,7 @@ class TreeVisitor(object):
 
     def dump_node(self, node):
         ignored = list(node.child_attrs or []) + [
-            u'child_attrs', u'pos', u'gil_message', u'cpp_message', u'subexprs']
+            'child_attrs', 'pos', 'gil_message', 'cpp_message', 'subexprs']
         values = []
         pos = getattr(node, 'pos', None)
         if pos:
@@ -116,7 +116,7 @@ class TreeVisitor(object):
         nodes = []
         while hasattr(stacktrace, 'tb_frame'):
             frame = stacktrace.tb_frame
-            node = frame.f_locals.get(u'self')
+            node = frame.f_locals.get('self')
             if isinstance(node, Nodes.Node):
                 code = frame.f_code
                 method_name = code.co_name
@@ -153,12 +153,12 @@ class TreeVisitor(object):
     def find_handler(self, obj):
         # to resolve, try entire hierarchy
         cls = type(obj)
-        pattern = "visit_%s"
         mro = inspect.getmro(cls)
         for mro_cls in mro:
-            handler_method = getattr(self, pattern % mro_cls.__name__, None)
+            handler_method = getattr(self, "visit_" + mro_cls.__name__, None)
             if handler_method is not None:
                 return handler_method
+
         print(type(self), cls)
         if self.access_path:
             print(self.access_path)
@@ -167,10 +167,12 @@ class TreeVisitor(object):
         raise RuntimeError("Visitor %r does not accept object: %s" % (self, obj))
 
     def visit(self, obj):
+        # generic def entry point for calls from Python subclasses
         return self._visit(obj)
 
     @cython.final
     def _visit(self, obj):
+        # fast cdef entry point for calls from Cython subclasses
         try:
             try:
                 handler_method = self.dispatch_table[type(obj)]
@@ -189,17 +191,20 @@ class TreeVisitor(object):
 
     @cython.final
     def _visitchild(self, child, parent, attrname, idx):
+        # fast cdef entry point for calls from Cython subclasses
         self.access_path.append((parent, attrname, idx))
         result = self._visit(child)
         self.access_path.pop()
         return result
 
-    def visitchildren(self, parent, attrs=None):
-        return self._visitchildren(parent, attrs)
+    def visitchildren(self, parent, attrs=None, exclude=None):
+        # generic def entry point for calls from Python subclasses
+        return self._visitchildren(parent, attrs, exclude)
 
     @cython.final
     @cython.locals(idx=cython.Py_ssize_t)
-    def _visitchildren(self, parent, attrs):
+    def _visitchildren(self, parent, attrs, exclude):
+        # fast cdef entry point for calls from Cython subclasses
         """
         Visits the children of the given parent. If parent is None, returns
         immediately (returning None).
@@ -213,6 +218,7 @@ class TreeVisitor(object):
         result = {}
         for attr in parent.child_attrs:
             if attrs is not None and attr not in attrs: continue
+            if exclude is not None and attr in exclude: continue
             child = getattr(parent, attr)
             if child is not None:
                 if type(child) is list:
@@ -246,18 +252,12 @@ class VisitorTransform(TreeVisitor):
     """
     def visitchildren(self, parent, attrs=None, exclude=None):
         # generic def entry point for calls from Python subclasses
-        if exclude is not None:
-            attrs = self._select_attrs(parent.child_attrs if attrs is None else attrs, exclude)
-        return self._process_children(parent, attrs)
+        return self._process_children(parent, attrs, exclude)
 
     @cython.final
-    def _select_attrs(self, attrs, exclude):
-        return [name for name in attrs if name not in exclude]
-
-    @cython.final
-    def _process_children(self, parent, attrs=None):
+    def _process_children(self, parent, attrs=None, exclude=None):
         # fast cdef entry point for calls from Cython subclasses
-        result = self._visitchildren(parent, attrs)
+        result = self._visitchildren(parent, attrs, exclude)
         for attr, newnode in result.items():
             if type(newnode) is list:
                 newnode = self._flatten_list(newnode)
@@ -276,6 +276,16 @@ class VisitorTransform(TreeVisitor):
                     newlist.append(x)
         return newlist
 
+    def visitchild(self, parent, attr, idx=0):
+        # Helper to visit specific children from Python subclasses
+        child = getattr(parent, attr)
+        if child is not None:
+            node = self._visitchild(child, parent, attr, idx)
+            if node is not child:
+                setattr(parent, attr, node)
+            child = node
+        return child
+
     def recurse_to_children(self, node):
         self._process_children(node)
         return node
@@ -296,8 +306,8 @@ class CythonTransform(VisitorTransform):
         self.context = context
 
     def __call__(self, node):
-        from . import ModuleNode
-        if isinstance(node, ModuleNode.ModuleNode):
+        from .ModuleNode import ModuleNode
+        if isinstance(node, ModuleNode):
             self.current_directives = node.directives
         return super(CythonTransform, self).__call__(node)
 
@@ -370,11 +380,15 @@ class EnvTransform(CythonTransform):
         self.env_stack.pop()
 
     def visit_FuncDefNode(self, node):
+        self.visit_func_outer_attrs(node)
         self.enter_scope(node, node.local_scope)
-        self._process_children(node)
+        self.visitchildren(node, attrs=None, exclude=node.outer_attrs)
         self.exit_scope()
         return node
 
+    def visit_func_outer_attrs(self, node):
+        self.visitchildren(node, attrs=node.outer_attrs)
+
     def visit_GeneratorBodyDefNode(self, node):
         self._process_children(node)
         return node
@@ -569,7 +583,18 @@ class MethodDispatcherTransform(EnvTransform):
     ### dispatch to specific handlers
 
     def _find_handler(self, match_name, has_kwargs):
-        call_type = has_kwargs and 'general' or 'simple'
+        try:
+            match_name.encode('ascii')
+        except UnicodeEncodeError:
+            # specifically when running the Cython compiler under Python 2
+            #  getattr can't take a unicode string.
+            #  Classes with unicode names won't have specific handlers and thus it
+            #  should be OK to return None.
+            # Doing the test here ensures that the same code gets run on
+            # Python 2 and 3
+            return None
+
+        call_type = 'general' if has_kwargs else 'simple'
         handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None)
         if handler is None:
             handler = getattr(self, '_handle_any_%s' % match_name, None)
@@ -662,8 +687,8 @@ class MethodDispatcherTransform(EnvTransform):
         method_handler = self._find_handler(
             "method_%s_%s" % (type_name, attr_name), kwargs)
         if method_handler is None:
-            if (attr_name in TypeSlots.method_name_to_slot
-                    or attr_name == '__new__'):
+            if (attr_name in TypeSlots.special_method_names
+                    or attr_name in ['__new__', '__class__']):
                 method_handler = self._find_handler(
                     "slot%s" % attr_name, kwargs)
             if method_handler is None:
@@ -733,7 +758,7 @@ class NodeFinder(TreeVisitor):
         elif node is self.node:
             self.found = True
         else:
-            self._visitchildren(node, None)
+            self._visitchildren(node, None, None)
 
 def tree_contains(tree, node):
     finder = NodeFinder(node)
@@ -821,6 +846,12 @@ class PrintTree(TreeVisitor):
                 result += "(type=%s, name=\"%s\")" % (repr(node.type), node.name)
             elif isinstance(node, Nodes.DefNode):
                 result += "(name=\"%s\")" % node.name
+            elif isinstance(node, Nodes.CFuncDefNode):
+                result += "(name=\"%s\")" % node.declared_name()
+            elif isinstance(node, ExprNodes.AttributeNode):
+                result += "(type=%s, attribute=\"%s\")" % (repr(node.type), node.attribute)
+            elif isinstance(node, (ExprNodes.ConstNode, ExprNodes.PyConstNode)):
+                result += "(type=%s, value=%r)" % (repr(node.type), node.value)
             elif isinstance(node, ExprNodes.ExprNode):
                 t = node.type
                 result += "(type=%s)" % repr(t)