init

2022-03-31 20:20:07 -05:00 · 2022-03-31 20:20:07 -05:00 · 38355d2442
commit 38355d2442
9083 changed files with 1225834 additions and 0 deletions
--- a/.venv/lib/python3.8/site-packages/mypyc/lib-rt/str_ops.c
+++ b/.venv/lib/python3.8/site-packages/mypyc/lib-rt/str_ops.c
@ -0,0 +1,241 @@
+// String primitive operations
+//
+// These are registered in mypyc.primitives.str_ops.
+
+#include <Python.h>
+#include "CPy.h"
+
+PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) {
+    if (PyUnicode_READY(str) != -1) {
+        if (CPyTagged_CheckShort(index)) {
+            Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
+            Py_ssize_t size = PyUnicode_GET_LENGTH(str);
+            if (n < 0)
+                n += size;
+            if (n < 0 || n >= size) {
+                PyErr_SetString(PyExc_IndexError, "string index out of range");
+                return NULL;
+            }
+            enum PyUnicode_Kind kind = (enum PyUnicode_Kind)PyUnicode_KIND(str);
+            void *data = PyUnicode_DATA(str);
+            Py_UCS4 ch = PyUnicode_READ(kind, data, n);
+            PyObject *unicode = PyUnicode_New(1, ch);
+            if (unicode == NULL)
+                return NULL;
+
+            if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
+                PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch;
+            } else if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
+                PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+            } else {
+                assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+                PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+            }
+            return unicode;
+        } else {
+            PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
+            return NULL;
+        }
+    } else {
+        PyObject *index_obj = CPyTagged_AsObject(index);
+        return PyObject_GetItem(str, index_obj);
+    }
+}
+
+// A simplification of _PyUnicode_JoinArray() from CPython 3.9.6
+PyObject *CPyStr_Build(Py_ssize_t len, ...) {
+    Py_ssize_t i;
+    va_list args;
+
+    // Calculate the total amount of space and check
+    // whether all components have the same kind.
+    Py_ssize_t sz = 0;
+    Py_UCS4 maxchar = 0;
+    int use_memcpy = 1; // Use memcpy by default
+    PyObject *last_obj = NULL;
+
+    va_start(args, len);
+    for (i = 0; i < len; i++) {
+        PyObject *item = va_arg(args, PyObject *);
+        if (!PyUnicode_Check(item)) {
+            PyErr_Format(PyExc_TypeError,
+                         "sequence item %zd: expected str instance,"
+                         " %.80s found",
+                         i, Py_TYPE(item)->tp_name);
+            return NULL;
+        }
+        if (PyUnicode_READY(item) == -1)
+            return NULL;
+
+        size_t add_sz = PyUnicode_GET_LENGTH(item);
+        Py_UCS4 item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
+        maxchar = Py_MAX(maxchar, item_maxchar);
+
+        // Using size_t to avoid overflow during arithmetic calculation
+        if (add_sz > (size_t)(PY_SSIZE_T_MAX - sz)) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "join() result is too long for a Python string");
+            return NULL;
+        }
+        sz += add_sz;
+
+        // If these strings have different kind, we would call
+        // _PyUnicode_FastCopyCharacters() in the following part.
+        if (use_memcpy && last_obj != NULL) {
+            if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item))
+                use_memcpy = 0;
+        }
+        last_obj = item;
+    }
+    va_end(args);
+
+    // Construct the string
+    PyObject *res = PyUnicode_New(sz, maxchar);
+    if (res == NULL)
+        return NULL;
+
+    if (use_memcpy) {
+        unsigned char *res_data = PyUnicode_1BYTE_DATA(res);
+        unsigned int kind = PyUnicode_KIND(res);
+
+        va_start(args, len);
+        for (i = 0; i < len; ++i) {
+            PyObject *item = va_arg(args, PyObject *);
+            Py_ssize_t itemlen = PyUnicode_GET_LENGTH(item);
+            if (itemlen != 0) {
+                memcpy(res_data, PyUnicode_DATA(item), kind * itemlen);
+                res_data += kind * itemlen;
+            }
+        }
+        va_end(args);
+        assert(res_data == PyUnicode_1BYTE_DATA(res) + kind * PyUnicode_GET_LENGTH(res));
+    } else {
+        Py_ssize_t res_offset = 0;
+
+        va_start(args, len);
+        for (i = 0; i < len; ++i) {
+            PyObject *item = va_arg(args, PyObject *);
+            Py_ssize_t itemlen = PyUnicode_GET_LENGTH(item);
+            if (itemlen != 0) {
+                _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
+                res_offset += itemlen;
+            }
+        }
+        va_end(args);
+        assert(res_offset == PyUnicode_GET_LENGTH(res));
+    }
+
+    assert(_PyUnicode_CheckConsistency(res, 1));
+    return res;
+}
+
+PyObject *CPyStr_Split(PyObject *str, PyObject *sep, CPyTagged max_split) {
+    Py_ssize_t temp_max_split = CPyTagged_AsSsize_t(max_split);
+    if (temp_max_split == -1 && PyErr_Occurred()) {
+        PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
+        return NULL;
+    }
+    return PyUnicode_Split(str, sep, temp_max_split);
+}
+
+PyObject *CPyStr_Replace(PyObject *str, PyObject *old_substr,
+                         PyObject *new_substr, CPyTagged max_replace) {
+    Py_ssize_t temp_max_replace = CPyTagged_AsSsize_t(max_replace);
+    if (temp_max_replace == -1 && PyErr_Occurred()) {
+        PyErr_SetString(PyExc_OverflowError, CPYTHON_LARGE_INT_ERRMSG);
+        return NULL;
+    }
+    return PyUnicode_Replace(str, old_substr, new_substr, temp_max_replace);
+}
+
+bool CPyStr_Startswith(PyObject *self, PyObject *subobj) {
+    Py_ssize_t start = 0;
+    Py_ssize_t end = PyUnicode_GET_LENGTH(self);
+    return PyUnicode_Tailmatch(self, subobj, start, end, -1);
+}
+
+bool CPyStr_Endswith(PyObject *self, PyObject *subobj) {
+    Py_ssize_t start = 0;
+    Py_ssize_t end = PyUnicode_GET_LENGTH(self);
+    return PyUnicode_Tailmatch(self, subobj, start, end, 1);
+}
+
+/* This does a dodgy attempt to append in place  */
+PyObject *CPyStr_Append(PyObject *o1, PyObject *o2) {
+    PyUnicode_Append(&o1, o2);
+    return o1;
+}
+
+PyObject *CPyStr_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end) {
+    if (likely(PyUnicode_CheckExact(obj)
+               && CPyTagged_CheckShort(start) && CPyTagged_CheckShort(end))) {
+        Py_ssize_t startn = CPyTagged_ShortAsSsize_t(start);
+        Py_ssize_t endn = CPyTagged_ShortAsSsize_t(end);
+        if (startn < 0) {
+            startn += PyUnicode_GET_LENGTH(obj);
+            if (startn < 0) {
+                startn = 0;
+            }
+        }
+        if (endn < 0) {
+            endn += PyUnicode_GET_LENGTH(obj);
+            if (endn < 0) {
+                endn = 0;
+            }
+        }
+        return PyUnicode_Substring(obj, startn, endn);
+    }
+    return CPyObject_GetSlice(obj, start, end);
+}
+
+/* Check if the given string is true (i.e. it's length isn't zero) */
+bool CPyStr_IsTrue(PyObject *obj) {
+    Py_ssize_t length = PyUnicode_GET_LENGTH(obj);
+    return length != 0;
+}
+
+Py_ssize_t CPyStr_Size_size_t(PyObject *str) {
+    if (PyUnicode_READY(str) != -1) {
+        return PyUnicode_GET_LENGTH(str);
+    }
+    return -1;
+}
+
+PyObject *CPy_Decode(PyObject *obj, PyObject *encoding, PyObject *errors) {
+    const char *enc = NULL;
+    const char *err = NULL;
+    if (encoding) {
+        enc = PyUnicode_AsUTF8AndSize(encoding, NULL);
+        if (!enc) return NULL;
+    }
+    if (errors) {
+        err = PyUnicode_AsUTF8AndSize(errors, NULL);
+        if (!err) return NULL;
+    }
+    if (PyBytes_Check(obj)) {
+        return PyUnicode_Decode(((PyBytesObject *)obj)->ob_sval,
+                                ((PyVarObject *)obj)->ob_size,
+                                enc, err);
+    } else {
+        return PyUnicode_FromEncodedObject(obj, enc, err);
+    }
+}
+
+PyObject *CPy_Encode(PyObject *obj, PyObject *encoding, PyObject *errors) {
+    const char *enc = NULL;
+    const char *err = NULL;
+    if (encoding) {
+        enc = PyUnicode_AsUTF8AndSize(encoding, NULL);
+        if (!enc) return NULL;
+    }
+    if (errors) {
+        err = PyUnicode_AsUTF8AndSize(errors, NULL);
+        if (!err) return NULL;
+    }
+    if (PyUnicode_Check(obj)) {
+        return PyUnicode_AsEncodedString(obj, enc, err);
+    } else {
+        PyErr_BadArgument();
+        return NULL;
+    }
+}