Changing pybind11::str to exclusively hold PyUnicodeObject (#2409)

* Changing pybind11::str to exclusively hold PyUnicodeObject
This commit is contained in:
Ralf W. Grosse-Kunstleve
2021-01-29 12:41:42 -05:00
committed by GitHub
parent 587d5f840a
commit 0432ae7c52
8 changed files with 136 additions and 13 deletions

View File

@@ -120,14 +120,17 @@ def test_str(doc):
assert s1 == s2
malformed_utf8 = b"\x80"
assert m.str_from_object(malformed_utf8) is malformed_utf8 # To be fixed; see #2380
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
assert m.str_from_object(malformed_utf8) is malformed_utf8
elif env.PY2:
with pytest.raises(UnicodeDecodeError):
m.str_from_object(malformed_utf8)
else:
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
if env.PY2:
# with pytest.raises(UnicodeDecodeError):
# m.str_from_object(malformed_utf8)
with pytest.raises(UnicodeDecodeError):
m.str_from_handle(malformed_utf8)
else:
# assert m.str_from_object(malformed_utf8) == "b'\\x80'"
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
@@ -303,13 +306,26 @@ def test_pybind11_str_raw_str():
valid_orig = u"DZ"
valid_utf8 = valid_orig.encode("utf-8")
valid_cvt = cvt(valid_utf8)
assert type(valid_cvt) == bytes # Probably surprising.
assert valid_cvt == b"\xc7\xb1"
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
assert valid_cvt is valid_utf8
else:
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
if env.PY2:
assert valid_cvt == valid_orig
else:
assert valid_cvt == "b'\\xc7\\xb1'"
malformed_utf8 = b"\x80"
malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) == bytes # Probably surprising.
assert malformed_cvt == b"\x80"
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
assert cvt(malformed_utf8) is malformed_utf8
else:
if env.PY2:
with pytest.raises(UnicodeDecodeError):
cvt(malformed_utf8)
else:
malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) is str
assert malformed_cvt == "b'\\x80'"
def test_implicit_casting():
@@ -488,3 +504,40 @@ def test_builtin_functions():
"object of type 'generator' has no len()",
"'generator' has no length",
] # PyPy
def test_isinstance_string_types():
assert m.isinstance_pybind11_bytes(b"")
assert not m.isinstance_pybind11_bytes(u"")
assert m.isinstance_pybind11_str(u"")
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
assert m.isinstance_pybind11_str(b"")
else:
assert not m.isinstance_pybind11_str(b"")
def test_pass_bytes_or_unicode_to_string_types():
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
with pytest.raises(TypeError):
m.pass_to_pybind11_bytes(u"Str")
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE") or env.PY2:
assert m.pass_to_pybind11_str(b"Bytes") == 5
else:
with pytest.raises(TypeError):
m.pass_to_pybind11_str(b"Bytes")
assert m.pass_to_pybind11_str(u"Str") == 3
assert m.pass_to_std_string(b"Bytes") == 5
assert m.pass_to_std_string(u"Str") == 3
malformed_utf8 = b"\x80"
if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
assert m.pass_to_pybind11_str(malformed_utf8) == 1
elif env.PY2:
with pytest.raises(UnicodeDecodeError):
m.pass_to_pybind11_str(malformed_utf8)
else:
with pytest.raises(TypeError):
m.pass_to_pybind11_str(malformed_utf8)