From 22ad81305832fc8ef16175cef01ebb66e121e9a2 Mon Sep 17 00:00:00 2001 From: George Robertson <41301972+georgeRobertson2@users.noreply.github.com> Date: Mon, 11 May 2026 17:13:07 +0100 Subject: [PATCH 01/12] build: bump python to 3.12 (#101) * build: allow python 3.12 install with DVE deal with typing issue on get_type_hints generated from bumping from 3.11 to 3.12 * style: upgrade pylint and dependencies and fix linting following upgrade * build: upgrade pydantic from 1.10.16 -> 1.10.19 removed previous typing fix as pydantic upgrade fixes the typing issues in 3.12 * refactor: remove previously missed typing fix no longer needed * build: lock all dependency versions * build: upgrade moto to a supported python 3.12 version * build: upgrade isort to version supported by python 3.12 * build: upgrade mypy to version supporting python 3.12 * build: update xlsx2csv to supported python 3.12 version and removed faker dependency --- .mise.toml | 2 +- poetry.lock | 508 ++++++++---------- pylint_checkers/check_typing_imports.py | 113 ---- pyproject.toml | 45 +- .../backends/implementations/duckdb/rules.py | 2 +- .../core_engine/backends/metadata/contract.py | 4 +- src/dve/core_engine/backends/utilities.py | 2 +- .../core_engine/configuration/v1/__init__.py | 18 +- src/dve/core_engine/engine.py | 20 +- src/dve/core_engine/type_hints.py | 2 +- src/dve/metadata_parser/domain_types.py | 2 +- src/dve/metadata_parser/models.py | 10 +- src/dve/parser/file_handling/helpers.py | 4 +- src/dve/pipeline/pipeline.py | 2 +- 14 files changed, 286 insertions(+), 448 deletions(-) delete mode 100644 pylint_checkers/check_typing_imports.py diff --git a/.mise.toml b/.mise.toml index befe0ad..d41748e 100644 --- a/.mise.toml +++ b/.mise.toml @@ -1,4 +1,4 @@ [tools] -python="3.11" +python="3.12" poetry="2.3.3" java="liberica-1.8.0" diff --git a/poetry.lock b/poetry.lock index 634f9e1..5548276 100644 --- a/poetry.lock +++ b/poetry.lock @@ -17,23 +17,18 @@ test = ["coverage", "mypy", "pexpect", "ruff", "wheel"] [[package]] name = "astroid" -version = "2.14.2" +version = "3.3.9" description = "An abstract syntax tree for Python with inference support." optional = false -python-versions = ">=3.7.2" +python-versions = ">=3.9.0" groups = ["dev", "lint"] files = [ - {file = "astroid-2.14.2-py3-none-any.whl", hash = "sha256:0e0e3709d64fbffd3037e4ff403580550f14471fd3eaae9fa11cc9a5c7901153"}, - {file = "astroid-2.14.2.tar.gz", hash = "sha256:a3cf9f02c53dd259144a7e8f3ccd75d67c9a8c716ef183e0c1f291bc5d7bb3cf"}, + {file = "astroid-3.3.9-py3-none-any.whl", hash = "sha256:d05bfd0acba96a7bd43e222828b7d9bc1e138aaeb0649707908d3702a9831248"}, + {file = "astroid-3.3.9.tar.gz", hash = "sha256:622cc8e3048684aa42c820d9d218978021c3c3d174fb03a9f0d615921744f550"}, ] [package.dependencies] -lazy-object-proxy = ">=1.4.0" typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} -wrapt = [ - {version = ">=1.11,<2", markers = "python_version < \"3.11\""}, - {version = ">=1.14,<2", markers = "python_version >= \"3.11\""}, -] [[package]] name = "behave" @@ -1225,21 +1220,6 @@ typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} [package.extras] test = ["pytest (>=6)"] -[[package]] -name = "faker" -version = "18.11.1" -description = "Faker is a Python package that generates fake data for you." -optional = false -python-versions = ">=3.7" -groups = ["dev", "test"] -files = [ - {file = "Faker-18.11.1-py3-none-any.whl", hash = "sha256:02980fe15acd58861305568bae0a277680792a505a5a45a309d352f79c452dd1"}, - {file = "Faker-18.11.1.tar.gz", hash = "sha256:df4ee36d058a6a96de9d5e645571ef8536946a0b62db841494f8a3bc3bcdc5af"}, -] - -[package.dependencies] -python-dateutil = ">=2.4" - [[package]] name = "filelock" version = "3.29.0" @@ -1353,21 +1333,18 @@ files = [ [[package]] name = "isort" -version = "5.11.5" +version = "5.13.2" description = "A Python utility / library to sort Python imports." optional = false -python-versions = ">=3.7.0" +python-versions = ">=3.8.0" groups = ["dev", "lint"] files = [ - {file = "isort-5.11.5-py3-none-any.whl", hash = "sha256:ba1d72fb2595a01c7895a5128f9585a5cc4b6d395f1c8d514989b9a7eb2a8746"}, - {file = "isort-5.11.5.tar.gz", hash = "sha256:6be1f76a507cb2ecf16c7cf14a37e41609ca082330be4e3436a18ef74add55db"}, + {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, + {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, ] [package.extras] -colors = ["colorama (>=0.4.3,<0.5.0)"] -pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] -plugins = ["setuptools"] -requirements-deprecated-finder = ["pip-api", "pipreqs"] +colors = ["colorama (>=0.4.6)"] [[package]] name = "jinja2" @@ -1399,60 +1376,6 @@ files = [ {file = "jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d"}, ] -[[package]] -name = "lazy-object-proxy" -version = "1.12.0" -description = "A fast and thorough lazy object proxy." -optional = false -python-versions = ">=3.9" -groups = ["dev", "lint"] -files = [ - {file = "lazy_object_proxy-1.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:61d5e3310a4aa5792c2b599a7a78ccf8687292c8eb09cf187cca8f09cf6a7519"}, - {file = "lazy_object_proxy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1ca33565f698ac1aece152a10f432415d1a2aa9a42dfe23e5ba2bc255ab91f6"}, - {file = "lazy_object_proxy-1.12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01c7819a410f7c255b20799b65d36b414379a30c6f1684c7bd7eb6777338c1b"}, - {file = "lazy_object_proxy-1.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:029d2b355076710505c9545aef5ab3f750d89779310e26ddf2b7b23f6ea03cd8"}, - {file = "lazy_object_proxy-1.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc6e3614eca88b1c8a625fc0a47d0d745e7c3255b21dac0e30b3037c5e3deeb8"}, - {file = "lazy_object_proxy-1.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:be5fe974e39ceb0d6c9db0663c0464669cf866b2851c73971409b9566e880eab"}, - {file = "lazy_object_proxy-1.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1cf69cd1a6c7fe2dbcc3edaa017cf010f4192e53796538cc7d5e1fedbfa4bcff"}, - {file = "lazy_object_proxy-1.12.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:efff4375a8c52f55a145dc8487a2108c2140f0bec4151ab4e1843e52eb9987ad"}, - {file = "lazy_object_proxy-1.12.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1192e8c2f1031a6ff453ee40213afa01ba765b3dc861302cd91dbdb2e2660b00"}, - {file = "lazy_object_proxy-1.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3605b632e82a1cbc32a1e5034278a64db555b3496e0795723ee697006b980508"}, - {file = "lazy_object_proxy-1.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a61095f5d9d1a743e1e20ec6d6db6c2ca511961777257ebd9b288951b23b44fa"}, - {file = "lazy_object_proxy-1.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:997b1d6e10ecc6fb6fe0f2c959791ae59599f41da61d652f6c903d1ee58b7370"}, - {file = "lazy_object_proxy-1.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ee0d6027b760a11cc18281e702c0309dd92da458a74b4c15025d7fc490deede"}, - {file = "lazy_object_proxy-1.12.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4ab2c584e3cc8be0dfca422e05ad30a9abe3555ce63e9ab7a559f62f8dbc6ff9"}, - {file = "lazy_object_proxy-1.12.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:14e348185adbd03ec17d051e169ec45686dcd840a3779c9d4c10aabe2ca6e1c0"}, - {file = "lazy_object_proxy-1.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4fcbe74fb85df8ba7825fa05eddca764138da752904b378f0ae5ab33a36c308"}, - {file = "lazy_object_proxy-1.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:563d2ec8e4d4b68ee7848c5ab4d6057a6d703cb7963b342968bb8758dda33a23"}, - {file = "lazy_object_proxy-1.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:53c7fd99eb156bbb82cbc5d5188891d8fdd805ba6c1e3b92b90092da2a837073"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:86fd61cb2ba249b9f436d789d1356deae69ad3231dc3c0f17293ac535162672e"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:81d1852fb30fab81696f93db1b1e55a5d1ff7940838191062f5f56987d5fcc3e"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be9045646d83f6c2664c1330904b245ae2371b5c57a3195e4028aedc9f999655"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:67f07ab742f1adfb3966c40f630baaa7902be4222a17941f3d85fd1dae5565ff"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75ba769017b944fcacbf6a80c18b2761a1795b03f8899acdad1f1c39db4409be"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:7b22c2bbfb155706b928ac4d74c1a63ac8552a55ba7fff4445155523ea4067e1"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4a79b909aa16bde8ae606f06e6bbc9d3219d2e57fb3e0076e17879072b742c65"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:338ab2f132276203e404951205fe80c3fd59429b3a724e7b662b2eb539bb1be9"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8c40b3c9faee2e32bfce0df4ae63f4e73529766893258eca78548bac801c8f66"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:717484c309df78cedf48396e420fa57fc8a2b1f06ea889df7248fdd156e58847"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a6b7ea5ea1ffe15059eb44bcbcb258f97bcb40e139b88152c40d07b1a1dfc9ac"}, - {file = "lazy_object_proxy-1.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:08c465fb5cd23527512f9bd7b4c7ba6cec33e28aad36fbbe46bf7b858f9f3f7f"}, - {file = "lazy_object_proxy-1.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c9defba70ab943f1df98a656247966d7729da2fe9c2d5d85346464bf320820a3"}, - {file = "lazy_object_proxy-1.12.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6763941dbf97eea6b90f5b06eb4da9418cc088fce0e3883f5816090f9afcde4a"}, - {file = "lazy_object_proxy-1.12.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fdc70d81235fc586b9e3d1aeef7d1553259b62ecaae9db2167a5d2550dcc391a"}, - {file = "lazy_object_proxy-1.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0a83c6f7a6b2bfc11ef3ed67f8cbe99f8ff500b05655d8e7df9aab993a6abc95"}, - {file = "lazy_object_proxy-1.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:256262384ebd2a77b023ad02fbcc9326282bcfd16484d5531154b02bc304f4c5"}, - {file = "lazy_object_proxy-1.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:7601ec171c7e8584f8ff3f4e440aa2eebf93e854f04639263875b8c2971f819f"}, - {file = "lazy_object_proxy-1.12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae575ad9b674d0029fc077c5231b3bc6b433a3d1a62a8c363df96974b5534728"}, - {file = "lazy_object_proxy-1.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31020c84005d3daa4cc0fa5a310af2066efe6b0d82aeebf9ab199292652ff036"}, - {file = "lazy_object_proxy-1.12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:800f32b00a47c27446a2b767df7538e6c66a3488632c402b4fb2224f9794f3c0"}, - {file = "lazy_object_proxy-1.12.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:15400b18893f345857b9e18b9bd87bd06aba84af6ed086187add70aeaa3f93f1"}, - {file = "lazy_object_proxy-1.12.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3d3964fbd326578bcdfffd017ef101b6fb0484f34e731fe060ba9b8816498c36"}, - {file = "lazy_object_proxy-1.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:424a8ab6695400845c39f13c685050eab69fa0bbac5790b201cd27375e5e41d7"}, - {file = "lazy_object_proxy-1.12.0-pp39.pp310.pp311.graalpy311-none-any.whl", hash = "sha256:c3b2e0af1f7f77c4263759c4824316ce458fabe0fceadcd24ef8ca08b2d1e402"}, - {file = "lazy_object_proxy-1.12.0.tar.gz", hash = "sha256:1f5a462d92fd0cfb82f1fab28b51bfb209fabbe6aabf7f0d51472c0c124c0c61"}, -] - [[package]] name = "lxml" version = "4.9.4" @@ -1812,14 +1735,14 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [[package]] name = "moto" -version = "4.0.13" -description = "A library that allows your python tests to easily mock out the boto library" +version = "4.2.14" +description = "" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" groups = ["dev", "test"] files = [ - {file = "moto-4.0.13-py3-none-any.whl", hash = "sha256:e73400c6d3fe06028aa7f07bb6f276f14260d289b70f38928a98e3d3d968352d"}, - {file = "moto-4.0.13.tar.gz", hash = "sha256:baf7d6969cf837990c730e6e648315bebc2e1c0038d9d8fc4f59d03561484469"}, + {file = "moto-4.2.14-py2.py3-none-any.whl", hash = "sha256:6d242dbbabe925bb385ddb6958449e5c827670b13b8e153ed63f91dbdb50372c"}, + {file = "moto-4.2.14.tar.gz", hash = "sha256:8f9263ca70b646f091edcc93e97cda864a542e6d16ed04066b1370ed217bd190"}, ] [package.dependencies] @@ -1827,7 +1750,7 @@ boto3 = ">=1.9.201" botocore = ">=1.12.201" cryptography = ">=3.3.1" Jinja2 = ">=2.10.1" -MarkupSafe = "!=2.0.0a1" +py-partiql-parser = {version = "0.5.0", optional = true, markers = "extra == \"s3\""} python-dateutil = ">=2.1,<3.0.0" PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"s3\""} requests = ">=2.5" @@ -1836,78 +1759,73 @@ werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1" xmltodict = "*" [package.extras] -all = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "idna (>=2.5,<4)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] -apigateway = ["PyYAML (>=5.1)", "ecdsa (!=0.15)", "openapi-spec-validator (>=0.2.8)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] +all = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +apigateway = ["PyYAML (>=5.1)", "ecdsa (!=0.15)", "openapi-spec-validator (>=0.5.0)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] apigatewayv2 = ["PyYAML (>=5.1)"] appsync = ["graphql-core"] -awslambda = ["docker (>=2.5.1)"] -batch = ["docker (>=2.5.1)"] -cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "idna (>=2.5,<4)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +awslambda = ["docker (>=3.0.0)"] +batch = ["docker (>=3.0.0)"] +cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] cognitoidp = ["ecdsa (!=0.15)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] -ds = ["sshpubkeys (>=3.1.0)"] -dynamodb = ["docker (>=2.5.1)"] -dynamodb2 = ["docker (>=2.5.1)"] -dynamodbstreams = ["docker (>=2.5.1)"] -ebs = ["sshpubkeys (>=3.1.0)"] +dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.0)"] +dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.0)"] ec2 = ["sshpubkeys (>=3.1.0)"] -efs = ["sshpubkeys (>=3.1.0)"] glue = ["pyparsing (>=3.0.7)"] iotdata = ["jsondiff (>=1.1.2)"] -route53resolver = ["sshpubkeys (>=3.1.0)"] -s3 = ["PyYAML (>=5.1)"] -server = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "idna (>=2.5,<4)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.2.8)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] -ssm = ["PyYAML (>=5.1)", "dataclasses ; python_version < \"3.7\""] +proxy = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)"] +s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.0)"] +s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.0)"] +server = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "ecdsa (!=0.15)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.0)", "pyparsing (>=3.0.7)", "python-jose[cryptography] (>=3.1.0,<4.0.0)", "setuptools", "sshpubkeys (>=3.1.0)"] +ssm = ["PyYAML (>=5.1)"] xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] [[package]] name = "mypy" -version = "0.991" +version = "1.11.2" description = "Optional static typing for Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["dev", "lint"] files = [ - {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, - {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, - {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, - {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, - {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, - {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, - {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, - {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, - {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, - {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, - {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, - {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, - {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, - {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, - {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, - {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, - {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, - {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, - {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, - {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, - {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, - {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, - {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, - {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, - {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, - {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, - {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, - {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, - {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, - {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, + {file = "mypy-1.11.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d42a6dd818ffce7be66cce644f1dff482f1d97c53ca70908dff0b9ddc120b77a"}, + {file = "mypy-1.11.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:801780c56d1cdb896eacd5619a83e427ce436d86a3bdf9112527f24a66618fef"}, + {file = "mypy-1.11.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41ea707d036a5307ac674ea172875f40c9d55c5394f888b168033177fce47383"}, + {file = "mypy-1.11.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e658bd2d20565ea86da7d91331b0eed6d2eee22dc031579e6297f3e12c758c8"}, + {file = "mypy-1.11.2-cp310-cp310-win_amd64.whl", hash = "sha256:478db5f5036817fe45adb7332d927daa62417159d49783041338921dcf646fc7"}, + {file = "mypy-1.11.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:75746e06d5fa1e91bfd5432448d00d34593b52e7e91a187d981d08d1f33d4385"}, + {file = "mypy-1.11.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a976775ab2256aadc6add633d44f100a2517d2388906ec4f13231fafbb0eccca"}, + {file = "mypy-1.11.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cd953f221ac1379050a8a646585a29574488974f79d8082cedef62744f0a0104"}, + {file = "mypy-1.11.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:57555a7715c0a34421013144a33d280e73c08df70f3a18a552938587ce9274f4"}, + {file = "mypy-1.11.2-cp311-cp311-win_amd64.whl", hash = "sha256:36383a4fcbad95f2657642a07ba22ff797de26277158f1cc7bd234821468b1b6"}, + {file = "mypy-1.11.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e8960dbbbf36906c5c0b7f4fbf2f0c7ffb20f4898e6a879fcf56a41a08b0d318"}, + {file = "mypy-1.11.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06d26c277962f3fb50e13044674aa10553981ae514288cb7d0a738f495550b36"}, + {file = "mypy-1.11.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e7184632d89d677973a14d00ae4d03214c8bc301ceefcdaf5c474866814c987"}, + {file = "mypy-1.11.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a66169b92452f72117e2da3a576087025449018afc2d8e9bfe5ffab865709ca"}, + {file = "mypy-1.11.2-cp312-cp312-win_amd64.whl", hash = "sha256:969ea3ef09617aff826885a22ece0ddef69d95852cdad2f60c8bb06bf1f71f70"}, + {file = "mypy-1.11.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:37c7fa6121c1cdfcaac97ce3d3b5588e847aa79b580c1e922bb5d5d2902df19b"}, + {file = "mypy-1.11.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a8a53bc3ffbd161b5b2a4fff2f0f1e23a33b0168f1c0778ec70e1a3d66deb86"}, + {file = "mypy-1.11.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ff93107f01968ed834f4256bc1fc4475e2fecf6c661260066a985b52741ddce"}, + {file = "mypy-1.11.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:edb91dded4df17eae4537668b23f0ff6baf3707683734b6a818d5b9d0c0c31a1"}, + {file = "mypy-1.11.2-cp38-cp38-win_amd64.whl", hash = "sha256:ee23de8530d99b6db0573c4ef4bd8f39a2a6f9b60655bf7a1357e585a3486f2b"}, + {file = "mypy-1.11.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:801ca29f43d5acce85f8e999b1e431fb479cb02d0e11deb7d2abb56bdaf24fd6"}, + {file = "mypy-1.11.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:af8d155170fcf87a2afb55b35dc1a0ac21df4431e7d96717621962e4b9192e70"}, + {file = "mypy-1.11.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7821776e5c4286b6a13138cc935e2e9b6fde05e081bdebf5cdb2bb97c9df81d"}, + {file = "mypy-1.11.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:539c570477a96a4e6fb718b8d5c3e0c0eba1f485df13f86d2970c91f0673148d"}, + {file = "mypy-1.11.2-cp39-cp39-win_amd64.whl", hash = "sha256:3f14cd3d386ac4d05c5a39a51b84387403dadbd936e17cb35882134d4f8f0d24"}, + {file = "mypy-1.11.2-py3-none-any.whl", hash = "sha256:b499bc07dbdcd3de92b0a8b29fdf592c111276f6a12fe29c30f6c417dd546d12"}, + {file = "mypy-1.11.2.tar.gz", hash = "sha256:7f9993ad3e0ffdc95c2a14b66dee63729f021968bff8ad911867579c65d13a79"}, ] [package.dependencies] -mypy-extensions = ">=0.4.3" +mypy-extensions = ">=1.0.0" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = ">=3.10" +typing-extensions = ">=4.6.0" [package.extras] dmypy = ["psutil (>=4.0)"] install-types = ["pip"] -python2 = ["typed-ast (>=1.4.0,<2)"] +mypyc = ["setuptools (>=50)"] reports = ["lxml"] [[package]] @@ -2159,6 +2077,7 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" @@ -2354,6 +2273,21 @@ files = [ [package.dependencies] wcwidth = "*" +[[package]] +name = "py-partiql-parser" +version = "0.5.0" +description = "Pure Python PartiQL Parser" +optional = false +python-versions = "*" +groups = ["dev", "test"] +files = [ + {file = "py-partiql-parser-0.5.0.tar.gz", hash = "sha256:427a662e87d51a0a50150fc8b75c9ebb4a52d49129684856c40c88b8c8e027e4"}, + {file = "py_partiql_parser-0.5.0-py3-none-any.whl", hash = "sha256:dc454c27526adf62deca5177ea997bf41fac4fd109c5d4c8d81f984de738ba8f"}, +] + +[package.extras] +dev = ["black (==22.6.0)", "flake8", "mypy", "pytest"] + [[package]] name = "py4j" version = "0.10.9.7" @@ -2433,48 +2367,55 @@ files = [ [[package]] name = "pydantic" -version = "1.10.16" +version = "1.10.19" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "pydantic-1.10.16-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1a539ac40551b01a85e899829aa43ca8036707474af8d74b48be288d4d2d2846"}, - {file = "pydantic-1.10.16-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a4fcc7b0b8038dbda2dda642cff024032dfae24a7960cc58e57a39eb1949b9b"}, - {file = "pydantic-1.10.16-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4660dd697de1ae2d4305a85161312611f64d5360663a9ba026cd6ad9e3fe14c3"}, - {file = "pydantic-1.10.16-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:900a787c574f903a97d0bf52a43ff3b6cf4fa0119674bcfc0e5fd1056d388ad9"}, - {file = "pydantic-1.10.16-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:d30192a63e6d3334c3f0c0506dd6ae9f1dce7b2f8845518915291393a5707a22"}, - {file = "pydantic-1.10.16-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:16cf23ed599ca5ca937e37ba50ab114e6b5c387eb43a6cc533701605ad1be611"}, - {file = "pydantic-1.10.16-cp310-cp310-win_amd64.whl", hash = "sha256:8d23111f41d1e19334edd51438fd57933f3eee7d9d2fa8cc3f5eda515a272055"}, - {file = "pydantic-1.10.16-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef287b8d7fc0e86a8bd1f902c61aff6ba9479c50563242fe88ba39692e98e1e0"}, - {file = "pydantic-1.10.16-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b9ded699bfd3b3912d796ff388b0c607e6d35d41053d37aaf8fd6082c660de9a"}, - {file = "pydantic-1.10.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:daeb199814333e4426c5e86d7fb610f4e230289f28cab90eb4de27330bef93cf"}, - {file = "pydantic-1.10.16-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5973843f1fa99ec6c3ac8d1a8698ac9340b35e45cca6c3e5beb5c3bd1ef15de6"}, - {file = "pydantic-1.10.16-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6b8a7788a8528a558828fe4a48783cafdcf2612d13c491594a8161dc721629c"}, - {file = "pydantic-1.10.16-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8abaecf54dacc9d991dda93c3b880d41092a8924cde94eeb811d7d9ab55df7d8"}, - {file = "pydantic-1.10.16-cp311-cp311-win_amd64.whl", hash = "sha256:ddc7b682fbd23f051edc419dc6977e11dd2dbdd0cef9d05f0e15d1387862d230"}, - {file = "pydantic-1.10.16-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:067c2b5539f7839653ad8c3d1fc2f1343338da8677b7b2172abf3cd3fdc8f719"}, - {file = "pydantic-1.10.16-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d1fc943583c046ecad0ff5d6281ee571b64e11b5503d9595febdce54f38b290"}, - {file = "pydantic-1.10.16-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18548b30ccebe71d380b0886cc44ea5d80afbcc155e3518792f13677ad06097d"}, - {file = "pydantic-1.10.16-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4e92292f9580fc5ea517618580fac24e9f6dc5657196e977c194a8e50e14f5a9"}, - {file = "pydantic-1.10.16-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5da8bc4bb4f85b8c97cc7f11141fddbbd29eb25e843672e5807e19cc3d7c1b7f"}, - {file = "pydantic-1.10.16-cp37-cp37m-win_amd64.whl", hash = "sha256:a04ee1ea34172b87707a6ecfcdb120d7656892206b7c4dbdb771a73e90179fcb"}, - {file = "pydantic-1.10.16-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4fa86469fd46e732242c7acb83282d33f83591a7e06f840481327d5bf6d96112"}, - {file = "pydantic-1.10.16-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:89c2783dc261726fe7a5ce1121bce29a2f7eb9b1e704c68df2b117604e3b346f"}, - {file = "pydantic-1.10.16-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78e59fa919fa7a192f423d190d8660c35dd444efa9216662273f36826765424b"}, - {file = "pydantic-1.10.16-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7e82a80068c77f4b074032e031e642530b6d45cb8121fc7c99faa31fb6c6b72"}, - {file = "pydantic-1.10.16-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d82d5956cee27a30e26a5b88d00a6a2a15a4855e13c9baf50175976de0dc282c"}, - {file = "pydantic-1.10.16-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b7b99424cc0970ff08deccb549b5a6ec1040c0b449eab91723e64df2bd8fdca"}, - {file = "pydantic-1.10.16-cp38-cp38-win_amd64.whl", hash = "sha256:d97a35e1ba59442775201657171f601a2879e63517a55862a51f8d67cdfc0017"}, - {file = "pydantic-1.10.16-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9d91f6866fd3e303c632207813ef6bc4d86055e21c5e5a0a311983a9ac5f0192"}, - {file = "pydantic-1.10.16-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d8d3c71d14c8bd26d2350c081908dbf59d5a6a8f9596d9ef2b09cc1e61c8662b"}, - {file = "pydantic-1.10.16-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b73e6386b439b4881d79244e9fc1e32d1e31e8d784673f5d58a000550c94a6c0"}, - {file = "pydantic-1.10.16-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5f039881fb2ef86f6de6eacce6e71701b47500355738367413ccc1550b2a69cf"}, - {file = "pydantic-1.10.16-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:3895ddb26f22bdddee7e49741486aa7b389258c6f6771943e87fc00eabd79134"}, - {file = "pydantic-1.10.16-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:55b945da2756b5cef93d792521ad0d457fdf2f69fd5a2d10a27513f5281717dd"}, - {file = "pydantic-1.10.16-cp39-cp39-win_amd64.whl", hash = "sha256:22dd265c77c3976a34be78409b128cb84629284dfd1b69d2fa1507a36f84dc8b"}, - {file = "pydantic-1.10.16-py3-none-any.whl", hash = "sha256:aa2774ba5412fd1c5cb890d08e8b0a3bb5765898913ba1f61a65a4810f03cf29"}, - {file = "pydantic-1.10.16.tar.gz", hash = "sha256:8bb388f6244809af69ee384900b10b677a69f1980fdc655ea419710cffcb5610"}, + {file = "pydantic-1.10.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a415b9e95fa602b10808113967f72b2da8722061265d6af69268c111c254832d"}, + {file = "pydantic-1.10.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:11965f421f7eb026439d4eb7464e9182fe6d69c3d4d416e464a4485d1ba61ab6"}, + {file = "pydantic-1.10.19-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5bb81fcfc6d5bff62cd786cbd87480a11d23f16d5376ad2e057c02b3b44df96"}, + {file = "pydantic-1.10.19-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83ee8c9916689f8e6e7d90161e6663ac876be2efd32f61fdcfa3a15e87d4e413"}, + {file = "pydantic-1.10.19-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0399094464ae7f28482de22383e667625e38e1516d6b213176df1acdd0c477ea"}, + {file = "pydantic-1.10.19-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8b2cf5e26da84f2d2dee3f60a3f1782adedcee785567a19b68d0af7e1534bd1f"}, + {file = "pydantic-1.10.19-cp310-cp310-win_amd64.whl", hash = "sha256:1fc8cc264afaf47ae6a9bcbd36c018d0c6b89293835d7fb0e5e1a95898062d59"}, + {file = "pydantic-1.10.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d7a8a1dd68bac29f08f0a3147de1885f4dccec35d4ea926e6e637fac03cdb4b3"}, + {file = "pydantic-1.10.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07d00ca5ef0de65dd274005433ce2bb623730271d495a7d190a91c19c5679d34"}, + {file = "pydantic-1.10.19-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad57004e5d73aee36f1e25e4e73a4bc853b473a1c30f652dc8d86b0a987ffce3"}, + {file = "pydantic-1.10.19-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dce355fe7ae53e3090f7f5fa242423c3a7b53260747aa398b4b3aaf8b25f41c3"}, + {file = "pydantic-1.10.19-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0d32227ea9a3bf537a2273fd2fdb6d64ab4d9b83acd9e4e09310a777baaabb98"}, + {file = "pydantic-1.10.19-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e351df83d1c9cffa53d4e779009a093be70f1d5c6bb7068584086f6a19042526"}, + {file = "pydantic-1.10.19-cp311-cp311-win_amd64.whl", hash = "sha256:d8d72553d2f3f57ce547de4fa7dc8e3859927784ab2c88343f1fc1360ff17a08"}, + {file = "pydantic-1.10.19-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d5b5b7c6bafaef90cbb7dafcb225b763edd71d9e22489647ee7df49d6d341890"}, + {file = "pydantic-1.10.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:570ad0aeaf98b5e33ff41af75aba2ef6604ee25ce0431ecd734a28e74a208555"}, + {file = "pydantic-1.10.19-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0890fbd7fec9e151c7512941243d830b2d6076d5df159a2030952d480ab80a4e"}, + {file = "pydantic-1.10.19-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec5c44e6e9eac5128a9bfd21610df3b8c6b17343285cc185105686888dc81206"}, + {file = "pydantic-1.10.19-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6eb56074b11a696e0b66c7181da682e88c00e5cebe6570af8013fcae5e63e186"}, + {file = "pydantic-1.10.19-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9d7d48fbc5289efd23982a0d68e973a1f37d49064ccd36d86de4543aff21e086"}, + {file = "pydantic-1.10.19-cp312-cp312-win_amd64.whl", hash = "sha256:fd34012691fbd4e67bdf4accb1f0682342101015b78327eaae3543583fcd451e"}, + {file = "pydantic-1.10.19-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a5d5b877c7d3d9e17399571a8ab042081d22fe6904416a8b20f8af5909e6c8f"}, + {file = "pydantic-1.10.19-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c46f58ef2df958ed2ea7437a8be0897d5efe9ee480818405338c7da88186fb3"}, + {file = "pydantic-1.10.19-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d8a38a44bb6a15810084316ed69c854a7c06e0c99c5429f1d664ad52cec353c"}, + {file = "pydantic-1.10.19-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a82746c6d6e91ca17e75f7f333ed41d70fce93af520a8437821dec3ee52dfb10"}, + {file = "pydantic-1.10.19-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:566bebdbe6bc0ac593fa0f67d62febbad9f8be5433f686dc56401ba4aab034e3"}, + {file = "pydantic-1.10.19-cp37-cp37m-win_amd64.whl", hash = "sha256:22a1794e01591884741be56c6fba157c4e99dcc9244beb5a87bd4aa54b84ea8b"}, + {file = "pydantic-1.10.19-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:076c49e24b73d346c45f9282d00dbfc16eef7ae27c970583d499f11110d9e5b0"}, + {file = "pydantic-1.10.19-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d4320510682d5a6c88766b2a286d03b87bd3562bf8d78c73d63bab04b21e7b4"}, + {file = "pydantic-1.10.19-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e66aa0fa7f8aa9d0a620361834f6eb60d01d3e9cea23ca1a92cda99e6f61dac"}, + {file = "pydantic-1.10.19-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d216f8d0484d88ab72ab45d699ac669fe031275e3fa6553e3804e69485449fa0"}, + {file = "pydantic-1.10.19-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9f28a81978e936136c44e6a70c65bde7548d87f3807260f73aeffbf76fb94c2f"}, + {file = "pydantic-1.10.19-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d3449633c207ec3d2d672eedb3edbe753e29bd4e22d2e42a37a2c1406564c20f"}, + {file = "pydantic-1.10.19-cp38-cp38-win_amd64.whl", hash = "sha256:7ea24e8614f541d69ea72759ff635df0e612b7dc9d264d43f51364df310081a3"}, + {file = "pydantic-1.10.19-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:573254d844f3e64093f72fcd922561d9c5696821ff0900a0db989d8c06ab0c25"}, + {file = "pydantic-1.10.19-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff09600cebe957ecbb4a27496fe34c1d449e7957ed20a202d5029a71a8af2e35"}, + {file = "pydantic-1.10.19-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4739c206bfb6bb2bdc78dcd40bfcebb2361add4ceac6d170e741bb914e9eff0f"}, + {file = "pydantic-1.10.19-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0bfb5b378b78229119d66ced6adac2e933c67a0aa1d0a7adffbe432f3ec14ce4"}, + {file = "pydantic-1.10.19-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7f31742c95e3f9443b8c6fa07c119623e61d76603be9c0d390bcf7e888acabcb"}, + {file = "pydantic-1.10.19-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c6444368b651a14c2ce2fb22145e1496f7ab23cbdb978590d47c8d34a7bc0289"}, + {file = "pydantic-1.10.19-cp39-cp39-win_amd64.whl", hash = "sha256:945407f4d08cd12485757a281fca0e5b41408606228612f421aa4ea1b63a095d"}, + {file = "pydantic-1.10.19-py3-none-any.whl", hash = "sha256:2206a1752d9fac011e95ca83926a269fb0ef5536f7e053966d058316e24d929f"}, + {file = "pydantic-1.10.19.tar.gz", hash = "sha256:fea36c2065b7a1d28c6819cc2e93387b43dd5d3cf5a1e82d8132ee23f36d1f10"}, ] [package.dependencies] @@ -2501,27 +2442,28 @@ windows-terminal = ["colorama (>=0.4.6)"] [[package]] name = "pylint" -version = "2.16.4" +version = "3.3.9" description = "python code static checker" optional = false -python-versions = ">=3.7.2" +python-versions = ">=3.9.0" groups = ["dev", "lint"] files = [ - {file = "pylint-2.16.4-py3-none-any.whl", hash = "sha256:4a770bb74fde0550fa0ab4248a2ad04e7887462f9f425baa0cd8d3c1d098eaee"}, - {file = "pylint-2.16.4.tar.gz", hash = "sha256:8841f26a0dbc3503631b6a20ee368b3f5e0e5461a1d95cf15d103dab748a0db3"}, + {file = "pylint-3.3.9-py3-none-any.whl", hash = "sha256:01f9b0462c7730f94786c283f3e52a1fbdf0494bbe0971a78d7277ef46a751e7"}, + {file = "pylint-3.3.9.tar.gz", hash = "sha256:d312737d7b25ccf6b01cc4ac629b5dcd14a0fcf3ec392735ac70f137a9d5f83a"}, ] [package.dependencies] -astroid = ">=2.14.2,<=2.16.0.dev0" +astroid = ">=3.3.8,<=3.4.0.dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ {version = ">=0.2", markers = "python_version < \"3.11\""}, - {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, + {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, + {version = ">=0.3.6", markers = "python_version == \"3.11\""}, ] -isort = ">=4.2.5,<6" +isort = ">=4.2.5,<5.13 || >5.13,<7" mccabe = ">=0.6,<0.8" -platformdirs = ">=2.2.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +platformdirs = ">=2.2" +tomli = {version = ">=1.1", markers = "python_version < \"3.11\""} tomlkit = ">=0.10.1" [package.extras] @@ -3173,105 +3115,117 @@ watchdog = ["watchdog (>=2.3)"] [[package]] name = "wrapt" -version = "1.17.3" +version = "2.1.2" description = "Module for decorators, wrappers and monkey patching." optional = false -python-versions = ">=3.8" -groups = ["dev", "lint"] +python-versions = ">=3.9" +groups = ["dev"] files = [ - {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04"}, - {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2"}, - {file = "wrapt-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c"}, - {file = "wrapt-1.17.3-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775"}, - {file = "wrapt-1.17.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd"}, - {file = "wrapt-1.17.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05"}, - {file = "wrapt-1.17.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418"}, - {file = "wrapt-1.17.3-cp310-cp310-win32.whl", hash = "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390"}, - {file = "wrapt-1.17.3-cp310-cp310-win_amd64.whl", hash = "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6"}, - {file = "wrapt-1.17.3-cp310-cp310-win_arm64.whl", hash = "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18"}, - {file = "wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7"}, - {file = "wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85"}, - {file = "wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f"}, - {file = "wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311"}, - {file = "wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1"}, - {file = "wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5"}, - {file = "wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2"}, - {file = "wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89"}, - {file = "wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77"}, - {file = "wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a"}, - {file = "wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0"}, - {file = "wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba"}, - {file = "wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd"}, - {file = "wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828"}, - {file = "wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9"}, - {file = "wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396"}, - {file = "wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc"}, - {file = "wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe"}, - {file = "wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c"}, - {file = "wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6"}, - {file = "wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0"}, - {file = "wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77"}, - {file = "wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7"}, - {file = "wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277"}, - {file = "wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d"}, - {file = "wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa"}, - {file = "wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050"}, - {file = "wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8"}, - {file = "wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb"}, - {file = "wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16"}, - {file = "wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39"}, - {file = "wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235"}, - {file = "wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c"}, - {file = "wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b"}, - {file = "wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa"}, - {file = "wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7"}, - {file = "wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4"}, - {file = "wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10"}, - {file = "wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6"}, - {file = "wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58"}, - {file = "wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a"}, - {file = "wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067"}, - {file = "wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454"}, - {file = "wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e"}, - {file = "wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f"}, - {file = "wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056"}, - {file = "wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804"}, - {file = "wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977"}, - {file = "wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116"}, - {file = "wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6"}, - {file = "wrapt-1.17.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:70d86fa5197b8947a2fa70260b48e400bf2ccacdcab97bb7de47e3d1e6312225"}, - {file = "wrapt-1.17.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:df7d30371a2accfe4013e90445f6388c570f103d61019b6b7c57e0265250072a"}, - {file = "wrapt-1.17.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:caea3e9c79d5f0d2c6d9ab96111601797ea5da8e6d0723f77eabb0d4068d2b2f"}, - {file = "wrapt-1.17.3-cp38-cp38-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:758895b01d546812d1f42204bd443b8c433c44d090248bf22689df673ccafe00"}, - {file = "wrapt-1.17.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02b551d101f31694fc785e58e0720ef7d9a10c4e62c1c9358ce6f63f23e30a56"}, - {file = "wrapt-1.17.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:656873859b3b50eeebe6db8b1455e99d90c26ab058db8e427046dbc35c3140a5"}, - {file = "wrapt-1.17.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a9a2203361a6e6404f80b99234fe7fb37d1fc73487b5a78dc1aa5b97201e0f22"}, - {file = "wrapt-1.17.3-cp38-cp38-win32.whl", hash = "sha256:55cbbc356c2842f39bcc553cf695932e8b30e30e797f961860afb308e6b1bb7c"}, - {file = "wrapt-1.17.3-cp38-cp38-win_amd64.whl", hash = "sha256:ad85e269fe54d506b240d2d7b9f5f2057c2aa9a2ea5b32c66f8902f768117ed2"}, - {file = "wrapt-1.17.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:30ce38e66630599e1193798285706903110d4f057aab3168a34b7fdc85569afc"}, - {file = "wrapt-1.17.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:65d1d00fbfb3ea5f20add88bbc0f815150dbbde3b026e6c24759466c8b5a9ef9"}, - {file = "wrapt-1.17.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a7c06742645f914f26c7f1fa47b8bc4c91d222f76ee20116c43d5ef0912bba2d"}, - {file = "wrapt-1.17.3-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7e18f01b0c3e4a07fe6dfdb00e29049ba17eadbc5e7609a2a3a4af83ab7d710a"}, - {file = "wrapt-1.17.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f5f51a6466667a5a356e6381d362d259125b57f059103dd9fdc8c0cf1d14139"}, - {file = "wrapt-1.17.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:59923aa12d0157f6b82d686c3fd8e1166fa8cdfb3e17b42ce3b6147ff81528df"}, - {file = "wrapt-1.17.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:46acc57b331e0b3bcb3e1ca3b421d65637915cfcd65eb783cb2f78a511193f9b"}, - {file = "wrapt-1.17.3-cp39-cp39-win32.whl", hash = "sha256:3e62d15d3cfa26e3d0788094de7b64efa75f3a53875cdbccdf78547aed547a81"}, - {file = "wrapt-1.17.3-cp39-cp39-win_amd64.whl", hash = "sha256:1f23fa283f51c890eda8e34e4937079114c74b4c81d2b2f1f1d94948f5cc3d7f"}, - {file = "wrapt-1.17.3-cp39-cp39-win_arm64.whl", hash = "sha256:24c2ed34dc222ed754247a2702b1e1e89fdbaa4016f324b4b8f1a802d4ffe87f"}, - {file = "wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22"}, - {file = "wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0"}, + {file = "wrapt-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b7a86d99a14f76facb269dc148590c01aaf47584071809a70da30555228158c"}, + {file = "wrapt-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a819e39017f95bf7aede768f75915635aa8f671f2993c036991b8d3bfe8dbb6f"}, + {file = "wrapt-2.1.2-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5681123e60aed0e64c7d44f72bbf8b4ce45f79d81467e2c4c728629f5baf06eb"}, + {file = "wrapt-2.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b8b28e97a44d21836259739ae76284e180b18abbb4dcfdff07a415cf1016c3e"}, + {file = "wrapt-2.1.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cef91c95a50596fcdc31397eb6955476f82ae8a3f5a8eabdc13611b60ee380ba"}, + {file = "wrapt-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dad63212b168de8569b1c512f4eac4b57f2c6934b30df32d6ee9534a79f1493f"}, + {file = "wrapt-2.1.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d307aa6888d5efab2c1cde09843d48c843990be13069003184b67d426d145394"}, + {file = "wrapt-2.1.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c87cf3f0c85e27b3ac7d9ad95da166bf8739ca215a8b171e8404a2d739897a45"}, + {file = "wrapt-2.1.2-cp310-cp310-win32.whl", hash = "sha256:d1c5fea4f9fe3762e2b905fdd67df51e4be7a73b7674957af2d2ade71a5c075d"}, + {file = "wrapt-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:d8f7740e1af13dff2684e4d56fe604a7e04d6c94e737a60568d8d4238b9a0c71"}, + {file = "wrapt-2.1.2-cp310-cp310-win_arm64.whl", hash = "sha256:1c6cc827c00dc839350155f316f1f8b4b0c370f52b6a19e782e2bda89600c7dc"}, + {file = "wrapt-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:96159a0ee2b0277d44201c3b5be479a9979cf154e8c82fa5df49586a8e7679bb"}, + {file = "wrapt-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98ba61833a77b747901e9012072f038795de7fc77849f1faa965464f3f87ff2d"}, + {file = "wrapt-2.1.2-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:767c0dbbe76cae2a60dd2b235ac0c87c9cccf4898aef8062e57bead46b5f6894"}, + {file = "wrapt-2.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c691a6bc752c0cc4711cc0c00896fcd0f116abc253609ef64ef930032821842"}, + {file = "wrapt-2.1.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f3b7d73012ea75aee5844de58c88f44cf62d0d62711e39da5a82824a7c4626a8"}, + {file = "wrapt-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:577dff354e7acd9d411eaf4bfe76b724c89c89c8fc9b7e127ee28c5f7bcb25b6"}, + {file = "wrapt-2.1.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:3d7b6fd105f8b24e5bd23ccf41cb1d1099796524bcc6f7fbb8fe576c44befbc9"}, + {file = "wrapt-2.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:866abdbf4612e0b34764922ef8b1c5668867610a718d3053d59e24a5e5fcfc15"}, + {file = "wrapt-2.1.2-cp311-cp311-win32.whl", hash = "sha256:5a0a0a3a882393095573344075189eb2d566e0fd205a2b6414e9997b1b800a8b"}, + {file = "wrapt-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:64a07a71d2730ba56f11d1a4b91f7817dc79bc134c11516b75d1921a7c6fcda1"}, + {file = "wrapt-2.1.2-cp311-cp311-win_arm64.whl", hash = "sha256:b89f095fe98bc12107f82a9f7d570dc83a0870291aeb6b1d7a7d35575f55d98a"}, + {file = "wrapt-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ff2aad9c4cda28a8f0653fc2d487596458c2a3f475e56ba02909e950a9efa6a9"}, + {file = "wrapt-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6433ea84e1cfacf32021d2a4ee909554ade7fd392caa6f7c13f1f4bf7b8e8748"}, + {file = "wrapt-2.1.2-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c20b757c268d30d6215916a5fa8461048d023865d888e437fab451139cad6c8e"}, + {file = "wrapt-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79847b83eb38e70d93dc392c7c5b587efe65b3e7afcc167aa8abd5d60e8761c8"}, + {file = "wrapt-2.1.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f8fba1bae256186a83d1875b2b1f4e2d1242e8fac0f58ec0d7e41b26967b965c"}, + {file = "wrapt-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e3d3b35eedcf5f7d022291ecd7533321c4775f7b9cd0050a31a68499ba45757c"}, + {file = "wrapt-2.1.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:6f2c5390460de57fa9582bc8a1b7a6c86e1a41dfad74c5225fc07044c15cc8d1"}, + {file = "wrapt-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7dfa9f2cf65d027b951d05c662cc99ee3bd01f6e4691ed39848a7a5fffc902b2"}, + {file = "wrapt-2.1.2-cp312-cp312-win32.whl", hash = "sha256:eba8155747eb2cae4a0b913d9ebd12a1db4d860fc4c829d7578c7b989bd3f2f0"}, + {file = "wrapt-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:1c51c738d7d9faa0b3601708e7e2eda9bf779e1b601dce6c77411f2a1b324a63"}, + {file = "wrapt-2.1.2-cp312-cp312-win_arm64.whl", hash = "sha256:c8e46ae8e4032792eb2f677dbd0d557170a8e5524d22acc55199f43efedd39bf"}, + {file = "wrapt-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:787fd6f4d67befa6fe2abdffcbd3de2d82dfc6fb8a6d850407c53332709d030b"}, + {file = "wrapt-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4bdf26e03e6d0da3f0e9422fd36bcebf7bc0eeb55fdf9c727a09abc6b9fe472e"}, + {file = "wrapt-2.1.2-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bbac24d879aa22998e87f6b3f481a5216311e7d53c7db87f189a7a0266dafffb"}, + {file = "wrapt-2.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16997dfb9d67addc2e3f41b62a104341e80cac52f91110dece393923c0ebd5ca"}, + {file = "wrapt-2.1.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:162e4e2ba7542da9027821cb6e7c5e068d64f9a10b5f15512ea28e954893a267"}, + {file = "wrapt-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f29c827a8d9936ac320746747a016c4bc66ef639f5cd0d32df24f5eacbf9c69f"}, + {file = "wrapt-2.1.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:a9dd9813825f7ecb018c17fd147a01845eb330254dff86d3b5816f20f4d6aaf8"}, + {file = "wrapt-2.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f8dbdd3719e534860d6a78526aafc220e0241f981367018c2875178cf83a413"}, + {file = "wrapt-2.1.2-cp313-cp313-win32.whl", hash = "sha256:5c35b5d82b16a3bc6e0a04349b606a0582bc29f573786aebe98e0c159bc48db6"}, + {file = "wrapt-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f8bc1c264d8d1cf5b3560a87bbdd31131573eb25f9f9447bb6252b8d4c44a3a1"}, + {file = "wrapt-2.1.2-cp313-cp313-win_arm64.whl", hash = "sha256:3beb22f674550d5634642c645aba4c72a2c66fb185ae1aebe1e955fae5a13baf"}, + {file = "wrapt-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0fc04bc8664a8bc4c8e00b37b5355cffca2535209fba1abb09ae2b7c76ddf82b"}, + {file = "wrapt-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a9b9d50c9af998875a1482a038eb05755dfd6fe303a313f6a940bb53a83c3f18"}, + {file = "wrapt-2.1.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2d3ff4f0024dd224290c0eabf0240f1bfc1f26363431505fb1b0283d3b08f11d"}, + {file = "wrapt-2.1.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3278c471f4468ad544a691b31bb856374fbdefb7fee1a152153e64019379f015"}, + {file = "wrapt-2.1.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a8914c754d3134a3032601c6984db1c576e6abaf3fc68094bb8ab1379d75ff92"}, + {file = "wrapt-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ff95d4264e55839be37bafe1536db2ab2de19da6b65f9244f01f332b5286cfbf"}, + {file = "wrapt-2.1.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:76405518ca4e1b76fbb1b9f686cff93aebae03920cc55ceeec48ff9f719c5f67"}, + {file = "wrapt-2.1.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c0be8b5a74c5824e9359b53e7e58bef71a729bacc82e16587db1c4ebc91f7c5a"}, + {file = "wrapt-2.1.2-cp313-cp313t-win32.whl", hash = "sha256:f01277d9a5fc1862f26f7626da9cf443bebc0abd2f303f41c5e995b15887dabd"}, + {file = "wrapt-2.1.2-cp313-cp313t-win_amd64.whl", hash = "sha256:84ce8f1c2104d2f6daa912b1b5b039f331febfeee74f8042ad4e04992bd95c8f"}, + {file = "wrapt-2.1.2-cp313-cp313t-win_arm64.whl", hash = "sha256:a93cd767e37faeddbe07d8fc4212d5cba660af59bdb0f6372c93faaa13e6e679"}, + {file = "wrapt-2.1.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:1370e516598854e5b4366e09ce81e08bfe94d42b0fd569b88ec46cc56d9164a9"}, + {file = "wrapt-2.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6de1a3851c27e0bd6a04ca993ea6f80fc53e6c742ee1601f486c08e9f9b900a9"}, + {file = "wrapt-2.1.2-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:de9f1a2bbc5ac7f6012ec24525bdd444765a2ff64b5985ac6e0692144838542e"}, + {file = "wrapt-2.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:970d57ed83fa040d8b20c52fe74a6ae7e3775ae8cff5efd6a81e06b19078484c"}, + {file = "wrapt-2.1.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3969c56e4563c375861c8df14fa55146e81ac11c8db49ea6fb7f2ba58bc1ff9a"}, + {file = "wrapt-2.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:57d7c0c980abdc5f1d98b11a2aa3bb159790add80258c717fa49a99921456d90"}, + {file = "wrapt-2.1.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:776867878e83130c7a04237010463372e877c1c994d449ca6aaafeab6aab2586"}, + {file = "wrapt-2.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:fab036efe5464ec3291411fabb80a7a39e2dd80bae9bcbeeca5087fdfa891e19"}, + {file = "wrapt-2.1.2-cp314-cp314-win32.whl", hash = "sha256:e6ed62c82ddf58d001096ae84ce7f833db97ae2263bff31c9b336ba8cfe3f508"}, + {file = "wrapt-2.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:467e7c76315390331c67073073d00662015bb730c566820c9ca9b54e4d67fd04"}, + {file = "wrapt-2.1.2-cp314-cp314-win_arm64.whl", hash = "sha256:da1f00a557c66225d53b095a97eace0fc5349e3bfda28fa34ffae238978ee575"}, + {file = "wrapt-2.1.2-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:62503ffbc2d3a69891cf29beeaccdb4d5e0a126e2b6a851688d4777e01428dbb"}, + {file = "wrapt-2.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7e6cd120ef837d5b6f860a6ea3745f8763805c418bb2f12eeb1fa6e25f22d22"}, + {file = "wrapt-2.1.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3769a77df8e756d65fbc050333f423c01ae012b4f6731aaf70cf2bef61b34596"}, + {file = "wrapt-2.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a76d61a2e851996150ba0f80582dd92a870643fa481f3b3846f229de88caf044"}, + {file = "wrapt-2.1.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6f97edc9842cf215312b75fe737ee7c8adda75a89979f8e11558dfff6343cc4b"}, + {file = "wrapt-2.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:4006c351de6d5007aa33a551f600404ba44228a89e833d2fadc5caa5de8edfbf"}, + {file = "wrapt-2.1.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a9372fc3639a878c8e7d87e1556fa209091b0a66e912c611e3f833e2c4202be2"}, + {file = "wrapt-2.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3144b027ff30cbd2fca07c0a87e67011adb717eb5f5bd8496325c17e454257a3"}, + {file = "wrapt-2.1.2-cp314-cp314t-win32.whl", hash = "sha256:3b8d15e52e195813efe5db8cec156eebe339aaf84222f4f4f051a6c01f237ed7"}, + {file = "wrapt-2.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:08ffa54146a7559f5b8df4b289b46d963a8e74ed16ba3687f99896101a3990c5"}, + {file = "wrapt-2.1.2-cp314-cp314t-win_arm64.whl", hash = "sha256:72aaa9d0d8e4ed0e2e98019cea47a21f823c9dd4b43c7b77bba6679ffcca6a00"}, + {file = "wrapt-2.1.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5e0fa9cc32300daf9eb09a1f5bdc6deb9a79defd70d5356ba453bcd50aef3742"}, + {file = "wrapt-2.1.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:710f6e5dfaf6a5d5c397d2d6758a78fecd9649deb21f1b645f5b57a328d63050"}, + {file = "wrapt-2.1.2-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:305d8a1755116bfdad5dda9e771dcb2138990a1d66e9edd81658816edf51aed1"}, + {file = "wrapt-2.1.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f0d8fc30a43b5fe191cf2b1a0c82bab2571dadd38e7c0062ee87d6df858dd06e"}, + {file = "wrapt-2.1.2-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a5d516e22aedb7c9c1d47cba1c63160b1a6f61ec2f3948d127cd38d5cfbb556f"}, + {file = "wrapt-2.1.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:45914e8efbe4b9d5102fcf0e8e2e3258b83a5d5fba9f8f7b6d15681e9d29ffe0"}, + {file = "wrapt-2.1.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:478282ebd3795a089154fb16d3db360e103aa13d3b2ad30f8f6aac0d2207de0e"}, + {file = "wrapt-2.1.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3756219045f73fb28c5d7662778e4156fbd06cf823c4d2d4b19f97305e52819c"}, + {file = "wrapt-2.1.2-cp39-cp39-win32.whl", hash = "sha256:b8aefb4dbb18d904b96827435a763fa42fc1f08ea096a391710407a60983ced8"}, + {file = "wrapt-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:e5aeab8fe15c3dff75cfee94260dcd9cded012d4ff06add036c28fae7718593b"}, + {file = "wrapt-2.1.2-cp39-cp39-win_arm64.whl", hash = "sha256:f069e113743a21a3defac6677f000068ebb931639f789b5b226598e247a4c89e"}, + {file = "wrapt-2.1.2-py3-none-any.whl", hash = "sha256:b8fd6fa2b2c4e7621808f8c62e8317f4aae56e59721ad933bac5239d913cf0e8"}, + {file = "wrapt-2.1.2.tar.gz", hash = "sha256:3996a67eecc2c68fd47b4e3c564405a5777367adfd9b8abb58387b63ee83b21e"}, ] +[package.extras] +dev = ["pytest", "setuptools"] + [[package]] name = "xlsx2csv" -version = "0.8.2" +version = "0.8.4" description = "xlsx to csv converter" optional = false python-versions = "*" groups = ["dev", "test"] files = [ - {file = "xlsx2csv-0.8.2-py3-none-any.whl", hash = "sha256:adcc76589d664f2bd597564851cc97c4d6b8825bb19d09c621e36baefa2d4274"}, - {file = "xlsx2csv-0.8.2.tar.gz", hash = "sha256:cdd272c82f8b32f1cee76aeaef87b2ee3549661fddf90f7ecf2310967a16fc84"}, + {file = "xlsx2csv-0.8.4-py3-none-any.whl", hash = "sha256:52ab873fc7b2f2ca75d14aee8bd1985a9f5c1bcb3cc7b80df7a5d57a40a67473"}, + {file = "xlsx2csv-0.8.4.tar.gz", hash = "sha256:2aa809888826f6af5b26c77fc7f613f2bbeada0d8cc09e5a58e0f59684bb6911"}, ] [[package]] @@ -3343,5 +3297,5 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" -python-versions = ">=3.10,<3.12" -content-hash = "2655344a334cdaeec3e76c4c3752c89b2db1bbe1bc0532b1eaa5bb8018a914bc" +python-versions = ">=3.10,<3.13" +content-hash = "34d27f814106c1acab20b1dd8907f568ab59f7e7d0c0b13b087e80770b117652" diff --git a/pylint_checkers/check_typing_imports.py b/pylint_checkers/check_typing_imports.py deleted file mode 100644 index 8e35529..0000000 --- a/pylint_checkers/check_typing_imports.py +++ /dev/null @@ -1,113 +0,0 @@ -""" -A pylint checker to check that deprecated imports are not being -taken from `typing`. - -No longer in use since downgrade to Python 3.7. -""" - -from typing import TYPE_CHECKING - -from astroid.node_classes import Attribute, Import, ImportFrom, Name -from pylint.checkers import BaseChecker - -if TYPE_CHECKING: - from pylint.lint import PyLinter - - -DEPRECATED_REPLACEMENTS: dict[str, str] = { - "Tuple": "tuple", - "Callable": "collections.abc.Callable", - "Type": "type", - "Dict": "dict", - "List": "list", - "Set": "set", - "FrozenSet": "frozenset", - "DefaultDict": "collections.defaultdict", - "OrderedDict": "collections.OrderedDict", - "ChainMap": "collections.ChainMap", - "Counter": "collections.counter", - "Deque": "collections.deque", - "Pattern": "re.Pattern", - "Match": "re.Match", - "AbstractSet": "collections.abc.AbstractSet", - "ByteString": "collections.abc.ByteString", - "Collection": "collections.abc.Collection", - "Container": "collections.abc.Container", - "ItemsView": "collections.abc.ItemsView", - "KeysView": "collections.abc.KeysView", - "Mapping": "collections.abc.Mapping", - "MappingView": "collections.abc.MappingView", - "MutableMapping": "collections.abc.MutableMapping", - "MutableSequence": "collections.abc.MutableSequence", - "MutableSet": "collections.abc.MutableSet", - "Sequence": "collections.abc.Sequence", - "ValuesView": "collections.abc.ValuesView", - "Iterable": "collections.abc.Iterable", - "Iterator": "collections.abc.Iterator", - "Generator": "collections.abc.Generator", - "Hashable": "collections.abc.Hashable", - "Reversible": "collections.abc.Reversible", - "Sized": "collections.abc.Sized", - "Coroutine": "collections.abc.Coroutine", - "AsyncGenerator": "collections.abc.AsyncGenerator", - "AsyncIterable": "collections.abc.AsyncIterable", - "AsyncIterator": "collections.abc.AsyncIterator", - "Awaitable": "collections.abc.Awaitable", - "ContextManager": "contextlib.AbstractContextManager", - "AsyncContextManager": "contextlib.AbstractAsyncContextManager", -} -"""Deprecated typing imports and their replacements.""" - - -class TypingImportChecker(BaseChecker): - """A pylint 'checker' to validate that deprecated typing generics are not being used.""" - - name = "deprecated-typing-imports-checker" - msgs = { - "W1901": ( - "Using deprecated 'typing.%s' instead of %s'%s'", - "deprecated-typing-generic", - "Emitted when deprecated typing generics are used instead of their replacements.", - {"minversion": (3, 9)}, - ), - } - - def visit_import(self, node: Import): - """Check import usage.""" - for import_name, import_alias in node.names: - if import_name != "typing": - continue - - typing_name = import_alias or import_name - for attr_node in node.root().nodes_of_class(Attribute): - name_node = attr_node.expr - - if not isinstance(name_node, Name) or name_node.name != typing_name: - continue - - name = attr_node.attrname - replacement = DEPRECATED_REPLACEMENTS.get(name) - qualifier = "builtin " if "." not in replacement else "" - self.add_message( - "deprecated-typing-generic", - node=attr_node, - args=(name, qualifier, replacement), - ) - - def visit_importfrom(self, node: ImportFrom): - """Check import from usage.""" - if not node.modname == "typing": - return - - for name, _ in node.names: - replacement = DEPRECATED_REPLACEMENTS.get(name) - if replacement is not None: - qualifier = "builtin " if "." not in replacement else "" - self.add_message( - "deprecated-typing-generic", node=node, args=(name, qualifier, replacement) - ) - - -def register(linter: "PyLinter"): - """Register the checker.""" - linter.register_checker(TypingImportChecker(linter)) diff --git a/pyproject.toml b/pyproject.toml index fb633c3..4aa4d3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,21 +30,21 @@ packages = [ ] [tool.poetry.dependencies] -python = ">=3.10,<3.12" +python = ">=3.10,<3.13" # breaking changes beyond 3.12 boto3 = ">=1.34.162,<1.36" # breaking change beyond 1.36 botocore = ">=1.34.162,<1.36" # breaking change beyond 1.36 delta-spark = "2.4.*" -duckdb = "1.1.*" # breaking changes beyond 1.1 -Jinja2 = "3.1.*" -lxml = "^4.9.1" +duckdb = "1.1.3" # breaking changes beyond 1.1 +Jinja2 = "3.1.6" +lxml = "4.9.4" numpy = "1.26.4" -openpyxl = "^3.1" -pandas = "^2.2.2" -polars = "0.20.*" -pyarrow = "^17.0.0" -pydantic = "1.10.16" +openpyxl = "3.1.5" +pandas = "2.3.3" +polars = "0.20.31" +pyarrow = "17.0.0" +pydantic = "1.10.19" pyspark = "3.4.*" -typing_extensions = "^4.6.2" +typing_extensions = "4.15.0" [tool.poetry.group.dev] optional = true @@ -58,30 +58,29 @@ commitizen = "4.9.1" pre-commit = "4.3.0" charset-normalizer = "3.4.6" python-discovery = "1.2.0" -requests = "2.33.0" [tool.poetry.group.test] optional = true [tool.poetry.group.test.dependencies] -faker = "18.11.1" behave = "1.3.3" coverage = "7.11.0" -moto = {extras = ["s3"], version = "4.0.13"} +moto = {extras = ["s3"], version = "4.2.14"} +requests = "2.33.0" # dependency of `moto` Werkzeug = "3.1.6" pytest = "8.4.2" pytest-lazy-fixtures = "1.4.0" # switched from https://github.com/TvoroG/pytest-lazy-fixture as it's no longer supported -xlsx2csv = "0.8.2" +xlsx2csv = "0.8.4" # polars requirement [tool.poetry.group.lint] optional = true [tool.poetry.group.lint.dependencies] black = "24.3.0" -astroid = "2.14.2" -isort = "5.11.5" -pylint = "2.16.4" -mypy = "0.991" +astroid = "3.3.9" +isort = "5.13.2" +pylint = "3.3.9" +mypy = "1.11.2" boto3-stubs = {extras = ["essential"], version = "1.26.72"} botocore-stubs = "1.29.72" pandas-stubs = "1.2.0.62" @@ -100,7 +99,7 @@ optional = true [tool.poetry.group.docs.dependencies] click = "8.2.1" -mkdocs = "^1.6.1" +mkdocs = "1.6.1" mkdocstrings = { version = "1.0.3", extras = ["python"] } griffelib = "2.0.1" pymdown-extensions = "10.21.2" @@ -142,10 +141,6 @@ source_pkgs = [ [tool.coverage.report] show_missing = true -[tool.pylint] -init-hook = "import sys; sys.path.append('./pylint_checkers')" -load-plugins = "check_typing_imports" - [tool.pylint.main] extension-pkg-allow-list = ["pyspark", "lxml", "pydantic"] fail-under = 10.0 @@ -194,7 +189,7 @@ max-statements = 50 min-public-methods = 2 [tool.pylint.exceptions] -overgeneral-exceptions = ["BaseException", "Exception"] +overgeneral-exceptions = ["builtins.BaseException", "builtins.Exception"] [tool.pylint.format] ignore-long-lines = "^\\s*(# )??$" @@ -229,6 +224,8 @@ disable = [ "use-symbolic-message-instead", "logging-fstring-interpolation", "fixme", + "too-many-positional-arguments", + "too-many-arguments", ] enable = ["c-extension-no-member"] diff --git a/src/dve/core_engine/backends/implementations/duckdb/rules.py b/src/dve/core_engine/backends/implementations/duckdb/rules.py index c93a6dd..debb8fe 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/rules.py +++ b/src/dve/core_engine/backends/implementations/duckdb/rules.py @@ -152,7 +152,7 @@ def _add_cnst_field(rel: DuckDBPyRelation) -> tuple[str, DuckDBPyRelation]: group_pl = entity.pl().pivot( columns=[config.pivot_column], values=agg_cols, - index=(group_cols or [const_fld]), + index=(group_cols or [const_fld]), # pylint: disable=E0606 aggregate_function=config.agg_function, ) if const_fld in group_pl.columns: diff --git a/src/dve/core_engine/backends/metadata/contract.py b/src/dve/core_engine/backends/metadata/contract.py index 12beb41..234a1e9 100644 --- a/src/dve/core_engine/backends/metadata/contract.py +++ b/src/dve/core_engine/backends/metadata/contract.py @@ -44,8 +44,8 @@ def schemas(self) -> dict[EntityName, type[BaseModel]]: """The per-entity schemas, as pydantic models.""" if not self._schemas: for entity_name, validator in self.validators.items(): - self._schemas[entity_name] = validator.model # type: ignore - return self._schemas.copy() + self._schemas[entity_name] = validator.model # type: ignore # pylint: disable=E1137 + return self._schemas.copy() # pylint: disable=E1101 @root_validator(allow_reuse=True) @classmethod diff --git a/src/dve/core_engine/backends/utilities.py b/src/dve/core_engine/backends/utilities.py index d94d0bf..ee4c2ab 100644 --- a/src/dve/core_engine/backends/utilities.py +++ b/src/dve/core_engine/backends/utilities.py @@ -52,7 +52,7 @@ def stringify_type(type_: Union[type, GenericAlias]) -> type: origin = get_origin(type_) if origin is None: # A non-generic container type, return as-is - return type_ + return type_ # type: ignore type_args = get_args(type_) if not type_args: diff --git a/src/dve/core_engine/configuration/v1/__init__.py b/src/dve/core_engine/configuration/v1/__init__.py index 89174be..04d5e07 100644 --- a/src/dve/core_engine/configuration/v1/__init__.py +++ b/src/dve/core_engine/configuration/v1/__init__.py @@ -38,7 +38,7 @@ FieldName = str """The name of a field within a model/schema.""" -TypeOrDef = Union[ +TypeOrDef = Union[ # pylint: disable=C0103 TypeName, "_CallableTypeDefinition", "_ModelTypeDefinition", "_TypeAliasDefinition" ] """The name or definition of a type.""" @@ -181,7 +181,7 @@ class V1EngineConfig(BaseEngineConfig): @validate_arguments def _update_rule_store(self, rule_store: dict[RuleName, BusinessComponentSpecConfigUnion]): """Update the rule store rules to add/override the rules from the new store.""" - self._rule_store_rules.update(rule_store) + self._rule_store_rules.update(rule_store) # pylint: disable=E1101 def _load_rule_store(self, uri: URI): """Load a JSON rule store from the provided URI and update the stored @@ -198,7 +198,7 @@ def _load_rule_store(self, uri: URI): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) uri_prefix = self.location.rsplit("/", 1)[0] - for rule_store_config in self.transformations.rule_stores: + for rule_store_config in self.transformations.rule_stores: # pylint: disable=E1101 uri = joinuri(uri_prefix, rule_store_config.filename) self._load_rule_store(uri) @@ -281,7 +281,7 @@ def _load_rules_and_vars(self) -> tuple[list[Rule], list[TemplateVariables]]: rules, local_variable_list = [], [] added_rules: set[RuleName] = set() - for index, complex_rule_config in enumerate(self.transformations.complex_rules): + for index, complex_rule_config in enumerate(self.transformations.complex_rules): # pylint: disable=E1101 rule, local_params, deps = self._resolve_business_rule(complex_rule_config) missing_rules = deps - added_rules if missing_rules: @@ -295,9 +295,9 @@ def _load_rules_and_vars(self) -> tuple[list[Rule], list[TemplateVariables]]: rule, local_params = self._create_rule( name="root", - rules=self.transformations.rules, - filters=self.transformations.filters, - post_filter_rules=self.transformations.post_filter_rules, + rules=self.transformations.rules, # pylint: disable=E1101 + filters=self.transformations.filters, # pylint: disable=E1101 + post_filter_rules=self.transformations.post_filter_rules, # pylint: disable=E1101 ) rules.append(rule) local_variable_list.append(local_params) @@ -338,7 +338,7 @@ def load_error_message_info(self, uri): def get_reference_data_config(self) -> dict[EntityName, ReferenceConfig]: # type: ignore """Gets the reference data configuration from the transformations""" - return self.transformations.reference_data + return self.transformations.reference_data # pylint: disable=E1101 def get_rule_metadata(self) -> RuleMetadata: """Gets the rule metadata from the Engine configuration""" @@ -346,6 +346,6 @@ def get_rule_metadata(self) -> RuleMetadata: return RuleMetadata( rules=rules, local_variables=local_variables, - global_variables=self.transformations.parameters, + global_variables=self.transformations.parameters, # pylint: disable=E1101 reference_data_config=self.get_reference_data_config(), ) diff --git a/src/dve/core_engine/engine.py b/src/dve/core_engine/engine.py index c5d1ba9..b2ec9a7 100644 --- a/src/dve/core_engine/engine.py +++ b/src/dve/core_engine/engine.py @@ -137,7 +137,7 @@ def build( debug=debug, **kwargs, ) - self.main_log.info(f"Output path: {self.output_prefix_uri!r}") + self.main_log.info(f"Output path: {self.output_prefix_uri!r}") # pylint: disable=E1101 return self @classmethod @@ -155,13 +155,13 @@ def build_from_model(cls, model_str: JSONstring): return cls.build(**EngineRunValidation(**json.loads(model_str)).dict()) def __enter__(self) -> "CoreEngine": - self.main_log.info("Entering pipeline context.") + self.main_log.info("Entering pipeline context.") # pylint: disable=E1101 if self._cache_dir is not None: raise ValueError("Pipeline already within context") self._cache_dir = TemporaryPrefix(self.cache_prefix_uri) self._cache_dir.__enter__() - self.main_log.info(f"Pipeline will cache to {self.cache_prefix!r}") + self.main_log.info(f"Pipeline will cache to {self.cache_prefix!r}") # pylint: disable=E1101 return self def __exit__( @@ -170,14 +170,14 @@ def __exit__( exc_value: Optional[Exception], traceback: Optional[TracebackType], ) -> None: - self.main_log.info(f"Exiting pipeline context, clearing {self.cache_prefix!r}") + self.main_log.info(f"Exiting pipeline context, clearing {self.cache_prefix!r}") # pylint: disable=E1101 cache_dir = self._cache_dir self._cache_dir = None if cache_dir is not None: cache_dir.__exit__(exc_type, exc_value, traceback) - self.main_log.info("Cleared cache.") + self.main_log.info("Cleared cache.") # pylint: disable=E1101 @property def cache_prefix(self) -> URI: @@ -198,17 +198,17 @@ def _write_entity_outputs(self, entities: SparkEntities) -> SparkEntities: """ output_entities = {} - self.main_log.info(f"Writing entities to the output location: {self.output_prefix_uri}") + self.main_log.info(f"Writing entities to the output location: {self.output_prefix_uri}") # pylint: disable=E1101 for entity_name, entity in entities.items(): entity = entity.drop(RECORD_INDEX_COLUMN_NAME) - self.main_log.info(f"Entity: {entity_name} {type(entity)}") + self.main_log.info(f"Entity: {entity_name} {type(entity)}") # pylint: disable=E1101 output_uri = joinuri(self.output_prefix_uri, entity_name) if get_resource_exists(output_uri): - self.main_log.info(f"{output_uri} already exists - will be overwritten") + self.main_log.info(f"{output_uri} already exists - will be overwritten") # pylint: disable=E1101 - self.main_log.info(f"+ Writing parquet output to {output_uri!r}") + self.main_log.info(f"+ Writing parquet output to {output_uri!r}") # pylint: disable=E1101 entity.write.mode("overwrite").parquet(output_uri) spark_session = SparkSession.builder.getOrCreate() output_entities[entity_name] = spark_session.read.format("parquet").load( @@ -228,7 +228,7 @@ def _write_outputs(self, entities: SparkEntities) -> SparkEntities: def _show_available_entities(self, entities: SparkEntities, *, verbose: bool = False) -> None: """Print current entities.""" - self.main_log.info("Displaying available dataframes in this run:") + self.main_log.info("Displaying available dataframes in this run:") # pylint: disable=E1101 for entity_name, entity in entities.items(): # FIXME: Currently a print statement because log messages diff --git a/src/dve/core_engine/type_hints.py b/src/dve/core_engine/type_hints.py index 3112e28..154ada6 100644 --- a/src/dve/core_engine/type_hints.py +++ b/src/dve/core_engine/type_hints.py @@ -5,7 +5,7 @@ from multiprocessing import Queue as ProcessQueue from pathlib import Path from queue import Queue as ThreadQueue -from typing import TYPE_CHECKING, Any, List, Optional, TypeVar, Union # pylint: disable=W1901 +from typing import TYPE_CHECKING, Any, List, Optional, TypeVar, Union from pyspark.sql import DataFrame from pyspark.sql.types import StructType diff --git a/src/dve/metadata_parser/domain_types.py b/src/dve/metadata_parser/domain_types.py index 6e102ea..84dc5a7 100644 --- a/src/dve/metadata_parser/domain_types.py +++ b/src/dve/metadata_parser/domain_types.py @@ -152,7 +152,7 @@ def check_validates(cls, value: Optional[str]) -> bool: return is_valid @classmethod - def validate(cls, value: Optional[str], field: fields.ModelField) -> str: # type: ignore + def validate(cls, value: Optional[str], field: fields.ModelField) -> str: # type: ignore # pylint: disable=W0221 """Validates the given postcode""" nhs_number = cls.ensure_format(value) diff --git a/src/dve/metadata_parser/models.py b/src/dve/metadata_parser/models.py index 73e6b5c..1d64160 100644 --- a/src/dve/metadata_parser/models.py +++ b/src/dve/metadata_parser/models.py @@ -81,6 +81,7 @@ def validate_error_message(cls, value: str, values: dict[str, Any]) -> str: def get_field_validator(self, field_name: str, **extra_kwargs: Any) -> classmethod: """Get a validator a given field.""" func = getattr(function_library, self.name) + _kwargs = self.kwargs_ | extra_kwargs return create_validator( func, field_name, @@ -88,8 +89,7 @@ def get_field_validator(self, field_name: str, **extra_kwargs: Any) -> classmeth self.error_message, return_result=True, fields=self.fields, - **self.kwargs_, - **extra_kwargs, + **_kwargs, ) @@ -257,7 +257,7 @@ def get_type_and_validators( python_type_callable = chain_get(self.callable, *type_mappings, pyd, dt, __builtins__) if not callable(python_type_callable): raise ValueError("Fetched callable is not callable") - python_type = python_type_callable(**self.constraints) + python_type = python_type_callable(**self.constraints) # pylint: disable=E1134 else: raise ValueError("No field type set") @@ -360,7 +360,7 @@ def as_model( field_name, *type_mappings, schemas=schemas, - is_mandatory=field_name in self.mandatory_fields, + is_mandatory=field_name in self.mandatory_fields, # pylint: disable=E1135 ) pyd_fields[field_name] = (python_type, default) validators.update(field_validators) @@ -398,7 +398,7 @@ def load_models( ) -> dict[EntityName, pyd.main.ModelMetaclass]: """Load the models from the dataset definition.""" loaded_schemas: dict[EntityName, pyd.main.ModelMetaclass] = {} - for model_name, specification in self.schemas.items(): + for model_name, specification in self.schemas.items(): # pylint: disable=E1101 loaded_schemas[model_name] = specification.as_model( model_name, self.types, *type_mappings, schemas=loaded_schemas ) diff --git a/src/dve/parser/file_handling/helpers.py b/src/dve/parser/file_handling/helpers.py index 9079e53..7ff9046 100644 --- a/src/dve/parser/file_handling/helpers.py +++ b/src/dve/parser/file_handling/helpers.py @@ -12,13 +12,13 @@ class NonClosingTextIOWrapper(TextIOWrapper): """ - def __exit__(self, *_): # pragma: no cover + def __exit__(self, *_): # pragma: no cover # pylint: disable=R1711 """Exits the context and detaches""" try: self.detach() except ValueError: # Assume all ValuesErrors are safe to absorb. - return + return # pylint: disable=R1711 def parse_uri(uri: URI) -> tuple[Scheme, Hostname, URIPath]: diff --git a/src/dve/pipeline/pipeline.py b/src/dve/pipeline/pipeline.py index fae60b6..67fdf88 100644 --- a/src/dve/pipeline/pipeline.py +++ b/src/dve/pipeline/pipeline.py @@ -108,7 +108,7 @@ def data_contract(self) -> BaseDataContract: @property def step_implementations(self) -> Optional[BaseStepImplementations[EntityType]]: """The step implementations to apply the business rules to a given dataset""" - return self._step_implementations + return self._step_implementations # type: ignore @property def backend_reader_kwargs(self) -> dict[str, Any] | None: From 8d4b3c49e2845c0b34aabe7576afba661ed37f74 Mon Sep 17 00:00:00 2001 From: George Robertson <41301972+georgeRobertson2@users.noreply.github.com> Date: Mon, 11 May 2026 17:56:39 +0100 Subject: [PATCH 02/12] build: update pyspark to v3.5 (#102) * build: upgrade pyspark pyspark from 3.4 -> 3.5.2 and delta spark 2.4 -> 3.2.0 --- poetry.lock | 22 +++++++++++----------- pyproject.toml | 4 ++-- tests/features/patches.py | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5548276..dfe26c4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1067,19 +1067,19 @@ dev = ["black", "build", "mypy", "pytest", "pyupgrade", "twine", "validate-pypro [[package]] name = "delta-spark" -version = "2.4.0" +version = "3.2.0" description = "Python APIs for using Delta Lake with Apache Spark" optional = false python-versions = ">=3.6" groups = ["main"] files = [ - {file = "delta-spark-2.4.0.tar.gz", hash = "sha256:ef776e325e80d98e3920cab982c747b094acc46599d62dfcdc9035fb112ba6a9"}, - {file = "delta_spark-2.4.0-py3-none-any.whl", hash = "sha256:7204142a97ef16367403b020d810d0c37f4ae8275b4997de4056423cf69b3a4b"}, + {file = "delta-spark-3.2.0.tar.gz", hash = "sha256:641967828e47c64805f8c746513da80bea24b5f19b069cdcf64561cd3692e11d"}, + {file = "delta_spark-3.2.0-py3-none-any.whl", hash = "sha256:c4ff3fa7218e58a702cb71eb64384b0005c4d6f0bbdd0fe0b38a53564d946e09"}, ] [package.dependencies] importlib-metadata = ">=1.0.0" -pyspark = ">=3.4.0,<3.5.0" +pyspark = ">=3.5.0,<3.6.0" [[package]] name = "deprecated" @@ -2491,24 +2491,24 @@ extra = ["pygments (>=2.19.1)"] [[package]] name = "pyspark" -version = "3.4.4" +version = "3.5.2" description = "Apache Spark Python API" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "pyspark-3.4.4.tar.gz", hash = "sha256:b831a99c1aeb058bc4a2498665de10006b899b2438ba15470cc88f577755ebf8"}, + {file = "pyspark-3.5.2.tar.gz", hash = "sha256:bbb36eba09fa24e86e0923d7e7a986041b90c714e11c6aa976f9791fe9edde5e"}, ] [package.dependencies] py4j = "0.10.9.7" [package.extras] -connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.48.1)", "grpcio-status (>=1.48.1)", "numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] +connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] ml = ["numpy (>=1.15,<2)"] mllib = ["numpy (>=1.15,<2)"] -pandas-on-spark = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] -sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] +pandas-on-spark = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] +sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] [[package]] name = "pytest" @@ -3298,4 +3298,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "34d27f814106c1acab20b1dd8907f568ab59f7e7d0c0b13b087e80770b117652" +content-hash = "dc39317cbf4102e2fd18a441031ed03d8fb788fb0b54c88e6a648ab301ba1e55" diff --git a/pyproject.toml b/pyproject.toml index 4aa4d3f..59c1d2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ packages = [ python = ">=3.10,<3.13" # breaking changes beyond 3.12 boto3 = ">=1.34.162,<1.36" # breaking change beyond 1.36 botocore = ">=1.34.162,<1.36" # breaking change beyond 1.36 -delta-spark = "2.4.*" +delta-spark = "3.2.0" duckdb = "1.1.3" # breaking changes beyond 1.1 Jinja2 = "3.1.6" lxml = "4.9.4" @@ -43,7 +43,7 @@ pandas = "2.3.3" polars = "0.20.31" pyarrow = "17.0.0" pydantic = "1.10.19" -pyspark = "3.4.*" +pyspark = "3.5.2" typing_extensions = "4.15.0" [tool.poetry.group.dev] diff --git a/tests/features/patches.py b/tests/features/patches.py index 6db6a8c..c31aef4 100644 --- a/tests/features/patches.py +++ b/tests/features/patches.py @@ -89,7 +89,7 @@ def get_spark_session() -> SparkSession: os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join( [ "--packages", - "com.databricks:spark-xml_2.12:0.16.0,io.delta:delta-core_2.12:2.4.0", + "com.databricks:spark-xml_2.12:0.16.0,io.delta:delta-spark_2.12:3.2.0", "pyspark-shell", ] ) From f6d6424af8ad0b06f5e357d5408ced224c113b06 Mon Sep 17 00:00:00 2001 From: George Robertson <50412379+georgeRobertson@users.noreply.github.com> Date: Fri, 5 Jun 2026 18:31:15 +0100 Subject: [PATCH 03/12] feat: add additional fields check into csv readers (#109) * feat: add additional fields check into csv readers refactored the missing check in csv readers as well * fix: add all available model fields to ensure that multiple models don't trigger additional fields check for csv restructured the reader tests as well * style: linting fixes --- src/dve/core_engine/backends/base/reader.py | 17 +- .../implementations/duckdb/readers/csv.py | 96 ++++--- .../implementations/duckdb/readers/json.py | 12 +- .../implementations/duckdb/readers/xml.py | 8 +- .../implementations/spark/readers/csv.py | 37 ++- .../implementations/spark/readers/json.py | 7 +- .../implementations/spark/readers/xml.py | 7 +- src/dve/core_engine/backends/readers/csv.py | 42 +++ .../core_engine/backends/readers/utilities.py | 58 +++- src/dve/core_engine/backends/readers/xml.py | 1 + src/dve/pipeline/pipeline.py | 7 +- .../test_backends/test_readers/test_csv.py | 52 +++- .../test_readers/test_ddb_csv.py | 241 ---------------- .../test_readers/test_duckdb/test_ddb_csv.py | 268 ++++++++++++++++++ .../{ => test_duckdb}/test_ddb_json.py | 0 .../{ => test_duckdb}/test_ddb_xml.py | 0 .../{test_spark.py => test_spark_csv.py} | 18 +- .../{ => test_spark}/test_spark_json.py | 0 .../test_readers/test_utilities.py | 79 ++---- tests/testdata/planets/planets_add_fields.csv | 2 + 20 files changed, 586 insertions(+), 366 deletions(-) delete mode 100644 tests/test_core_engine/test_backends/test_readers/test_ddb_csv.py create mode 100644 tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py rename tests/test_core_engine/test_backends/test_readers/{ => test_duckdb}/test_ddb_json.py (100%) rename tests/test_core_engine/test_backends/test_readers/{ => test_duckdb}/test_ddb_xml.py (100%) rename tests/test_core_engine/test_backends/test_readers/test_spark/{test_spark.py => test_spark_csv.py} (72%) rename tests/test_core_engine/test_backends/test_readers/{ => test_spark}/test_spark_json.py (100%) create mode 100644 tests/testdata/planets/planets_add_fields.csv diff --git a/src/dve/core_engine/backends/base/reader.py b/src/dve/core_engine/backends/base/reader.py index ac30111..e9ed9e8 100644 --- a/src/dve/core_engine/backends/base/reader.py +++ b/src/dve/core_engine/backends/base/reader.py @@ -90,6 +90,7 @@ def read_to_py_iterator( resource: URI, entity_name: EntityName, schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[str, Any]]: """Iterate through the contents of the resource, yielding dicts representing each record. @@ -107,6 +108,7 @@ def read_to_entity_type( resource: URI, entity_name: EntityName, schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> EntityType: """Read to the specified entity type, if supported. @@ -116,7 +118,12 @@ def read_to_entity_type( """ if entity_name == Iterator[dict[str, Any]]: - return self.read_to_py_iterator(resource, entity_name, schema) # type: ignore + return self.read_to_py_iterator( + resource, + entity_name, + schema, # type: ignore + all_model_fields + ) self.raise_if_not_sensible_file(resource, entity_name) @@ -125,7 +132,13 @@ def read_to_entity_type( except KeyError as err: raise ReaderLacksEntityTypeSupport(entity_type=entity_type) from err - return reader_func(self, resource, entity_name, schema) + return reader_func( + self, + resource, + entity_name, + schema, + all_model_fields=all_model_fields # type: ignore + ) def add_record_index(self, entity: EntityType, **kwargs) -> EntityType: """Add a record index to the entity""" diff --git a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py index 17d7635..27960d6 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py +++ b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py @@ -9,7 +9,7 @@ from duckdb import DuckDBPyConnection, DuckDBPyRelation, StarExpression, read_csv from pydantic import BaseModel -from dve.core_engine.backends.base.reader import BaseFileReader, read_function +from dve.core_engine.backends.base.reader import read_function from dve.core_engine.backends.exceptions import EmptyFileError, MessageBearingError from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import ( duckdb_record_index, @@ -17,7 +17,7 @@ get_duckdb_type_from_annotation, ) from dve.core_engine.backends.implementations.duckdb.types import SQLType -from dve.core_engine.backends.readers.utilities import check_csv_header_expected +from dve.core_engine.backends.readers.csv import CSVFileReader from dve.core_engine.backends.utilities import get_polars_type_from_annotation, polars_record_index from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME from dve.core_engine.message import FeedbackMessage @@ -27,7 +27,7 @@ @duckdb_record_index @duckdb_write_parquet -class DuckDBCSVReader(BaseFileReader): +class DuckDBCSVReader(CSVFileReader): """A reader for CSV files including the ability to compare the passed model to the file header, if it exists. @@ -47,66 +47,57 @@ def __init__( quotechar: str = '"', connection: Optional[DuckDBPyConnection] = None, field_check: bool = False, - field_check_error_code: Optional[str] = "ExpectedVsActualFieldMismatch", - field_check_error_message: Optional[str] = "The submitted header is missing fields", + field_check_error_code: str = "ExpectedVsActualFieldMismatch", + field_check_error_message: str = "The submitted header is missing fields", null_empty_strings: bool = False, **_, ): - self.header = header - self.delim = delim - self.quotechar = quotechar self._connection = connection if connection else ddb.connect(":memory:") - self.field_check = field_check - self.field_check_error_code = field_check_error_code - self.field_check_error_message = field_check_error_message self.null_empty_strings = null_empty_strings - super().__init__() - - def perform_field_check( - self, resource: URI, entity_name: str, expected_schema: type[BaseModel] - ): - """Check that the header of the CSV aligns with the provided model""" - if not self.header: - raise ValueError("Cannot perform field check without a CSV header") - - if missing := check_csv_header_expected(resource, expected_schema, self.delim): - raise MessageBearingError( - "The CSV header doesn't match what is expected", - messages=[ - FeedbackMessage( - entity=entity_name, - record={"missing_fields": missing}, - failure_type="submission", - error_location="Whole File", - reporting_field="missing_fields", - error_code=self.field_check_error_code, - error_message=f"{self.field_check_error_message}", # pylint: disable=line-too-long - ) - ], - ) + super().__init__( + header=header, + delimiter=delim, + quote_char=quotechar, + field_check=field_check, + field_check_error_code=field_check_error_code, + field_check_error_message=field_check_error_message + ) def read_to_py_iterator( - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[str, Any]]: """Creates an iterable object of rows as dictionaries""" - yield from self.read_to_relation(resource, entity_name, schema).pl().iter_rows(named=True) + yield from self.read_to_relation( + resource, + entity_name, + schema, + all_model_fields, + ).pl().iter_rows(named=True) @read_function(DuckDBPyRelation) def read_to_relation( # pylint: disable=unused-argument - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> DuckDBPyRelation: """Returns a relation object from the source csv""" if get_content_length(resource) == 0: raise EmptyFileError(f"File at {resource} is empty.") if self.field_check: - self.perform_field_check(resource, entity_name, schema) + self.perform_field_check(resource, entity_name, schema, all_model_fields) reader_options: dict[str, Any] = { "header": self.header, - "delimiter": self.delim, - "quotechar": self.quotechar, + "delimiter": self.delimiter, + "quotechar": self.quote_char, } ddb_schema: dict[str, SQLType] = { @@ -138,19 +129,23 @@ class PolarsToDuckDBCSVReader(DuckDBCSVReader): @read_function(DuckDBPyRelation) def read_to_relation( # pylint: disable=unused-argument - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> DuckDBPyRelation: """Returns a relation object from the source csv""" if get_content_length(resource) == 0: raise EmptyFileError(f"File at {resource} is empty.") if self.field_check: - self.perform_field_check(resource, entity_name, schema) + self.perform_field_check(resource, entity_name, schema, all_model_fields) reader_options: dict[str, Any] = { "has_header": self.header, - "separator": self.delim, - "quote_char": self.quotechar, + "separator": self.delimiter, + "quote_char": self.quote_char, } polars_types = { @@ -216,10 +211,17 @@ def __init__( @read_function(DuckDBPyRelation) def read_to_relation( # pylint: disable=unused-argument - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> DuckDBPyRelation: entity: DuckDBPyRelation = super().read_to_relation( - resource=resource, entity_name=entity_name, schema=schema + resource=resource, + entity_name=entity_name, + schema=schema, + all_model_fields=all_model_fields ) entity = entity.select(StarExpression(exclude=[RECORD_INDEX_COLUMN_NAME])).distinct() no_records = entity.shape[0] diff --git a/src/dve/core_engine/backends/implementations/duckdb/readers/json.py b/src/dve/core_engine/backends/implementations/duckdb/readers/json.py index cf0fa82..09c2384 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/readers/json.py +++ b/src/dve/core_engine/backends/implementations/duckdb/readers/json.py @@ -36,14 +36,22 @@ def __init__( super().__init__() def read_to_py_iterator( - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[str, Any]]: """Creates an iterable object of rows as dictionaries""" return self.read_to_relation(resource, entity_name, schema).pl().iter_rows(named=True) @read_function(DuckDBPyRelation) def read_to_relation( # pylint: disable=unused-argument - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + **_, ) -> DuckDBPyRelation: """Returns a relation object from the source json""" diff --git a/src/dve/core_engine/backends/implementations/duckdb/readers/xml.py b/src/dve/core_engine/backends/implementations/duckdb/readers/xml.py index c63e464..00d81a6 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/readers/xml.py +++ b/src/dve/core_engine/backends/implementations/duckdb/readers/xml.py @@ -30,7 +30,13 @@ def __init__(self, *, connection: Optional[DuckDBPyConnection] = None, **kwargs) super().__init__(**kwargs) @read_function(DuckDBPyRelation) - def read_to_relation(self, resource: URI, entity_name: str, schema: type[BaseModel]): + def read_to_relation( + self, + resource: URI, + entity_name: str, + schema: type[BaseModel], + **_, + ): """Returns a relation object from the source xml""" if self.xsd_location: msg = self._run_xmllint(file_uri=resource) diff --git a/src/dve/core_engine/backends/implementations/spark/readers/csv.py b/src/dve/core_engine/backends/implementations/spark/readers/csv.py index 8c2b137..c983cdf 100644 --- a/src/dve/core_engine/backends/implementations/spark/readers/csv.py +++ b/src/dve/core_engine/backends/implementations/spark/readers/csv.py @@ -8,7 +8,8 @@ from pyspark.sql import DataFrame, SparkSession from pyspark.sql.types import StructType -from dve.core_engine.backends.base.reader import BaseFileReader, read_function +from dve.core_engine.backends.base.reader import read_function +from dve.core_engine.backends.readers.csv import CSVFileReader from dve.core_engine.backends.exceptions import EmptyFileError from dve.core_engine.backends.implementations.spark.spark_helpers import ( get_type_from_annotation, @@ -21,9 +22,10 @@ @spark_record_index @spark_write_parquet -class SparkCSVReader(BaseFileReader): +class SparkCSVReader(CSVFileReader): """A Spark reader for CSV files.""" + # pylint: disable=R0902 def __init__( self, *, @@ -35,24 +37,35 @@ def __init__( encoding: str = "utf-8-sig", null_empty_strings: bool = False, spark_session: Optional[SparkSession] = None, + field_check: bool = False, + field_check_error_code: str = "ExpectedVsActualFieldMismatch", + field_check_error_message: str = "The submitted header is missing fields", **_, ) -> None: - self.delimiter = delimiter - self.escape_char = escape_char - self.encoding = encoding - self.quote_char = quote_char - self.header = header self.multi_line = multi_line self.null_empty_strings = null_empty_strings self.spark_session = spark_session if spark_session else SparkSession.builder.getOrCreate() # type: ignore # pylint: disable=C0301 - super().__init__() + super().__init__( + delimiter=delimiter, + escape_char=escape_char, + encoding=encoding, + quote_char=quote_char, + header=header, + field_check=field_check, + field_check_error_code=field_check_error_code, + field_check_error_message=field_check_error_message, + ) def read_to_py_iterator( - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[URI, Any]]: - df = self.read_to_dataframe(resource, entity_name, schema) + df = self.read_to_dataframe(resource, entity_name, schema, all_model_fields) yield from (record.asDict(True) for record in df.toLocalIterator()) @read_function(DataFrame) @@ -61,11 +74,15 @@ def read_to_dataframe( resource: URI, entity_name: EntityName, # pylint: disable=unused-argument schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> DataFrame: """Read a CSV file directly to a Spark DataFrame.""" if get_content_length(resource) == 0: raise EmptyFileError(f"File at {resource} is empty.") + if self.field_check: + self.perform_field_check(resource, entity_name, schema, all_model_fields) + spark_schema: StructType = get_type_from_annotation(schema) kwargs = { "sep": self.delimiter, diff --git a/src/dve/core_engine/backends/implementations/spark/readers/json.py b/src/dve/core_engine/backends/implementations/spark/readers/json.py index 0b4a09f..6123009 100644 --- a/src/dve/core_engine/backends/implementations/spark/readers/json.py +++ b/src/dve/core_engine/backends/implementations/spark/readers/json.py @@ -39,7 +39,11 @@ def __init__( super().__init__() def read_to_py_iterator( - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[URI, Any]]: df = self.read_to_dataframe(resource, entity_name, schema) yield from (record.asDict(True) for record in df.toLocalIterator()) @@ -50,6 +54,7 @@ def read_to_dataframe( resource: URI, entity_name: EntityName, # pylint: disable=unused-argument schema: type[BaseModel], + **_, ) -> DataFrame: """Read a JSON file directly to a Spark DataFrame.""" if get_content_length(resource) == 0: diff --git a/src/dve/core_engine/backends/implementations/spark/readers/xml.py b/src/dve/core_engine/backends/implementations/spark/readers/xml.py index 39433b3..ba42d29 100644 --- a/src/dve/core_engine/backends/implementations/spark/readers/xml.py +++ b/src/dve/core_engine/backends/implementations/spark/readers/xml.py @@ -104,7 +104,11 @@ def __init__( self.namespace = namespace def read_to_py_iterator( - self, resource: URI, entity_name: EntityName, schema: type[BaseModel] + self, + resource: URI, + entity_name: EntityName, + schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[URI, Any]]: df = self.read_to_dataframe(resource, entity_name, schema) yield from (record.asDict(True) for record in df.toLocalIterator()) @@ -115,6 +119,7 @@ def read_to_dataframe( resource: URI, entity_name: EntityName, # pylint: disable=unused-argument schema: type[BaseModel], + **_, ) -> DataFrame: """Read an XML file directly to a Spark DataFrame using the Databricks XML reader package. diff --git a/src/dve/core_engine/backends/readers/csv.py b/src/dve/core_engine/backends/readers/csv.py index edd6bf0..9d37b06 100644 --- a/src/dve/core_engine/backends/readers/csv.py +++ b/src/dve/core_engine/backends/readers/csv.py @@ -15,8 +15,12 @@ FieldCountMismatch, MissingHeaderError, ) +from dve.core_engine.backends.readers.utilities import ( + raise_message_bearing_error_on_header_differences +) from dve.core_engine.backends.utilities import get_polars_type_from_annotation, stringify_model from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME +from dve.core_engine.backends.readers.utilities import get_all_model_fields from dve.core_engine.type_hints import EntityName from dve.parser.file_handling import get_content_length, open_stream from dve.parser.file_handling.implementations.file import file_uri_to_local_path @@ -37,6 +41,9 @@ def __init__( trim_cells: bool = True, null_values: Collection[str] = frozenset({"NULL", "null", ""}), encoding: str = "utf-8-sig", + field_check: bool = False, + field_check_error_code: str = "CSVFieldMismatch", + field_check_error_message: str = "The submitted header is invalid", **_, ): """Init function for the base CSV reader. @@ -80,6 +87,12 @@ def __init__( """A container of values to replace with null if encountered in a cell.""" self.encoding = encoding """Encoding of the CSV file.""" + self.field_check = field_check + """Whether to check the fields are correct in the supplied header or not""" + self.field_check_error_code = field_check_error_code + """Error code to raise when fields are missing or unexpected""" + self.field_check_error_message = field_check_error_message + """Error message to raise when fields are missing or unexpected""" def _get_reader_args(self) -> dict[str, Any]: reader_args: dict[str, Any] = { @@ -186,16 +199,45 @@ def _coerce( new_row[field_name] = value return new_row + def perform_field_check( + self, + resource: URI, + entity_name: str, + expected_schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, + ): + """Check that the header of the CSV aligns with the provided model""" + if not self.header: + raise ValueError("Cannot perform field check without a CSV header") + + if not all_model_fields: + all_model_fields = get_all_model_fields([expected_schema]) + + raise_message_bearing_error_on_header_differences( + resource, + entity_name, + expected_schema, + all_model_fields, + self.field_check_error_code, + self.field_check_error_message, + self.delimiter, + self.quote_char, + ) + def read_to_py_iterator( self, resource: URI, entity_name: EntityName, schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[str, Any]]: """Reads the data to an iterator of dictionaries""" if get_content_length(resource) == 0: raise EmptyFileError(f"File at {resource!r} is empty") + if self.field_check: + self.perform_field_check(resource, entity_name, schema, all_model_fields) + field_names = list(schema.__fields__.keys()) with open_stream(resource, "r", self.encoding) as stream: reader = csv.DictReader( diff --git a/src/dve/core_engine/backends/readers/utilities.py b/src/dve/core_engine/backends/readers/utilities.py index 642c0b2..a636167 100644 --- a/src/dve/core_engine/backends/readers/utilities.py +++ b/src/dve/core_engine/backends/readers/utilities.py @@ -1,21 +1,73 @@ """General utilities for file readers""" +from collections.abc import Iterable from typing import Optional from pydantic import BaseModel -from dve.core_engine.type_hints import URI +from dve.core_engine.backends.exceptions import MessageBearingError +from dve.core_engine.message import FeedbackMessage +from dve.core_engine.type_hints import URI, EntityName from dve.parser.file_handling.service import open_stream def check_csv_header_expected( resource: URI, expected_schema: type[BaseModel], + all_model_fields: set[str], delimiter: Optional[str] = ",", quote_char: str = '"', -) -> set[str]: +) -> tuple[set[str], set[str]]: """Check the header of a CSV matches the expected fields""" with open_stream(resource) as fle: header_fields = fle.readline().rstrip().replace(quote_char, "").split(delimiter) expected_fields = expected_schema.__fields__.keys() - return set(expected_fields).difference(header_fields) + + missing = set(expected_fields).difference(header_fields) + additional = set(header_fields).difference(all_model_fields) + + return missing, additional + + +def raise_message_bearing_error_on_header_differences( + resource: URI, + entity_name: EntityName, + expected_schema: type[BaseModel], + all_model_fields: set[str], + field_check_error_code: str, + field_check_error_message: str, + delimiter: Optional[str] = ",", + quote_char: str = '"', +) -> None: + """ + Raise a FeedbackMessage if differences found between the actual header and the expected + header or vice versa. + """ + missing, additional = check_csv_header_expected( + resource, + expected_schema, + all_model_fields, + delimiter, + quote_char + ) + + if missing or additional: + raise MessageBearingError( + "The CSV header doesn't match what is expected", + messages=[ + FeedbackMessage( + entity=entity_name, + record={"missing_fields": missing, "additional_fields": additional}, + failure_type="submission", + error_location="Whole File", + reporting_field="csv_header", + error_code=field_check_error_code, + error_message=field_check_error_message, + ) + ], + ) + + +def get_all_model_fields(models: Iterable[type[BaseModel]]) -> set[str]: + """Return all field names from all available models""" + return {field for model in models for field in model.__fields__.keys()} diff --git a/src/dve/core_engine/backends/readers/xml.py b/src/dve/core_engine/backends/readers/xml.py index 4620402..13a7c38 100644 --- a/src/dve/core_engine/backends/readers/xml.py +++ b/src/dve/core_engine/backends/readers/xml.py @@ -296,6 +296,7 @@ def read_to_py_iterator( resource: URI, entity_name: EntityName, schema: type[BaseModel], + all_model_fields: Optional[set[str]] = None, ) -> Iterator[dict[str, Any]]: """Iterate through the contents of the file at URI, yielding rows containing the data. diff --git a/src/dve/pipeline/pipeline.py b/src/dve/pipeline/pipeline.py index 67fdf88..00a0c51 100644 --- a/src/dve/pipeline/pipeline.py +++ b/src/dve/pipeline/pipeline.py @@ -30,6 +30,7 @@ from dve.core_engine.backends.base.rules import BaseStepImplementations from dve.core_engine.backends.exceptions import MessageBearingError from dve.core_engine.backends.readers import BaseFileReader +from dve.core_engine.backends.readers.utilities import get_all_model_fields from dve.core_engine.backends.types import EntityType from dve.core_engine.backends.utilities import stringify_model from dve.core_engine.exceptions import CriticalProcessingError @@ -219,7 +220,10 @@ def write_file_to_parquet( if not entity_type: reader.write_parquet( reader.read_to_py_iterator( - submission_file_uri, model_name, stringify_model(model) # type: ignore + submission_file_uri, + model_name, + stringify_model(model), # type: ignore + get_all_model_fields(models.values()) # type: ignore ), f"{out}{model_name}", ) @@ -230,6 +234,7 @@ def write_file_to_parquet( submission_file_uri, model_name, stringify_model(model), # type: ignore + get_all_model_fields(models.values()), # type: ignore ), f"{out}{model_name}", ) diff --git a/tests/test_core_engine/test_backends/test_readers/test_csv.py b/tests/test_core_engine/test_backends/test_readers/test_csv.py index 0737ad2..da867ab 100644 --- a/tests/test_core_engine/test_backends/test_readers/test_csv.py +++ b/tests/test_core_engine/test_backends/test_readers/test_csv.py @@ -10,8 +10,9 @@ import pytest from pydantic import BaseModel -from dve.core_engine.backends.exceptions import EmptyFileError, FieldCountMismatch +from dve.core_engine.backends.exceptions import EmptyFileError, FieldCountMismatch, MessageBearingError from dve.core_engine.backends.readers import CSVFileReader +from dve.core_engine.backends.readers.utilities import get_all_model_fields from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME from ....conftest import get_test_file_path @@ -24,6 +25,12 @@ def planet_location() -> Iterator[str]: yield get_test_file_path("planets/planets.csv").as_uri() +@pytest.fixture(scope="module") +def planet_additional_field_location() -> Iterator[str]: + """The URI for planet data with additional columns""" + yield get_test_file_path("planets/planets_add_fields.csv").as_uri() + + @pytest.fixture(scope="function") def planet_data() -> Iterator[Dict[str, Dict[str, str]]]: """The expected planet data after reading, as loaded by Python's default parser.""" @@ -104,6 +111,12 @@ class Planets(PlanetsSubset): has_global_magnetic_field: str +class PlanetsWithExtra(Planets): + """A subset of the planets data with an extra field.""" + + random_null: str + + class SingleColumnModel(BaseModel): Column: str @@ -261,3 +274,40 @@ def test_base_csv_reader_parquet_write( pd.read_parquet(target_location).to_dict(orient="records"), key=lambda x: x.get(RECORD_INDEX_COLUMN_NAME), ) == sorted([dict(val) for val in planet_data.values()], key=lambda x: x.get(RECORD_INDEX_COLUMN_NAME)) + + def test_base_csv_reader_with_additional_fields( + self, + planet_additional_field_location: str + ): + """Test that message bearing error raised when additional fields provided""" + reader = CSVFileReader(field_check=True) + with pytest.raises(MessageBearingError) as exc_info: + list(reader.read_to_py_iterator( + planet_additional_field_location, + "test", + Planets, + get_all_model_fields([Planets]) + )) + + error_msg = exc_info.value.messages[0] + assert error_msg.record["additional_fields"] == {"add_field1", "add_field2"} + assert not error_msg.record["missing_fields"] + + def test_base_csv_reader_with_missing_fields( + self, + planet_location: str + ): + """Test that message bearing error raised when fields are missing from the expected schema""" + reader = CSVFileReader(field_check=True) + + with pytest.raises(MessageBearingError) as exc_info: + list(reader.read_to_py_iterator( + planet_location, + "test", + PlanetsWithExtra, + get_all_model_fields([PlanetsWithExtra]) + )) + + error_msg = exc_info.value.messages[0] + assert not error_msg.record["additional_fields"] + assert error_msg.record["missing_fields"] == {"random_null"} diff --git a/tests/test_core_engine/test_backends/test_readers/test_ddb_csv.py b/tests/test_core_engine/test_backends/test_readers/test_ddb_csv.py deleted file mode 100644 index 78974eb..0000000 --- a/tests/test_core_engine/test_backends/test_readers/test_ddb_csv.py +++ /dev/null @@ -1,241 +0,0 @@ -from datetime import date, datetime -from pathlib import Path -from tempfile import TemporaryDirectory - -import duckdb -import polars as pl -import pytest -from duckdb import DuckDBPyRelation -from pydantic import BaseModel - -from dve.core_engine.backends.exceptions import EmptyFileError, MessageBearingError -from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import ( - get_duckdb_type_from_annotation, -) -from dve.core_engine.backends.implementations.duckdb.readers.csv import ( - DuckDBCSVReader, - DuckDBCSVRepeatingHeaderReader, - PolarsToDuckDBCSVReader, -) -from dve.core_engine.backends.utilities import stringify_model -from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME -from tests.test_core_engine.test_backends.fixtures import duckdb_connection - -# pylint: disable=C0116 - - -class SimpleModel(BaseModel): - varchar_field: str - bigint_field: int - date_field: date - timestamp_field: datetime - - -class SimpleHeaderModel(BaseModel): - header_1: str - header_2: str - - -class VerySimpleModel(BaseModel): - test_col: str - - -@pytest.fixture -def temp_dir(): - with TemporaryDirectory(prefix="ddb_test_csv_reader") as temp_dir: - yield Path(temp_dir) - - -@pytest.fixture -def temp_csv_file(temp_dir: Path): - header: str = "varchar_field,bigint_field,date_field,timestamp_field" - typed_data = [ - ["hi", 1, date(2023, 1, 3), datetime(2023, 1, 3, 12, 0, 3)], - ["bye", 2, date(2023, 3, 7), datetime(2023, 5, 9, 15, 21, 53)], - ] - - with open(temp_dir.joinpath("dummy.csv"), mode="w") as csv_file: - csv_file.write(header + "\n") - for rw in typed_data: - csv_file.write(",".join([str(val) for val in rw]) + "\n") - - yield temp_dir.joinpath("dummy.csv"), header, typed_data, SimpleModel - - -@pytest.fixture -def temp_empty_csv_file(temp_dir: Path): - with open(temp_dir.joinpath("empty.csv"), mode="w"): - pass - - yield temp_dir.joinpath("empty.csv"), SimpleModel - - -def test_ddb_csv_reader_all_str(temp_csv_file): - uri, header, data, mdl = temp_csv_file - reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) - rel: DuckDBPyRelation = reader.read_to_entity_type( - DuckDBPyRelation, str(uri), "test", stringify_model(mdl) - ) - expected_dtypes = {**{fld: "VARCHAR" for fld in header.split(",")}, RECORD_INDEX_COLUMN_NAME: "BIGINT"} - expected_data = [(*[str(val) for val in rw], idx) for idx, rw in enumerate(data, start=1)] - assert rel.columns == header.split(",") + [RECORD_INDEX_COLUMN_NAME] - assert dict(zip(rel.columns, rel.dtypes)) == expected_dtypes - assert rel.fetchall() == expected_data - - -def test_ddb_csv_reader_cast(temp_csv_file): - uri, header, data, mdl = temp_csv_file - reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) - rel: DuckDBPyRelation = reader.read_to_entity_type(DuckDBPyRelation, str(uri), "test", mdl) - expected_dtypes = {**{ - fld.name: str(get_duckdb_type_from_annotation(fld.annotation)) - for fld in mdl.__fields__.values() - }, RECORD_INDEX_COLUMN_NAME: get_duckdb_type_from_annotation(int)} - expected_data = [(*rw, idx) for idx, rw in enumerate(data, start=1)] - assert rel.columns == header.split(",") + [RECORD_INDEX_COLUMN_NAME] - assert dict(zip(rel.columns, rel.dtypes)) == expected_dtypes - assert rel.fetchall() == expected_data - - -def test_ddb_csv_write_parquet(temp_csv_file): - uri, header, data, mdl = temp_csv_file - reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) - rel: DuckDBPyRelation = reader.read_to_entity_type( - DuckDBPyRelation, str(uri), "test", stringify_model(mdl) - ) - target_loc: Path = uri.parent.joinpath("test_parquet.parquet").as_posix() - reader.write_parquet(rel, target_loc) - parquet_rel = reader._connection.read_parquet(target_loc) - assert sorted(parquet_rel.df().to_dict(orient="records"), key=lambda x: x.get(RECORD_INDEX_COLUMN_NAME)) == sorted([{**rec, RECORD_INDEX_COLUMN_NAME: idx} for idx, rec in enumerate(rel.df().to_dict(orient="records"), start=1)], key=lambda x: x.get(RECORD_INDEX_COLUMN_NAME)) - - -def test_ddb_csv_read_empty_file(temp_empty_csv_file): - uri, mdl = temp_empty_csv_file - reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) - - with pytest.raises(EmptyFileError): - reader.read_to_relation(str(uri), "test", mdl) - - -def test_polars_to_ddb_csv_reader(temp_csv_file): - uri, header, data, mdl = temp_csv_file - reader = PolarsToDuckDBCSVReader( - header=True, delim=",", quotechar='"', connection=duckdb.connect() - ) - entity = reader.read_to_relation(str(uri), "test", mdl) - - assert entity.shape[0] == 2 - - -def test_ddb_csv_repeating_header_reader_non_duplicate(temp_dir): - header = "header_1,header_2,non_header_1" - typed_data = [ - ["hvalue1", "hvalue1", "nhvalue1"], - ["hvalue1", "hvalue1", "nhvalue2"], - ["hvalue1", "hvalue1", "nhvalue3"], - ] - with open(temp_dir.joinpath("test_header.csv"), mode="w") as csv_file: - csv_file.write(header + "\n") - for rw in typed_data: - csv_file.write(",".join([str(val) for val in rw]) + "\n") - - file_uri = temp_dir.joinpath("test_header.csv") - - reader = DuckDBCSVRepeatingHeaderReader( - header=True, delim=",", quotechar='"', connection=duckdb.connect() - ) - entity = reader.read_to_relation(str(file_uri), "test", SimpleHeaderModel) - - assert entity.shape[0] == 1 - - -def test_ddb_csv_repeating_header_reader_with_more_than_one_set_of_distinct_values(temp_dir): - header = "header_1,header_2,non_header_1" - typed_data = [ - ["hvalue1", "hvalue2", "nhvalue1"], - ["hvalue2", "hvalue2", "nhvalue2"], - ["hvalue1", "hvalue1", "nhvalue3"], - ] - with open(temp_dir.joinpath("test_header.csv"), mode="w") as csv_file: - csv_file.write(header + "\n") - for rw in typed_data: - csv_file.write(",".join([str(val) for val in rw]) + "\n") - - file_uri = temp_dir.joinpath("test_header.csv") - reader = DuckDBCSVRepeatingHeaderReader( - header=True, delim=",", quotechar='"', connection=duckdb.connect() - ) - - with pytest.raises(MessageBearingError): - reader.read_to_relation(str(file_uri), "test", SimpleHeaderModel) - - -def test_DuckDBCSVReader_with_null_empty_strings(temp_dir): - test_df = pl.DataFrame({"test_col": ["fine", " ", " "]}) - file_uri = temp_dir.joinpath("test_empty_string1.csv").as_posix() - test_df.write_csv( - file_uri, - include_header=True, - quote_style="always" - ) - - reader = DuckDBCSVReader( - header=True, - delim=",", - quotechar='"', - connection=duckdb.connect(), - null_empty_strings=True, - ) - - entity = reader.read_to_relation(file_uri, "test", VerySimpleModel) - - assert entity.shape[0] == 3 - assert entity.filter("test_col IS NULL").shape[0] == 2 - - -def test_DuckDBCSVRepeatingHeaderReader_with_null_empty_strings(temp_dir): - test_df = pl.DataFrame({ - "header_1": ["fine",], "header_2": [" "], - }) - file_uri = temp_dir.joinpath("test_empty_string2.csv").as_posix() - test_df.write_csv( - file_uri, - include_header=True, - quote_style="always" - ) - - reader = DuckDBCSVRepeatingHeaderReader( - header=True, - delim=",", - quotechar='"', - connection=duckdb.connect(), - null_empty_strings=True, - ) - - entity = reader.read_to_relation(file_uri, "test", SimpleHeaderModel) - - assert entity.shape[0] == 1 - assert entity.filter("header_2 IS NULL").shape[0] == 1 - - -def test_PolarsToDuckDBCSVReader_with_null_empty_strings(temp_dir): - test_df = pl.DataFrame({"test_col": ["fine", " ", " "]}) - file_uri = temp_dir.joinpath("test_empty_string3.csv").as_posix() - test_df.write_csv( - file_uri, - include_header=True, - quote_style="always" - ) - - reader = PolarsToDuckDBCSVReader( - header=True, - delim=",", - quotechar='"', - connection=duckdb.connect(), - null_empty_strings=True, - ) - - entity = reader.read_to_relation(file_uri, "test", VerySimpleModel) - - assert entity.shape[0] == 3 - assert entity.filter("test_col IS NULL").shape[0] == 2 diff --git a/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py b/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py new file mode 100644 index 0000000..6fdc2a6 --- /dev/null +++ b/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py @@ -0,0 +1,268 @@ +from collections.abc import Iterator +from datetime import date, datetime +from pathlib import Path +from tempfile import TemporaryDirectory + +import duckdb +import polars as pl +import pytest +from duckdb import DuckDBPyRelation +from pydantic import BaseModel + +from dve.core_engine.backends.exceptions import EmptyFileError, MessageBearingError +from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import ( + get_duckdb_type_from_annotation, +) +from dve.core_engine.backends.implementations.duckdb.readers.csv import ( + DuckDBCSVReader, + DuckDBCSVRepeatingHeaderReader, + PolarsToDuckDBCSVReader, +) +from dve.core_engine.backends.utilities import stringify_model +from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME + +# pylint: disable=C0115,C0116,W0621 + + +class SimpleModel(BaseModel): + varchar_field: str + bigint_field: int + date_field: date + timestamp_field: datetime + + +class SimpleHeaderModel(BaseModel): + header_1: str + header_2: str + + +class VerySimpleModel(BaseModel): + test_col: str + + +@pytest.fixture +def temp_dir(): + with TemporaryDirectory(prefix="ddb_test_csv_reader") as temp_dir: + yield Path(temp_dir) + + +@pytest.fixture(scope="function") +def temp_csv_file(temp_dir: Path): + header: str = "varchar_field,bigint_field,date_field,timestamp_field" + typed_data = [ + ["hi", 1, date(2023, 1, 3), datetime(2023, 1, 3, 12, 0, 3)], + ["bye", 2, date(2023, 3, 7), datetime(2023, 5, 9, 15, 21, 53)], + ] + + with open(temp_dir.joinpath("dummy.csv"), mode="w") as csv_file: + csv_file.write(header + "\n") + for rw in typed_data: + csv_file.write(",".join([str(val) for val in rw]) + "\n") + + yield temp_dir.joinpath("dummy.csv"), header, typed_data, SimpleModel + + +@pytest.fixture(scope="function") +def temp_csv_file_additional_fields(temp_dir: Path) -> Iterator[str]: + test_df = pl.DataFrame({"test_col": ["fine"], "test_col2": ["wow"]}) + file_uri = temp_dir.joinpath("test_additional_fields.csv").as_posix() + test_df.write_csv( + file_uri, + include_header=True, + quote_style="always" + ) + + yield file_uri + + +@pytest.fixture(scope="function") +def temp_csv_file_missing_fields(temp_dir: Path) -> Iterator[str]: + test_df = pl.DataFrame({"header_1": ["fine"]}) + file_uri = temp_dir.joinpath("test_missing_fields.csv").as_posix() + test_df.write_csv( + file_uri, + include_header=True, + quote_style="always" + ) + + yield file_uri + + +@pytest.fixture +def temp_empty_csv_file(temp_dir: Path): + with open(temp_dir.joinpath("empty.csv"), mode="w"): + pass + + yield temp_dir.joinpath("empty.csv"), SimpleModel + + +class TestDuckDBCSVReader: + """Test DuckDBCSVReader""" + + def test_ddb_csv_reader_all_str(self, temp_csv_file): + uri, header, data, mdl = temp_csv_file + reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) + rel: DuckDBPyRelation = reader.read_to_entity_type( + DuckDBPyRelation, str(uri), "test", stringify_model(mdl) + ) + expected_dtypes = {**{fld: "VARCHAR" for fld in header.split(",")}, RECORD_INDEX_COLUMN_NAME: "BIGINT"} + expected_data = [(*[str(val) for val in rw], idx) for idx, rw in enumerate(data, start=1)] + assert rel.columns == header.split(",") + [RECORD_INDEX_COLUMN_NAME] + assert dict(zip(rel.columns, rel.dtypes)) == expected_dtypes + assert rel.fetchall() == expected_data + + def test_ddb_csv_reader_cast(self, temp_csv_file): + uri, header, data, mdl = temp_csv_file + reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) + rel: DuckDBPyRelation = reader.read_to_entity_type(DuckDBPyRelation, str(uri), "test", mdl) + expected_dtypes = {**{ + fld.name: str(get_duckdb_type_from_annotation(fld.annotation)) + for fld in mdl.__fields__.values() + }, RECORD_INDEX_COLUMN_NAME: get_duckdb_type_from_annotation(int)} + expected_data = [(*rw, idx) for idx, rw in enumerate(data, start=1)] + assert rel.columns == header.split(",") + [RECORD_INDEX_COLUMN_NAME] + assert dict(zip(rel.columns, rel.dtypes)) == expected_dtypes + assert rel.fetchall() == expected_data + + def test_ddb_csv_write_parquet(self, temp_csv_file): + uri, header, data, mdl = temp_csv_file + reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) + rel: DuckDBPyRelation = reader.read_to_entity_type( + DuckDBPyRelation, str(uri), "test", stringify_model(mdl) + ) + target_loc: Path = uri.parent.joinpath("test_parquet.parquet").as_posix() + reader.write_parquet(rel, target_loc) + parquet_rel = reader._connection.read_parquet(target_loc) + assert sorted(parquet_rel.df().to_dict(orient="records"), key=lambda x: x.get(RECORD_INDEX_COLUMN_NAME)) == sorted([{**rec, RECORD_INDEX_COLUMN_NAME: idx} for idx, rec in enumerate(rel.df().to_dict(orient="records"), start=1)], key=lambda x: x.get(RECORD_INDEX_COLUMN_NAME)) + + def test_ddb_csv_read_empty_file(self, temp_empty_csv_file): + uri, mdl = temp_empty_csv_file + reader = DuckDBCSVReader(header=True, delim=",", connection=duckdb.connect()) + + with pytest.raises(EmptyFileError): + reader.read_to_relation(str(uri), "test", mdl) + + def test_DuckDBCSVReader_with_null_empty_strings(self, temp_dir): + test_df = pl.DataFrame({"test_col": ["fine", " ", " "]}) + file_uri = temp_dir.joinpath("test_empty_string1.csv").as_posix() + test_df.write_csv( + file_uri, + include_header=True, + quote_style="always" + ) + + reader = DuckDBCSVReader( + header=True, + delim=",", + quotechar='"', + connection=duckdb.connect(), + null_empty_strings=True, + ) + + entity = reader.read_to_relation(file_uri, "test", VerySimpleModel) + + assert entity.shape[0] == 3 + assert entity.filter("test_col IS NULL").shape[0] == 2 + + +class TestPolarsToDuckDBCSVReader: + """Test PolarsToDuckDBCSVReader""" + + def test_polars_to_ddb_csv_reader(self, temp_csv_file): + uri, header, data, mdl = temp_csv_file + reader = PolarsToDuckDBCSVReader( + header=True, delim=",", quotechar='"', connection=duckdb.connect() + ) + entity = reader.read_to_relation(str(uri), "test", mdl) + + assert entity.shape[0] == 2 + + def test_PolarsToDuckDBCSVReader_with_null_empty_strings(self, temp_dir): + test_df = pl.DataFrame({"test_col": ["fine", " ", " "]}) + file_uri = temp_dir.joinpath("test_empty_string3.csv").as_posix() + test_df.write_csv( + file_uri, + include_header=True, + quote_style="always" + ) + + reader = PolarsToDuckDBCSVReader( + header=True, + delim=",", + quotechar='"', + connection=duckdb.connect(), + null_empty_strings=True, + ) + + entity = reader.read_to_relation(file_uri, "test", VerySimpleModel) + + assert entity.shape[0] == 3 + assert entity.filter("test_col IS NULL").shape[0] == 2 + +class TestDuckDBCSVRepeatingHeaderReader: + """Test DuckDBCSVRepeatingHeaderReader""" + + def test_ddb_csv_repeating_header_reader_non_duplicate(self, temp_dir): + header = "header_1,header_2,non_header_1" + typed_data = [ + ["hvalue1", "hvalue1", "nhvalue1"], + ["hvalue1", "hvalue1", "nhvalue2"], + ["hvalue1", "hvalue1", "nhvalue3"], + ] + with open(temp_dir.joinpath("test_header.csv"), mode="w") as csv_file: + csv_file.write(header + "\n") + for rw in typed_data: + csv_file.write(",".join([str(val) for val in rw]) + "\n") + + file_uri = temp_dir.joinpath("test_header.csv") + + reader = DuckDBCSVRepeatingHeaderReader( + header=True, delim=",", quotechar='"', connection=duckdb.connect() + ) + entity = reader.read_to_relation(str(file_uri), "test", SimpleHeaderModel) + + assert entity.shape[0] == 1 + + def test_ddb_csv_repeating_header_reader_with_more_than_one_set_of_distinct_values(self, temp_dir): + header = "header_1,header_2,non_header_1" + typed_data = [ + ["hvalue1", "hvalue2", "nhvalue1"], + ["hvalue2", "hvalue2", "nhvalue2"], + ["hvalue1", "hvalue1", "nhvalue3"], + ] + with open(temp_dir.joinpath("test_header.csv"), mode="w") as csv_file: + csv_file.write(header + "\n") + for rw in typed_data: + csv_file.write(",".join([str(val) for val in rw]) + "\n") + + file_uri = temp_dir.joinpath("test_header.csv") + reader = DuckDBCSVRepeatingHeaderReader( + header=True, delim=",", quotechar='"', connection=duckdb.connect() + ) + + with pytest.raises(MessageBearingError): + reader.read_to_relation(str(file_uri), "test", SimpleHeaderModel) + + def test_DuckDBCSVRepeatingHeaderReader_with_null_empty_strings(self, temp_dir): + test_df = pl.DataFrame({ + "header_1": ["fine",], "header_2": [" "], + }) + file_uri = temp_dir.joinpath("test_empty_string2.csv").as_posix() + test_df.write_csv( + file_uri, + include_header=True, + quote_style="always" + ) + + reader = DuckDBCSVRepeatingHeaderReader( + header=True, + delim=",", + quotechar='"', + connection=duckdb.connect(), + null_empty_strings=True, + ) + + entity = reader.read_to_relation(file_uri, "test", SimpleHeaderModel) + + assert entity.shape[0] == 1 + assert entity.filter("header_2 IS NULL").shape[0] == 1 diff --git a/tests/test_core_engine/test_backends/test_readers/test_ddb_json.py b/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_json.py similarity index 100% rename from tests/test_core_engine/test_backends/test_readers/test_ddb_json.py rename to tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_json.py diff --git a/tests/test_core_engine/test_backends/test_readers/test_ddb_xml.py b/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_xml.py similarity index 100% rename from tests/test_core_engine/test_backends/test_readers/test_ddb_xml.py rename to tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_xml.py diff --git a/tests/test_core_engine/test_backends/test_readers/test_spark/test_spark.py b/tests/test_core_engine/test_backends/test_readers/test_spark/test_spark_csv.py similarity index 72% rename from tests/test_core_engine/test_backends/test_readers/test_spark/test_spark.py rename to tests/test_core_engine/test_backends/test_readers/test_spark/test_spark_csv.py index d5dccc1..f28f0ac 100644 --- a/tests/test_core_engine/test_backends/test_readers/test_spark/test_spark.py +++ b/tests/test_core_engine/test_backends/test_readers/test_spark/test_spark_csv.py @@ -14,6 +14,7 @@ from pyspark.sql import DataFrame, Row, SparkSession from pyspark.sql.types import StringType, StructField, StructType +from dve.core_engine.backends.exceptions import MessageBearingError from dve.core_engine.backends.implementations.spark.readers.csv import SparkCSVReader @@ -21,7 +22,11 @@ class SparkCSVTestModel(BaseModel): test_col: str -@pytest.fixture +class SparkCSVTestModelAdditionalField(SparkCSVTestModel): + test_col2: str + + +@pytest.fixture(scope="function") def spark_null_csv_resource(): test_df = pl.DataFrame({"test_col": ["fine", " ", " "]}) @@ -32,6 +37,17 @@ def spark_null_csv_resource(): yield resource_uri +@pytest.fixture(scope="function") +def spark_additional_fields(): + test_df = pl.DataFrame({"test_col": ["abc"], "test_col2": ["def"]}) + + with tempfile.TemporaryDirectory() as tdir: + resource_uri = Path(tdir, "test_spark_csv_reader_add_fields.csv").as_posix() + test_df.write_csv(resource_uri, include_header=True, quote_style="always") + + yield resource_uri + + def test_SparkCSVReader_clean_empty_strings(spark: SparkSession, spark_null_csv_resource): resource_uri = spark_null_csv_resource expected_df = spark.createDataFrame( diff --git a/tests/test_core_engine/test_backends/test_readers/test_spark_json.py b/tests/test_core_engine/test_backends/test_readers/test_spark/test_spark_json.py similarity index 100% rename from tests/test_core_engine/test_backends/test_readers/test_spark_json.py rename to tests/test_core_engine/test_backends/test_readers/test_spark/test_spark_json.py diff --git a/tests/test_core_engine/test_backends/test_readers/test_utilities.py b/tests/test_core_engine/test_backends/test_readers/test_utilities.py index 4426769..1e2be41 100644 --- a/tests/test_core_engine/test_backends/test_readers/test_utilities.py +++ b/tests/test_core_engine/test_backends/test_readers/test_utilities.py @@ -1,55 +1,24 @@ -import datetime as dt -from pathlib import Path -import tempfile -from uuid import uuid4 - -import pytest -from pydantic import BaseModel, create_model - -from dve.core_engine.backends.readers.utilities import check_csv_header_expected - -@pytest.mark.parametrize( - ["header_row", "delim", "schema", "expected"], - [ - ( - "field1,field2,field3", - ",", - {"field1": (str, ...), "field2": (int, ...), "field3": (float, 1.2)}, - set(), - ), - ( - "field2,field3,field1", - ",", - {"field1": (str, ...), "field2": (int, ...), "field3": (float, 1.2)}, - set(), - ), - ( - "str_field|int_field|date_field|", - ",", - {"str_field": (str, ...), "int_field": (int, ...), "date_field": (dt.date, dt.date.today())}, - {"str_field","int_field","date_field"}, - ), - ( - '"str_field"|"int_field"|"date_field"', - "|", - {"str_field": (str, ...), "int_field": (int, ...), "date_field": (dt.date, dt.date.today())}, - set(), - ), - ( - 'str_field,int_field,date_field\n', - ",", - {"str_field": (str, ...), "int_field": (int, ...), "date_field": (dt.date, dt.date.today())}, - set(), - ), - - ], -) -def test_check_csv_header_expected( - header_row: str, delim: str, schema: type[BaseModel], expected: set[str] -): - mdl = create_model("TestModel", **schema) - with tempfile.TemporaryDirectory() as tmpdir: - fle = Path(tmpdir).joinpath(f"test_file_{uuid4().hex}.csv") - fle.open("w+").write(header_row) - res = check_csv_header_expected(fle.as_posix(), mdl, delim) - assert res == expected \ No newline at end of file +"""Test utility functions & objects for readers""" + +from pydantic import BaseModel + +from dve.core_engine.backends.readers.utilities import get_all_model_fields + + +class Model1(BaseModel): # pylint: disable=C0115 + model1_field_1: str + model1_field_2: int + + +class Model2(BaseModel): # pylint: disable=C0115 + model2_field_1: str + + +def test_get_all_model_fields(): + """Test get_all_model_fields returns a unique set of fields from multiple models""" + md1 = Model1(model1_field_1="hello", model1_field_2=123) + md2 = Model2(model2_field_1="world") + + result = get_all_model_fields([md1, md2]) + + assert result == {"model1_field_1", "model1_field_2", "model2_field_1"} diff --git a/tests/testdata/planets/planets_add_fields.csv b/tests/testdata/planets/planets_add_fields.csv new file mode 100644 index 0000000..59b6c39 --- /dev/null +++ b/tests/testdata/planets/planets_add_fields.csv @@ -0,0 +1,2 @@ +planet,mass,diameter,density,gravity,escape_velocity,rotation_period,length_of_day,distance_from_sun,perihelion,aphelion,orbital_period,orbital_velocity,orbital_inclination,orbital_eccentricity,obliquity_to_orbit,mean_temperature,surface_pressure,number_of_moons,has_ring_system,has_global_magnetic_field,add_field1,add_field2 +Mercury,0.330,4879,5427,3.7,4.3,1407.6,4222.6,57.9,46.0,69.8,88.0,47.4,7.0,0.205,0.034,167,0,0,No,Yes,1,2 \ No newline at end of file From 00ba8bdaee3b62b93a31fd83d2128550e9ac18b7 Mon Sep 17 00:00:00 2001 From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com> Date: Fri, 5 Jun 2026 19:20:44 +0100 Subject: [PATCH 04/12] build(deps-dev): bump idna from 3.13 to 3.15 resolve CVE-2024-3651 --- poetry.lock | 8 ++++---- pyproject.toml | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index dfe26c4..909e192 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1282,14 +1282,14 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.13" +version = "3.15" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.8" groups = ["dev", "test"] files = [ - {file = "idna-3.13-py3-none-any.whl", hash = "sha256:892ea0cde124a99ce773decba204c5552b69c3c67ffd5f232eb7696135bc8bb3"}, - {file = "idna-3.13.tar.gz", hash = "sha256:585ea8fe5d69b9181ec1afba340451fba6ba764af97026f92a91d4eef164a242"}, + {file = "idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8"}, + {file = "idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc"}, ] [package.extras] @@ -3298,4 +3298,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "dc39317cbf4102e2fd18a441031ed03d8fb788fb0b54c88e6a648ab301ba1e55" +content-hash = "06ec8617339ec74207912a7dd8e915a0b83ce7d95160ca9df977fb03d862f005" diff --git a/pyproject.toml b/pyproject.toml index 59c1d2d..2be8bb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ commitizen = "4.9.1" pre-commit = "4.3.0" charset-normalizer = "3.4.6" python-discovery = "1.2.0" +idna = "3.15" # dependency of requests [tool.poetry.group.test] optional = true From e3f7708856182d893311cdfc221ad64aa3c74af2 Mon Sep 17 00:00:00 2001 From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com> Date: Fri, 5 Jun 2026 19:25:30 +0100 Subject: [PATCH 05/12] build(deps-docs): bump pymdown-extensions from 10.21.2 to 10.21.3 resolve regression on CVE-2023-32309 --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 909e192..38e9fed 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2472,14 +2472,14 @@ testutils = ["gitpython (>3)"] [[package]] name = "pymdown-extensions" -version = "10.21.2" +version = "10.21.3" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.9" groups = ["docs"] files = [ - {file = "pymdown_extensions-10.21.2-py3-none-any.whl", hash = "sha256:5c0fd2a2bea14eb39af8ff284f1066d898ab2187d81b889b75d46d4348c01638"}, - {file = "pymdown_extensions-10.21.2.tar.gz", hash = "sha256:c3f55a5b8a1d0edf6699e35dcbea71d978d34ff3fa79f3d807b8a5b3fa90fbdc"}, + {file = "pymdown_extensions-10.21.3-py3-none-any.whl", hash = "sha256:d7a5d08014fc571e80ca21dd6f854e31f94c489800350564d55d15b3c41e76b6"}, + {file = "pymdown_extensions-10.21.3.tar.gz", hash = "sha256:72cfcf55f07aea0d4af2c4f11dd4e52466ddfb1bb819673146398e0bd3a77354"}, ] [package.dependencies] @@ -3298,4 +3298,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "06ec8617339ec74207912a7dd8e915a0b83ce7d95160ca9df977fb03d862f005" +content-hash = "ce4a194b359c2132bbedc0cb8d04da77de7581316d025d3afecfed017f90f3e5" diff --git a/pyproject.toml b/pyproject.toml index 2be8bb4..9becc7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,7 +103,7 @@ click = "8.2.1" mkdocs = "1.6.1" mkdocstrings = { version = "1.0.3", extras = ["python"] } griffelib = "2.0.1" -pymdown-extensions = "10.21.2" +pymdown-extensions = "10.21.3" zensical = "0.0.31" [tool.ruff] From cb8f45c1f1f21e677acd40023fa20cb8d5623e19 Mon Sep 17 00:00:00 2001 From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com> Date: Fri, 5 Jun 2026 19:46:56 +0100 Subject: [PATCH 06/12] build(deps): bump urllib3 from 2.6.3 to 2.7.0 upgrade urllib3 to prevent sensitive headers forwarded across origins in proxied low-level redirects --- poetry.lock | 10 +++++----- pyproject.toml | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 38e9fed..666aa3f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3005,14 +3005,14 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.7.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["main", "dev", "test"] files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897"}, + {file = "urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c"}, ] [package.extras] @@ -3298,4 +3298,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "ce4a194b359c2132bbedc0cb8d04da77de7581316d025d3afecfed017f90f3e5" +content-hash = "40a54c1e2151430fc7f7e6a900e501cb378c76d4efe2af1b962b3f59af314911" diff --git a/pyproject.toml b/pyproject.toml index 9becc7a..7df1f51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ pyarrow = "17.0.0" pydantic = "1.10.19" pyspark = "3.5.2" typing_extensions = "4.15.0" +urllib3 = "2.7.0" # dependency of boto3 & botocore [tool.poetry.group.dev] optional = true From 61dc981c0dd8711fcf152f8d579a6778c4e8cda9 Mon Sep 17 00:00:00 2001 From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:00:14 +0100 Subject: [PATCH 07/12] build(deps-test): bump pytest from 8.4.2 to 9.0.3 resolve vuln tmpdir handling in pytest 8.4.2 --- poetry.lock | 14 +++++++------- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index 666aa3f..f043df0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2512,21 +2512,21 @@ sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"] [[package]] name = "pytest" -version = "8.4.2" +version = "9.0.3" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.10" groups = ["dev", "test"] files = [ - {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, - {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, + {file = "pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9"}, + {file = "pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c"}, ] [package.dependencies] colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} -iniconfig = ">=1" -packaging = ">=20" +iniconfig = ">=1.0.1" +packaging = ">=22" pluggy = ">=1.5,<2" pygments = ">=2.7.2" tomli = {version = ">=1", markers = "python_version < \"3.11\""} @@ -3298,4 +3298,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "40a54c1e2151430fc7f7e6a900e501cb378c76d4efe2af1b962b3f59af314911" +content-hash = "2eecfaa8b0b6ea083dea25e2fcb01b54b6ac269315856bf42fd7e87a0c3582a9" diff --git a/pyproject.toml b/pyproject.toml index 7df1f51..f886e63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ coverage = "7.11.0" moto = {extras = ["s3"], version = "4.2.14"} requests = "2.33.0" # dependency of `moto` Werkzeug = "3.1.6" -pytest = "8.4.2" +pytest = "9.0.3" pytest-lazy-fixtures = "1.4.0" # switched from https://github.com/TvoroG/pytest-lazy-fixture as it's no longer supported xlsx2csv = "0.8.4" # polars requirement From b2916ba009c28cc69520a89f1153768b93f96b69 Mon Sep 17 00:00:00 2001 From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com> Date: Mon, 8 Jun 2026 20:00:27 +0100 Subject: [PATCH 08/12] fix: adjust csv header check feedback message to be more detailed --- src/dve/core_engine/backends/readers/utilities.py | 8 +++++--- .../test_backends/test_readers/test_csv.py | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/dve/core_engine/backends/readers/utilities.py b/src/dve/core_engine/backends/readers/utilities.py index a636167..5a5df6c 100644 --- a/src/dve/core_engine/backends/readers/utilities.py +++ b/src/dve/core_engine/backends/readers/utilities.py @@ -52,14 +52,16 @@ def raise_message_bearing_error_on_header_differences( ) if missing or additional: + record_details_missing = f"missing fields: {', '.join(missing)};" if missing else "" + record_details_additional = f"additional fields: {', '.join(additional)};" if additional else "" # pylint: disable=C0301 raise MessageBearingError( "The CSV header doesn't match what is expected", messages=[ FeedbackMessage( - entity=entity_name, - record={"missing_fields": missing, "additional_fields": additional}, + entity="Pre-validation", + record={entity_name: f"{record_details_missing}{record_details_additional}"}, failure_type="submission", - error_location="Whole File", + error_location=entity_name, reporting_field="csv_header", error_code=field_check_error_code, error_message=field_check_error_message, diff --git a/tests/test_core_engine/test_backends/test_readers/test_csv.py b/tests/test_core_engine/test_backends/test_readers/test_csv.py index da867ab..9e07f67 100644 --- a/tests/test_core_engine/test_backends/test_readers/test_csv.py +++ b/tests/test_core_engine/test_backends/test_readers/test_csv.py @@ -290,8 +290,8 @@ def test_base_csv_reader_with_additional_fields( )) error_msg = exc_info.value.messages[0] - assert error_msg.record["additional_fields"] == {"add_field1", "add_field2"} - assert not error_msg.record["missing_fields"] + assert error_msg.record["test"] == "additional fields: add_field2, add_field1;" + assert "missing_fields" not in error_msg.record["test"] def test_base_csv_reader_with_missing_fields( self, @@ -309,5 +309,5 @@ def test_base_csv_reader_with_missing_fields( )) error_msg = exc_info.value.messages[0] - assert not error_msg.record["additional_fields"] - assert error_msg.record["missing_fields"] == {"random_null"} + assert "additional_fields" not in error_msg.record["test"] + assert error_msg.record["test"] == "missing fields: random_null;" From 65dbb8e4d72a9fd7583817e8f3bc0fa2fb564ccd Mon Sep 17 00:00:00 2001 From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com> Date: Mon, 8 Jun 2026 22:57:06 +0100 Subject: [PATCH 09/12] test: ensure that additional/missing fields are sorted consistently --- src/dve/core_engine/backends/readers/utilities.py | 4 ++-- tests/test_core_engine/test_backends/test_readers/test_csv.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dve/core_engine/backends/readers/utilities.py b/src/dve/core_engine/backends/readers/utilities.py index 5a5df6c..c46cac7 100644 --- a/src/dve/core_engine/backends/readers/utilities.py +++ b/src/dve/core_engine/backends/readers/utilities.py @@ -52,8 +52,8 @@ def raise_message_bearing_error_on_header_differences( ) if missing or additional: - record_details_missing = f"missing fields: {', '.join(missing)};" if missing else "" - record_details_additional = f"additional fields: {', '.join(additional)};" if additional else "" # pylint: disable=C0301 + record_details_missing = f"missing fields: {', '.join(sorted(missing))};" if missing else "" # pylint: disable=C0301 + record_details_additional = f"additional fields: {', '.join(sorted(additional))};" if additional else "" # pylint: disable=C0301 raise MessageBearingError( "The CSV header doesn't match what is expected", messages=[ diff --git a/tests/test_core_engine/test_backends/test_readers/test_csv.py b/tests/test_core_engine/test_backends/test_readers/test_csv.py index 9e07f67..25cfd71 100644 --- a/tests/test_core_engine/test_backends/test_readers/test_csv.py +++ b/tests/test_core_engine/test_backends/test_readers/test_csv.py @@ -290,7 +290,7 @@ def test_base_csv_reader_with_additional_fields( )) error_msg = exc_info.value.messages[0] - assert error_msg.record["test"] == "additional fields: add_field2, add_field1;" + assert error_msg.record["test"] == "additional fields: add_field1, add_field2;" assert "missing_fields" not in error_msg.record["test"] def test_base_csv_reader_with_missing_fields( From 7968f99f6c364bf7017594a95d47931705468912 Mon Sep 17 00:00:00 2001 From: George Robertson <50412379+georgeRobertson@users.noreply.github.com> Date: Wed, 10 Jun 2026 12:37:56 +0100 Subject: [PATCH 10/12] fix: add greater error handling around polars and duckdb csv reader (#112) --- src/dve/core_engine/backends/exceptions.py | 27 ++++++++++ .../implementations/duckdb/readers/csv.py | 49 ++++++++++++++++--- .../test_backends/test_readers/test_csv.py | 8 ++- .../test_readers/test_duckdb/test_ddb_csv.py | 48 +++++++++++++++++- 4 files changed, 121 insertions(+), 11 deletions(-) diff --git a/src/dve/core_engine/backends/exceptions.py b/src/dve/core_engine/backends/exceptions.py index 6878fc2..c72f252 100644 --- a/src/dve/core_engine/backends/exceptions.py +++ b/src/dve/core_engine/backends/exceptions.py @@ -33,6 +33,33 @@ def __init__(self, *args: object, messages: Messages) -> None: """The messages to be returned as part of the error.""" +class UnableToParseCSVError(MessageBearingError): + """An error raised when unable to parse a CSV file""" + + def __init__( + self, + entity_name: str, + field_check_error_message: str, + field_check_error_code: str + ): + super().__init__( + messages=[ + FeedbackMessage( + entity="csv_structure", + record={ + entity_name: "Unable to parse file. Please check the structure of the file." + }, + failure_type="submission", + is_informational=False, + error_type="csv read", + error_location=entity_name, + error_message=field_check_error_message, + error_code=field_check_error_code, + ) + ] + ) + + class BackendErrorMixin(ABC, BackendError): """A mixin used to create backend error type.""" diff --git a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py index 27960d6..717ced2 100644 --- a/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py +++ b/src/dve/core_engine/backends/implementations/duckdb/readers/csv.py @@ -6,11 +6,21 @@ import duckdb as ddb import polars as pl -from duckdb import DuckDBPyConnection, DuckDBPyRelation, StarExpression, read_csv +from duckdb import ( + DuckDBPyConnection, + DuckDBPyRelation, + InvalidInputException, + StarExpression, + read_csv, +) from pydantic import BaseModel from dve.core_engine.backends.base.reader import read_function -from dve.core_engine.backends.exceptions import EmptyFileError, MessageBearingError +from dve.core_engine.backends.exceptions import ( + EmptyFileError, + MessageBearingError, + UnableToParseCSVError, +) from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import ( duckdb_record_index, duckdb_write_parquet, @@ -107,7 +117,14 @@ def read_to_relation( # pylint: disable=unused-argument reader_options["columns"] = ddb_schema - rel = self.add_record_index(read_csv(resource, **reader_options, parallel=False)) + try: + rel = self.add_record_index(read_csv(resource, **reader_options, parallel=False)) + except InvalidInputException as exc: + raise UnableToParseCSVError( + entity_name="csv_structure", + field_check_error_message=self.field_check_error_message, + field_check_error_code=self.field_check_error_code, + ) from exc if self.null_empty_strings: cleaned_cols = ",".join( @@ -156,11 +173,18 @@ def read_to_relation( # pylint: disable=unused-argument # there is a raise_if_empty arg for 0.18+. Future reference when upgrading. Makes L85 # redundant - df = self.add_record_index( # pylint: disable=W0612 - pl.scan_csv(resource, **reader_options).select( # type: ignore - list(polars_types.keys()) + try: + df = self.add_record_index( # pylint: disable=W0612 + pl.scan_csv(resource, **reader_options).select( # type: ignore + list(polars_types.keys()) + ) ) - ) + except pl.exceptions.PolarsError as exc: + raise UnableToParseCSVError( + entity_name="csv_structure", + field_check_error_message=self.field_check_error_message, + field_check_error_code=self.field_check_error_code, + ) from exc if self.null_empty_strings: pl_exprs = [ @@ -170,7 +194,16 @@ def read_to_relation( # pylint: disable=unused-argument ] + [pl.col(RECORD_INDEX_COLUMN_NAME)] df = df.select(pl_exprs) - return self._connection.sql("SELECT * FROM df") + entity = self._connection.sql("SELECT * FROM df") + + if entity.pl().shape[0] == 0: + raise UnableToParseCSVError( + entity_name="csv_structure", + field_check_error_message=self.field_check_error_message, + field_check_error_code=self.field_check_error_code, + ) + + return entity class DuckDBCSVRepeatingHeaderReader(PolarsToDuckDBCSVReader): diff --git a/tests/test_core_engine/test_backends/test_readers/test_csv.py b/tests/test_core_engine/test_backends/test_readers/test_csv.py index 25cfd71..f47ce6b 100644 --- a/tests/test_core_engine/test_backends/test_readers/test_csv.py +++ b/tests/test_core_engine/test_backends/test_readers/test_csv.py @@ -5,12 +5,18 @@ import csv from pathlib import Path from typing import Dict, Iterator, Optional +from uuid import uuid4 import pandas as pd import pytest from pydantic import BaseModel -from dve.core_engine.backends.exceptions import EmptyFileError, FieldCountMismatch, MessageBearingError +from dve.core_engine.backends.exceptions import ( + EmptyFileError, + FieldCountMismatch, + MessageBearingError, + UnableToParseCSVError, +) from dve.core_engine.backends.readers import CSVFileReader from dve.core_engine.backends.readers.utilities import get_all_model_fields from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME diff --git a/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py b/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py index 6fdc2a6..b3178cd 100644 --- a/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py +++ b/tests/test_core_engine/test_backends/test_readers/test_duckdb/test_ddb_csv.py @@ -2,6 +2,7 @@ from datetime import date, datetime from pathlib import Path from tempfile import TemporaryDirectory +from uuid import uuid4 import duckdb import polars as pl @@ -9,7 +10,11 @@ from duckdb import DuckDBPyRelation from pydantic import BaseModel -from dve.core_engine.backends.exceptions import EmptyFileError, MessageBearingError +from dve.core_engine.backends.exceptions import ( + EmptyFileError, + MessageBearingError, + UnableToParseCSVError, +) from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import ( get_duckdb_type_from_annotation, ) @@ -21,7 +26,7 @@ from dve.core_engine.backends.utilities import stringify_model from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME -# pylint: disable=C0115,C0116,W0621 +# pylint: disable=C0103,C0115,C0116,W0621 class SimpleModel(BaseModel): @@ -164,6 +169,25 @@ def test_DuckDBCSVReader_with_null_empty_strings(self, temp_dir): assert entity.shape[0] == 3 assert entity.filter("test_col IS NULL").shape[0] == 2 + def test_DuckDBCSVReader_with_malformed_header(self, temp_dir): + test_data_headers = '"varchar_field,bigint_field,date_field,timestamp_field"' + row_data = "hello,1,2023-04-01,2023-04-01T12:30:00" + temp_id = uuid4().hex + fqp = Path(temp_dir, f"{temp_id}.csv") + + with open(fqp, mode="w", encoding="utf-8") as f: + f.write(f"{test_data_headers}\n{row_data}") + + reader = DuckDBCSVReader( + header=True, + delim=",", + connection=duckdb.connect(), + ) + + with pytest.raises(UnableToParseCSVError) as err: + reader.read_to_relation(fqp.as_posix(), "test", SimpleModel) + assert len(err.messages) == 1 + class TestPolarsToDuckDBCSVReader: """Test PolarsToDuckDBCSVReader""" @@ -199,6 +223,26 @@ def test_PolarsToDuckDBCSVReader_with_null_empty_strings(self, temp_dir): assert entity.shape[0] == 3 assert entity.filter("test_col IS NULL").shape[0] == 2 + def test_PolarsToDuckDBCSVReader_with_malformed_header(self, temp_dir): + test_data_headers = '"varchar_field,bigint_field,date_field,timestamp_field"' + row_data = "hello,1,2023-04-01,2023-04-01T12:30:00" + temp_id = uuid4().hex + fqp = Path(temp_dir, f"{temp_id}.csv") + + with open(fqp, mode="w", encoding="utf-8") as f: + f.write(f"{test_data_headers}\n{row_data}") + + reader = PolarsToDuckDBCSVReader( + header=True, + delim=",", + connection=duckdb.connect(), + ) + + with pytest.raises(UnableToParseCSVError) as err: + reader.read_to_relation(fqp.as_posix(), "test", SimpleModel) + assert len(err.messages) == 1 + + class TestDuckDBCSVRepeatingHeaderReader: """Test DuckDBCSVRepeatingHeaderReader""" From 57dcba9410b77ecec4f6f23c26d57a1ac4e8c873 Mon Sep 17 00:00:00 2001 From: "george.robertson1" <50412379+georgeRobertson@users.noreply.github.com> Date: Wed, 10 Jun 2026 13:12:09 +0100 Subject: [PATCH 11/12] docs: update documentation for v0.8 release --- README.md | 13 ++++++++----- docs/user_guidance/data_contract.md | 6 +++--- docs/user_guidance/implementations/spark.md | 2 +- docs/user_guidance/install.md | 3 ++- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index b3038b7..1f3ae54 100644 --- a/README.md +++ b/README.md @@ -60,13 +60,16 @@ Below is a list of features that we would like to implement or have been request | ------------------------------------------------------------------------------- | ----------------- | --------- | | Open source release | 0.1.0 | Yes | | Uplift to Python 3.11 | 0.2.0 | Yes | -| Uplift Pyspark to 3.5 | TBA | No | -| Allow DVE to run on Python 3.12+ | TBA | No | -| Upgrade to Pydantic 2.0 | TBA | No | +| Uplift Pyspark to 3.5 | 0.8.0 | Yes | +| Allow DVE to run on Python 3.12+ | 0.8.0 | Yes | +| Upgrade to Pydantic 2.0 | 0.9.0 | No | | Uplift Pyspark to 4.0+ | TBA | No | -| Create a more user friendly interface for building and modifying dischema files | Not yet confirmed | No | +| Polars upgrade to v1+ | TBA | No | +| DuckDB upgrade to v1.5+ | TBA | No | +| Python 3.13 & 3.14 upgrade | TBA | No | +| Create a more user friendly interface for building and modifying dischema files | TBA | No | -Beyond the Python and Pydantic upgrade, we cannot confirm the other features will be made available anytime soon. Therefore, if you have the interest and desire to make these features available, then please read the [Contributing](#Contributing) section and get involved. +If you are interested in getting any of the unreleased features listed above available, then please read the [Contributing](#Contributing) section and then submit us a pull request. ## Contributing Please see guidance [here](https://github.com/NHSDigital/data-validation-engine/blob/main/CONTRIBUTE.md). diff --git a/docs/user_guidance/data_contract.md b/docs/user_guidance/data_contract.md index b49e12f..88b0c3f 100644 --- a/docs/user_guidance/data_contract.md +++ b/docs/user_guidance/data_contract.md @@ -6,11 +6,11 @@ tags: - Domain Types --- -The Data Contract defines the structure (models) of your data and controls how it is typecast. We use [Pydantic](https://docs.pydantic.dev/1.10/) to generate and validate the models. This page is meant to give you greater details on how you should write your Data Contract. If you want a summary of how the Data Contract works, please refer to the [Getting Started](./getting_started.md#rules-configuration-introduction) page. +The Data Contract defines the structure (models) of your data and controls how it is typecast. We use [Pydantic](https://pydantic.dev/docs/validation/1.10/overview/) to generate and validate the models. This page is meant to give you greater details on how you should write your Data Contract. If you want a summary of how the Data Contract works, please refer to the [Getting Started](./getting_started.md#rules-configuration-introduction) page. !!! Note - We plan to migrate to Pydantic v2+ in a future release. This page currently reflects what is available through Pydantic v1. + We plan to migrate to Pydantic v2+ in v0.9.0. This page currently reflects what is available through Pydantic v1. ## Models @@ -206,7 +206,7 @@ If you want to read more about the readers, please see the [File Transformation] Within the `fields` section of the contract you must define what data type a given field should be. Depending on how strict/lenient you want your types to be, a number of types are available to use. The types available are: -- [Built-in standard library](https://docs.python.org/3.11/library/stdtypes.html) types (such as `int`, `str`, `date`) available with your version of Python installed for the DVE. +- [Built-in standard library](https://docs.python.org/3.12/library/stdtypes.html) types (such as `int`, `str`, `date`) available with your version of Python installed for the DVE. - [Pydantic v1 types](https://docs.pydantic.dev/1.10/usage/types/) - [Custom Types](./data_contract.md#custom-types) - [Domain types](./data_contract.md#domain-types) diff --git a/docs/user_guidance/implementations/spark.md b/docs/user_guidance/implementations/spark.md index 23b82d9..6bbdb30 100644 --- a/docs/user_guidance/implementations/spark.md +++ b/docs/user_guidance/implementations/spark.md @@ -25,7 +25,7 @@ def get_spark_session() -> SparkSession: os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join( [ "--packages", - "com.databricks:spark-xml_2.12:0.16.0,io.delta:delta-core_2.12:2.4.0", + "com.databricks:spark-xml_2.12:0.16.0,io.delta:delta-spark_2.12:3.2.0", "pyspark-shell", ] ) diff --git a/docs/user_guidance/install.md b/docs/user_guidance/install.md index 4792a1f..2996758 100644 --- a/docs/user_guidance/install.md +++ b/docs/user_guidance/install.md @@ -8,7 +8,7 @@ tags: !!! warning **DVE is currently an unstable package. Expect breaking changes between every minor patch**. We intend to follow semantic versioning of `major.minor.patch` more strictly after a 1.0 release. Until then, we recommend that you pin your install to the latest version available and keep an eye on [future releases](https://github.com/NHSDigital/data-validation-engine/releases). - **Please note that we only support Python runtimes of 3.10 and 3.11.** In the future we will look to add support for Python versions greater than 3.11, but it's not an immediate priority. + **Please note that we only support Python runtimes of 3.10, 3.11 & 3.12.** In the future we will look to add support for Python versions greater than 3.12, but it's not an immediate priority. If working on Python 3.7, the `0.1` release supports this (and only this) version of Python. However, we have not been updating that version with any bugfixes, performance improvements etc. There are also a number of vulnerable dependencies on version `0.1` release due to [Python 3.7 being depreciated](https://devguide.python.org/versions/) and a number of packages dropping support. **If you choose to install `0.1`, you accept the risks of doing so and additional support will not be provided.** @@ -83,6 +83,7 @@ Once you have installed the DVE you are almost ready to use it. To be able to ru | DVE Version | Python Version | DuckDB Version | Spark Version | Pydantic Version | | ------------ | -------------- | -------------- | ------------- | ---------------- | +| >=0.8.0 | >=3.10,<3.13 | 1.1.3 | 3.5.2 | 1.10.19 | | >=0.7.2 | >=3.10,<3.12 | 1.1.* | 3.4.* | 1.10.16 | | >=0.6 | >=3.10,<3.12 | 1.1.* | 3.4.* | 1.10.15 | | >=0.2,<0.6 | >=3.10,<3.12 | 1.1.0 | 3.4.4 | 1.10.15 | From de02375b0eacfcd86403b50628c3717ea73afb6d Mon Sep 17 00:00:00 2001 From: stevenhsd <56357022+stevenhsd@users.noreply.github.com> Date: Wed, 10 Jun 2026 13:37:28 +0100 Subject: [PATCH 12/12] =?UTF-8?q?bump:=20version=200.7.6=20=E2=86=92=200.8?= =?UTF-8?q?.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 11 +++++++++++ pyproject.toml | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9b14f7..b5d0173 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +## v0.8.0 (2026-06-10) + +### Feat + +- add additional fields check into csv readers (#109) + +### Fix + +- add greater error handling around polars and duckdb csv reader (#112) +- adjust csv header check feedback message to be more detailed + ## v0.7.6 (2026-04-30) ### Fix diff --git a/pyproject.toml b/pyproject.toml index f886e63..3f34c26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ Issues = "https://github.com/NHSDigital/data-validation-engine/issues" Changelog = "https://github.com/NHSDigital/data-validation-engine/blob/main/CHANGELOG.md" [tool.poetry] -version = "0.7.6" +version = "0.8.0" packages = [ { include = "dve", from = "src" }, ]